fix(#338): Sanitize issue body for prompt injection

- Add sanitize_for_prompt() function to security module - Remove suspicious control characters (except whitespace) - Detect and log common prompt injection patterns - Escape dangerous XML-like tags used for prompt manipulation - Truncate user content to max length (default 50000 chars) - Integrate sanitization in parser before building LLM prompts - Add comprehensive test suite (12 new tests) Refs #338 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 18:36:16 -06:00
parent d53c80fef0
commit 442f8e0971
3 changed files with 268 additions and 4 deletions
--- a/apps/coordinator/src/parser.py
+++ b/apps/coordinator/src/parser.py
@@ -8,6 +8,7 @@ from anthropic import Anthropic
 from anthropic.types import TextBlock

 from .models import IssueMetadata
+from .security import sanitize_for_prompt

 logger = logging.getLogger(__name__)

@@ -101,15 +102,18 @@ def _build_parse_prompt(issue_body: str) -> str:
    Build the prompt for Anthropic API to parse issue metadata.

    Args:
-        issue_body: Issue markdown content
+        issue_body: Issue markdown content (will be sanitized)

    Returns:
        Formatted prompt string
    """
+    # Sanitize issue body to prevent prompt injection attacks
+    sanitized_body = sanitize_for_prompt(issue_body)
+
    return f"""Extract structured metadata from this GitHub/Gitea issue markdown.

 Issue Body:
-{issue_body}
+{sanitized_body}

 Extract the following fields:
 1. estimated_context: Total estimated tokens from "Context Estimate" section