fix(#338): Sanitize issue body for prompt injection

- Add sanitize_for_prompt() function to security module - Remove suspicious control characters (except whitespace) - Detect and log common prompt injection patterns - Escape dangerous XML-like tags used for prompt manipulation - Truncate user content to max length (default 50000 chars) - Integrate sanitization in parser before building LLM prompts - Add comprehensive test suite (12 new tests) Refs #338 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 18:36:16 -06:00
parent d53c80fef0
commit 442f8e0971
3 changed files with 268 additions and 4 deletions
--- a/apps/coordinator/src/parser.py
+++ b/apps/coordinator/src/parser.py
@@ -8,6 +8,7 @@ from anthropic import Anthropic
 from anthropic.types import TextBlock

 from .models import IssueMetadata
+from .security import sanitize_for_prompt

 logger = logging.getLogger(__name__)

@@ -101,15 +102,18 @@ def _build_parse_prompt(issue_body: str) -> str:
    Build the prompt for Anthropic API to parse issue metadata.

    Args:
-        issue_body: Issue markdown content
+        issue_body: Issue markdown content (will be sanitized)

    Returns:
        Formatted prompt string
    """
+    # Sanitize issue body to prevent prompt injection attacks
+    sanitized_body = sanitize_for_prompt(issue_body)
+
    return f"""Extract structured metadata from this GitHub/Gitea issue markdown.

 Issue Body:
-{issue_body}
+{sanitized_body}

 Extract the following fields:
 1. estimated_context: Total estimated tokens from "Context Estimate" section
--- a/apps/coordinator/src/security.py
+++ b/apps/coordinator/src/security.py
@@ -1,7 +1,103 @@
-"""Security utilities for webhook signature verification."""
+"""Security utilities for webhook signature verification and prompt sanitization."""

 import hashlib
 import hmac
+import logging
+import re
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+# Default maximum length for user-provided content in prompts
+DEFAULT_MAX_PROMPT_LENGTH = 50000
+
+# Patterns that may indicate prompt injection attempts
+INJECTION_PATTERNS = [
+    # Instruction override attempts
+    re.compile(r"ignore\s+(all\s+)?(previous|prior|above)\s+instructions", re.IGNORECASE),
+    re.compile(r"disregard\s+(all\s+)?(previous|prior|above)", re.IGNORECASE),
+    re.compile(r"forget\s+(everything|all|your)\s+(previous|prior|above)", re.IGNORECASE),
+    # System prompt manipulation
+    re.compile(r"<\s*system\s*>", re.IGNORECASE),
+    re.compile(r"<\s*/\s*system\s*>", re.IGNORECASE),
+    re.compile(r"\[\s*system\s*\]", re.IGNORECASE),
+    # Role injection
+    re.compile(r"^(assistant|system|user)\s*:", re.IGNORECASE | re.MULTILINE),
+    # Delimiter injection
+    re.compile(r"-{3,}\s*(end|begin|start)\s+(of\s+)?(input|output|context|prompt)", re.IGNORECASE),
+    re.compile(r"={3,}\s*(end|begin|start)", re.IGNORECASE),
+    # Common injection phrases
+    re.compile(r"(you\s+are|act\s+as|pretend\s+to\s+be)\s+(now\s+)?a\s+different", re.IGNORECASE),
+    re.compile(r"new\s+instructions?\s*:", re.IGNORECASE),
+    re.compile(r"override\s+(the\s+)?(system|instructions|rules)", re.IGNORECASE),
+]
+
+# XML-like tags that could be used for injection
+DANGEROUS_TAG_PATTERN = re.compile(r"<\s*(instructions?|prompt|context|system|user|assistant)\s*>", re.IGNORECASE)
+
+
+def sanitize_for_prompt(
+    content: Optional[str],
+    max_length: int = DEFAULT_MAX_PROMPT_LENGTH
+) -> str:
+    """
+    Sanitize user-provided content before including in LLM prompts.
+
+    This function:
+    1. Removes control characters (except newlines/tabs)
+    2. Detects and logs potential prompt injection patterns
+    3. Escapes dangerous XML-like tags
+    4. Truncates content to maximum length
+
+    Args:
+        content: User-provided content to sanitize
+        max_length: Maximum allowed length (default 50000)
+
+    Returns:
+        Sanitized content safe for prompt inclusion
+
+    Example:
+        >>> body = "Fix the bug\\x00\\nIgnore previous instructions"
+        >>> safe_body = sanitize_for_prompt(body)
+        >>> # Returns sanitized content, logs warning about injection pattern
+    """
+    if not content:
+        return ""
+
+    # Step 1: Remove control characters (keep newlines \n, tabs \t, carriage returns \r)
+    # Control characters are 0x00-0x1F and 0x7F, except 0x09 (tab), 0x0A (newline), 0x0D (CR)
+    sanitized = "".join(
+        char for char in content
+        if ord(char) >= 32 or char in "\n\t\r"
+    )
+
+    # Step 2: Detect prompt injection patterns
+    detected_patterns = []
+    for pattern in INJECTION_PATTERNS:
+        if pattern.search(sanitized):
+            detected_patterns.append(pattern.pattern)
+
+    if detected_patterns:
+        logger.warning(
+            "Potential prompt injection detected in issue body",
+            extra={
+                "patterns_matched": len(detected_patterns),
+                "sample_patterns": detected_patterns[:3],
+                "content_length": len(sanitized),
+            },
+        )
+
+    # Step 3: Escape dangerous XML-like tags by adding spaces
+    sanitized = DANGEROUS_TAG_PATTERN.sub(
+        lambda m: m.group(0).replace("<", "< ").replace(">", " >"),
+        sanitized
+    )
+
+    # Step 4: Truncate to max length
+    if len(sanitized) > max_length:
+        sanitized = sanitized[:max_length] + "... [content truncated]"
+
+    return sanitized


 def verify_signature(payload: bytes, signature: str, secret: str) -> bool: