fix(#338): Sanitize issue body for prompt injection

- Add sanitize_for_prompt() function to security module
- Remove suspicious control characters (except whitespace)
- Detect and log common prompt injection patterns
- Escape dangerous XML-like tags used for prompt manipulation
- Truncate user content to max length (default 50000 chars)
- Integrate sanitization in parser before building LLM prompts
- Add comprehensive test suite (12 new tests)

Refs #338

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Jason Woltje
2026-02-05 18:36:16 -06:00
parent d53c80fef0
commit 442f8e0971
3 changed files with 268 additions and 4 deletions

View File

@@ -8,6 +8,7 @@ from anthropic import Anthropic
from anthropic.types import TextBlock
from .models import IssueMetadata
from .security import sanitize_for_prompt
logger = logging.getLogger(__name__)
@@ -101,15 +102,18 @@ def _build_parse_prompt(issue_body: str) -> str:
Build the prompt for Anthropic API to parse issue metadata.
Args:
issue_body: Issue markdown content
issue_body: Issue markdown content (will be sanitized)
Returns:
Formatted prompt string
"""
# Sanitize issue body to prevent prompt injection attacks
sanitized_body = sanitize_for_prompt(issue_body)
return f"""Extract structured metadata from this GitHub/Gitea issue markdown.
Issue Body:
{issue_body}
{sanitized_body}
Extract the following fields:
1. estimated_context: Total estimated tokens from "Context Estimate" section