fix(#338): Sanitize issue body for prompt injection
- Add sanitize_for_prompt() function to security module - Remove suspicious control characters (except whitespace) - Detect and log common prompt injection patterns - Escape dangerous XML-like tags used for prompt manipulation - Truncate user content to max length (default 50000 chars) - Integrate sanitization in parser before building LLM prompts - Add comprehensive test suite (12 new tests) Refs #338 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,7 @@ from anthropic import Anthropic
|
||||
from anthropic.types import TextBlock
|
||||
|
||||
from .models import IssueMetadata
|
||||
from .security import sanitize_for_prompt
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -101,15 +102,18 @@ def _build_parse_prompt(issue_body: str) -> str:
|
||||
Build the prompt for Anthropic API to parse issue metadata.
|
||||
|
||||
Args:
|
||||
issue_body: Issue markdown content
|
||||
issue_body: Issue markdown content (will be sanitized)
|
||||
|
||||
Returns:
|
||||
Formatted prompt string
|
||||
"""
|
||||
# Sanitize issue body to prevent prompt injection attacks
|
||||
sanitized_body = sanitize_for_prompt(issue_body)
|
||||
|
||||
return f"""Extract structured metadata from this GitHub/Gitea issue markdown.
|
||||
|
||||
Issue Body:
|
||||
{issue_body}
|
||||
{sanitized_body}
|
||||
|
||||
Extract the following fields:
|
||||
1. estimated_context: Total estimated tokens from "Context Estimate" section
|
||||
|
||||
Reference in New Issue
Block a user