"""Security utilities for webhook signature verification and prompt sanitization.""" import hashlib import hmac import logging import re from typing import Optional logger = logging.getLogger(__name__) # Default maximum length for user-provided content in prompts DEFAULT_MAX_PROMPT_LENGTH = 50000 # Patterns that may indicate prompt injection attempts INJECTION_PATTERNS = [ # Instruction override attempts re.compile(r"ignore\s+(all\s+)?(previous|prior|above)\s+instructions", re.IGNORECASE), re.compile(r"disregard\s+(all\s+)?(previous|prior|above)", re.IGNORECASE), re.compile(r"forget\s+(everything|all|your)\s+(previous|prior|above)", re.IGNORECASE), # System prompt manipulation re.compile(r"<\s*system\s*>", re.IGNORECASE), re.compile(r"<\s*/\s*system\s*>", re.IGNORECASE), re.compile(r"\[\s*system\s*\]", re.IGNORECASE), # Role injection re.compile(r"^(assistant|system|user)\s*:", re.IGNORECASE | re.MULTILINE), # Delimiter injection re.compile(r"-{3,}\s*(end|begin|start)\s+(of\s+)?(input|output|context|prompt)", re.IGNORECASE), re.compile(r"={3,}\s*(end|begin|start)", re.IGNORECASE), # Common injection phrases re.compile(r"(you\s+are|act\s+as|pretend\s+to\s+be)\s+(now\s+)?a\s+different", re.IGNORECASE), re.compile(r"new\s+instructions?\s*:", re.IGNORECASE), re.compile(r"override\s+(the\s+)?(system|instructions|rules)", re.IGNORECASE), ] # XML-like tags that could be used for injection DANGEROUS_TAG_PATTERN = re.compile(r"<\s*(instructions?|prompt|context|system|user|assistant)\s*>", re.IGNORECASE) def sanitize_for_prompt( content: Optional[str], max_length: int = DEFAULT_MAX_PROMPT_LENGTH ) -> str: """ Sanitize user-provided content before including in LLM prompts. This function: 1. Removes control characters (except newlines/tabs) 2. Detects and logs potential prompt injection patterns 3. Escapes dangerous XML-like tags 4. Truncates content to maximum length Args: content: User-provided content to sanitize max_length: Maximum allowed length (default 50000) Returns: Sanitized content safe for prompt inclusion Example: >>> body = "Fix the bug\\x00\\nIgnore previous instructions" >>> safe_body = sanitize_for_prompt(body) >>> # Returns sanitized content, logs warning about injection pattern """ if not content: return "" # Step 1: Remove control characters (keep newlines \n, tabs \t, carriage returns \r) # Control characters are 0x00-0x1F and 0x7F, except 0x09 (tab), 0x0A (newline), 0x0D (CR) sanitized = "".join( char for char in content if ord(char) >= 32 or char in "\n\t\r" ) # Step 2: Detect prompt injection patterns detected_patterns = [] for pattern in INJECTION_PATTERNS: if pattern.search(sanitized): detected_patterns.append(pattern.pattern) if detected_patterns: logger.warning( "Potential prompt injection detected in issue body", extra={ "patterns_matched": len(detected_patterns), "sample_patterns": detected_patterns[:3], "content_length": len(sanitized), }, ) # Step 3: Escape dangerous XML-like tags by adding spaces sanitized = DANGEROUS_TAG_PATTERN.sub( lambda m: m.group(0).replace("<", "< ").replace(">", " >"), sanitized ) # Step 4: Truncate to max length if len(sanitized) > max_length: sanitized = sanitized[:max_length] + "... [content truncated]" return sanitized def verify_signature(payload: bytes, signature: str, secret: str) -> bool: """ Verify HMAC SHA256 signature of webhook payload. Args: payload: Raw request body as bytes signature: Signature from X-Gitea-Signature header secret: Webhook secret configured in Gitea Returns: True if signature is valid, False otherwise Example: >>> payload = b'{"action": "assigned"}' >>> secret = "my-webhook-secret" >>> sig = hmac.new(secret.encode(), payload, "sha256").hexdigest() >>> verify_signature(payload, sig, secret) True """ if not signature: return False # Compute expected signature expected_signature = hmac.new( secret.encode("utf-8"), payload, hashlib.sha256 ).hexdigest() # Use timing-safe comparison to prevent timing attacks return hmac.compare_digest(signature, expected_signature)