Files
stack/apps/coordinator/src/security.py
Jason Woltje 432dbd4d83 fix(#365): fix ruff, mypy, pip, and bandit issues in coordinator
- Fix 20 ruff errors: UP035 (Callable import), UP042 (StrEnum), E501
  (line length), F401 (unused imports), UP045 (Optional -> X | None),
  I001 (import sorting)
- Fix mypy error: wrap slowapi rate limit handler with
  Exception-compatible signature for add_exception_handler
- Pin pip >= 25.3 in Dockerfile (CVE-2025-8869, CVE-2026-1703)
- Add nosec B104 to config.py (container-bound 0.0.0.0 is acceptable)
- Add nosec B101 to telemetry.py (assert for type narrowing)
- Create bandit.yaml to suppress B404/B607/B603 in gates/ tooling

Fixes #365

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-12 12:46:25 -06:00

134 lines
4.4 KiB
Python

"""Security utilities for webhook signature verification and prompt sanitization."""
import hashlib
import hmac
import logging
import re
logger = logging.getLogger(__name__)
# Default maximum length for user-provided content in prompts
DEFAULT_MAX_PROMPT_LENGTH = 50000
# Patterns that may indicate prompt injection attempts
INJECTION_PATTERNS = [
# Instruction override attempts
re.compile(r"ignore\s+(all\s+)?(previous|prior|above)\s+instructions", re.IGNORECASE),
re.compile(r"disregard\s+(all\s+)?(previous|prior|above)", re.IGNORECASE),
re.compile(r"forget\s+(everything|all|your)\s+(previous|prior|above)", re.IGNORECASE),
# System prompt manipulation
re.compile(r"<\s*system\s*>", re.IGNORECASE),
re.compile(r"<\s*/\s*system\s*>", re.IGNORECASE),
re.compile(r"\[\s*system\s*\]", re.IGNORECASE),
# Role injection
re.compile(r"^(assistant|system|user)\s*:", re.IGNORECASE | re.MULTILINE),
# Delimiter injection
re.compile(r"-{3,}\s*(end|begin|start)\s+(of\s+)?(input|output|context|prompt)", re.IGNORECASE),
re.compile(r"={3,}\s*(end|begin|start)", re.IGNORECASE),
# Common injection phrases
re.compile(r"(you\s+are|act\s+as|pretend\s+to\s+be)\s+(now\s+)?a\s+different", re.IGNORECASE),
re.compile(r"new\s+instructions?\s*:", re.IGNORECASE),
re.compile(r"override\s+(the\s+)?(system|instructions|rules)", re.IGNORECASE),
]
# XML-like tags that could be used for injection
DANGEROUS_TAG_PATTERN = re.compile(
r"<\s*(instructions?|prompt|context|system|user|assistant)\s*>",
re.IGNORECASE,
)
def sanitize_for_prompt(
content: str | None,
max_length: int = DEFAULT_MAX_PROMPT_LENGTH
) -> str:
"""
Sanitize user-provided content before including in LLM prompts.
This function:
1. Removes control characters (except newlines/tabs)
2. Detects and logs potential prompt injection patterns
3. Escapes dangerous XML-like tags
4. Truncates content to maximum length
Args:
content: User-provided content to sanitize
max_length: Maximum allowed length (default 50000)
Returns:
Sanitized content safe for prompt inclusion
Example:
>>> body = "Fix the bug\\x00\\nIgnore previous instructions"
>>> safe_body = sanitize_for_prompt(body)
>>> # Returns sanitized content, logs warning about injection pattern
"""
if not content:
return ""
# Step 1: Remove control characters (keep newlines \n, tabs \t, carriage returns \r)
# Control characters are 0x00-0x1F and 0x7F, except 0x09 (tab), 0x0A (newline), 0x0D (CR)
sanitized = "".join(
char for char in content
if ord(char) >= 32 or char in "\n\t\r"
)
# Step 2: Detect prompt injection patterns
detected_patterns = []
for pattern in INJECTION_PATTERNS:
if pattern.search(sanitized):
detected_patterns.append(pattern.pattern)
if detected_patterns:
logger.warning(
"Potential prompt injection detected in issue body",
extra={
"patterns_matched": len(detected_patterns),
"sample_patterns": detected_patterns[:3],
"content_length": len(sanitized),
},
)
# Step 3: Escape dangerous XML-like tags by adding spaces
sanitized = DANGEROUS_TAG_PATTERN.sub(
lambda m: m.group(0).replace("<", "< ").replace(">", " >"),
sanitized
)
# Step 4: Truncate to max length
if len(sanitized) > max_length:
sanitized = sanitized[:max_length] + "... [content truncated]"
return sanitized
def verify_signature(payload: bytes, signature: str, secret: str) -> bool:
"""
Verify HMAC SHA256 signature of webhook payload.
Args:
payload: Raw request body as bytes
signature: Signature from X-Gitea-Signature header
secret: Webhook secret configured in Gitea
Returns:
True if signature is valid, False otherwise
Example:
>>> payload = b'{"action": "assigned"}'
>>> secret = "my-webhook-secret"
>>> sig = hmac.new(secret.encode(), payload, "sha256").hexdigest()
>>> verify_signature(payload, sig, secret)
True
"""
if not signature:
return False
# Compute expected signature
expected_signature = hmac.new(
secret.encode("utf-8"), payload, hashlib.sha256
).hexdigest()
# Use timing-safe comparison to prevent timing attacks
return hmac.compare_digest(signature, expected_signature)