stack/apps/coordinator/src/parser.py

"""Issue parser agent using Anthropic API."""

import json
import logging
from typing import Any

from anthropic import Anthropic
from anthropic.types import TextBlock

from .models import IssueMetadata
from .security import sanitize_for_prompt

logger = logging.getLogger(__name__)

# In-memory cache: issue_number -> IssueMetadata
_parse_cache: dict[int, IssueMetadata] = {}


def clear_cache() -> None:
    """Clear the parse cache (primarily for testing)."""
    _parse_cache.clear()


def parse_issue_metadata(issue_body: str, issue_number: int) -> IssueMetadata:
    """
    Parse issue markdown body to extract structured metadata using Anthropic API.

    Args:
        issue_body: Markdown content of the issue
        issue_number: Issue number for caching

    Returns:
        IssueMetadata with extracted fields or defaults on failure

    Example:
        >>> metadata = parse_issue_metadata(issue_body, 158)
        >>> print(metadata.difficulty)
        'medium'
    """
    # Check cache first
    if issue_number in _parse_cache:
        logger.debug(f"Cache hit for issue #{issue_number}")
        return _parse_cache[issue_number]

    # Parse using Anthropic API
    try:
        from .config import settings

        client = Anthropic(api_key=settings.anthropic_api_key)

        prompt = _build_parse_prompt(issue_body)

        response = client.messages.create(
            model="claude-sonnet-4.5-20250929",
            max_tokens=1024,
            temperature=0,
            messages=[
                {
                    "role": "user",
                    "content": prompt
                }
            ]
        )

        # Extract JSON from response
        first_block = response.content[0]
        if not isinstance(first_block, TextBlock):
            raise ValueError("Expected TextBlock in response")
        response_text = first_block.text
        parsed_data = json.loads(response_text)

        # Log token usage
        logger.info(
            f"Parsed issue #{issue_number}",
            extra={
                "issue_number": issue_number,
                "input_tokens": response.usage.input_tokens,
                "output_tokens": response.usage.output_tokens,
            }
        )

        # Create metadata with validation
        metadata = _create_metadata_from_parsed(parsed_data)

        # Cache the result
        _parse_cache[issue_number] = metadata

        return metadata

    except Exception as e:
        logger.error(
            f"Failed to parse issue #{issue_number}: {e}",
            extra={"issue_number": issue_number, "error": str(e)},
            exc_info=True
        )
        # Return defaults on failure
        return IssueMetadata()


def _build_parse_prompt(issue_body: str) -> str:
    """
    Build the prompt for Anthropic API to parse issue metadata.

    Args:
        issue_body: Issue markdown content (will be sanitized)

    Returns:
        Formatted prompt string
    """
    # Sanitize issue body to prevent prompt injection attacks
    sanitized_body = sanitize_for_prompt(issue_body)

    return f"""Extract structured metadata from this GitHub/Gitea issue markdown.

Issue Body:
{sanitized_body}

Extract the following fields:
1. estimated_context: Total estimated tokens from "Context Estimate" section
   (look for "Total estimated: X tokens")
2. difficulty: From "Difficulty" section (easy/medium/hard)
3. assigned_agent: From "Recommended agent" in Context Estimate section
   (sonnet/haiku/opus/glm)
4. blocks: Issue numbers from "Dependencies" section after "Blocks:"
   (extract #XXX numbers)
5. blocked_by: Issue numbers from "Dependencies" section after "Blocked by:"
   (extract #XXX numbers)

Return ONLY a JSON object with these exact fields.
Use these defaults if fields are missing:
- estimated_context: 50000
- difficulty: "medium"
- assigned_agent: "sonnet"
- blocks: []
- blocked_by: []

Example output:
{{"estimated_context": 46800, "difficulty": "medium", "assigned_agent": "sonnet",
  "blocks": [159], "blocked_by": [157]}}
"""


def _create_metadata_from_parsed(parsed_data: dict[str, Any]) -> IssueMetadata:
    """
    Create IssueMetadata from parsed JSON data with validation.

    Args:
        parsed_data: Dictionary from parsed JSON

    Returns:
        Validated IssueMetadata instance
    """
    return IssueMetadata(
        estimated_context=parsed_data.get("estimated_context", 50000),
        difficulty=parsed_data.get("difficulty", "medium"),
        assigned_agent=parsed_data.get("assigned_agent", "sonnet"),
        blocks=parsed_data.get("blocks", []),
        blocked_by=parsed_data.get("blocked_by", []),
    )