feat(#158): Implement issue parser agent

Add AI-powered issue metadata parser using Anthropic Sonnet model. - Parse issue markdown to extract: estimated_context, difficulty, assigned_agent, blocks, blocked_by - Implement in-memory caching to avoid duplicate API calls - Graceful fallback to defaults on parse failures - Add comprehensive test suite (9 test cases) - 95% test coverage (exceeds 85% requirement) - Add ANTHROPIC_API_KEY to config - Update documentation and add .env.example Fixes #158 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-01 17:50:35 -06:00
parent d54c65360a
commit dad4b68f66
8 changed files with 689 additions and 10 deletions
--- a/apps/coordinator/src/config.py
+++ b/apps/coordinator/src/config.py
@@ -17,6 +17,9 @@ class Settings(BaseSettings):
    gitea_webhook_secret: str
    gitea_url: str = "https://git.mosaicstack.dev"

+    # Anthropic API
+    anthropic_api_key: str
+
    # Server Configuration
    host: str = "0.0.0.0"
    port: int = 8000
--- a/apps/coordinator/src/parser.py
+++ b/apps/coordinator/src/parser.py
@@ -0,0 +1,155 @@
+"""Issue parser agent using Anthropic API."""
+
+import json
+import logging
+from typing import Any
+
+from anthropic import Anthropic
+from anthropic.types import TextBlock
+
+from .models import IssueMetadata
+
+logger = logging.getLogger(__name__)
+
+# In-memory cache: issue_number -> IssueMetadata
+_parse_cache: dict[int, IssueMetadata] = {}
+
+
+def clear_cache() -> None:
+    """Clear the parse cache (primarily for testing)."""
+    _parse_cache.clear()
+
+
+def parse_issue_metadata(issue_body: str, issue_number: int) -> IssueMetadata:
+    """
+    Parse issue markdown body to extract structured metadata using Anthropic API.
+
+    Args:
+        issue_body: Markdown content of the issue
+        issue_number: Issue number for caching
+
+    Returns:
+        IssueMetadata with extracted fields or defaults on failure
+
+    Example:
+        >>> metadata = parse_issue_metadata(issue_body, 158)
+        >>> print(metadata.difficulty)
+        'medium'
+    """
+    # Check cache first
+    if issue_number in _parse_cache:
+        logger.debug(f"Cache hit for issue #{issue_number}")
+        return _parse_cache[issue_number]
+
+    # Parse using Anthropic API
+    try:
+        from .config import settings
+
+        client = Anthropic(api_key=settings.anthropic_api_key)
+
+        prompt = _build_parse_prompt(issue_body)
+
+        response = client.messages.create(
+            model="claude-sonnet-4.5-20250929",
+            max_tokens=1024,
+            temperature=0,
+            messages=[
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ]
+        )
+
+        # Extract JSON from response
+        first_block = response.content[0]
+        if not isinstance(first_block, TextBlock):
+            raise ValueError("Expected TextBlock in response")
+        response_text = first_block.text
+        parsed_data = json.loads(response_text)
+
+        # Log token usage
+        logger.info(
+            f"Parsed issue #{issue_number}",
+            extra={
+                "issue_number": issue_number,
+                "input_tokens": response.usage.input_tokens,
+                "output_tokens": response.usage.output_tokens,
+            }
+        )
+
+        # Create metadata with validation
+        metadata = _create_metadata_from_parsed(parsed_data)
+
+        # Cache the result
+        _parse_cache[issue_number] = metadata
+
+        return metadata
+
+    except Exception as e:
+        logger.error(
+            f"Failed to parse issue #{issue_number}: {e}",
+            extra={"issue_number": issue_number, "error": str(e)},
+            exc_info=True
+        )
+        # Return defaults on failure
+        return IssueMetadata()
+
+
+def _build_parse_prompt(issue_body: str) -> str:
+    """
+    Build the prompt for Anthropic API to parse issue metadata.
+
+    Args:
+        issue_body: Issue markdown content
+
+    Returns:
+        Formatted prompt string
+    """
+    return f"""Extract structured metadata from this GitHub/Gitea issue markdown.
+
+Issue Body:
+{issue_body}
+
+Extract the following fields:
+1. estimated_context: Total estimated tokens from "Context Estimate" section
+   (look for "Total estimated: X tokens")
+2. difficulty: From "Difficulty" section (easy/medium/hard)
+3. assigned_agent: From "Recommended agent" in Context Estimate section
+   (sonnet/haiku/opus/glm)
+4. blocks: Issue numbers from "Dependencies" section after "Blocks:"
+   (extract #XXX numbers)
+5. blocked_by: Issue numbers from "Dependencies" section after "Blocked by:"
+   (extract #XXX numbers)
+
+Return ONLY a JSON object with these exact fields.
+Use these defaults if fields are missing:
+- estimated_context: 50000
+- difficulty: "medium"
+- assigned_agent: "sonnet"
+- blocks: []
+- blocked_by: []
+
+Example output:
+{{"estimated_context": 46800, "difficulty": "medium", "assigned_agent": "sonnet",
+  "blocks": [159], "blocked_by": [157]}}
+"""
+
+
+def _create_metadata_from_parsed(parsed_data: dict[str, Any]) -> IssueMetadata:
+    """
+    Create IssueMetadata from parsed JSON data with validation.
+
+    Args:
+        parsed_data: Dictionary from parsed JSON
+
+    Returns:
+        Validated IssueMetadata instance
+    """
+    return IssueMetadata(
+        estimated_context=parsed_data.get("estimated_context", 50000),
+        difficulty=parsed_data.get("difficulty", "medium"),
+        assigned_agent=parsed_data.get("assigned_agent", "sonnet"),
+        blocks=parsed_data.get("blocks", []),
+        blocked_by=parsed_data.get("blocked_by", []),
+    )