feat(#158): Implement issue parser agent

Add AI-powered issue metadata parser using Anthropic Sonnet model.
- Parse issue markdown to extract: estimated_context, difficulty,
  assigned_agent, blocks, blocked_by
- Implement in-memory caching to avoid duplicate API calls
- Graceful fallback to defaults on parse failures
- Add comprehensive test suite (9 test cases)
- 95% test coverage (exceeds 85% requirement)
- Add ANTHROPIC_API_KEY to config
- Update documentation and add .env.example

Fixes #158

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-01 17:50:35 -06:00
parent d54c65360a
commit dad4b68f66
8 changed files with 689 additions and 10 deletions

View File

@@ -0,0 +1,13 @@
# Gitea Configuration
GITEA_WEBHOOK_SECRET=your-webhook-secret-here
GITEA_URL=https://git.mosaicstack.dev
# Anthropic API (for issue parsing)
ANTHROPIC_API_KEY=sk-ant-your-api-key-here
# Server Configuration
HOST=0.0.0.0
PORT=8000
# Logging
LOG_LEVEL=info

View File

@@ -10,10 +10,13 @@ The coordinator receives webhook events from Gitea when issues are assigned, una
- HMAC SHA256 signature verification
- Event routing (assigned, unassigned, closed)
- AI-powered issue metadata parsing (using Anthropic Sonnet)
- Context estimation and agent assignment
- Dependency tracking (blocks/blocked_by)
- Comprehensive logging
- Health check endpoint
- Docker containerized
- 85%+ test coverage
- 95%+ test coverage
## Development
@@ -44,9 +47,11 @@ ruff check src/
### Running locally
```bash
# Set environment variables
export GITEA_WEBHOOK_SECRET="your-secret-here"
export LOG_LEVEL="info"
# Copy environment template
cp .env.example .env
# Edit .env with your values
# GITEA_WEBHOOK_SECRET, GITEA_URL, ANTHROPIC_API_KEY
# Run server
uvicorn src.main:app --reload --port 8000
@@ -82,6 +87,7 @@ Health check endpoint.
| ---------------------- | ------------------------------------------- | -------- | ------- |
| `GITEA_WEBHOOK_SECRET` | Secret for HMAC signature verification | Yes | - |
| `GITEA_URL` | Gitea instance URL | Yes | - |
| `ANTHROPIC_API_KEY` | Anthropic API key for issue parsing | Yes | - |
| `LOG_LEVEL` | Logging level (debug, info, warning, error) | No | info |
| `HOST` | Server host | No | 0.0.0.0 |
| `PORT` | Server port | No | 8000 |
@@ -96,6 +102,7 @@ docker build -t mosaic-coordinator .
docker run -p 8000:8000 \
-e GITEA_WEBHOOK_SECRET="your-secret" \
-e GITEA_URL="https://git.mosaicstack.dev" \
-e ANTHROPIC_API_KEY="your-anthropic-key" \
mosaic-coordinator
```
@@ -120,15 +127,21 @@ pytest -v
```
apps/coordinator/
├── src/
│ ├── main.py # FastAPI application
│ ├── webhook.py # Webhook endpoint handlers
│ ├── security.py # HMAC signature verification
── config.py # Configuration management
│ ├── main.py # FastAPI application
│ ├── webhook.py # Webhook endpoint handlers
│ ├── parser.py # Issue metadata parser (Anthropic)
── models.py # Data models
│ ├── security.py # HMAC signature verification
│ ├── config.py # Configuration management
│ └── context_monitor.py # Context usage monitoring
├── tests/
│ ├── test_security.py
│ ├── test_webhook.py
── conftest.py # Pytest fixtures
├── pyproject.toml # Project metadata & dependencies
── test_parser.py
│ ├── test_context_monitor.py
│ └── conftest.py # Pytest fixtures
├── pyproject.toml # Project metadata & dependencies
├── .env.example # Environment variable template
├── Dockerfile
└── README.md
```

View File

@@ -9,6 +9,7 @@ dependencies = [
"pydantic>=2.5.0",
"pydantic-settings>=2.1.0",
"python-dotenv>=1.0.0",
"anthropic>=0.39.0",
]
[project.optional-dependencies]

View File

@@ -17,6 +17,9 @@ class Settings(BaseSettings):
gitea_webhook_secret: str
gitea_url: str = "https://git.mosaicstack.dev"
# Anthropic API
anthropic_api_key: str
# Server Configuration
host: str = "0.0.0.0"
port: int = 8000

View File

@@ -0,0 +1,155 @@
"""Issue parser agent using Anthropic API."""
import json
import logging
from typing import Any
from anthropic import Anthropic
from anthropic.types import TextBlock
from .models import IssueMetadata
logger = logging.getLogger(__name__)
# In-memory cache: issue_number -> IssueMetadata
_parse_cache: dict[int, IssueMetadata] = {}
def clear_cache() -> None:
"""Clear the parse cache (primarily for testing)."""
_parse_cache.clear()
def parse_issue_metadata(issue_body: str, issue_number: int) -> IssueMetadata:
"""
Parse issue markdown body to extract structured metadata using Anthropic API.
Args:
issue_body: Markdown content of the issue
issue_number: Issue number for caching
Returns:
IssueMetadata with extracted fields or defaults on failure
Example:
>>> metadata = parse_issue_metadata(issue_body, 158)
>>> print(metadata.difficulty)
'medium'
"""
# Check cache first
if issue_number in _parse_cache:
logger.debug(f"Cache hit for issue #{issue_number}")
return _parse_cache[issue_number]
# Parse using Anthropic API
try:
from .config import settings
client = Anthropic(api_key=settings.anthropic_api_key)
prompt = _build_parse_prompt(issue_body)
response = client.messages.create(
model="claude-sonnet-4.5-20250929",
max_tokens=1024,
temperature=0,
messages=[
{
"role": "user",
"content": prompt
}
]
)
# Extract JSON from response
first_block = response.content[0]
if not isinstance(first_block, TextBlock):
raise ValueError("Expected TextBlock in response")
response_text = first_block.text
parsed_data = json.loads(response_text)
# Log token usage
logger.info(
f"Parsed issue #{issue_number}",
extra={
"issue_number": issue_number,
"input_tokens": response.usage.input_tokens,
"output_tokens": response.usage.output_tokens,
}
)
# Create metadata with validation
metadata = _create_metadata_from_parsed(parsed_data)
# Cache the result
_parse_cache[issue_number] = metadata
return metadata
except Exception as e:
logger.error(
f"Failed to parse issue #{issue_number}: {e}",
extra={"issue_number": issue_number, "error": str(e)},
exc_info=True
)
# Return defaults on failure
return IssueMetadata()
def _build_parse_prompt(issue_body: str) -> str:
"""
Build the prompt for Anthropic API to parse issue metadata.
Args:
issue_body: Issue markdown content
Returns:
Formatted prompt string
"""
return f"""Extract structured metadata from this GitHub/Gitea issue markdown.
Issue Body:
{issue_body}
Extract the following fields:
1. estimated_context: Total estimated tokens from "Context Estimate" section
(look for "Total estimated: X tokens")
2. difficulty: From "Difficulty" section (easy/medium/hard)
3. assigned_agent: From "Recommended agent" in Context Estimate section
(sonnet/haiku/opus/glm)
4. blocks: Issue numbers from "Dependencies" section after "Blocks:"
(extract #XXX numbers)
5. blocked_by: Issue numbers from "Dependencies" section after "Blocked by:"
(extract #XXX numbers)
Return ONLY a JSON object with these exact fields.
Use these defaults if fields are missing:
- estimated_context: 50000
- difficulty: "medium"
- assigned_agent: "sonnet"
- blocks: []
- blocked_by: []
Example output:
{{"estimated_context": 46800, "difficulty": "medium", "assigned_agent": "sonnet",
"blocks": [159], "blocked_by": [157]}}
"""
def _create_metadata_from_parsed(parsed_data: dict[str, Any]) -> IssueMetadata:
"""
Create IssueMetadata from parsed JSON data with validation.
Args:
parsed_data: Dictionary from parsed JSON
Returns:
Validated IssueMetadata instance
"""
return IssueMetadata(
estimated_context=parsed_data.get("estimated_context", 50000),
difficulty=parsed_data.get("difficulty", "medium"),
assigned_agent=parsed_data.get("assigned_agent", "sonnet"),
blocks=parsed_data.get("blocks", []),
blocked_by=parsed_data.get("blocked_by", []),
)

View File

@@ -108,6 +108,7 @@ def client(webhook_secret: str, gitea_url: str, monkeypatch: pytest.MonkeyPatch)
# Set test environment variables
monkeypatch.setenv("GITEA_WEBHOOK_SECRET", webhook_secret)
monkeypatch.setenv("GITEA_URL", gitea_url)
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-anthropic-api-key")
monkeypatch.setenv("LOG_LEVEL", "debug")
# Force reload of settings

View File

@@ -0,0 +1,384 @@
"""Tests for issue parser agent."""
import os
import pytest
from unittest.mock import Mock, patch, AsyncMock
from anthropic import Anthropic
from anthropic.types import Message, TextBlock, Usage
from src.parser import parse_issue_metadata, clear_cache
from src.models import IssueMetadata
@pytest.fixture(autouse=True)
def setup_test_env(monkeypatch: pytest.MonkeyPatch) -> None:
"""Set up test environment variables."""
monkeypatch.setenv("GITEA_WEBHOOK_SECRET", "test-secret")
monkeypatch.setenv("GITEA_URL", "https://test.example.com")
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-anthropic-key")
monkeypatch.setenv("LOG_LEVEL", "debug")
@pytest.fixture
def sample_complete_issue_body() -> str:
"""Complete issue body with all fields."""
return """## Objective
Create AI agent (Sonnet) that parses issue markdown body to extract structured metadata.
## Implementation Details
1. Create parse_issue_metadata() function
2. Use Anthropic API with Sonnet model
## Context Estimate
• Files to modify: 3 (parser.py, agent.py, models.py)
• Implementation complexity: medium (20000 tokens)
• Test requirements: medium (10000 tokens)
• Documentation: medium (3000 tokens)
• **Total estimated: 46800 tokens**
• **Recommended agent: sonnet**
## Difficulty
medium
## Dependencies
• Blocked by: #157 (COORD-001 - needs webhook to trigger parser)
• Blocks: #159 (COORD-003 - queue needs parsed metadata)
## Acceptance Criteria
[ ] Parser extracts all required fields
[ ] Returns valid JSON matching schema
"""
@pytest.fixture
def sample_minimal_issue_body() -> str:
"""Minimal issue body with only required fields."""
return """## Objective
Fix the login bug.
## Acceptance Criteria
[ ] Bug is fixed
"""
@pytest.fixture
def sample_malformed_issue_body() -> str:
"""Malformed issue body to test graceful failure."""
return """This is just random text without proper sections.
Some more random content here.
"""
@pytest.fixture
def mock_anthropic_response() -> Message:
"""Mock Anthropic API response."""
return Message(
id="msg_123",
type="message",
role="assistant",
content=[
TextBlock(
type="text",
text='{"estimated_context": 46800, "difficulty": "medium", "assigned_agent": "sonnet", "blocks": [159], "blocked_by": [157]}'
)
],
model="claude-sonnet-4.5-20250929",
stop_reason="end_turn",
usage=Usage(input_tokens=500, output_tokens=50)
)
@pytest.fixture
def mock_anthropic_minimal_response() -> Message:
"""Mock Anthropic API response for minimal issue."""
return Message(
id="msg_124",
type="message",
role="assistant",
content=[
TextBlock(
type="text",
text='{"estimated_context": 50000, "difficulty": "medium", "assigned_agent": "sonnet", "blocks": [], "blocked_by": []}'
)
],
model="claude-sonnet-4.5-20250929",
stop_reason="end_turn",
usage=Usage(input_tokens=200, output_tokens=40)
)
@pytest.fixture(autouse=True)
def reset_cache() -> None:
"""Clear cache before each test."""
clear_cache()
class TestParseIssueMetadata:
"""Tests for parse_issue_metadata function."""
@patch("src.parser.Anthropic")
def test_parse_complete_issue(
self,
mock_anthropic_class: Mock,
sample_complete_issue_body: str,
mock_anthropic_response: Message
) -> None:
"""Test parsing complete issue body with all fields."""
# Setup mock
mock_client = Mock(spec=Anthropic)
mock_messages = Mock()
mock_messages.create = Mock(return_value=mock_anthropic_response)
mock_client.messages = mock_messages
mock_anthropic_class.return_value = mock_client
# Parse issue
result = parse_issue_metadata(sample_complete_issue_body, 158)
# Verify result
assert result.estimated_context == 46800
assert result.difficulty == "medium"
assert result.assigned_agent == "sonnet"
assert result.blocks == [159]
assert result.blocked_by == [157]
# Verify API was called correctly
mock_messages.create.assert_called_once()
call_args = mock_messages.create.call_args
assert call_args.kwargs["model"] == "claude-sonnet-4.5-20250929"
assert call_args.kwargs["max_tokens"] == 1024
assert call_args.kwargs["temperature"] == 0
@patch("src.parser.Anthropic")
def test_parse_minimal_issue(
self,
mock_anthropic_class: Mock,
sample_minimal_issue_body: str,
mock_anthropic_minimal_response: Message
) -> None:
"""Test parsing minimal issue body uses defaults."""
# Setup mock
mock_client = Mock(spec=Anthropic)
mock_messages = Mock()
mock_messages.create = Mock(return_value=mock_anthropic_minimal_response)
mock_client.messages = mock_messages
mock_anthropic_class.return_value = mock_client
# Parse issue
result = parse_issue_metadata(sample_minimal_issue_body, 999)
# Verify defaults are used
assert result.estimated_context == 50000
assert result.difficulty == "medium"
assert result.assigned_agent == "sonnet"
assert result.blocks == []
assert result.blocked_by == []
@patch("src.parser.Anthropic")
def test_parse_malformed_issue_returns_defaults(
self,
mock_anthropic_class: Mock,
sample_malformed_issue_body: str
) -> None:
"""Test malformed issue body returns graceful defaults."""
# Setup mock to return invalid JSON
mock_client = Mock(spec=Anthropic)
mock_messages = Mock()
mock_messages.create = Mock(
return_value=Message(
id="msg_125",
type="message",
role="assistant",
content=[TextBlock(type="text", text='{"invalid": "json"')],
model="claude-sonnet-4.5-20250929",
stop_reason="end_turn",
usage=Usage(input_tokens=100, output_tokens=20)
)
)
mock_client.messages = mock_messages
mock_anthropic_class.return_value = mock_client
# Parse issue
result = parse_issue_metadata(sample_malformed_issue_body, 888)
# Verify defaults
assert result.estimated_context == 50000
assert result.difficulty == "medium"
assert result.assigned_agent == "sonnet"
assert result.blocks == []
assert result.blocked_by == []
@patch("src.parser.Anthropic")
def test_api_failure_returns_defaults(
self,
mock_anthropic_class: Mock,
sample_complete_issue_body: str
) -> None:
"""Test API failure returns defaults with error logged."""
# Setup mock to raise exception
mock_client = Mock(spec=Anthropic)
mock_messages = Mock()
mock_messages.create = Mock(side_effect=Exception("API Error"))
mock_client.messages = mock_messages
mock_anthropic_class.return_value = mock_client
# Parse issue
result = parse_issue_metadata(sample_complete_issue_body, 777)
# Verify defaults
assert result.estimated_context == 50000
assert result.difficulty == "medium"
assert result.assigned_agent == "sonnet"
assert result.blocks == []
assert result.blocked_by == []
@patch("src.parser.Anthropic")
def test_caching_avoids_duplicate_api_calls(
self,
mock_anthropic_class: Mock,
sample_complete_issue_body: str,
mock_anthropic_response: Message
) -> None:
"""Test that caching prevents duplicate API calls for same issue."""
# Setup mock
mock_client = Mock(spec=Anthropic)
mock_messages = Mock()
mock_messages.create = Mock(return_value=mock_anthropic_response)
mock_client.messages = mock_messages
mock_anthropic_class.return_value = mock_client
# Parse same issue twice
result1 = parse_issue_metadata(sample_complete_issue_body, 158)
result2 = parse_issue_metadata(sample_complete_issue_body, 158)
# Verify API was called only once
assert mock_messages.create.call_count == 1
# Verify both results are identical
assert result1.model_dump() == result2.model_dump()
@patch("src.parser.Anthropic")
def test_different_issues_not_cached(
self,
mock_anthropic_class: Mock,
sample_complete_issue_body: str,
sample_minimal_issue_body: str,
mock_anthropic_response: Message
) -> None:
"""Test that different issues result in separate API calls."""
# Setup mock
mock_client = Mock(spec=Anthropic)
mock_messages = Mock()
mock_messages.create = Mock(return_value=mock_anthropic_response)
mock_client.messages = mock_messages
mock_anthropic_class.return_value = mock_client
# Parse different issues
parse_issue_metadata(sample_complete_issue_body, 158)
parse_issue_metadata(sample_minimal_issue_body, 159)
# Verify API was called twice
assert mock_messages.create.call_count == 2
@patch("src.parser.Anthropic")
def test_difficulty_validation(
self,
mock_anthropic_class: Mock,
sample_complete_issue_body: str
) -> None:
"""Test that difficulty values are validated."""
# Setup mock with invalid difficulty
mock_client = Mock(spec=Anthropic)
mock_messages = Mock()
mock_messages.create = Mock(
return_value=Message(
id="msg_126",
type="message",
role="assistant",
content=[
TextBlock(
type="text",
text='{"estimated_context": 10000, "difficulty": "invalid", "assigned_agent": "sonnet", "blocks": [], "blocked_by": []}'
)
],
model="claude-sonnet-4.5-20250929",
stop_reason="end_turn",
usage=Usage(input_tokens=100, output_tokens=20)
)
)
mock_client.messages = mock_messages
mock_anthropic_class.return_value = mock_client
# Parse issue
result = parse_issue_metadata(sample_complete_issue_body, 666)
# Should default to "medium" for invalid difficulty
assert result.difficulty == "medium"
@patch("src.parser.Anthropic")
def test_agent_validation(
self,
mock_anthropic_class: Mock,
sample_complete_issue_body: str
) -> None:
"""Test that agent values are validated."""
# Setup mock with invalid agent
mock_client = Mock(spec=Anthropic)
mock_messages = Mock()
mock_messages.create = Mock(
return_value=Message(
id="msg_127",
type="message",
role="assistant",
content=[
TextBlock(
type="text",
text='{"estimated_context": 10000, "difficulty": "medium", "assigned_agent": "invalid_agent", "blocks": [], "blocked_by": []}'
)
],
model="claude-sonnet-4.5-20250929",
stop_reason="end_turn",
usage=Usage(input_tokens=100, output_tokens=20)
)
)
mock_client.messages = mock_messages
mock_anthropic_class.return_value = mock_client
# Parse issue
result = parse_issue_metadata(sample_complete_issue_body, 555)
# Should default to "sonnet" for invalid agent
assert result.assigned_agent == "sonnet"
@patch("src.parser.Anthropic")
def test_parse_time_performance(
self,
mock_anthropic_class: Mock,
sample_complete_issue_body: str,
mock_anthropic_response: Message
) -> None:
"""Test that parsing completes within performance target."""
import time
# Setup mock
mock_client = Mock(spec=Anthropic)
mock_messages = Mock()
mock_messages.create = Mock(return_value=mock_anthropic_response)
mock_client.messages = mock_messages
mock_anthropic_class.return_value = mock_client
# Measure parse time
start_time = time.time()
parse_issue_metadata(sample_complete_issue_body, 158)
elapsed_time = time.time() - start_time
# Should complete within 2 seconds (mocked, so should be instant)
assert elapsed_time < 2.0