feat(#372): track orchestrator agent task completions via telemetry

- Instrument Coordinator.process_queue() with timing and telemetry events
- Instrument OrchestrationLoop.process_next_issue() with quality gate tracking
- Add agent-to-telemetry mapping (model, provider, harness per agent name)
- Map difficulty levels to Complexity enum and gate names to QualityGate enum
- Track retry counts per issue (increment on failure, clear on success)
- Emit FAILURE outcome on agent spawn failure or quality gate rejection
- Non-blocking: telemetry errors are logged and swallowed, never delay tasks
- Pass telemetry client from FastAPI lifespan to Coordinator constructor
- Add 33 unit tests covering all telemetry scenarios

Refs #372

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 01:52:54 -06:00
parent ed23293e1a
commit d6c6af10d9
3 changed files with 1057 additions and 1 deletions

View File

@@ -0,0 +1,796 @@
"""Tests for task completion telemetry instrumentation in the coordinator.
These tests verify that the Coordinator and OrchestrationLoop correctly
emit TaskCompletionEvents via the Mosaic telemetry SDK after each task
dispatch attempt.
"""
from __future__ import annotations
import tempfile
from collections.abc import Generator
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock
import pytest
from mosaicstack_telemetry import ( # type: ignore[import-untyped]
Complexity,
Harness,
Outcome,
Provider,
QualityGate,
TaskCompletionEvent,
TaskType,
TelemetryClient,
)
from src.coordinator import (
_AGENT_TELEMETRY_MAP,
_DIFFICULTY_TO_COMPLEXITY,
_GATE_NAME_TO_ENUM,
Coordinator,
OrchestrationLoop,
_resolve_agent_fields,
)
from src.gates.quality_gate import GateResult
from src.models import IssueMetadata
from src.quality_orchestrator import QualityOrchestrator, VerificationResult
from src.queue import QueueManager
VALID_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc"
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def temp_queue_file() -> Generator[Path, None, None]:
"""Create a temporary file for queue persistence."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".json") as f:
temp_path = Path(f.name)
yield temp_path
if temp_path.exists():
temp_path.unlink()
@pytest.fixture
def queue_manager(temp_queue_file: Path) -> QueueManager:
"""Create a queue manager with temporary storage."""
return QueueManager(queue_file=temp_queue_file)
@pytest.fixture
def mock_telemetry_client() -> MagicMock:
"""Create a mock TelemetryClient."""
client = MagicMock(spec=TelemetryClient)
client.track = MagicMock()
return client
@pytest.fixture
def sonnet_metadata() -> IssueMetadata:
"""Metadata for a sonnet agent task."""
return IssueMetadata(
assigned_agent="sonnet",
difficulty="medium",
estimated_context=50000,
)
@pytest.fixture
def opus_metadata() -> IssueMetadata:
"""Metadata for an opus agent task (hard difficulty)."""
return IssueMetadata(
assigned_agent="opus",
difficulty="hard",
estimated_context=120000,
)
# ---------------------------------------------------------------------------
# _resolve_agent_fields tests
# ---------------------------------------------------------------------------
class TestResolveAgentFields:
"""Tests for the _resolve_agent_fields helper."""
def test_known_agent_sonnet(self) -> None:
"""Should return correct fields for sonnet agent."""
model, provider, harness = _resolve_agent_fields("sonnet")
assert model == "claude-sonnet-4-20250514"
assert provider == Provider.ANTHROPIC
assert harness == Harness.CLAUDE_CODE
def test_known_agent_opus(self) -> None:
"""Should return correct fields for opus agent."""
model, provider, harness = _resolve_agent_fields("opus")
assert model == "claude-opus-4-20250514"
assert provider == Provider.ANTHROPIC
assert harness == Harness.CLAUDE_CODE
def test_known_agent_haiku(self) -> None:
"""Should return correct fields for haiku agent."""
model, provider, harness = _resolve_agent_fields("haiku")
assert model == "claude-haiku-3.5-20241022"
assert provider == Provider.ANTHROPIC
assert harness == Harness.CLAUDE_CODE
def test_known_agent_glm(self) -> None:
"""Should return correct fields for glm (self-hosted) agent."""
model, provider, harness = _resolve_agent_fields("glm")
assert model == "glm-4"
assert provider == Provider.CUSTOM
assert harness == Harness.CUSTOM
def test_known_agent_minimax(self) -> None:
"""Should return correct fields for minimax (self-hosted) agent."""
model, provider, harness = _resolve_agent_fields("minimax")
assert model == "minimax"
assert provider == Provider.CUSTOM
assert harness == Harness.CUSTOM
def test_unknown_agent_returns_defaults(self) -> None:
"""Should return unknown values for unrecognised agent names."""
model, provider, harness = _resolve_agent_fields("nonexistent")
assert model == "unknown"
assert provider == Provider.UNKNOWN
assert harness == Harness.UNKNOWN
def test_all_map_entries_covered(self) -> None:
"""Ensure every entry in _AGENT_TELEMETRY_MAP is resolvable."""
for agent_name in _AGENT_TELEMETRY_MAP:
model, provider, harness = _resolve_agent_fields(agent_name)
assert model != "unknown"
# ---------------------------------------------------------------------------
# Coordinator telemetry emission tests
# ---------------------------------------------------------------------------
class TestCoordinatorTelemetry:
"""Tests for telemetry emission in the Coordinator class."""
@pytest.mark.asyncio
async def test_emits_success_event_on_completion(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""Should emit a SUCCESS event when task completes successfully."""
queue_manager.enqueue(100, sonnet_metadata)
coordinator = Coordinator(
queue_manager=queue_manager,
telemetry_client=mock_telemetry_client,
instance_id=VALID_INSTANCE_ID,
)
await coordinator.process_queue()
mock_telemetry_client.track.assert_called_once()
event = mock_telemetry_client.track.call_args[0][0]
assert isinstance(event, TaskCompletionEvent)
assert event.outcome == Outcome.SUCCESS
assert event.task_type == TaskType.IMPLEMENTATION
assert event.complexity == Complexity.MEDIUM
assert event.provider == Provider.ANTHROPIC
assert event.harness == Harness.CLAUDE_CODE
assert str(event.instance_id) == VALID_INSTANCE_ID
assert event.task_duration_ms >= 0
@pytest.mark.asyncio
async def test_emits_failure_event_when_agent_fails(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""Should emit a FAILURE event when spawn_agent returns False."""
queue_manager.enqueue(101, sonnet_metadata)
coordinator = Coordinator(
queue_manager=queue_manager,
telemetry_client=mock_telemetry_client,
instance_id=VALID_INSTANCE_ID,
)
# Override spawn_agent to fail
coordinator.spawn_agent = AsyncMock(return_value=False) # type: ignore[method-assign]
await coordinator.process_queue()
mock_telemetry_client.track.assert_called_once()
event = mock_telemetry_client.track.call_args[0][0]
assert event.outcome == Outcome.FAILURE
@pytest.mark.asyncio
async def test_emits_failure_event_on_exception(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""Should emit a FAILURE event when spawn_agent raises an exception."""
queue_manager.enqueue(102, sonnet_metadata)
coordinator = Coordinator(
queue_manager=queue_manager,
telemetry_client=mock_telemetry_client,
instance_id=VALID_INSTANCE_ID,
)
coordinator.spawn_agent = AsyncMock(side_effect=RuntimeError("agent crashed")) # type: ignore[method-assign]
await coordinator.process_queue()
mock_telemetry_client.track.assert_called_once()
event = mock_telemetry_client.track.call_args[0][0]
assert event.outcome == Outcome.FAILURE
@pytest.mark.asyncio
async def test_maps_difficulty_to_complexity(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
opus_metadata: IssueMetadata,
) -> None:
"""Should map difficulty='hard' to Complexity.HIGH in the event."""
queue_manager.enqueue(103, opus_metadata)
coordinator = Coordinator(
queue_manager=queue_manager,
telemetry_client=mock_telemetry_client,
instance_id=VALID_INSTANCE_ID,
)
await coordinator.process_queue()
event = mock_telemetry_client.track.call_args[0][0]
assert event.complexity == Complexity.HIGH
@pytest.mark.asyncio
async def test_maps_agent_to_model_and_provider(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
opus_metadata: IssueMetadata,
) -> None:
"""Should map 'opus' agent to opus model and ANTHROPIC provider."""
queue_manager.enqueue(104, opus_metadata)
coordinator = Coordinator(
queue_manager=queue_manager,
telemetry_client=mock_telemetry_client,
instance_id=VALID_INSTANCE_ID,
)
await coordinator.process_queue()
event = mock_telemetry_client.track.call_args[0][0]
assert "opus" in event.model
assert event.provider == Provider.ANTHROPIC
assert event.harness == Harness.CLAUDE_CODE
@pytest.mark.asyncio
async def test_no_event_when_telemetry_disabled(
self,
queue_manager: QueueManager,
sonnet_metadata: IssueMetadata,
) -> None:
"""Should not call track when telemetry_client is None."""
queue_manager.enqueue(105, sonnet_metadata)
coordinator = Coordinator(
queue_manager=queue_manager,
telemetry_client=None,
instance_id=VALID_INSTANCE_ID,
)
# Should not raise
await coordinator.process_queue()
@pytest.mark.asyncio
async def test_no_event_when_instance_id_empty(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""Should not call track when instance_id is empty."""
queue_manager.enqueue(106, sonnet_metadata)
coordinator = Coordinator(
queue_manager=queue_manager,
telemetry_client=mock_telemetry_client,
instance_id="",
)
await coordinator.process_queue()
mock_telemetry_client.track.assert_not_called()
@pytest.mark.asyncio
async def test_telemetry_exception_does_not_propagate(
self,
queue_manager: QueueManager,
sonnet_metadata: IssueMetadata,
) -> None:
"""Telemetry failures must never break task processing."""
queue_manager.enqueue(107, sonnet_metadata)
bad_client = MagicMock(spec=TelemetryClient)
bad_client.track = MagicMock(side_effect=RuntimeError("telemetry down"))
coordinator = Coordinator(
queue_manager=queue_manager,
telemetry_client=bad_client,
instance_id=VALID_INSTANCE_ID,
)
# Should complete without raising, despite telemetry failure
result = await coordinator.process_queue()
assert result is not None
assert result.issue_number == 107
@pytest.mark.asyncio
async def test_no_event_when_queue_empty(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
) -> None:
"""Should not emit any event when the queue is empty."""
coordinator = Coordinator(
queue_manager=queue_manager,
telemetry_client=mock_telemetry_client,
instance_id=VALID_INSTANCE_ID,
)
result = await coordinator.process_queue()
assert result is None
mock_telemetry_client.track.assert_not_called()
@pytest.mark.asyncio
async def test_estimated_input_tokens_from_metadata(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""Should set estimated_input_tokens from issue metadata."""
queue_manager.enqueue(108, sonnet_metadata)
coordinator = Coordinator(
queue_manager=queue_manager,
telemetry_client=mock_telemetry_client,
instance_id=VALID_INSTANCE_ID,
)
await coordinator.process_queue()
event = mock_telemetry_client.track.call_args[0][0]
assert event.estimated_input_tokens == 50000
# ---------------------------------------------------------------------------
# OrchestrationLoop telemetry emission tests
# ---------------------------------------------------------------------------
def _make_orchestration_loop(
queue_manager: QueueManager,
telemetry_client: TelemetryClient | None = None,
instance_id: str = VALID_INSTANCE_ID,
quality_result: VerificationResult | None = None,
) -> OrchestrationLoop:
"""Create an OrchestrationLoop with mocked dependencies.
Args:
queue_manager: Queue manager instance.
telemetry_client: Optional telemetry client.
instance_id: Coordinator instance ID.
quality_result: Override quality verification result.
Returns:
Configured OrchestrationLoop.
"""
# Create quality orchestrator mock
qo = MagicMock(spec=QualityOrchestrator)
default_result = quality_result or VerificationResult(
all_passed=True,
gate_results={
"build": GateResult(passed=True, message="Build OK"),
"lint": GateResult(passed=True, message="Lint OK"),
"test": GateResult(passed=True, message="Test OK"),
"coverage": GateResult(passed=True, message="Coverage OK"),
},
)
qo.verify_completion = AsyncMock(return_value=default_result)
# Continuation service mock
from src.forced_continuation import ForcedContinuationService
cs = MagicMock(spec=ForcedContinuationService)
cs.generate_prompt = MagicMock(return_value="Fix: build failed")
# Context monitor mock
from src.context_monitor import ContextMonitor
cm = MagicMock(spec=ContextMonitor)
cm.determine_action = AsyncMock(return_value="continue")
return OrchestrationLoop(
queue_manager=queue_manager,
quality_orchestrator=qo,
continuation_service=cs,
context_monitor=cm,
telemetry_client=telemetry_client,
instance_id=instance_id,
)
class TestOrchestrationLoopTelemetry:
"""Tests for telemetry emission in the OrchestrationLoop class."""
@pytest.mark.asyncio
async def test_emits_success_with_quality_gates(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""Should emit SUCCESS event with quality gate details."""
queue_manager.enqueue(200, sonnet_metadata)
loop = _make_orchestration_loop(
queue_manager, telemetry_client=mock_telemetry_client
)
await loop.process_next_issue()
mock_telemetry_client.track.assert_called_once()
event = mock_telemetry_client.track.call_args[0][0]
assert event.outcome == Outcome.SUCCESS
assert event.quality_gate_passed is True
assert set(event.quality_gates_run) == {
QualityGate.BUILD,
QualityGate.LINT,
QualityGate.TEST,
QualityGate.COVERAGE,
}
assert event.quality_gates_failed == []
@pytest.mark.asyncio
async def test_emits_failure_with_failed_gates(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""Should emit FAILURE event with failed gate details."""
queue_manager.enqueue(201, sonnet_metadata)
failed_result = VerificationResult(
all_passed=False,
gate_results={
"build": GateResult(passed=True, message="Build OK"),
"lint": GateResult(passed=True, message="Lint OK"),
"test": GateResult(passed=False, message="3 tests failed"),
"coverage": GateResult(passed=False, message="Coverage 70% < 85%"),
},
)
loop = _make_orchestration_loop(
queue_manager,
telemetry_client=mock_telemetry_client,
quality_result=failed_result,
)
await loop.process_next_issue()
mock_telemetry_client.track.assert_called_once()
event = mock_telemetry_client.track.call_args[0][0]
assert event.outcome == Outcome.FAILURE
assert event.quality_gate_passed is False
assert set(event.quality_gates_failed) == {
QualityGate.TEST,
QualityGate.COVERAGE,
}
assert set(event.quality_gates_run) == {
QualityGate.BUILD,
QualityGate.LINT,
QualityGate.TEST,
QualityGate.COVERAGE,
}
@pytest.mark.asyncio
async def test_retry_count_starts_at_zero(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""First attempt should report retry_count=0."""
queue_manager.enqueue(202, sonnet_metadata)
loop = _make_orchestration_loop(
queue_manager, telemetry_client=mock_telemetry_client
)
await loop.process_next_issue()
event = mock_telemetry_client.track.call_args[0][0]
assert event.retry_count == 0
@pytest.mark.asyncio
async def test_retry_count_increments_on_failure(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""Retry count should increment after a quality gate failure."""
queue_manager.enqueue(203, sonnet_metadata)
failed_result = VerificationResult(
all_passed=False,
gate_results={
"build": GateResult(passed=False, message="Build failed"),
},
)
loop = _make_orchestration_loop(
queue_manager,
telemetry_client=mock_telemetry_client,
quality_result=failed_result,
)
# First attempt
await loop.process_next_issue()
event1 = mock_telemetry_client.track.call_args[0][0]
assert event1.retry_count == 0
# Re-enqueue and process again (simulates retry)
queue_manager.enqueue(203, sonnet_metadata)
mock_telemetry_client.track.reset_mock()
await loop.process_next_issue()
event2 = mock_telemetry_client.track.call_args[0][0]
assert event2.retry_count == 1
@pytest.mark.asyncio
async def test_retry_count_clears_on_success(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""Retry count should be cleared after a successful completion."""
queue_manager.enqueue(204, sonnet_metadata)
# First: fail
failed_result = VerificationResult(
all_passed=False,
gate_results={
"build": GateResult(passed=False, message="Build failed"),
},
)
loop = _make_orchestration_loop(
queue_manager,
telemetry_client=mock_telemetry_client,
quality_result=failed_result,
)
await loop.process_next_issue()
assert loop._retry_counts.get(204) == 1
# Now succeed
success_result = VerificationResult(
all_passed=True,
gate_results={
"build": GateResult(passed=True, message="Build OK"),
},
)
loop.quality_orchestrator.verify_completion = AsyncMock(return_value=success_result) # type: ignore[method-assign]
queue_manager.enqueue(204, sonnet_metadata)
mock_telemetry_client.track.reset_mock()
await loop.process_next_issue()
assert 204 not in loop._retry_counts
@pytest.mark.asyncio
async def test_emits_failure_when_agent_spawn_fails(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""Should emit FAILURE when _spawn_agent returns False."""
queue_manager.enqueue(205, sonnet_metadata)
loop = _make_orchestration_loop(
queue_manager, telemetry_client=mock_telemetry_client
)
loop._spawn_agent = AsyncMock(return_value=False) # type: ignore[method-assign]
await loop.process_next_issue()
mock_telemetry_client.track.assert_called_once()
event = mock_telemetry_client.track.call_args[0][0]
assert event.outcome == Outcome.FAILURE
@pytest.mark.asyncio
async def test_no_event_when_telemetry_disabled(
self,
queue_manager: QueueManager,
sonnet_metadata: IssueMetadata,
) -> None:
"""Should not call track when telemetry_client is None."""
queue_manager.enqueue(206, sonnet_metadata)
loop = _make_orchestration_loop(
queue_manager, telemetry_client=None
)
# Should not raise
result = await loop.process_next_issue()
assert result is not None
@pytest.mark.asyncio
async def test_telemetry_exception_does_not_propagate(
self,
queue_manager: QueueManager,
sonnet_metadata: IssueMetadata,
) -> None:
"""Telemetry failures must never disrupt task processing."""
queue_manager.enqueue(207, sonnet_metadata)
bad_client = MagicMock(spec=TelemetryClient)
bad_client.track = MagicMock(side_effect=RuntimeError("telemetry down"))
loop = _make_orchestration_loop(
queue_manager, telemetry_client=bad_client
)
result = await loop.process_next_issue()
assert result is not None
assert result.issue_number == 207
@pytest.mark.asyncio
async def test_duration_is_positive(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""Duration should be a non-negative integer."""
queue_manager.enqueue(208, sonnet_metadata)
loop = _make_orchestration_loop(
queue_manager, telemetry_client=mock_telemetry_client
)
await loop.process_next_issue()
event = mock_telemetry_client.track.call_args[0][0]
assert event.task_duration_ms >= 0
@pytest.mark.asyncio
async def test_maps_glm_agent_correctly(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
) -> None:
"""Should map GLM (self-hosted) agent to CUSTOM provider/harness."""
glm_meta = IssueMetadata(
assigned_agent="glm",
difficulty="medium",
estimated_context=30000,
)
queue_manager.enqueue(209, glm_meta)
loop = _make_orchestration_loop(
queue_manager, telemetry_client=mock_telemetry_client
)
await loop.process_next_issue()
event = mock_telemetry_client.track.call_args[0][0]
assert event.model == "glm-4"
assert event.provider == Provider.CUSTOM
assert event.harness == Harness.CUSTOM
@pytest.mark.asyncio
async def test_maps_easy_difficulty_to_low_complexity(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
) -> None:
"""Should map difficulty='easy' to Complexity.LOW."""
easy_meta = IssueMetadata(
assigned_agent="haiku",
difficulty="easy",
estimated_context=10000,
)
queue_manager.enqueue(210, easy_meta)
loop = _make_orchestration_loop(
queue_manager, telemetry_client=mock_telemetry_client
)
await loop.process_next_issue()
event = mock_telemetry_client.track.call_args[0][0]
assert event.complexity == Complexity.LOW
@pytest.mark.asyncio
async def test_no_event_when_queue_empty(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
) -> None:
"""Should not emit an event when queue is empty."""
loop = _make_orchestration_loop(
queue_manager, telemetry_client=mock_telemetry_client
)
result = await loop.process_next_issue()
assert result is None
mock_telemetry_client.track.assert_not_called()
@pytest.mark.asyncio
async def test_unknown_gate_names_excluded(
self,
queue_manager: QueueManager,
mock_telemetry_client: MagicMock,
sonnet_metadata: IssueMetadata,
) -> None:
"""Gate names not in _GATE_NAME_TO_ENUM should be excluded from telemetry."""
queue_manager.enqueue(211, sonnet_metadata)
result_with_unknown = VerificationResult(
all_passed=False,
gate_results={
"build": GateResult(passed=True, message="Build OK"),
"unknown_gate": GateResult(passed=False, message="Unknown gate"),
},
)
loop = _make_orchestration_loop(
queue_manager,
telemetry_client=mock_telemetry_client,
quality_result=result_with_unknown,
)
await loop.process_next_issue()
event = mock_telemetry_client.track.call_args[0][0]
assert QualityGate.BUILD in event.quality_gates_run
# unknown_gate should not appear
assert len(event.quality_gates_run) == 1
assert len(event.quality_gates_failed) == 0
# ---------------------------------------------------------------------------
# Mapping dict completeness tests
# ---------------------------------------------------------------------------
class TestMappingCompleteness:
"""Tests to verify mapping dicts cover expected values."""
def test_difficulty_map_covers_all_metadata_values(self) -> None:
"""All valid difficulty levels should have Complexity mappings."""
expected_difficulties = {"easy", "medium", "hard"}
assert expected_difficulties == set(_DIFFICULTY_TO_COMPLEXITY.keys())
def test_gate_name_map_covers_all_orchestrator_gates(self) -> None:
"""All gate names used by QualityOrchestrator should be mappable."""
expected_gates = {"build", "lint", "test", "coverage"}
assert expected_gates == set(_GATE_NAME_TO_ENUM.keys())
def test_agent_map_covers_all_configured_agents(self) -> None:
"""All agents used by the coordinator should have telemetry mappings."""
expected_agents = {"sonnet", "opus", "haiku", "glm", "minimax"}
assert expected_agents == set(_AGENT_TELEMETRY_MAP.keys())