feat(#372): track orchestrator agent task completions via telemetry
- Instrument Coordinator.process_queue() with timing and telemetry events - Instrument OrchestrationLoop.process_next_issue() with quality gate tracking - Add agent-to-telemetry mapping (model, provider, harness per agent name) - Map difficulty levels to Complexity enum and gate names to QualityGate enum - Track retry counts per issue (increment on failure, clear on success) - Emit FAILURE outcome on agent spawn failure or quality gate rejection - Non-blocking: telemetry errors are logged and swallowed, never delay tasks - Pass telemetry client from FastAPI lifespan to Coordinator constructor - Add 33 unit tests covering all telemetry scenarios Refs #372 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,12 +2,24 @@
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
|
from mosaicstack_telemetry import ( # type: ignore[import-untyped]
|
||||||
|
Complexity,
|
||||||
|
Harness,
|
||||||
|
Outcome,
|
||||||
|
Provider,
|
||||||
|
QualityGate,
|
||||||
|
TaskType,
|
||||||
|
TelemetryClient,
|
||||||
|
)
|
||||||
|
|
||||||
from src.circuit_breaker import CircuitBreaker, CircuitBreakerError
|
from src.circuit_breaker import CircuitBreaker, CircuitBreakerError
|
||||||
from src.context_monitor import ContextMonitor
|
from src.context_monitor import ContextMonitor
|
||||||
from src.forced_continuation import ForcedContinuationService
|
from src.forced_continuation import ForcedContinuationService
|
||||||
from src.models import ContextAction
|
from src.models import ContextAction
|
||||||
|
from src.mosaic_telemetry import build_task_event
|
||||||
from src.quality_orchestrator import QualityOrchestrator, VerificationResult
|
from src.quality_orchestrator import QualityOrchestrator, VerificationResult
|
||||||
from src.queue import QueueItem, QueueManager
|
from src.queue import QueueItem, QueueManager
|
||||||
from src.tracing_decorators import trace_agent_operation
|
from src.tracing_decorators import trace_agent_operation
|
||||||
@@ -17,6 +29,49 @@ if TYPE_CHECKING:
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Agent-name → telemetry-field mapping helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Maps assigned_agent strings to (model, Provider, Harness)
|
||||||
|
_AGENT_TELEMETRY_MAP: dict[str, tuple[str, Provider, Harness]] = {
|
||||||
|
"sonnet": ("claude-sonnet-4-20250514", Provider.ANTHROPIC, Harness.CLAUDE_CODE),
|
||||||
|
"opus": ("claude-opus-4-20250514", Provider.ANTHROPIC, Harness.CLAUDE_CODE),
|
||||||
|
"haiku": ("claude-haiku-3.5-20241022", Provider.ANTHROPIC, Harness.CLAUDE_CODE),
|
||||||
|
"glm": ("glm-4", Provider.CUSTOM, Harness.CUSTOM),
|
||||||
|
"minimax": ("minimax", Provider.CUSTOM, Harness.CUSTOM),
|
||||||
|
}
|
||||||
|
|
||||||
|
_DIFFICULTY_TO_COMPLEXITY: dict[str, Complexity] = {
|
||||||
|
"easy": Complexity.LOW,
|
||||||
|
"medium": Complexity.MEDIUM,
|
||||||
|
"hard": Complexity.HIGH,
|
||||||
|
}
|
||||||
|
|
||||||
|
_GATE_NAME_TO_ENUM: dict[str, QualityGate] = {
|
||||||
|
"build": QualityGate.BUILD,
|
||||||
|
"lint": QualityGate.LINT,
|
||||||
|
"test": QualityGate.TEST,
|
||||||
|
"coverage": QualityGate.COVERAGE,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_agent_fields(
|
||||||
|
assigned_agent: str,
|
||||||
|
) -> tuple[str, Provider, Harness]:
|
||||||
|
"""Resolve agent name to (model, provider, harness) for telemetry.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
assigned_agent: The agent name string from issue metadata.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (model_name, Provider, Harness).
|
||||||
|
"""
|
||||||
|
return _AGENT_TELEMETRY_MAP.get(
|
||||||
|
assigned_agent,
|
||||||
|
("unknown", Provider.UNKNOWN, Harness.UNKNOWN),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Coordinator:
|
class Coordinator:
|
||||||
"""Main orchestration loop for processing the issue queue.
|
"""Main orchestration loop for processing the issue queue.
|
||||||
@@ -41,6 +96,8 @@ class Coordinator:
|
|||||||
poll_interval: float = 5.0,
|
poll_interval: float = 5.0,
|
||||||
circuit_breaker_threshold: int = 5,
|
circuit_breaker_threshold: int = 5,
|
||||||
circuit_breaker_cooldown: float = 30.0,
|
circuit_breaker_cooldown: float = 30.0,
|
||||||
|
telemetry_client: TelemetryClient | None = None,
|
||||||
|
instance_id: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize the Coordinator.
|
"""Initialize the Coordinator.
|
||||||
|
|
||||||
@@ -49,12 +106,16 @@ class Coordinator:
|
|||||||
poll_interval: Seconds between queue polls (default: 5.0)
|
poll_interval: Seconds between queue polls (default: 5.0)
|
||||||
circuit_breaker_threshold: Consecutive failures before opening circuit (default: 5)
|
circuit_breaker_threshold: Consecutive failures before opening circuit (default: 5)
|
||||||
circuit_breaker_cooldown: Seconds to wait before retry after circuit opens (default: 30)
|
circuit_breaker_cooldown: Seconds to wait before retry after circuit opens (default: 30)
|
||||||
|
telemetry_client: Optional Mosaic telemetry client for tracking task events
|
||||||
|
instance_id: UUID identifying this coordinator instance for telemetry
|
||||||
"""
|
"""
|
||||||
self.queue_manager = queue_manager
|
self.queue_manager = queue_manager
|
||||||
self.poll_interval = poll_interval
|
self.poll_interval = poll_interval
|
||||||
self._running = False
|
self._running = False
|
||||||
self._stop_event: asyncio.Event | None = None
|
self._stop_event: asyncio.Event | None = None
|
||||||
self._active_agents: dict[int, dict[str, Any]] = {}
|
self._active_agents: dict[int, dict[str, Any]] = {}
|
||||||
|
self._telemetry_client = telemetry_client
|
||||||
|
self._instance_id = instance_id
|
||||||
|
|
||||||
# Circuit breaker for preventing infinite retry loops (SEC-ORCH-7)
|
# Circuit breaker for preventing infinite retry loops (SEC-ORCH-7)
|
||||||
self._circuit_breaker = CircuitBreaker(
|
self._circuit_breaker = CircuitBreaker(
|
||||||
@@ -197,7 +258,8 @@ class Coordinator:
|
|||||||
"""Process the next ready item from the queue.
|
"""Process the next ready item from the queue.
|
||||||
|
|
||||||
Gets the next ready item, spawns an agent to process it,
|
Gets the next ready item, spawns an agent to process it,
|
||||||
and marks it complete on success.
|
and marks it complete on success. Emits a Mosaic telemetry
|
||||||
|
TaskCompletionEvent after each task attempt.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The QueueItem that was processed, or None if queue is empty
|
The QueueItem that was processed, or None if queue is empty
|
||||||
@@ -218,6 +280,10 @@ class Coordinator:
|
|||||||
# Mark as in progress
|
# Mark as in progress
|
||||||
self.queue_manager.mark_in_progress(item.issue_number)
|
self.queue_manager.mark_in_progress(item.issue_number)
|
||||||
|
|
||||||
|
# Track timing for telemetry
|
||||||
|
start_mono = time.monotonic()
|
||||||
|
outcome = Outcome.FAILURE
|
||||||
|
|
||||||
# Spawn agent (stub implementation)
|
# Spawn agent (stub implementation)
|
||||||
try:
|
try:
|
||||||
success = await self.spawn_agent(item)
|
success = await self.spawn_agent(item)
|
||||||
@@ -225,6 +291,7 @@ class Coordinator:
|
|||||||
if success:
|
if success:
|
||||||
# Mark as complete
|
# Mark as complete
|
||||||
self.queue_manager.mark_complete(item.issue_number)
|
self.queue_manager.mark_complete(item.issue_number)
|
||||||
|
outcome = Outcome.SUCCESS
|
||||||
logger.info(f"Issue #{item.issue_number} completed successfully")
|
logger.info(f"Issue #{item.issue_number} completed successfully")
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Issue #{item.issue_number} agent failed - remains in progress")
|
logger.warning(f"Issue #{item.issue_number} agent failed - remains in progress")
|
||||||
@@ -233,8 +300,81 @@ class Coordinator:
|
|||||||
logger.error(f"Error spawning agent for issue #{item.issue_number}: {e}")
|
logger.error(f"Error spawning agent for issue #{item.issue_number}: {e}")
|
||||||
# Item remains in progress on error
|
# Item remains in progress on error
|
||||||
|
|
||||||
|
finally:
|
||||||
|
elapsed_ms = int((time.monotonic() - start_mono) * 1000)
|
||||||
|
self._emit_task_telemetry(item, outcome=outcome, duration_ms=elapsed_ms)
|
||||||
|
|
||||||
return item
|
return item
|
||||||
|
|
||||||
|
def _emit_task_telemetry(
|
||||||
|
self,
|
||||||
|
item: QueueItem,
|
||||||
|
*,
|
||||||
|
outcome: Outcome,
|
||||||
|
duration_ms: int,
|
||||||
|
retry_count: int = 0,
|
||||||
|
actual_input_tokens: int = 0,
|
||||||
|
actual_output_tokens: int = 0,
|
||||||
|
quality_passed: bool = False,
|
||||||
|
quality_gates_run: list[QualityGate] | None = None,
|
||||||
|
quality_gates_failed: list[QualityGate] | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Emit a Mosaic telemetry TaskCompletionEvent (non-blocking).
|
||||||
|
|
||||||
|
This method never raises; any telemetry errors are logged and swallowed
|
||||||
|
so they do not interfere with task processing.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
item: The QueueItem that was processed.
|
||||||
|
outcome: Task outcome (SUCCESS, FAILURE, TIMEOUT, etc.).
|
||||||
|
duration_ms: Wall-clock duration in milliseconds.
|
||||||
|
retry_count: Number of retries before this attempt.
|
||||||
|
actual_input_tokens: Actual input tokens consumed by the harness.
|
||||||
|
actual_output_tokens: Actual output tokens consumed by the harness.
|
||||||
|
quality_passed: Whether all quality gates passed.
|
||||||
|
quality_gates_run: Quality gates that were executed.
|
||||||
|
quality_gates_failed: Quality gates that failed.
|
||||||
|
"""
|
||||||
|
if self._telemetry_client is None or not self._instance_id:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
model, provider, harness = _resolve_agent_fields(
|
||||||
|
item.metadata.assigned_agent,
|
||||||
|
)
|
||||||
|
complexity = _DIFFICULTY_TO_COMPLEXITY.get(
|
||||||
|
item.metadata.difficulty, Complexity.MEDIUM
|
||||||
|
)
|
||||||
|
|
||||||
|
event = build_task_event(
|
||||||
|
instance_id=self._instance_id,
|
||||||
|
task_type=TaskType.IMPLEMENTATION,
|
||||||
|
complexity=complexity,
|
||||||
|
outcome=outcome,
|
||||||
|
duration_ms=duration_ms,
|
||||||
|
model=model,
|
||||||
|
provider=provider,
|
||||||
|
harness=harness,
|
||||||
|
actual_input_tokens=actual_input_tokens,
|
||||||
|
actual_output_tokens=actual_output_tokens,
|
||||||
|
estimated_input_tokens=item.metadata.estimated_context,
|
||||||
|
quality_passed=quality_passed,
|
||||||
|
quality_gates_run=quality_gates_run,
|
||||||
|
quality_gates_failed=quality_gates_failed,
|
||||||
|
retry_count=retry_count,
|
||||||
|
)
|
||||||
|
self._telemetry_client.track(event)
|
||||||
|
logger.debug(
|
||||||
|
"Telemetry event emitted for issue #%d (outcome=%s)",
|
||||||
|
item.issue_number,
|
||||||
|
outcome.value,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"Failed to emit telemetry for issue #%d (non-fatal)",
|
||||||
|
item.issue_number,
|
||||||
|
)
|
||||||
|
|
||||||
@trace_agent_operation(operation_name="spawn_agent")
|
@trace_agent_operation(operation_name="spawn_agent")
|
||||||
async def spawn_agent(self, item: QueueItem) -> bool:
|
async def spawn_agent(self, item: QueueItem) -> bool:
|
||||||
"""Spawn an agent to process the given item.
|
"""Spawn an agent to process the given item.
|
||||||
@@ -294,6 +434,8 @@ class OrchestrationLoop:
|
|||||||
poll_interval: float = 5.0,
|
poll_interval: float = 5.0,
|
||||||
circuit_breaker_threshold: int = 5,
|
circuit_breaker_threshold: int = 5,
|
||||||
circuit_breaker_cooldown: float = 30.0,
|
circuit_breaker_cooldown: float = 30.0,
|
||||||
|
telemetry_client: TelemetryClient | None = None,
|
||||||
|
instance_id: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize the OrchestrationLoop.
|
"""Initialize the OrchestrationLoop.
|
||||||
|
|
||||||
@@ -305,6 +447,8 @@ class OrchestrationLoop:
|
|||||||
poll_interval: Seconds between queue polls (default: 5.0)
|
poll_interval: Seconds between queue polls (default: 5.0)
|
||||||
circuit_breaker_threshold: Consecutive failures before opening circuit (default: 5)
|
circuit_breaker_threshold: Consecutive failures before opening circuit (default: 5)
|
||||||
circuit_breaker_cooldown: Seconds to wait before retry after circuit opens (default: 30)
|
circuit_breaker_cooldown: Seconds to wait before retry after circuit opens (default: 30)
|
||||||
|
telemetry_client: Optional Mosaic telemetry client for tracking task events
|
||||||
|
instance_id: UUID identifying this coordinator instance for telemetry
|
||||||
"""
|
"""
|
||||||
self.queue_manager = queue_manager
|
self.queue_manager = queue_manager
|
||||||
self.quality_orchestrator = quality_orchestrator
|
self.quality_orchestrator = quality_orchestrator
|
||||||
@@ -314,6 +458,11 @@ class OrchestrationLoop:
|
|||||||
self._running = False
|
self._running = False
|
||||||
self._stop_event: asyncio.Event | None = None
|
self._stop_event: asyncio.Event | None = None
|
||||||
self._active_agents: dict[int, dict[str, Any]] = {}
|
self._active_agents: dict[int, dict[str, Any]] = {}
|
||||||
|
self._telemetry_client = telemetry_client
|
||||||
|
self._instance_id = instance_id
|
||||||
|
|
||||||
|
# Per-issue retry tracking
|
||||||
|
self._retry_counts: dict[int, int] = {}
|
||||||
|
|
||||||
# Metrics tracking
|
# Metrics tracking
|
||||||
self._processed_count = 0
|
self._processed_count = 0
|
||||||
@@ -493,6 +642,7 @@ class OrchestrationLoop:
|
|||||||
3. Spawns an agent to process it
|
3. Spawns an agent to process it
|
||||||
4. Runs quality gates on completion
|
4. Runs quality gates on completion
|
||||||
5. Handles rejection with forced continuation or marks complete
|
5. Handles rejection with forced continuation or marks complete
|
||||||
|
6. Emits a Mosaic telemetry TaskCompletionEvent
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The QueueItem that was processed, or None if queue is empty
|
The QueueItem that was processed, or None if queue is empty
|
||||||
@@ -524,12 +674,21 @@ class OrchestrationLoop:
|
|||||||
"status": "running",
|
"status": "running",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Track timing for telemetry
|
||||||
|
start_mono = time.monotonic()
|
||||||
|
outcome = Outcome.FAILURE
|
||||||
|
quality_passed = False
|
||||||
|
gates_run: list[QualityGate] = []
|
||||||
|
gates_failed: list[QualityGate] = []
|
||||||
|
retry_count = self._retry_counts.get(item.issue_number, 0)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Spawn agent (stub implementation)
|
# Spawn agent (stub implementation)
|
||||||
agent_success = await self._spawn_agent(item)
|
agent_success = await self._spawn_agent(item)
|
||||||
|
|
||||||
if not agent_success:
|
if not agent_success:
|
||||||
logger.warning(f"Issue #{item.issue_number} agent failed - remains in progress")
|
logger.warning(f"Issue #{item.issue_number} agent failed - remains in progress")
|
||||||
|
self._retry_counts[item.issue_number] = retry_count + 1
|
||||||
return item
|
return item
|
||||||
|
|
||||||
# Check context usage (stub - no real monitoring in Phase 0)
|
# Check context usage (stub - no real monitoring in Phase 0)
|
||||||
@@ -538,24 +697,123 @@ class OrchestrationLoop:
|
|||||||
# Run quality gates on completion
|
# Run quality gates on completion
|
||||||
verification = await self._verify_quality(item)
|
verification = await self._verify_quality(item)
|
||||||
|
|
||||||
|
# Map gate results for telemetry
|
||||||
|
gates_run = [
|
||||||
|
_GATE_NAME_TO_ENUM[name]
|
||||||
|
for name in verification.gate_results
|
||||||
|
if name in _GATE_NAME_TO_ENUM
|
||||||
|
]
|
||||||
|
gates_failed = [
|
||||||
|
_GATE_NAME_TO_ENUM[name]
|
||||||
|
for name, result in verification.gate_results.items()
|
||||||
|
if name in _GATE_NAME_TO_ENUM and not result.passed
|
||||||
|
]
|
||||||
|
quality_passed = verification.all_passed
|
||||||
|
|
||||||
if verification.all_passed:
|
if verification.all_passed:
|
||||||
# All gates passed - mark as complete
|
# All gates passed - mark as complete
|
||||||
self.queue_manager.mark_complete(item.issue_number)
|
self.queue_manager.mark_complete(item.issue_number)
|
||||||
self._success_count += 1
|
self._success_count += 1
|
||||||
|
outcome = Outcome.SUCCESS
|
||||||
|
# Clear retry counter on success
|
||||||
|
self._retry_counts.pop(item.issue_number, None)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Issue #{item.issue_number} completed successfully - all gates passed"
|
f"Issue #{item.issue_number} completed successfully - all gates passed"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Gates failed - generate continuation prompt
|
# Gates failed - generate continuation prompt
|
||||||
self._rejection_count += 1
|
self._rejection_count += 1
|
||||||
|
outcome = Outcome.FAILURE
|
||||||
|
self._retry_counts[item.issue_number] = retry_count + 1
|
||||||
await self._handle_rejection(item, verification)
|
await self._handle_rejection(item, verification)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing issue #{item.issue_number}: {e}")
|
logger.error(f"Error processing issue #{item.issue_number}: {e}")
|
||||||
# Item remains in progress on error
|
# Item remains in progress on error
|
||||||
|
|
||||||
|
finally:
|
||||||
|
elapsed_ms = int((time.monotonic() - start_mono) * 1000)
|
||||||
|
self._emit_task_telemetry(
|
||||||
|
item,
|
||||||
|
outcome=outcome,
|
||||||
|
duration_ms=elapsed_ms,
|
||||||
|
retry_count=retry_count,
|
||||||
|
quality_passed=quality_passed,
|
||||||
|
quality_gates_run=gates_run,
|
||||||
|
quality_gates_failed=gates_failed,
|
||||||
|
)
|
||||||
|
|
||||||
return item
|
return item
|
||||||
|
|
||||||
|
def _emit_task_telemetry(
|
||||||
|
self,
|
||||||
|
item: QueueItem,
|
||||||
|
*,
|
||||||
|
outcome: Outcome,
|
||||||
|
duration_ms: int,
|
||||||
|
retry_count: int = 0,
|
||||||
|
actual_input_tokens: int = 0,
|
||||||
|
actual_output_tokens: int = 0,
|
||||||
|
quality_passed: bool = False,
|
||||||
|
quality_gates_run: list[QualityGate] | None = None,
|
||||||
|
quality_gates_failed: list[QualityGate] | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Emit a Mosaic telemetry TaskCompletionEvent (non-blocking).
|
||||||
|
|
||||||
|
This method never raises; any telemetry errors are logged and swallowed
|
||||||
|
so they do not interfere with task processing.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
item: The QueueItem that was processed.
|
||||||
|
outcome: Task outcome (SUCCESS, FAILURE, TIMEOUT, etc.).
|
||||||
|
duration_ms: Wall-clock duration in milliseconds.
|
||||||
|
retry_count: Number of retries before this attempt.
|
||||||
|
actual_input_tokens: Actual input tokens consumed by the harness.
|
||||||
|
actual_output_tokens: Actual output tokens consumed by the harness.
|
||||||
|
quality_passed: Whether all quality gates passed.
|
||||||
|
quality_gates_run: Quality gates that were executed.
|
||||||
|
quality_gates_failed: Quality gates that failed.
|
||||||
|
"""
|
||||||
|
if self._telemetry_client is None or not self._instance_id:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
model, provider, harness = _resolve_agent_fields(
|
||||||
|
item.metadata.assigned_agent,
|
||||||
|
)
|
||||||
|
complexity = _DIFFICULTY_TO_COMPLEXITY.get(
|
||||||
|
item.metadata.difficulty, Complexity.MEDIUM
|
||||||
|
)
|
||||||
|
|
||||||
|
event = build_task_event(
|
||||||
|
instance_id=self._instance_id,
|
||||||
|
task_type=TaskType.IMPLEMENTATION,
|
||||||
|
complexity=complexity,
|
||||||
|
outcome=outcome,
|
||||||
|
duration_ms=duration_ms,
|
||||||
|
model=model,
|
||||||
|
provider=provider,
|
||||||
|
harness=harness,
|
||||||
|
actual_input_tokens=actual_input_tokens,
|
||||||
|
actual_output_tokens=actual_output_tokens,
|
||||||
|
estimated_input_tokens=item.metadata.estimated_context,
|
||||||
|
quality_passed=quality_passed,
|
||||||
|
quality_gates_run=quality_gates_run,
|
||||||
|
quality_gates_failed=quality_gates_failed,
|
||||||
|
retry_count=retry_count,
|
||||||
|
)
|
||||||
|
self._telemetry_client.track(event)
|
||||||
|
logger.debug(
|
||||||
|
"Telemetry event emitted for issue #%d (outcome=%s)",
|
||||||
|
item.issue_number,
|
||||||
|
outcome.value,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"Failed to emit telemetry for issue #%d (non-fatal)",
|
||||||
|
item.issue_number,
|
||||||
|
)
|
||||||
|
|
||||||
async def _spawn_agent(self, item: QueueItem) -> bool:
|
async def _spawn_agent(self, item: QueueItem) -> bool:
|
||||||
"""Spawn an agent to process the given item.
|
"""Spawn an agent to process the given item.
|
||||||
|
|
||||||
|
|||||||
@@ -100,6 +100,8 @@ async def lifespan(app: FastAPI) -> AsyncIterator[dict[str, Any]]:
|
|||||||
_coordinator = Coordinator(
|
_coordinator = Coordinator(
|
||||||
queue_manager=queue_manager,
|
queue_manager=queue_manager,
|
||||||
poll_interval=settings.coordinator_poll_interval,
|
poll_interval=settings.coordinator_poll_interval,
|
||||||
|
telemetry_client=mosaic_telemetry_client,
|
||||||
|
instance_id=mosaic_telemetry_config.instance_id or "",
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Coordinator initialized (poll interval: {settings.coordinator_poll_interval}s, "
|
f"Coordinator initialized (poll interval: {settings.coordinator_poll_interval}s, "
|
||||||
|
|||||||
796
apps/coordinator/tests/test_task_telemetry.py
Normal file
796
apps/coordinator/tests/test_task_telemetry.py
Normal file
@@ -0,0 +1,796 @@
|
|||||||
|
"""Tests for task completion telemetry instrumentation in the coordinator.
|
||||||
|
|
||||||
|
These tests verify that the Coordinator and OrchestrationLoop correctly
|
||||||
|
emit TaskCompletionEvents via the Mosaic telemetry SDK after each task
|
||||||
|
dispatch attempt.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
from collections.abc import Generator
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from mosaicstack_telemetry import ( # type: ignore[import-untyped]
|
||||||
|
Complexity,
|
||||||
|
Harness,
|
||||||
|
Outcome,
|
||||||
|
Provider,
|
||||||
|
QualityGate,
|
||||||
|
TaskCompletionEvent,
|
||||||
|
TaskType,
|
||||||
|
TelemetryClient,
|
||||||
|
)
|
||||||
|
|
||||||
|
from src.coordinator import (
|
||||||
|
_AGENT_TELEMETRY_MAP,
|
||||||
|
_DIFFICULTY_TO_COMPLEXITY,
|
||||||
|
_GATE_NAME_TO_ENUM,
|
||||||
|
Coordinator,
|
||||||
|
OrchestrationLoop,
|
||||||
|
_resolve_agent_fields,
|
||||||
|
)
|
||||||
|
from src.gates.quality_gate import GateResult
|
||||||
|
from src.models import IssueMetadata
|
||||||
|
from src.quality_orchestrator import QualityOrchestrator, VerificationResult
|
||||||
|
from src.queue import QueueManager
|
||||||
|
|
||||||
|
VALID_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fixtures
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_queue_file() -> Generator[Path, None, None]:
|
||||||
|
"""Create a temporary file for queue persistence."""
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".json") as f:
|
||||||
|
temp_path = Path(f.name)
|
||||||
|
yield temp_path
|
||||||
|
if temp_path.exists():
|
||||||
|
temp_path.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def queue_manager(temp_queue_file: Path) -> QueueManager:
|
||||||
|
"""Create a queue manager with temporary storage."""
|
||||||
|
return QueueManager(queue_file=temp_queue_file)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_telemetry_client() -> MagicMock:
|
||||||
|
"""Create a mock TelemetryClient."""
|
||||||
|
client = MagicMock(spec=TelemetryClient)
|
||||||
|
client.track = MagicMock()
|
||||||
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sonnet_metadata() -> IssueMetadata:
|
||||||
|
"""Metadata for a sonnet agent task."""
|
||||||
|
return IssueMetadata(
|
||||||
|
assigned_agent="sonnet",
|
||||||
|
difficulty="medium",
|
||||||
|
estimated_context=50000,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def opus_metadata() -> IssueMetadata:
|
||||||
|
"""Metadata for an opus agent task (hard difficulty)."""
|
||||||
|
return IssueMetadata(
|
||||||
|
assigned_agent="opus",
|
||||||
|
difficulty="hard",
|
||||||
|
estimated_context=120000,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _resolve_agent_fields tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestResolveAgentFields:
|
||||||
|
"""Tests for the _resolve_agent_fields helper."""
|
||||||
|
|
||||||
|
def test_known_agent_sonnet(self) -> None:
|
||||||
|
"""Should return correct fields for sonnet agent."""
|
||||||
|
model, provider, harness = _resolve_agent_fields("sonnet")
|
||||||
|
assert model == "claude-sonnet-4-20250514"
|
||||||
|
assert provider == Provider.ANTHROPIC
|
||||||
|
assert harness == Harness.CLAUDE_CODE
|
||||||
|
|
||||||
|
def test_known_agent_opus(self) -> None:
|
||||||
|
"""Should return correct fields for opus agent."""
|
||||||
|
model, provider, harness = _resolve_agent_fields("opus")
|
||||||
|
assert model == "claude-opus-4-20250514"
|
||||||
|
assert provider == Provider.ANTHROPIC
|
||||||
|
assert harness == Harness.CLAUDE_CODE
|
||||||
|
|
||||||
|
def test_known_agent_haiku(self) -> None:
|
||||||
|
"""Should return correct fields for haiku agent."""
|
||||||
|
model, provider, harness = _resolve_agent_fields("haiku")
|
||||||
|
assert model == "claude-haiku-3.5-20241022"
|
||||||
|
assert provider == Provider.ANTHROPIC
|
||||||
|
assert harness == Harness.CLAUDE_CODE
|
||||||
|
|
||||||
|
def test_known_agent_glm(self) -> None:
|
||||||
|
"""Should return correct fields for glm (self-hosted) agent."""
|
||||||
|
model, provider, harness = _resolve_agent_fields("glm")
|
||||||
|
assert model == "glm-4"
|
||||||
|
assert provider == Provider.CUSTOM
|
||||||
|
assert harness == Harness.CUSTOM
|
||||||
|
|
||||||
|
def test_known_agent_minimax(self) -> None:
|
||||||
|
"""Should return correct fields for minimax (self-hosted) agent."""
|
||||||
|
model, provider, harness = _resolve_agent_fields("minimax")
|
||||||
|
assert model == "minimax"
|
||||||
|
assert provider == Provider.CUSTOM
|
||||||
|
assert harness == Harness.CUSTOM
|
||||||
|
|
||||||
|
def test_unknown_agent_returns_defaults(self) -> None:
|
||||||
|
"""Should return unknown values for unrecognised agent names."""
|
||||||
|
model, provider, harness = _resolve_agent_fields("nonexistent")
|
||||||
|
assert model == "unknown"
|
||||||
|
assert provider == Provider.UNKNOWN
|
||||||
|
assert harness == Harness.UNKNOWN
|
||||||
|
|
||||||
|
def test_all_map_entries_covered(self) -> None:
|
||||||
|
"""Ensure every entry in _AGENT_TELEMETRY_MAP is resolvable."""
|
||||||
|
for agent_name in _AGENT_TELEMETRY_MAP:
|
||||||
|
model, provider, harness = _resolve_agent_fields(agent_name)
|
||||||
|
assert model != "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Coordinator telemetry emission tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestCoordinatorTelemetry:
|
||||||
|
"""Tests for telemetry emission in the Coordinator class."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_emits_success_event_on_completion(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Should emit a SUCCESS event when task completes successfully."""
|
||||||
|
queue_manager.enqueue(100, sonnet_metadata)
|
||||||
|
|
||||||
|
coordinator = Coordinator(
|
||||||
|
queue_manager=queue_manager,
|
||||||
|
telemetry_client=mock_telemetry_client,
|
||||||
|
instance_id=VALID_INSTANCE_ID,
|
||||||
|
)
|
||||||
|
|
||||||
|
await coordinator.process_queue()
|
||||||
|
|
||||||
|
mock_telemetry_client.track.assert_called_once()
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert isinstance(event, TaskCompletionEvent)
|
||||||
|
assert event.outcome == Outcome.SUCCESS
|
||||||
|
assert event.task_type == TaskType.IMPLEMENTATION
|
||||||
|
assert event.complexity == Complexity.MEDIUM
|
||||||
|
assert event.provider == Provider.ANTHROPIC
|
||||||
|
assert event.harness == Harness.CLAUDE_CODE
|
||||||
|
assert str(event.instance_id) == VALID_INSTANCE_ID
|
||||||
|
assert event.task_duration_ms >= 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_emits_failure_event_when_agent_fails(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Should emit a FAILURE event when spawn_agent returns False."""
|
||||||
|
queue_manager.enqueue(101, sonnet_metadata)
|
||||||
|
|
||||||
|
coordinator = Coordinator(
|
||||||
|
queue_manager=queue_manager,
|
||||||
|
telemetry_client=mock_telemetry_client,
|
||||||
|
instance_id=VALID_INSTANCE_ID,
|
||||||
|
)
|
||||||
|
# Override spawn_agent to fail
|
||||||
|
coordinator.spawn_agent = AsyncMock(return_value=False) # type: ignore[method-assign]
|
||||||
|
|
||||||
|
await coordinator.process_queue()
|
||||||
|
|
||||||
|
mock_telemetry_client.track.assert_called_once()
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event.outcome == Outcome.FAILURE
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_emits_failure_event_on_exception(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Should emit a FAILURE event when spawn_agent raises an exception."""
|
||||||
|
queue_manager.enqueue(102, sonnet_metadata)
|
||||||
|
|
||||||
|
coordinator = Coordinator(
|
||||||
|
queue_manager=queue_manager,
|
||||||
|
telemetry_client=mock_telemetry_client,
|
||||||
|
instance_id=VALID_INSTANCE_ID,
|
||||||
|
)
|
||||||
|
coordinator.spawn_agent = AsyncMock(side_effect=RuntimeError("agent crashed")) # type: ignore[method-assign]
|
||||||
|
|
||||||
|
await coordinator.process_queue()
|
||||||
|
|
||||||
|
mock_telemetry_client.track.assert_called_once()
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event.outcome == Outcome.FAILURE
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_maps_difficulty_to_complexity(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
opus_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Should map difficulty='hard' to Complexity.HIGH in the event."""
|
||||||
|
queue_manager.enqueue(103, opus_metadata)
|
||||||
|
|
||||||
|
coordinator = Coordinator(
|
||||||
|
queue_manager=queue_manager,
|
||||||
|
telemetry_client=mock_telemetry_client,
|
||||||
|
instance_id=VALID_INSTANCE_ID,
|
||||||
|
)
|
||||||
|
|
||||||
|
await coordinator.process_queue()
|
||||||
|
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event.complexity == Complexity.HIGH
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_maps_agent_to_model_and_provider(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
opus_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Should map 'opus' agent to opus model and ANTHROPIC provider."""
|
||||||
|
queue_manager.enqueue(104, opus_metadata)
|
||||||
|
|
||||||
|
coordinator = Coordinator(
|
||||||
|
queue_manager=queue_manager,
|
||||||
|
telemetry_client=mock_telemetry_client,
|
||||||
|
instance_id=VALID_INSTANCE_ID,
|
||||||
|
)
|
||||||
|
|
||||||
|
await coordinator.process_queue()
|
||||||
|
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert "opus" in event.model
|
||||||
|
assert event.provider == Provider.ANTHROPIC
|
||||||
|
assert event.harness == Harness.CLAUDE_CODE
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_no_event_when_telemetry_disabled(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Should not call track when telemetry_client is None."""
|
||||||
|
queue_manager.enqueue(105, sonnet_metadata)
|
||||||
|
|
||||||
|
coordinator = Coordinator(
|
||||||
|
queue_manager=queue_manager,
|
||||||
|
telemetry_client=None,
|
||||||
|
instance_id=VALID_INSTANCE_ID,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should not raise
|
||||||
|
await coordinator.process_queue()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_no_event_when_instance_id_empty(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Should not call track when instance_id is empty."""
|
||||||
|
queue_manager.enqueue(106, sonnet_metadata)
|
||||||
|
|
||||||
|
coordinator = Coordinator(
|
||||||
|
queue_manager=queue_manager,
|
||||||
|
telemetry_client=mock_telemetry_client,
|
||||||
|
instance_id="",
|
||||||
|
)
|
||||||
|
|
||||||
|
await coordinator.process_queue()
|
||||||
|
mock_telemetry_client.track.assert_not_called()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_telemetry_exception_does_not_propagate(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Telemetry failures must never break task processing."""
|
||||||
|
queue_manager.enqueue(107, sonnet_metadata)
|
||||||
|
|
||||||
|
bad_client = MagicMock(spec=TelemetryClient)
|
||||||
|
bad_client.track = MagicMock(side_effect=RuntimeError("telemetry down"))
|
||||||
|
|
||||||
|
coordinator = Coordinator(
|
||||||
|
queue_manager=queue_manager,
|
||||||
|
telemetry_client=bad_client,
|
||||||
|
instance_id=VALID_INSTANCE_ID,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should complete without raising, despite telemetry failure
|
||||||
|
result = await coordinator.process_queue()
|
||||||
|
assert result is not None
|
||||||
|
assert result.issue_number == 107
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_no_event_when_queue_empty(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
) -> None:
|
||||||
|
"""Should not emit any event when the queue is empty."""
|
||||||
|
coordinator = Coordinator(
|
||||||
|
queue_manager=queue_manager,
|
||||||
|
telemetry_client=mock_telemetry_client,
|
||||||
|
instance_id=VALID_INSTANCE_ID,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await coordinator.process_queue()
|
||||||
|
assert result is None
|
||||||
|
mock_telemetry_client.track.assert_not_called()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_estimated_input_tokens_from_metadata(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Should set estimated_input_tokens from issue metadata."""
|
||||||
|
queue_manager.enqueue(108, sonnet_metadata)
|
||||||
|
|
||||||
|
coordinator = Coordinator(
|
||||||
|
queue_manager=queue_manager,
|
||||||
|
telemetry_client=mock_telemetry_client,
|
||||||
|
instance_id=VALID_INSTANCE_ID,
|
||||||
|
)
|
||||||
|
|
||||||
|
await coordinator.process_queue()
|
||||||
|
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event.estimated_input_tokens == 50000
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# OrchestrationLoop telemetry emission tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _make_orchestration_loop(
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
telemetry_client: TelemetryClient | None = None,
|
||||||
|
instance_id: str = VALID_INSTANCE_ID,
|
||||||
|
quality_result: VerificationResult | None = None,
|
||||||
|
) -> OrchestrationLoop:
|
||||||
|
"""Create an OrchestrationLoop with mocked dependencies.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
queue_manager: Queue manager instance.
|
||||||
|
telemetry_client: Optional telemetry client.
|
||||||
|
instance_id: Coordinator instance ID.
|
||||||
|
quality_result: Override quality verification result.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured OrchestrationLoop.
|
||||||
|
"""
|
||||||
|
# Create quality orchestrator mock
|
||||||
|
qo = MagicMock(spec=QualityOrchestrator)
|
||||||
|
default_result = quality_result or VerificationResult(
|
||||||
|
all_passed=True,
|
||||||
|
gate_results={
|
||||||
|
"build": GateResult(passed=True, message="Build OK"),
|
||||||
|
"lint": GateResult(passed=True, message="Lint OK"),
|
||||||
|
"test": GateResult(passed=True, message="Test OK"),
|
||||||
|
"coverage": GateResult(passed=True, message="Coverage OK"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
qo.verify_completion = AsyncMock(return_value=default_result)
|
||||||
|
|
||||||
|
# Continuation service mock
|
||||||
|
from src.forced_continuation import ForcedContinuationService
|
||||||
|
|
||||||
|
cs = MagicMock(spec=ForcedContinuationService)
|
||||||
|
cs.generate_prompt = MagicMock(return_value="Fix: build failed")
|
||||||
|
|
||||||
|
# Context monitor mock
|
||||||
|
from src.context_monitor import ContextMonitor
|
||||||
|
|
||||||
|
cm = MagicMock(spec=ContextMonitor)
|
||||||
|
cm.determine_action = AsyncMock(return_value="continue")
|
||||||
|
|
||||||
|
return OrchestrationLoop(
|
||||||
|
queue_manager=queue_manager,
|
||||||
|
quality_orchestrator=qo,
|
||||||
|
continuation_service=cs,
|
||||||
|
context_monitor=cm,
|
||||||
|
telemetry_client=telemetry_client,
|
||||||
|
instance_id=instance_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestOrchestrationLoopTelemetry:
|
||||||
|
"""Tests for telemetry emission in the OrchestrationLoop class."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_emits_success_with_quality_gates(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Should emit SUCCESS event with quality gate details."""
|
||||||
|
queue_manager.enqueue(200, sonnet_metadata)
|
||||||
|
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager, telemetry_client=mock_telemetry_client
|
||||||
|
)
|
||||||
|
|
||||||
|
await loop.process_next_issue()
|
||||||
|
|
||||||
|
mock_telemetry_client.track.assert_called_once()
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event.outcome == Outcome.SUCCESS
|
||||||
|
assert event.quality_gate_passed is True
|
||||||
|
assert set(event.quality_gates_run) == {
|
||||||
|
QualityGate.BUILD,
|
||||||
|
QualityGate.LINT,
|
||||||
|
QualityGate.TEST,
|
||||||
|
QualityGate.COVERAGE,
|
||||||
|
}
|
||||||
|
assert event.quality_gates_failed == []
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_emits_failure_with_failed_gates(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Should emit FAILURE event with failed gate details."""
|
||||||
|
queue_manager.enqueue(201, sonnet_metadata)
|
||||||
|
|
||||||
|
failed_result = VerificationResult(
|
||||||
|
all_passed=False,
|
||||||
|
gate_results={
|
||||||
|
"build": GateResult(passed=True, message="Build OK"),
|
||||||
|
"lint": GateResult(passed=True, message="Lint OK"),
|
||||||
|
"test": GateResult(passed=False, message="3 tests failed"),
|
||||||
|
"coverage": GateResult(passed=False, message="Coverage 70% < 85%"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager,
|
||||||
|
telemetry_client=mock_telemetry_client,
|
||||||
|
quality_result=failed_result,
|
||||||
|
)
|
||||||
|
|
||||||
|
await loop.process_next_issue()
|
||||||
|
|
||||||
|
mock_telemetry_client.track.assert_called_once()
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event.outcome == Outcome.FAILURE
|
||||||
|
assert event.quality_gate_passed is False
|
||||||
|
assert set(event.quality_gates_failed) == {
|
||||||
|
QualityGate.TEST,
|
||||||
|
QualityGate.COVERAGE,
|
||||||
|
}
|
||||||
|
assert set(event.quality_gates_run) == {
|
||||||
|
QualityGate.BUILD,
|
||||||
|
QualityGate.LINT,
|
||||||
|
QualityGate.TEST,
|
||||||
|
QualityGate.COVERAGE,
|
||||||
|
}
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_retry_count_starts_at_zero(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""First attempt should report retry_count=0."""
|
||||||
|
queue_manager.enqueue(202, sonnet_metadata)
|
||||||
|
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager, telemetry_client=mock_telemetry_client
|
||||||
|
)
|
||||||
|
|
||||||
|
await loop.process_next_issue()
|
||||||
|
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event.retry_count == 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_retry_count_increments_on_failure(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Retry count should increment after a quality gate failure."""
|
||||||
|
queue_manager.enqueue(203, sonnet_metadata)
|
||||||
|
|
||||||
|
failed_result = VerificationResult(
|
||||||
|
all_passed=False,
|
||||||
|
gate_results={
|
||||||
|
"build": GateResult(passed=False, message="Build failed"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager,
|
||||||
|
telemetry_client=mock_telemetry_client,
|
||||||
|
quality_result=failed_result,
|
||||||
|
)
|
||||||
|
|
||||||
|
# First attempt
|
||||||
|
await loop.process_next_issue()
|
||||||
|
event1 = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event1.retry_count == 0
|
||||||
|
|
||||||
|
# Re-enqueue and process again (simulates retry)
|
||||||
|
queue_manager.enqueue(203, sonnet_metadata)
|
||||||
|
mock_telemetry_client.track.reset_mock()
|
||||||
|
|
||||||
|
await loop.process_next_issue()
|
||||||
|
event2 = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event2.retry_count == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_retry_count_clears_on_success(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Retry count should be cleared after a successful completion."""
|
||||||
|
queue_manager.enqueue(204, sonnet_metadata)
|
||||||
|
|
||||||
|
# First: fail
|
||||||
|
failed_result = VerificationResult(
|
||||||
|
all_passed=False,
|
||||||
|
gate_results={
|
||||||
|
"build": GateResult(passed=False, message="Build failed"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager,
|
||||||
|
telemetry_client=mock_telemetry_client,
|
||||||
|
quality_result=failed_result,
|
||||||
|
)
|
||||||
|
|
||||||
|
await loop.process_next_issue()
|
||||||
|
assert loop._retry_counts.get(204) == 1
|
||||||
|
|
||||||
|
# Now succeed
|
||||||
|
success_result = VerificationResult(
|
||||||
|
all_passed=True,
|
||||||
|
gate_results={
|
||||||
|
"build": GateResult(passed=True, message="Build OK"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
loop.quality_orchestrator.verify_completion = AsyncMock(return_value=success_result) # type: ignore[method-assign]
|
||||||
|
queue_manager.enqueue(204, sonnet_metadata)
|
||||||
|
mock_telemetry_client.track.reset_mock()
|
||||||
|
|
||||||
|
await loop.process_next_issue()
|
||||||
|
assert 204 not in loop._retry_counts
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_emits_failure_when_agent_spawn_fails(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Should emit FAILURE when _spawn_agent returns False."""
|
||||||
|
queue_manager.enqueue(205, sonnet_metadata)
|
||||||
|
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager, telemetry_client=mock_telemetry_client
|
||||||
|
)
|
||||||
|
loop._spawn_agent = AsyncMock(return_value=False) # type: ignore[method-assign]
|
||||||
|
|
||||||
|
await loop.process_next_issue()
|
||||||
|
|
||||||
|
mock_telemetry_client.track.assert_called_once()
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event.outcome == Outcome.FAILURE
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_no_event_when_telemetry_disabled(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Should not call track when telemetry_client is None."""
|
||||||
|
queue_manager.enqueue(206, sonnet_metadata)
|
||||||
|
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager, telemetry_client=None
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should not raise
|
||||||
|
result = await loop.process_next_issue()
|
||||||
|
assert result is not None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_telemetry_exception_does_not_propagate(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Telemetry failures must never disrupt task processing."""
|
||||||
|
queue_manager.enqueue(207, sonnet_metadata)
|
||||||
|
|
||||||
|
bad_client = MagicMock(spec=TelemetryClient)
|
||||||
|
bad_client.track = MagicMock(side_effect=RuntimeError("telemetry down"))
|
||||||
|
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager, telemetry_client=bad_client
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await loop.process_next_issue()
|
||||||
|
assert result is not None
|
||||||
|
assert result.issue_number == 207
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_duration_is_positive(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Duration should be a non-negative integer."""
|
||||||
|
queue_manager.enqueue(208, sonnet_metadata)
|
||||||
|
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager, telemetry_client=mock_telemetry_client
|
||||||
|
)
|
||||||
|
|
||||||
|
await loop.process_next_issue()
|
||||||
|
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event.task_duration_ms >= 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_maps_glm_agent_correctly(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
) -> None:
|
||||||
|
"""Should map GLM (self-hosted) agent to CUSTOM provider/harness."""
|
||||||
|
glm_meta = IssueMetadata(
|
||||||
|
assigned_agent="glm",
|
||||||
|
difficulty="medium",
|
||||||
|
estimated_context=30000,
|
||||||
|
)
|
||||||
|
queue_manager.enqueue(209, glm_meta)
|
||||||
|
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager, telemetry_client=mock_telemetry_client
|
||||||
|
)
|
||||||
|
|
||||||
|
await loop.process_next_issue()
|
||||||
|
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event.model == "glm-4"
|
||||||
|
assert event.provider == Provider.CUSTOM
|
||||||
|
assert event.harness == Harness.CUSTOM
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_maps_easy_difficulty_to_low_complexity(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
) -> None:
|
||||||
|
"""Should map difficulty='easy' to Complexity.LOW."""
|
||||||
|
easy_meta = IssueMetadata(
|
||||||
|
assigned_agent="haiku",
|
||||||
|
difficulty="easy",
|
||||||
|
estimated_context=10000,
|
||||||
|
)
|
||||||
|
queue_manager.enqueue(210, easy_meta)
|
||||||
|
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager, telemetry_client=mock_telemetry_client
|
||||||
|
)
|
||||||
|
|
||||||
|
await loop.process_next_issue()
|
||||||
|
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert event.complexity == Complexity.LOW
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_no_event_when_queue_empty(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
) -> None:
|
||||||
|
"""Should not emit an event when queue is empty."""
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager, telemetry_client=mock_telemetry_client
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await loop.process_next_issue()
|
||||||
|
assert result is None
|
||||||
|
mock_telemetry_client.track.assert_not_called()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_unknown_gate_names_excluded(
|
||||||
|
self,
|
||||||
|
queue_manager: QueueManager,
|
||||||
|
mock_telemetry_client: MagicMock,
|
||||||
|
sonnet_metadata: IssueMetadata,
|
||||||
|
) -> None:
|
||||||
|
"""Gate names not in _GATE_NAME_TO_ENUM should be excluded from telemetry."""
|
||||||
|
queue_manager.enqueue(211, sonnet_metadata)
|
||||||
|
|
||||||
|
result_with_unknown = VerificationResult(
|
||||||
|
all_passed=False,
|
||||||
|
gate_results={
|
||||||
|
"build": GateResult(passed=True, message="Build OK"),
|
||||||
|
"unknown_gate": GateResult(passed=False, message="Unknown gate"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
loop = _make_orchestration_loop(
|
||||||
|
queue_manager,
|
||||||
|
telemetry_client=mock_telemetry_client,
|
||||||
|
quality_result=result_with_unknown,
|
||||||
|
)
|
||||||
|
|
||||||
|
await loop.process_next_issue()
|
||||||
|
|
||||||
|
event = mock_telemetry_client.track.call_args[0][0]
|
||||||
|
assert QualityGate.BUILD in event.quality_gates_run
|
||||||
|
# unknown_gate should not appear
|
||||||
|
assert len(event.quality_gates_run) == 1
|
||||||
|
assert len(event.quality_gates_failed) == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Mapping dict completeness tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestMappingCompleteness:
|
||||||
|
"""Tests to verify mapping dicts cover expected values."""
|
||||||
|
|
||||||
|
def test_difficulty_map_covers_all_metadata_values(self) -> None:
|
||||||
|
"""All valid difficulty levels should have Complexity mappings."""
|
||||||
|
expected_difficulties = {"easy", "medium", "hard"}
|
||||||
|
assert expected_difficulties == set(_DIFFICULTY_TO_COMPLEXITY.keys())
|
||||||
|
|
||||||
|
def test_gate_name_map_covers_all_orchestrator_gates(self) -> None:
|
||||||
|
"""All gate names used by QualityOrchestrator should be mappable."""
|
||||||
|
expected_gates = {"build", "lint", "test", "coverage"}
|
||||||
|
assert expected_gates == set(_GATE_NAME_TO_ENUM.keys())
|
||||||
|
|
||||||
|
def test_agent_map_covers_all_configured_agents(self) -> None:
|
||||||
|
"""All agents used by the coordinator should have telemetry mappings."""
|
||||||
|
expected_agents = {"sonnet", "opus", "haiku", "glm", "minimax"}
|
||||||
|
assert expected_agents == set(_AGENT_TELEMETRY_MAP.keys())
|
||||||
Reference in New Issue
Block a user