feat(#160): Implement basic orchestration loop

Implements the Coordinator class with main orchestration loop:
- Async loop architecture with configurable poll interval
- process_queue() method gets next ready issue and spawns agent (stub)
- Graceful shutdown handling with stop() method
- Error handling that allows loop to continue after failures
- Logging for all actions (start, stop, processing, errors)
- Integration with QueueManager from #159
- Active agent tracking for future agent management

Configuration settings added:
- COORDINATOR_POLL_INTERVAL (default: 5.0s)
- COORDINATOR_MAX_CONCURRENT_AGENTS (default: 10)
- COORDINATOR_ENABLED (default: true)

Tests: 27 new tests covering all acceptance criteria
Coverage: 92% overall (100% for coordinator.py)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-01 18:03:12 -06:00
parent f0fd0bed41
commit 88953fc998
9 changed files with 1043 additions and 24 deletions

View File

@@ -27,6 +27,11 @@ class Settings(BaseSettings):
# Logging
log_level: str = "info"
# Coordinator Configuration
coordinator_poll_interval: float = 5.0
coordinator_max_concurrent_agents: int = 10
coordinator_enabled: bool = True
def get_settings() -> Settings:
"""Get settings instance (lazy loaded)."""

View File

@@ -0,0 +1,181 @@
"""Coordinator orchestration loop for processing issue queue."""
import asyncio
import logging
from typing import Any
from src.queue import QueueItem, QueueManager
logger = logging.getLogger(__name__)
class Coordinator:
"""Main orchestration loop for processing the issue queue.
The Coordinator is responsible for:
- Monitoring the queue for ready items
- Spawning agents to process issues (stub implementation for Phase 0)
- Marking items as complete when processing finishes
- Handling errors gracefully
- Supporting graceful shutdown
"""
def __init__(
self,
queue_manager: QueueManager,
poll_interval: float = 5.0,
) -> None:
"""Initialize the Coordinator.
Args:
queue_manager: QueueManager instance for queue operations
poll_interval: Seconds between queue polls (default: 5.0)
"""
self.queue_manager = queue_manager
self.poll_interval = poll_interval
self._running = False
self._stop_event: asyncio.Event | None = None
self._active_agents: dict[int, dict[str, Any]] = {}
@property
def is_running(self) -> bool:
"""Check if the coordinator is currently running.
Returns:
True if the orchestration loop is running
"""
return self._running
@property
def active_agents(self) -> dict[int, dict[str, Any]]:
"""Get the dictionary of active agents.
Returns:
Dictionary mapping issue numbers to agent info
"""
return self._active_agents
def get_active_agent_count(self) -> int:
"""Get the count of currently active agents.
Returns:
Number of active agents
"""
return len(self._active_agents)
async def start(self) -> None:
"""Start the orchestration loop.
Continuously processes the queue until stop() is called.
"""
self._running = True
self._stop_event = asyncio.Event()
logger.info("Coordinator started - beginning orchestration loop")
try:
while self._running:
try:
await self.process_queue()
except Exception as e:
logger.error(f"Error in process_queue: {e}")
# Continue running despite errors
# Wait for poll interval or stop signal
try:
await asyncio.wait_for(
self._stop_event.wait(),
timeout=self.poll_interval,
)
# If we reach here, stop was requested
break
except TimeoutError:
# Normal timeout, continue polling
pass
finally:
self._running = False
logger.info("Coordinator stopped")
async def stop(self) -> None:
"""Stop the orchestration loop gracefully.
Signals the loop to stop and waits for current processing to complete.
This method is idempotent - can be called multiple times safely.
"""
logger.info("Coordinator stop requested")
self._running = False
if self._stop_event is not None:
self._stop_event.set()
async def process_queue(self) -> QueueItem | None:
"""Process the next ready item from the queue.
Gets the next ready item, spawns an agent to process it,
and marks it complete on success.
Returns:
The QueueItem that was processed, or None if queue is empty
"""
# Get next ready item
item = self.queue_manager.get_next_ready()
if item is None:
logger.debug("No items in queue to process")
return None
logger.info(
f"Processing issue #{item.issue_number} "
f"(agent: {item.metadata.assigned_agent}, "
f"difficulty: {item.metadata.difficulty})"
)
# Mark as in progress
self.queue_manager.mark_in_progress(item.issue_number)
# Spawn agent (stub implementation)
try:
success = await self.spawn_agent(item)
if success:
# Mark as complete
self.queue_manager.mark_complete(item.issue_number)
logger.info(f"Issue #{item.issue_number} completed successfully")
else:
logger.warning(f"Issue #{item.issue_number} agent failed - remains in progress")
except Exception as e:
logger.error(f"Error spawning agent for issue #{item.issue_number}: {e}")
# Item remains in progress on error
return item
async def spawn_agent(self, item: QueueItem) -> bool:
"""Spawn an agent to process the given item.
This is a stub implementation for Phase 0 that always succeeds.
Future phases will implement actual agent spawning.
Args:
item: QueueItem containing issue details
Returns:
True if agent completed successfully, False otherwise
"""
logger.info(
f"[STUB] Spawning {item.metadata.assigned_agent} agent "
f"for issue #{item.issue_number} "
f"(estimated context: {item.metadata.estimated_context} tokens)"
)
# Track the agent
self._active_agents[item.issue_number] = {
"agent_type": item.metadata.assigned_agent,
"issue_number": item.issue_number,
"status": "running",
}
# Stub implementation: always succeed
# In future phases, this will actually spawn a Claude agent process
logger.info(f"[STUB] Agent completed for issue #{item.issue_number}")
return True

View File

@@ -1,13 +1,18 @@
"""FastAPI application for mosaic-coordinator webhook receiver."""
import asyncio
import logging
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
from pathlib import Path
from typing import Any
from fastapi import FastAPI
from pydantic import BaseModel
from .config import settings
from .coordinator import Coordinator
from .queue import QueueManager
from .webhook import router as webhook_router
@@ -26,24 +31,77 @@ def setup_logging() -> None:
setup_logging()
logger = logging.getLogger(__name__)
# Global instances for application state
_coordinator: Coordinator | None = None
_coordinator_task: asyncio.Task[None] | None = None
def get_coordinator() -> Coordinator | None:
"""Get the global coordinator instance.
Returns:
The Coordinator instance if initialized, None otherwise
"""
return _coordinator
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
"""
Application lifespan manager.
async def lifespan(app: FastAPI) -> AsyncIterator[dict[str, Any]]:
"""Application lifespan manager.
Handles startup and shutdown logic.
Handles startup and shutdown logic including coordinator lifecycle.
Yields:
State dict with shared resources
"""
global _coordinator, _coordinator_task
# Startup
logger.info("Starting mosaic-coordinator webhook receiver")
logger.info(f"Gitea URL: {settings.gitea_url}")
logger.info(f"Log level: {settings.log_level}")
logger.info(f"Server: {settings.host}:{settings.port}")
yield
# Initialize queue manager
queue_file = Path("queue.json")
queue_manager = QueueManager(queue_file=queue_file)
logger.info(f"Queue manager initialized (file: {queue_file})")
# Initialize and start coordinator if enabled
if settings.coordinator_enabled:
_coordinator = Coordinator(
queue_manager=queue_manager,
poll_interval=settings.coordinator_poll_interval,
)
logger.info(
f"Coordinator initialized (poll interval: {settings.coordinator_poll_interval}s, "
f"max agents: {settings.coordinator_max_concurrent_agents})"
)
# Start coordinator in background
_coordinator_task = asyncio.create_task(_coordinator.start())
logger.info("Coordinator orchestration loop started")
else:
logger.info("Coordinator disabled via configuration")
yield {"queue_manager": queue_manager, "coordinator": _coordinator}
# Shutdown
logger.info("Shutting down mosaic-coordinator webhook receiver")
logger.info("Shutting down mosaic-coordinator")
# Stop coordinator gracefully
if _coordinator is not None:
logger.info("Stopping coordinator...")
await _coordinator.stop()
if _coordinator_task is not None:
_coordinator_task.cancel()
try:
await _coordinator_task
except asyncio.CancelledError:
pass
logger.info("Coordinator stopped")
logger.info("Mosaic-coordinator shutdown complete")
# Create FastAPI application
@@ -60,17 +118,30 @@ class HealthResponse(BaseModel):
status: str
service: str
coordinator_running: bool = False
active_agents: int = 0
@app.get("/health", response_model=HealthResponse)
async def health_check() -> HealthResponse:
"""
Health check endpoint.
"""Health check endpoint.
Returns:
HealthResponse indicating service is healthy
HealthResponse indicating service is healthy with coordinator status
"""
return HealthResponse(status="healthy", service="mosaic-coordinator")
coordinator_running = False
active_agents = 0
if _coordinator is not None:
coordinator_running = _coordinator.is_running
active_agents = _coordinator.get_active_agent_count()
return HealthResponse(
status="healthy",
service="mosaic-coordinator",
coordinator_running=coordinator_running,
active_agents=active_agents,
)
# Include webhook router