From 8d8d37dbf9e53559d3c00147e1d27f0ef7cd626d Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 01:33:54 -0600 Subject: [PATCH 01/15] feat(#370): install mosaicstack-telemetry in Coordinator - Add mosaicstack-telemetry>=0.1.0 to pyproject.toml dependencies - Configure Gitea PyPI registry via pip.conf (extra-index-url) - Integrate TelemetryClient in FastAPI lifespan (start_async/stop_async) - Store client on app.state.mosaic_telemetry for downstream access - Create mosaic_telemetry.py helper module with: - get_telemetry_client(): retrieve client from app state - build_task_event(): construct TaskCompletionEvent with coordinator defaults - create_telemetry_config(): create config from MOSAIC_TELEMETRY_* env vars - Add 28 unit tests covering config, helpers, disabled mode, and lifespan - New module has 100% test coverage Refs #370 Co-Authored-By: Claude Opus 4.6 --- apps/coordinator/pip.conf | 2 + apps/coordinator/pyproject.toml | 1 + apps/coordinator/src/main.py | 19 + apps/coordinator/src/mosaic_telemetry.py | 157 +++++++ .../tests/test_mosaic_telemetry.py | 426 ++++++++++++++++++ 5 files changed, 605 insertions(+) create mode 100644 apps/coordinator/pip.conf create mode 100644 apps/coordinator/src/mosaic_telemetry.py create mode 100644 apps/coordinator/tests/test_mosaic_telemetry.py diff --git a/apps/coordinator/pip.conf b/apps/coordinator/pip.conf new file mode 100644 index 0000000..a421c56 --- /dev/null +++ b/apps/coordinator/pip.conf @@ -0,0 +1,2 @@ +[global] +extra-index-url = https://git.mosaicstack.dev/api/packages/mosaic/pypi/simple/ diff --git a/apps/coordinator/pyproject.toml b/apps/coordinator/pyproject.toml index 62b6704..ca20f48 100644 --- a/apps/coordinator/pyproject.toml +++ b/apps/coordinator/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "opentelemetry-sdk>=1.20.0", "opentelemetry-instrumentation-fastapi>=0.41b0", "opentelemetry-exporter-otlp>=1.20.0", + "mosaicstack-telemetry>=0.1.0", ] [project.optional-dependencies] diff --git a/apps/coordinator/src/main.py b/apps/coordinator/src/main.py index f1f378c..8f345b5 100644 --- a/apps/coordinator/src/main.py +++ b/apps/coordinator/src/main.py @@ -9,6 +9,7 @@ from pathlib import Path from typing import Any from fastapi import FastAPI +from mosaicstack_telemetry import TelemetryClient # type: ignore[import-untyped] from pydantic import BaseModel from slowapi import Limiter, _rate_limit_exceeded_handler from slowapi.errors import RateLimitExceeded @@ -18,6 +19,7 @@ from starlette.responses import Response from .config import settings from .coordinator import Coordinator +from .mosaic_telemetry import create_telemetry_config from .queue import QueueManager from .telemetry import TelemetryService, shutdown_telemetry from .webhook import router as webhook_router @@ -76,6 +78,18 @@ async def lifespan(app: FastAPI) -> AsyncIterator[dict[str, Any]]: telemetry_service.initialize() logger.info("OpenTelemetry telemetry initialized") + # Initialize Mosaic telemetry client + mosaic_telemetry_config = create_telemetry_config() + mosaic_telemetry_client: TelemetryClient | None = None + if mosaic_telemetry_config.enabled: + mosaic_telemetry_client = TelemetryClient(mosaic_telemetry_config) + await mosaic_telemetry_client.start_async() + app.state.mosaic_telemetry = mosaic_telemetry_client + logger.info("Mosaic telemetry client started") + else: + app.state.mosaic_telemetry = None + logger.info("Mosaic telemetry disabled via configuration") + # Initialize queue manager queue_file = Path("queue.json") queue_manager = QueueManager(queue_file=queue_file) @@ -115,6 +129,11 @@ async def lifespan(app: FastAPI) -> AsyncIterator[dict[str, Any]]: pass logger.info("Coordinator stopped") + # Shutdown Mosaic telemetry client + if mosaic_telemetry_client is not None: + await mosaic_telemetry_client.stop_async() + logger.info("Mosaic telemetry client stopped") + # Shutdown OpenTelemetry if telemetry_enabled: shutdown_telemetry() diff --git a/apps/coordinator/src/mosaic_telemetry.py b/apps/coordinator/src/mosaic_telemetry.py new file mode 100644 index 0000000..c4793fd --- /dev/null +++ b/apps/coordinator/src/mosaic_telemetry.py @@ -0,0 +1,157 @@ +"""Mosaic Stack telemetry integration for the Coordinator. + +This module provides helpers for tracking task completion events using the +mosaicstack-telemetry SDK. It is separate from the OpenTelemetry distributed +tracing configured in telemetry.py. + +Environment variables (auto-read by the SDK): + MOSAIC_TELEMETRY_ENABLED: Enable/disable telemetry (default: true) + MOSAIC_TELEMETRY_SERVER_URL: Telemetry server endpoint + MOSAIC_TELEMETRY_API_KEY: API key for authentication + MOSAIC_TELEMETRY_INSTANCE_ID: UUID identifying this coordinator instance +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +from mosaicstack_telemetry import ( + Complexity, + EventBuilder, + Harness, + Outcome, + Provider, + QualityGate, + TaskType, + TelemetryClient, + TelemetryConfig, +) + +if TYPE_CHECKING: + from fastapi import FastAPI + from mosaicstack_telemetry import TaskCompletionEvent + +logger = logging.getLogger(__name__) + + +def get_telemetry_client(app: FastAPI) -> TelemetryClient | None: + """Retrieve the Mosaic telemetry client from FastAPI app state. + + Args: + app: The FastAPI application instance. + + Returns: + The TelemetryClient if initialised and telemetry is enabled, + or None if telemetry is disabled or not yet initialised. + """ + client: TelemetryClient | None = getattr(app.state, "mosaic_telemetry", None) + return client + + +def build_task_event( + *, + instance_id: str, + task_type: TaskType = TaskType.IMPLEMENTATION, + complexity: Complexity = Complexity.MEDIUM, + outcome: Outcome = Outcome.SUCCESS, + duration_ms: int = 0, + model: str = "claude-sonnet-4-20250514", + provider: Provider = Provider.ANTHROPIC, + harness: Harness = Harness.CLAUDE_CODE, + estimated_input_tokens: int = 0, + estimated_output_tokens: int = 0, + actual_input_tokens: int = 0, + actual_output_tokens: int = 0, + estimated_cost_micros: int = 0, + actual_cost_micros: int = 0, + quality_passed: bool = False, + quality_gates_run: list[QualityGate] | None = None, + quality_gates_failed: list[QualityGate] | None = None, + context_compactions: int = 0, + context_rotations: int = 0, + context_utilization: float = 0.0, + retry_count: int = 0, + language: str | None = "typescript", +) -> TaskCompletionEvent: + """Build a TaskCompletionEvent for a coordinator task. + + Provides sensible defaults for the coordinator context (Claude Code harness, + Anthropic provider, TypeScript language). + + Args: + instance_id: UUID identifying this coordinator instance. + task_type: The kind of task that was performed. + complexity: Complexity level of the task. + outcome: Whether the task succeeded, failed, etc. + duration_ms: Task duration in milliseconds. + model: The AI model used. + provider: The AI model provider. + harness: The coding harness used. + estimated_input_tokens: Estimated input token count. + estimated_output_tokens: Estimated output token count. + actual_input_tokens: Actual input token count. + actual_output_tokens: Actual output token count. + estimated_cost_micros: Estimated cost in USD micros. + actual_cost_micros: Actual cost in USD micros. + quality_passed: Whether all quality gates passed. + quality_gates_run: List of quality gates that were executed. + quality_gates_failed: List of quality gates that failed. + context_compactions: Number of context compactions during the task. + context_rotations: Number of context rotations during the task. + context_utilization: Final context window utilization (0.0-1.0). + retry_count: Number of retries before the task completed. + language: Primary programming language (default: typescript). + + Returns: + A fully populated TaskCompletionEvent ready to be tracked. + """ + builder = ( + EventBuilder(instance_id=instance_id) + .task_type(task_type) + .complexity_level(complexity) + .harness_type(harness) + .model(model) + .provider(provider) + .duration_ms(duration_ms) + .outcome_value(outcome) + .tokens( + estimated_in=estimated_input_tokens, + estimated_out=estimated_output_tokens, + actual_in=actual_input_tokens, + actual_out=actual_output_tokens, + ) + .cost(estimated=estimated_cost_micros, actual=actual_cost_micros) + .quality( + passed=quality_passed, + gates_run=quality_gates_run or [], + gates_failed=quality_gates_failed or [], + ) + .context( + compactions=context_compactions, + rotations=context_rotations, + utilization=context_utilization, + ) + .retry_count(retry_count) + .language(language) + ) + return builder.build() + + +def create_telemetry_config() -> TelemetryConfig: + """Create a TelemetryConfig instance. + + The config reads from MOSAIC_TELEMETRY_* environment variables automatically. + Validation warnings are logged but do not prevent creation. + + Returns: + A TelemetryConfig instance with env-var overrides applied. + """ + config = TelemetryConfig() + errors = config.validate() + if errors and config.enabled: + logger.warning( + "Mosaic telemetry config has validation issues (telemetry may not submit): %s", + "; ".join(errors), + ) + return config diff --git a/apps/coordinator/tests/test_mosaic_telemetry.py b/apps/coordinator/tests/test_mosaic_telemetry.py new file mode 100644 index 0000000..cbd92e6 --- /dev/null +++ b/apps/coordinator/tests/test_mosaic_telemetry.py @@ -0,0 +1,426 @@ +"""Tests for Mosaic Stack telemetry integration (mosaic_telemetry module). + +These tests cover the mosaicstack-telemetry SDK integration, NOT the +OpenTelemetry distributed tracing (which is tested in test_telemetry.py). +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest +from fastapi import FastAPI +from mosaicstack_telemetry import ( + Complexity, + Harness, + Outcome, + Provider, + QualityGate, + TaskCompletionEvent, + TaskType, + TelemetryClient, + TelemetryConfig, +) + +from src.mosaic_telemetry import ( + build_task_event, + create_telemetry_config, + get_telemetry_client, +) + +# --------------------------------------------------------------------------- +# TelemetryConfig creation from environment variables +# --------------------------------------------------------------------------- + + +class TestCreateTelemetryConfig: + """Tests for create_telemetry_config helper.""" + + def test_config_reads_enabled_from_env(self) -> None: + """TelemetryConfig should read MOSAIC_TELEMETRY_ENABLED from env.""" + with patch.dict( + "os.environ", + {"MOSAIC_TELEMETRY_ENABLED": "true"}, + clear=False, + ): + config = create_telemetry_config() + assert config.enabled is True + + def test_config_disabled_from_env(self) -> None: + """TelemetryConfig should be disabled when env var is false.""" + with patch.dict( + "os.environ", + {"MOSAIC_TELEMETRY_ENABLED": "false"}, + clear=False, + ): + config = create_telemetry_config() + assert config.enabled is False + + def test_config_reads_server_url_from_env(self) -> None: + """TelemetryConfig should read MOSAIC_TELEMETRY_SERVER_URL from env.""" + with patch.dict( + "os.environ", + {"MOSAIC_TELEMETRY_SERVER_URL": "https://telemetry.example.com"}, + clear=False, + ): + config = create_telemetry_config() + assert config.server_url == "https://telemetry.example.com" + + def test_config_reads_api_key_from_env(self) -> None: + """TelemetryConfig should read MOSAIC_TELEMETRY_API_KEY from env.""" + api_key = "a" * 64 # 64-char hex string + with patch.dict( + "os.environ", + {"MOSAIC_TELEMETRY_API_KEY": api_key}, + clear=False, + ): + config = create_telemetry_config() + assert config.api_key == api_key + + def test_config_reads_instance_id_from_env(self) -> None: + """TelemetryConfig should read MOSAIC_TELEMETRY_INSTANCE_ID from env.""" + instance_id = "12345678-1234-1234-1234-123456789abc" + with patch.dict( + "os.environ", + {"MOSAIC_TELEMETRY_INSTANCE_ID": instance_id}, + clear=False, + ): + config = create_telemetry_config() + assert config.instance_id == instance_id + + def test_config_defaults_to_enabled(self) -> None: + """TelemetryConfig should default to enabled when env var is not set.""" + with patch.dict( + "os.environ", + {}, + clear=True, + ): + config = create_telemetry_config() + assert config.enabled is True + + def test_config_logs_validation_warnings_when_enabled(self) -> None: + """Config creation should log warnings for validation errors when enabled.""" + with ( + patch.dict( + "os.environ", + {"MOSAIC_TELEMETRY_ENABLED": "true"}, + clear=True, + ), + patch("src.mosaic_telemetry.logger") as mock_logger, + ): + config = create_telemetry_config() + # server_url, api_key, and instance_id are all empty = validation errors + assert config.enabled is True + mock_logger.warning.assert_called_once() + warning_msg = mock_logger.warning.call_args[0][0] + assert "validation issues" in warning_msg + + def test_config_no_warnings_when_disabled(self) -> None: + """Config creation should not log warnings when telemetry is disabled.""" + with ( + patch.dict( + "os.environ", + {"MOSAIC_TELEMETRY_ENABLED": "false"}, + clear=True, + ), + patch("src.mosaic_telemetry.logger") as mock_logger, + ): + create_telemetry_config() + mock_logger.warning.assert_not_called() + + def test_config_strips_trailing_slashes(self) -> None: + """TelemetryConfig should strip trailing slashes from server_url.""" + with patch.dict( + "os.environ", + {"MOSAIC_TELEMETRY_SERVER_URL": "https://telemetry.example.com/"}, + clear=False, + ): + config = create_telemetry_config() + assert config.server_url == "https://telemetry.example.com" + + +# --------------------------------------------------------------------------- +# get_telemetry_client from app state +# --------------------------------------------------------------------------- + + +class TestGetTelemetryClient: + """Tests for get_telemetry_client helper.""" + + def test_returns_client_when_set(self) -> None: + """Should return the telemetry client from app state.""" + app = FastAPI() + mock_client = MagicMock(spec=TelemetryClient) + app.state.mosaic_telemetry = mock_client + + result = get_telemetry_client(app) + assert result is mock_client + + def test_returns_none_when_not_set(self) -> None: + """Should return None when mosaic_telemetry is not in app state.""" + app = FastAPI() + # Do not set app.state.mosaic_telemetry + + result = get_telemetry_client(app) + assert result is None + + def test_returns_none_when_explicitly_none(self) -> None: + """Should return None when mosaic_telemetry is explicitly set to None.""" + app = FastAPI() + app.state.mosaic_telemetry = None + + result = get_telemetry_client(app) + assert result is None + + +# --------------------------------------------------------------------------- +# build_task_event helper +# --------------------------------------------------------------------------- + + +class TestBuildTaskEvent: + """Tests for build_task_event helper.""" + + VALID_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc" + + def test_builds_event_with_defaults(self) -> None: + """Should build a TaskCompletionEvent with default values.""" + event = build_task_event(instance_id=self.VALID_INSTANCE_ID) + + assert isinstance(event, TaskCompletionEvent) + assert str(event.instance_id) == self.VALID_INSTANCE_ID + assert event.task_type == TaskType.IMPLEMENTATION + assert event.complexity == Complexity.MEDIUM + assert event.outcome == Outcome.SUCCESS + assert event.harness == Harness.CLAUDE_CODE + assert event.provider == Provider.ANTHROPIC + assert event.language == "typescript" + + def test_builds_event_with_custom_task_type(self) -> None: + """Should respect custom task_type parameter.""" + event = build_task_event( + instance_id=self.VALID_INSTANCE_ID, + task_type=TaskType.TESTING, + ) + assert event.task_type == TaskType.TESTING + + def test_builds_event_with_custom_outcome(self) -> None: + """Should respect custom outcome parameter.""" + event = build_task_event( + instance_id=self.VALID_INSTANCE_ID, + outcome=Outcome.FAILURE, + ) + assert event.outcome == Outcome.FAILURE + + def test_builds_event_with_duration(self) -> None: + """Should set duration_ms correctly.""" + event = build_task_event( + instance_id=self.VALID_INSTANCE_ID, + duration_ms=45000, + ) + assert event.task_duration_ms == 45000 + + def test_builds_event_with_token_counts(self) -> None: + """Should set all token counts correctly.""" + event = build_task_event( + instance_id=self.VALID_INSTANCE_ID, + estimated_input_tokens=1000, + estimated_output_tokens=500, + actual_input_tokens=1100, + actual_output_tokens=480, + ) + assert event.estimated_input_tokens == 1000 + assert event.estimated_output_tokens == 500 + assert event.actual_input_tokens == 1100 + assert event.actual_output_tokens == 480 + + def test_builds_event_with_cost(self) -> None: + """Should set cost fields correctly.""" + event = build_task_event( + instance_id=self.VALID_INSTANCE_ID, + estimated_cost_micros=50000, + actual_cost_micros=48000, + ) + assert event.estimated_cost_usd_micros == 50000 + assert event.actual_cost_usd_micros == 48000 + + def test_builds_event_with_quality_gates(self) -> None: + """Should set quality gate information correctly.""" + gates_run = [QualityGate.LINT, QualityGate.TEST, QualityGate.BUILD] + gates_failed = [QualityGate.TEST] + event = build_task_event( + instance_id=self.VALID_INSTANCE_ID, + quality_passed=False, + quality_gates_run=gates_run, + quality_gates_failed=gates_failed, + ) + assert event.quality_gate_passed is False + assert event.quality_gates_run == gates_run + assert event.quality_gates_failed == gates_failed + + def test_builds_event_with_context_info(self) -> None: + """Should set context compaction/rotation/utilization correctly.""" + event = build_task_event( + instance_id=self.VALID_INSTANCE_ID, + context_compactions=2, + context_rotations=1, + context_utilization=0.75, + ) + assert event.context_compactions == 2 + assert event.context_rotations == 1 + assert event.context_utilization_final == 0.75 + + def test_builds_event_with_retry_count(self) -> None: + """Should set retry count correctly.""" + event = build_task_event( + instance_id=self.VALID_INSTANCE_ID, + retry_count=3, + ) + assert event.retry_count == 3 + + def test_builds_event_with_custom_language(self) -> None: + """Should allow overriding the default language.""" + event = build_task_event( + instance_id=self.VALID_INSTANCE_ID, + language="python", + ) + assert event.language == "python" + + +# --------------------------------------------------------------------------- +# TelemetryClient lifecycle (disabled mode) +# --------------------------------------------------------------------------- + + +class TestTelemetryDisabledMode: + """Tests for disabled telemetry mode (no HTTP calls).""" + + def test_disabled_client_does_not_start(self) -> None: + """Client start_async should be a no-op when disabled.""" + config = TelemetryConfig(enabled=False) + client = TelemetryClient(config) + # Should not raise + assert client.is_running is False + + def test_disabled_client_track_is_noop(self) -> None: + """Tracking events when disabled should silently drop them.""" + config = TelemetryConfig(enabled=False) + client = TelemetryClient(config) + + event = build_task_event( + instance_id="12345678-1234-1234-1234-123456789abc", + ) + # Should not raise, should silently drop + client.track(event) + assert client.queue_size == 0 + + @pytest.mark.asyncio + async def test_disabled_client_start_stop_async(self) -> None: + """Async start/stop should be safe when disabled.""" + config = TelemetryConfig(enabled=False) + client = TelemetryClient(config) + + await client.start_async() + assert client.is_running is False + await client.stop_async() + + +# --------------------------------------------------------------------------- +# Lifespan integration +# --------------------------------------------------------------------------- + + +class TestLifespanIntegration: + """Tests for Mosaic telemetry in the FastAPI lifespan.""" + + @pytest.mark.asyncio + async def test_lifespan_sets_mosaic_telemetry_on_app_state(self) -> None: + """Lifespan should store mosaic_telemetry client on app.state.""" + with patch.dict( + "os.environ", + { + "GITEA_WEBHOOK_SECRET": "test-secret", + "GITEA_URL": "https://git.mosaicstack.dev", + "ANTHROPIC_API_KEY": "test-key", + "MOSAIC_TELEMETRY_ENABLED": "true", + "MOSAIC_TELEMETRY_SERVER_URL": "https://telemetry.example.com", + "MOSAIC_TELEMETRY_API_KEY": "a" * 64, + "MOSAIC_TELEMETRY_INSTANCE_ID": "12345678-1234-1234-1234-123456789abc", + "OTEL_ENABLED": "false", + "COORDINATOR_ENABLED": "false", + }, + ): + # Reload config to pick up test env vars + import importlib + + from src import config + importlib.reload(config) + + from src.main import lifespan + + app = FastAPI() + async with lifespan(app) as _state: + client = getattr(app.state, "mosaic_telemetry", None) + assert client is not None + assert isinstance(client, TelemetryClient) + + @pytest.mark.asyncio + async def test_lifespan_sets_none_when_disabled(self) -> None: + """Lifespan should set mosaic_telemetry to None when disabled.""" + with patch.dict( + "os.environ", + { + "GITEA_WEBHOOK_SECRET": "test-secret", + "GITEA_URL": "https://git.mosaicstack.dev", + "ANTHROPIC_API_KEY": "test-key", + "MOSAIC_TELEMETRY_ENABLED": "false", + "OTEL_ENABLED": "false", + "COORDINATOR_ENABLED": "false", + }, + ): + import importlib + + from src import config + importlib.reload(config) + + from src.main import lifespan + + app = FastAPI() + async with lifespan(app) as _state: + client = getattr(app.state, "mosaic_telemetry", None) + assert client is None + + @pytest.mark.asyncio + async def test_lifespan_stops_client_on_shutdown(self) -> None: + """Lifespan should call stop_async on shutdown.""" + with patch.dict( + "os.environ", + { + "GITEA_WEBHOOK_SECRET": "test-secret", + "GITEA_URL": "https://git.mosaicstack.dev", + "ANTHROPIC_API_KEY": "test-key", + "MOSAIC_TELEMETRY_ENABLED": "true", + "MOSAIC_TELEMETRY_SERVER_URL": "https://telemetry.example.com", + "MOSAIC_TELEMETRY_API_KEY": "a" * 64, + "MOSAIC_TELEMETRY_INSTANCE_ID": "12345678-1234-1234-1234-123456789abc", + "OTEL_ENABLED": "false", + "COORDINATOR_ENABLED": "false", + }, + ): + import importlib + + from src import config + importlib.reload(config) + + from src.main import lifespan + + app = FastAPI() + async with lifespan(app) as _state: + client = app.state.mosaic_telemetry + assert isinstance(client, TelemetryClient) + # Client was started + # After context manager exits, stop_async should have been called + + # After lifespan exits, client should no longer be running + # (stop_async was called in the shutdown section) + assert not client.is_running From 314dd24dce79b96285bb32818524679ab20d3e98 Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 01:36:53 -0600 Subject: [PATCH 02/15] feat(#369): install @mosaicstack/telemetry-client in API - Add .npmrc with scoped Gitea npm registry for @mosaicstack packages - Create MosaicTelemetryModule (global, lifecycle-aware) at apps/api/src/mosaic-telemetry/ - Create MosaicTelemetryService wrapping TelemetryClient with convenience methods: trackTaskCompletion, getPrediction, refreshPredictions, eventBuilder - Create mosaic-telemetry.config.ts for env var integration via NestJS ConfigService - Register MosaicTelemetryModule in AppModule - Add 32 unit tests covering module init, service methods, disabled mode, dry-run mode, and lifecycle management Refs #369 Co-Authored-By: Claude Opus 4.6 --- .npmrc | 1 + apps/api/package.json | 1 + apps/api/src/app.module.ts | 2 + apps/api/src/mosaic-telemetry/index.ts | 17 + .../mosaic-telemetry.config.ts | 78 +++ .../mosaic-telemetry.module.spec.ts | 212 ++++++++ .../mosaic-telemetry.module.ts | 37 ++ .../mosaic-telemetry.service.spec.ts | 506 ++++++++++++++++++ .../mosaic-telemetry.service.ts | 164 ++++++ pnpm-lock.yaml | 8 + 10 files changed, 1026 insertions(+) create mode 100644 .npmrc create mode 100644 apps/api/src/mosaic-telemetry/index.ts create mode 100644 apps/api/src/mosaic-telemetry/mosaic-telemetry.config.ts create mode 100644 apps/api/src/mosaic-telemetry/mosaic-telemetry.module.spec.ts create mode 100644 apps/api/src/mosaic-telemetry/mosaic-telemetry.module.ts create mode 100644 apps/api/src/mosaic-telemetry/mosaic-telemetry.service.spec.ts create mode 100644 apps/api/src/mosaic-telemetry/mosaic-telemetry.service.ts diff --git a/.npmrc b/.npmrc new file mode 100644 index 0000000..db95609 --- /dev/null +++ b/.npmrc @@ -0,0 +1 @@ +@mosaicstack:registry=https://git.mosaicstack.dev/api/packages/mosaic/npm/ diff --git a/apps/api/package.json b/apps/api/package.json index ce11f92..0f40211 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -27,6 +27,7 @@ "dependencies": { "@anthropic-ai/sdk": "^0.72.1", "@mosaic/shared": "workspace:*", + "@mosaicstack/telemetry-client": "^0.1.0", "@nestjs/axios": "^4.0.1", "@nestjs/bullmq": "^11.0.4", "@nestjs/common": "^11.1.12", diff --git a/apps/api/src/app.module.ts b/apps/api/src/app.module.ts index 43733e3..704067b 100644 --- a/apps/api/src/app.module.ts +++ b/apps/api/src/app.module.ts @@ -37,6 +37,7 @@ import { JobStepsModule } from "./job-steps/job-steps.module"; import { CoordinatorIntegrationModule } from "./coordinator-integration/coordinator-integration.module"; import { FederationModule } from "./federation/federation.module"; import { CredentialsModule } from "./credentials/credentials.module"; +import { MosaicTelemetryModule } from "./mosaic-telemetry"; import { RlsContextInterceptor } from "./common/interceptors/rls-context.interceptor"; @Module({ @@ -97,6 +98,7 @@ import { RlsContextInterceptor } from "./common/interceptors/rls-context.interce CoordinatorIntegrationModule, FederationModule, CredentialsModule, + MosaicTelemetryModule, ], controllers: [AppController, CsrfController], providers: [ diff --git a/apps/api/src/mosaic-telemetry/index.ts b/apps/api/src/mosaic-telemetry/index.ts new file mode 100644 index 0000000..6f1c402 --- /dev/null +++ b/apps/api/src/mosaic-telemetry/index.ts @@ -0,0 +1,17 @@ +/** + * Mosaic Telemetry module — task completion tracking and crowd-sourced predictions. + * + * **Not to be confused with the OpenTelemetry (OTEL) TelemetryModule** at + * `src/telemetry/`, which handles distributed request tracing. + * + * @module mosaic-telemetry + */ + +export { MosaicTelemetryModule } from "./mosaic-telemetry.module"; +export { MosaicTelemetryService } from "./mosaic-telemetry.service"; +export { + loadMosaicTelemetryConfig, + toSdkConfig, + MOSAIC_TELEMETRY_ENV, + type MosaicTelemetryModuleConfig, +} from "./mosaic-telemetry.config"; diff --git a/apps/api/src/mosaic-telemetry/mosaic-telemetry.config.ts b/apps/api/src/mosaic-telemetry/mosaic-telemetry.config.ts new file mode 100644 index 0000000..f5fa6cf --- /dev/null +++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.config.ts @@ -0,0 +1,78 @@ +import type { ConfigService } from "@nestjs/config"; +import type { TelemetryConfig } from "@mosaicstack/telemetry-client"; + +/** + * Configuration interface for the Mosaic Telemetry module. + * Maps environment variables to SDK configuration. + */ +export interface MosaicTelemetryModuleConfig { + /** Whether telemetry collection is enabled. Default: true */ + enabled: boolean; + /** Base URL of the telemetry server */ + serverUrl: string; + /** API key for authentication (64-char hex string) */ + apiKey: string; + /** Instance UUID for this client */ + instanceId: string; + /** If true, log events instead of sending them. Default: false */ + dryRun: boolean; +} + +/** + * Environment variable names used by the Mosaic Telemetry module. + */ +export const MOSAIC_TELEMETRY_ENV = { + ENABLED: "MOSAIC_TELEMETRY_ENABLED", + SERVER_URL: "MOSAIC_TELEMETRY_SERVER_URL", + API_KEY: "MOSAIC_TELEMETRY_API_KEY", + INSTANCE_ID: "MOSAIC_TELEMETRY_INSTANCE_ID", + DRY_RUN: "MOSAIC_TELEMETRY_DRY_RUN", +} as const; + +/** + * Read Mosaic Telemetry configuration from environment variables via NestJS ConfigService. + * + * @param configService - NestJS ConfigService instance + * @returns Parsed module configuration + */ +export function loadMosaicTelemetryConfig( + configService: ConfigService +): MosaicTelemetryModuleConfig { + const enabledRaw = configService.get(MOSAIC_TELEMETRY_ENV.ENABLED, "true"); + const dryRunRaw = configService.get(MOSAIC_TELEMETRY_ENV.DRY_RUN, "false"); + + return { + enabled: enabledRaw.toLowerCase() === "true", + serverUrl: configService.get(MOSAIC_TELEMETRY_ENV.SERVER_URL, ""), + apiKey: configService.get(MOSAIC_TELEMETRY_ENV.API_KEY, ""), + instanceId: configService.get(MOSAIC_TELEMETRY_ENV.INSTANCE_ID, ""), + dryRun: dryRunRaw.toLowerCase() === "true", + }; +} + +/** + * Convert module config to SDK TelemetryConfig format. + * Includes the onError callback for NestJS Logger integration. + * + * @param config - Module configuration + * @param onError - Error callback (typically NestJS Logger) + * @returns SDK-compatible TelemetryConfig + */ +export function toSdkConfig( + config: MosaicTelemetryModuleConfig, + onError?: (error: Error) => void +): TelemetryConfig { + const sdkConfig: TelemetryConfig = { + serverUrl: config.serverUrl, + apiKey: config.apiKey, + instanceId: config.instanceId, + enabled: config.enabled, + dryRun: config.dryRun, + }; + + if (onError) { + sdkConfig.onError = onError; + } + + return sdkConfig; +} diff --git a/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.spec.ts b/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.spec.ts new file mode 100644 index 0000000..37420ec --- /dev/null +++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.spec.ts @@ -0,0 +1,212 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { Test, TestingModule } from "@nestjs/testing"; +import { ConfigModule } from "@nestjs/config"; +import { MosaicTelemetryModule } from "./mosaic-telemetry.module"; +import { MosaicTelemetryService } from "./mosaic-telemetry.service"; + +// Mock the telemetry client to avoid real HTTP calls +vi.mock("@mosaicstack/telemetry-client", async (importOriginal) => { + const actual = await importOriginal(); + + class MockTelemetryClient { + private _isRunning = false; + + constructor(_config: unknown) { + // no-op + } + + get eventBuilder() { + return { build: vi.fn().mockReturnValue({ event_id: "test-event-id" }) }; + } + + start(): void { + this._isRunning = true; + } + + async stop(): Promise { + this._isRunning = false; + } + + track(_event: unknown): void { + // no-op + } + + getPrediction(_query: unknown): unknown { + return null; + } + + async refreshPredictions(_queries: unknown): Promise { + // no-op + } + + get queueSize(): number { + return 0; + } + + get isRunning(): boolean { + return this._isRunning; + } + } + + return { + ...actual, + TelemetryClient: MockTelemetryClient, + }; +}); + +describe("MosaicTelemetryModule", () => { + let module: TestingModule; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe("module initialization", () => { + it("should compile the module successfully", async () => { + module = await Test.createTestingModule({ + imports: [ + ConfigModule.forRoot({ + isGlobal: true, + envFilePath: [], + load: [ + () => ({ + MOSAIC_TELEMETRY_ENABLED: "false", + }), + ], + }), + MosaicTelemetryModule, + ], + }).compile(); + + expect(module).toBeDefined(); + await module.close(); + }); + + it("should provide MosaicTelemetryService", async () => { + module = await Test.createTestingModule({ + imports: [ + ConfigModule.forRoot({ + isGlobal: true, + envFilePath: [], + load: [ + () => ({ + MOSAIC_TELEMETRY_ENABLED: "false", + }), + ], + }), + MosaicTelemetryModule, + ], + }).compile(); + + const service = module.get(MosaicTelemetryService); + expect(service).toBeDefined(); + expect(service).toBeInstanceOf(MosaicTelemetryService); + + await module.close(); + }); + + it("should export MosaicTelemetryService for injection in other modules", async () => { + module = await Test.createTestingModule({ + imports: [ + ConfigModule.forRoot({ + isGlobal: true, + envFilePath: [], + load: [ + () => ({ + MOSAIC_TELEMETRY_ENABLED: "false", + }), + ], + }), + MosaicTelemetryModule, + ], + }).compile(); + + const service = module.get(MosaicTelemetryService); + expect(service).toBeDefined(); + + await module.close(); + }); + }); + + describe("lifecycle integration", () => { + it("should initialize service on module init when enabled", async () => { + module = await Test.createTestingModule({ + imports: [ + ConfigModule.forRoot({ + isGlobal: true, + envFilePath: [], + load: [ + () => ({ + MOSAIC_TELEMETRY_ENABLED: "true", + MOSAIC_TELEMETRY_SERVER_URL: "https://tel.test.local", + MOSAIC_TELEMETRY_API_KEY: "a".repeat(64), + MOSAIC_TELEMETRY_INSTANCE_ID: "550e8400-e29b-41d4-a716-446655440000", + MOSAIC_TELEMETRY_DRY_RUN: "false", + }), + ], + }), + MosaicTelemetryModule, + ], + }).compile(); + + await module.init(); + + const service = module.get(MosaicTelemetryService); + expect(service.isEnabled).toBe(true); + + await module.close(); + }); + + it("should not start client when disabled via env", async () => { + module = await Test.createTestingModule({ + imports: [ + ConfigModule.forRoot({ + isGlobal: true, + envFilePath: [], + load: [ + () => ({ + MOSAIC_TELEMETRY_ENABLED: "false", + }), + ], + }), + MosaicTelemetryModule, + ], + }).compile(); + + await module.init(); + + const service = module.get(MosaicTelemetryService); + expect(service.isEnabled).toBe(false); + + await module.close(); + }); + + it("should cleanly shut down on module destroy", async () => { + module = await Test.createTestingModule({ + imports: [ + ConfigModule.forRoot({ + isGlobal: true, + envFilePath: [], + load: [ + () => ({ + MOSAIC_TELEMETRY_ENABLED: "true", + MOSAIC_TELEMETRY_SERVER_URL: "https://tel.test.local", + MOSAIC_TELEMETRY_API_KEY: "a".repeat(64), + MOSAIC_TELEMETRY_INSTANCE_ID: "550e8400-e29b-41d4-a716-446655440000", + MOSAIC_TELEMETRY_DRY_RUN: "false", + }), + ], + }), + MosaicTelemetryModule, + ], + }).compile(); + + await module.init(); + + const service = module.get(MosaicTelemetryService); + expect(service.isEnabled).toBe(true); + + await expect(module.close()).resolves.not.toThrow(); + }); + }); +}); diff --git a/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.ts b/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.ts new file mode 100644 index 0000000..a321dda --- /dev/null +++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.ts @@ -0,0 +1,37 @@ +import { Module, Global } from "@nestjs/common"; +import { ConfigModule } from "@nestjs/config"; +import { MosaicTelemetryService } from "./mosaic-telemetry.service"; + +/** + * Global module providing Mosaic Telemetry integration via @mosaicstack/telemetry-client. + * + * Tracks task completion events and provides crowd-sourced predictions for + * token usage, cost estimation, and quality metrics. + * + * **This is separate from the OpenTelemetry (OTEL) TelemetryModule** which + * handles distributed request tracing. This module is specifically for + * Mosaic Stack's own telemetry aggregation service. + * + * Configuration via environment variables: + * - MOSAIC_TELEMETRY_ENABLED (boolean, default: true) + * - MOSAIC_TELEMETRY_SERVER_URL (string) + * - MOSAIC_TELEMETRY_API_KEY (string, 64-char hex) + * - MOSAIC_TELEMETRY_INSTANCE_ID (string, UUID) + * - MOSAIC_TELEMETRY_DRY_RUN (boolean, default: false) + * + * @example + * ```typescript + * // In any service (no need to import module — it's global): + * @Injectable() + * export class MyService { + * constructor(private readonly telemetry: MosaicTelemetryService) {} + * } + * ``` + */ +@Global() +@Module({ + imports: [ConfigModule], + providers: [MosaicTelemetryService], + exports: [MosaicTelemetryService], +}) +export class MosaicTelemetryModule {} diff --git a/apps/api/src/mosaic-telemetry/mosaic-telemetry.service.spec.ts b/apps/api/src/mosaic-telemetry/mosaic-telemetry.service.spec.ts new file mode 100644 index 0000000..a84e84a --- /dev/null +++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.service.spec.ts @@ -0,0 +1,506 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { ConfigService } from "@nestjs/config"; +import { MOSAIC_TELEMETRY_ENV } from "./mosaic-telemetry.config"; +import type { + TaskCompletionEvent, + PredictionQuery, + PredictionResponse, +} from "@mosaicstack/telemetry-client"; +import { TaskType, Complexity, Provider, Outcome } from "@mosaicstack/telemetry-client"; + +// Track mock instances created during tests +const mockStartFn = vi.fn(); +const mockStopFn = vi.fn().mockResolvedValue(undefined); +const mockTrackFn = vi.fn(); +const mockGetPredictionFn = vi.fn().mockReturnValue(null); +const mockRefreshPredictionsFn = vi.fn().mockResolvedValue(undefined); +const mockBuildFn = vi.fn().mockReturnValue({ event_id: "test-event-id" }); + +vi.mock("@mosaicstack/telemetry-client", async (importOriginal) => { + const actual = await importOriginal(); + + class MockTelemetryClient { + private _isRunning = false; + + constructor(_config: unknown) { + // no-op + } + + get eventBuilder() { + return { build: mockBuildFn }; + } + + start(): void { + this._isRunning = true; + mockStartFn(); + } + + async stop(): Promise { + this._isRunning = false; + await mockStopFn(); + } + + track(event: unknown): void { + mockTrackFn(event); + } + + getPrediction(query: unknown): unknown { + return mockGetPredictionFn(query); + } + + async refreshPredictions(queries: unknown): Promise { + await mockRefreshPredictionsFn(queries); + } + + get queueSize(): number { + return 0; + } + + get isRunning(): boolean { + return this._isRunning; + } + } + + return { + ...actual, + TelemetryClient: MockTelemetryClient, + }; +}); + +// Lazy-import the service after the mock is in place +const { MosaicTelemetryService } = await import("./mosaic-telemetry.service"); + +/** + * Create a ConfigService mock that returns environment values from the provided map. + */ +function createConfigService( + envMap: Record = {}, +): ConfigService { + const configService = { + get: vi.fn((key: string, defaultValue?: string): string => { + const value = envMap[key]; + if (value !== undefined) { + return value; + } + return defaultValue ?? ""; + }), + } as unknown as ConfigService; + return configService; +} + +/** + * Default env config for an enabled telemetry service. + */ +const ENABLED_CONFIG: Record = { + [MOSAIC_TELEMETRY_ENV.ENABLED]: "true", + [MOSAIC_TELEMETRY_ENV.SERVER_URL]: "https://tel.test.local", + [MOSAIC_TELEMETRY_ENV.API_KEY]: "a".repeat(64), + [MOSAIC_TELEMETRY_ENV.INSTANCE_ID]: "550e8400-e29b-41d4-a716-446655440000", + [MOSAIC_TELEMETRY_ENV.DRY_RUN]: "false", +}; + +/** + * Create a minimal TaskCompletionEvent for testing. + */ +function createTestEvent(): TaskCompletionEvent { + return { + schema_version: "1.0.0", + event_id: "test-event-123", + timestamp: new Date().toISOString(), + instance_id: "550e8400-e29b-41d4-a716-446655440000", + task_duration_ms: 5000, + task_type: TaskType.FEATURE, + complexity: Complexity.MEDIUM, + harness: "claude-code" as TaskCompletionEvent["harness"], + model: "claude-sonnet-4-20250514", + provider: Provider.ANTHROPIC, + estimated_input_tokens: 1000, + estimated_output_tokens: 500, + actual_input_tokens: 1100, + actual_output_tokens: 450, + estimated_cost_usd_micros: 5000, + actual_cost_usd_micros: 4800, + quality_gate_passed: true, + quality_gates_run: [], + quality_gates_failed: [], + context_compactions: 0, + context_rotations: 0, + context_utilization_final: 0.45, + outcome: Outcome.SUCCESS, + retry_count: 0, + }; +} + +describe("MosaicTelemetryService", () => { + let service: InstanceType; + + afterEach(async () => { + if (service) { + await service.onModuleDestroy(); + } + vi.clearAllMocks(); + }); + + describe("onModuleInit", () => { + it("should initialize the client when enabled with valid config", () => { + const configService = createConfigService(ENABLED_CONFIG); + service = new MosaicTelemetryService(configService); + + service.onModuleInit(); + + expect(mockStartFn).toHaveBeenCalledOnce(); + expect(service.isEnabled).toBe(true); + }); + + it("should not initialize client when disabled", () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.ENABLED]: "false", + }); + service = new MosaicTelemetryService(configService); + + service.onModuleInit(); + + expect(mockStartFn).not.toHaveBeenCalled(); + expect(service.isEnabled).toBe(false); + }); + + it("should disable when server URL is missing", () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.SERVER_URL]: "", + }); + service = new MosaicTelemetryService(configService); + + service.onModuleInit(); + + expect(service.isEnabled).toBe(false); + }); + + it("should disable when API key is missing", () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.API_KEY]: "", + }); + service = new MosaicTelemetryService(configService); + + service.onModuleInit(); + + expect(service.isEnabled).toBe(false); + }); + + it("should disable when instance ID is missing", () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.INSTANCE_ID]: "", + }); + service = new MosaicTelemetryService(configService); + + service.onModuleInit(); + + expect(service.isEnabled).toBe(false); + }); + + it("should log dry-run mode when configured", () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.DRY_RUN]: "true", + }); + service = new MosaicTelemetryService(configService); + + service.onModuleInit(); + + expect(mockStartFn).toHaveBeenCalledOnce(); + }); + }); + + describe("onModuleDestroy", () => { + it("should stop the client on shutdown", async () => { + const configService = createConfigService(ENABLED_CONFIG); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + await service.onModuleDestroy(); + + expect(mockStopFn).toHaveBeenCalledOnce(); + }); + + it("should not throw when client is not initialized (disabled)", async () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.ENABLED]: "false", + }); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + await expect(service.onModuleDestroy()).resolves.not.toThrow(); + }); + + it("should not throw when called multiple times", async () => { + const configService = createConfigService(ENABLED_CONFIG); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + await service.onModuleDestroy(); + await expect(service.onModuleDestroy()).resolves.not.toThrow(); + }); + }); + + describe("trackTaskCompletion", () => { + it("should queue event via client.track() when enabled", () => { + const configService = createConfigService(ENABLED_CONFIG); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + const event = createTestEvent(); + service.trackTaskCompletion(event); + + expect(mockTrackFn).toHaveBeenCalledWith(event); + }); + + it("should be a no-op when disabled", () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.ENABLED]: "false", + }); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + const event = createTestEvent(); + service.trackTaskCompletion(event); + + expect(mockTrackFn).not.toHaveBeenCalled(); + }); + }); + + describe("getPrediction", () => { + const testQuery: PredictionQuery = { + task_type: TaskType.FEATURE, + model: "claude-sonnet-4-20250514", + provider: Provider.ANTHROPIC, + complexity: Complexity.MEDIUM, + }; + + it("should return cached prediction when available", () => { + const mockPrediction: PredictionResponse = { + prediction: { + input_tokens: { p10: 100, p25: 200, median: 300, p75: 400, p90: 500 }, + output_tokens: { p10: 50, p25: 100, median: 150, p75: 200, p90: 250 }, + cost_usd_micros: { median: 5000 }, + duration_ms: { median: 10000 }, + correction_factors: { input: 1.0, output: 1.0 }, + quality: { gate_pass_rate: 0.95, success_rate: 0.90 }, + }, + metadata: { + sample_size: 100, + fallback_level: 0, + confidence: "high", + last_updated: new Date().toISOString(), + cache_hit: true, + }, + }; + + const configService = createConfigService(ENABLED_CONFIG); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + mockGetPredictionFn.mockReturnValueOnce(mockPrediction); + + const result = service.getPrediction(testQuery); + + expect(result).toEqual(mockPrediction); + expect(mockGetPredictionFn).toHaveBeenCalledWith(testQuery); + }); + + it("should return null when disabled", () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.ENABLED]: "false", + }); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + const result = service.getPrediction(testQuery); + + expect(result).toBeNull(); + }); + + it("should return null when no cached prediction exists", () => { + const configService = createConfigService(ENABLED_CONFIG); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + mockGetPredictionFn.mockReturnValueOnce(null); + + const result = service.getPrediction(testQuery); + + expect(result).toBeNull(); + }); + }); + + describe("refreshPredictions", () => { + const testQueries: PredictionQuery[] = [ + { + task_type: TaskType.FEATURE, + model: "claude-sonnet-4-20250514", + provider: Provider.ANTHROPIC, + complexity: Complexity.MEDIUM, + }, + ]; + + it("should call client.refreshPredictions when enabled", async () => { + const configService = createConfigService(ENABLED_CONFIG); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + await service.refreshPredictions(testQueries); + + expect(mockRefreshPredictionsFn).toHaveBeenCalledWith(testQueries); + }); + + it("should be a no-op when disabled", async () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.ENABLED]: "false", + }); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + await service.refreshPredictions(testQueries); + + expect(mockRefreshPredictionsFn).not.toHaveBeenCalled(); + }); + }); + + describe("eventBuilder", () => { + it("should return EventBuilder when enabled", () => { + const configService = createConfigService(ENABLED_CONFIG); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + const builder = service.eventBuilder; + + expect(builder).toBeDefined(); + expect(builder).not.toBeNull(); + expect(typeof builder?.build).toBe("function"); + }); + + it("should return null when disabled", () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.ENABLED]: "false", + }); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + const builder = service.eventBuilder; + + expect(builder).toBeNull(); + }); + }); + + describe("isEnabled", () => { + it("should return true when client is running", () => { + const configService = createConfigService(ENABLED_CONFIG); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + expect(service.isEnabled).toBe(true); + }); + + it("should return false when disabled", () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.ENABLED]: "false", + }); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + expect(service.isEnabled).toBe(false); + }); + }); + + describe("queueSize", () => { + it("should return 0 when disabled", () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.ENABLED]: "false", + }); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + expect(service.queueSize).toBe(0); + }); + + it("should delegate to client.queueSize when enabled", () => { + const configService = createConfigService(ENABLED_CONFIG); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + expect(service.queueSize).toBe(0); + }); + }); + + describe("disabled mode (comprehensive)", () => { + beforeEach(() => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.ENABLED]: "false", + }); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + }); + + it("should not make any HTTP calls when disabled", () => { + const event = createTestEvent(); + service.trackTaskCompletion(event); + + expect(mockTrackFn).not.toHaveBeenCalled(); + expect(mockStartFn).not.toHaveBeenCalled(); + }); + + it("should safely handle all method calls when disabled", async () => { + expect(() => service.trackTaskCompletion(createTestEvent())).not.toThrow(); + expect( + service.getPrediction({ + task_type: TaskType.FEATURE, + model: "test", + provider: Provider.ANTHROPIC, + complexity: Complexity.LOW, + }), + ).toBeNull(); + await expect(service.refreshPredictions([])).resolves.not.toThrow(); + expect(service.eventBuilder).toBeNull(); + expect(service.isEnabled).toBe(false); + expect(service.queueSize).toBe(0); + }); + }); + + describe("dry-run mode", () => { + it("should create client in dry-run mode", () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.DRY_RUN]: "true", + }); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + expect(mockStartFn).toHaveBeenCalledOnce(); + expect(service.isEnabled).toBe(true); + }); + + it("should accept events in dry-run mode", () => { + const configService = createConfigService({ + ...ENABLED_CONFIG, + [MOSAIC_TELEMETRY_ENV.DRY_RUN]: "true", + }); + service = new MosaicTelemetryService(configService); + service.onModuleInit(); + + const event = createTestEvent(); + service.trackTaskCompletion(event); + + expect(mockTrackFn).toHaveBeenCalledWith(event); + }); + }); +}); diff --git a/apps/api/src/mosaic-telemetry/mosaic-telemetry.service.ts b/apps/api/src/mosaic-telemetry/mosaic-telemetry.service.ts new file mode 100644 index 0000000..a1a737f --- /dev/null +++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.service.ts @@ -0,0 +1,164 @@ +import { Injectable, Logger, OnModuleInit, OnModuleDestroy } from "@nestjs/common"; +import { ConfigService } from "@nestjs/config"; +import { + TelemetryClient, + type TaskCompletionEvent, + type PredictionQuery, + type PredictionResponse, + type EventBuilder, +} from "@mosaicstack/telemetry-client"; +import { + loadMosaicTelemetryConfig, + toSdkConfig, + type MosaicTelemetryModuleConfig, +} from "./mosaic-telemetry.config"; + +/** + * NestJS service wrapping the @mosaicstack/telemetry-client SDK. + * + * Provides convenience methods for tracking task completions and reading + * crowd-sourced predictions. When telemetry is disabled via + * MOSAIC_TELEMETRY_ENABLED=false, all methods are safe no-ops. + * + * This service is provided globally by MosaicTelemetryModule — any service + * can inject it without importing the module explicitly. + * + * @example + * ```typescript + * @Injectable() + * export class TasksService { + * constructor(private readonly telemetry: MosaicTelemetryService) {} + * + * async completeTask(taskId: string): Promise { + * // ... complete the task ... + * const event = this.telemetry.eventBuilder.build({ ... }); + * this.telemetry.trackTaskCompletion(event); + * } + * } + * ``` + */ +@Injectable() +export class MosaicTelemetryService implements OnModuleInit, OnModuleDestroy { + private readonly logger = new Logger(MosaicTelemetryService.name); + private client: TelemetryClient | null = null; + private config: MosaicTelemetryModuleConfig | null = null; + + constructor(private readonly configService: ConfigService) {} + + /** + * Initialize the telemetry client on module startup. + * Reads configuration from environment variables and starts background submission. + */ + onModuleInit(): void { + this.config = loadMosaicTelemetryConfig(this.configService); + + if (!this.config.enabled) { + this.logger.log("Mosaic Telemetry is disabled"); + return; + } + + if (!this.config.serverUrl || !this.config.apiKey || !this.config.instanceId) { + this.logger.warn( + "Mosaic Telemetry is enabled but missing configuration " + + "(MOSAIC_TELEMETRY_SERVER_URL, MOSAIC_TELEMETRY_API_KEY, or MOSAIC_TELEMETRY_INSTANCE_ID). " + + "Telemetry will remain disabled." + ); + this.config = { ...this.config, enabled: false }; + return; + } + + const sdkConfig = toSdkConfig(this.config, (error: Error) => { + this.logger.error(`Telemetry client error: ${error.message}`, error.stack); + }); + + this.client = new TelemetryClient(sdkConfig); + this.client.start(); + + const mode = this.config.dryRun ? "dry-run" : "live"; + this.logger.log(`Mosaic Telemetry client started (${mode}) -> ${this.config.serverUrl}`); + } + + /** + * Stop the telemetry client on module shutdown. + * Flushes any remaining queued events before stopping. + */ + async onModuleDestroy(): Promise { + if (this.client) { + this.logger.log("Stopping Mosaic Telemetry client..."); + await this.client.stop(); + this.client = null; + this.logger.log("Mosaic Telemetry client stopped"); + } + } + + /** + * Queue a task completion event for batch submission. + * No-op when telemetry is disabled. + * + * @param event - The task completion event to track + */ + trackTaskCompletion(event: TaskCompletionEvent): void { + if (!this.client) { + return; + } + this.client.track(event); + } + + /** + * Get a cached prediction for the given query. + * Returns null when telemetry is disabled or if not cached/expired. + * + * @param query - The prediction query parameters + * @returns Cached prediction response, or null + */ + getPrediction(query: PredictionQuery): PredictionResponse | null { + if (!this.client) { + return null; + } + return this.client.getPrediction(query); + } + + /** + * Force-refresh predictions from the telemetry server. + * No-op when telemetry is disabled. + * + * @param queries - Array of prediction queries to refresh + */ + async refreshPredictions(queries: PredictionQuery[]): Promise { + if (!this.client) { + return; + } + await this.client.refreshPredictions(queries); + } + + /** + * Get the EventBuilder for constructing TaskCompletionEvent objects. + * Returns null when telemetry is disabled. + * + * @returns EventBuilder instance, or null if disabled + */ + get eventBuilder(): EventBuilder | null { + if (!this.client) { + return null; + } + return this.client.eventBuilder; + } + + /** + * Whether the telemetry client is currently active and running. + */ + get isEnabled(): boolean { + return this.client?.isRunning ?? false; + } + + /** + * Number of events currently queued for submission. + * Returns 0 when telemetry is disabled. + */ + get queueSize(): number { + if (!this.client) { + return 0; + } + return this.client.queueSize; + } +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4f24087..8450600 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -66,6 +66,9 @@ importers: '@mosaic/shared': specifier: workspace:* version: link:../../packages/shared + '@mosaicstack/telemetry-client': + specifier: ^0.1.0 + version: 0.1.0 '@nestjs/axios': specifier: ^4.0.1 version: 4.0.1(@nestjs/common@11.1.12(class-transformer@0.5.1)(class-validator@0.14.3)(reflect-metadata@0.2.2)(rxjs@7.8.2))(axios@1.13.5)(rxjs@7.8.2) @@ -1500,6 +1503,9 @@ packages: '@mermaid-js/parser@0.6.3': resolution: {integrity: sha512-lnjOhe7zyHjc+If7yT4zoedx2vo4sHaTmtkl1+or8BRTnCtDmcTpAjpzDSfCZrshM5bCoz0GyidzadJAH1xobA==} + '@mosaicstack/telemetry-client@0.1.0': + resolution: {integrity: sha512-j78u9QDIAhTPzGfi9hfiM1wIhw9DUsjshGQ8PrXBdMcp22hfocdmjys4VwwvGiHYTn45um9rY04H4W1NdCaMiA==, tarball: https://git.mosaicstack.dev/api/packages/mosaic/npm/%40mosaicstack%2Ftelemetry-client/-/0.1.0/telemetry-client-0.1.0.tgz} + '@mrleebo/prisma-ast@0.13.1': resolution: {integrity: sha512-XyroGQXcHrZdvmrGJvsA9KNeOOgGMg1Vg9OlheUsBOSKznLMDl+YChxbkboRHvtFYJEMRYmlV3uoo/njCw05iw==} engines: {node: '>=16'} @@ -7732,6 +7738,8 @@ snapshots: dependencies: langium: 3.3.1 + '@mosaicstack/telemetry-client@0.1.0': {} + '@mrleebo/prisma-ast@0.13.1': dependencies: chevrotain: 10.5.0 From 24c21f45b3190ca5d99d06075b3351d99e35aa35 Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 01:40:06 -0600 Subject: [PATCH 03/15] feat(#374): add telemetry config to docker-compose and .env - Add MOSAIC_TELEMETRY_* variables to .env.example with descriptions - Pass telemetry env vars to api service in production compose - Pass telemetry env vars to coordinator service in dev and swarm composes - Swarm composes default to production URL (https://tel-api.mosaicstack.dev) - Dev compose includes commented-out telemetry-api service placeholder - All compose files default MOSAIC_TELEMETRY_ENABLED to false for safety Refs #374 Co-Authored-By: Claude Opus 4.6 --- .env.example | 26 ++++++++++++++++++++++++++ docker-compose.swarm.portainer.yml | 12 ++++++++++++ docker-compose.swarm.yml | 12 ++++++++++++ docker-compose.yml | 6 ++++++ docker/docker-compose.yml | 29 +++++++++++++++++++++++++++++ 5 files changed, 85 insertions(+) diff --git a/.env.example b/.env.example index 9ca59fd..e6cee35 100644 --- a/.env.example +++ b/.env.example @@ -350,6 +350,32 @@ OLLAMA_MODEL=llama3.1:latest # Get your API key from: https://platform.openai.com/api-keys # OPENAI_API_KEY=sk-... +# ====================== +# Mosaic Telemetry (Task Completion Tracking & Predictions) +# ====================== +# Telemetry tracks task completion patterns to provide time estimates and predictions. +# Data is sent to the Mosaic Telemetry API (a separate service). + +# Master switch: set to false to completely disable telemetry (no HTTP calls will be made) +MOSAIC_TELEMETRY_ENABLED=true + +# URL of the telemetry API server +# For Docker Compose (internal): http://telemetry-api:8000 +# For production/swarm: https://tel-api.mosaicstack.dev +MOSAIC_TELEMETRY_SERVER_URL=http://telemetry-api:8000 + +# API key for authenticating with the telemetry server +# Generate with: openssl rand -hex 32 +MOSAIC_TELEMETRY_API_KEY=your-64-char-hex-api-key-here + +# Unique identifier for this Mosaic Stack instance +# Generate with: uuidgen or python -c "import uuid; print(uuid.uuid4())" +MOSAIC_TELEMETRY_INSTANCE_ID=your-instance-uuid-here + +# Dry run mode: set to true to log telemetry events to console instead of sending HTTP requests +# Useful for development and debugging telemetry payloads +MOSAIC_TELEMETRY_DRY_RUN=false + # ====================== # Logging & Debugging # ====================== diff --git a/docker-compose.swarm.portainer.yml b/docker-compose.swarm.portainer.yml index 855d15d..7ad1e55 100644 --- a/docker-compose.swarm.portainer.yml +++ b/docker-compose.swarm.portainer.yml @@ -255,6 +255,12 @@ services: COORDINATOR_POLL_INTERVAL: ${COORDINATOR_POLL_INTERVAL:-5.0} COORDINATOR_MAX_CONCURRENT_AGENTS: ${COORDINATOR_MAX_CONCURRENT_AGENTS:-10} COORDINATOR_ENABLED: ${COORDINATOR_ENABLED:-true} + # Telemetry (task completion tracking & predictions) + MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false} + MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-https://tel-api.mosaicstack.dev} + MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-} + MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-} + MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false} healthcheck: test: [ @@ -295,6 +301,12 @@ services: OLLAMA_ENDPOINT: ${OLLAMA_ENDPOINT:-http://ollama:11434} OPENBAO_ADDR: ${OPENBAO_ADDR:-http://openbao:8200} ENCRYPTION_KEY: ${ENCRYPTION_KEY} + # Telemetry (task completion tracking & predictions) + MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false} + MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-https://tel-api.mosaicstack.dev} + MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-} + MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-} + MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false} healthcheck: test: [ diff --git a/docker-compose.swarm.yml b/docker-compose.swarm.yml index 69efcbf..1d3b1af 100644 --- a/docker-compose.swarm.yml +++ b/docker-compose.swarm.yml @@ -283,6 +283,12 @@ services: COORDINATOR_POLL_INTERVAL: ${COORDINATOR_POLL_INTERVAL:-5.0} COORDINATOR_MAX_CONCURRENT_AGENTS: ${COORDINATOR_MAX_CONCURRENT_AGENTS:-10} COORDINATOR_ENABLED: ${COORDINATOR_ENABLED:-true} + # Telemetry (task completion tracking & predictions) + MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false} + MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-https://tel-api.mosaicstack.dev} + MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-} + MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-} + MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false} healthcheck: test: [ @@ -324,6 +330,12 @@ services: OPENBAO_ADDR: ${OPENBAO_ADDR:-http://openbao:8200} ORCHESTRATOR_URL: ${ORCHESTRATOR_URL:-http://orchestrator:3001} ENCRYPTION_KEY: ${ENCRYPTION_KEY} + # Telemetry (task completion tracking & predictions) + MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false} + MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-https://tel-api.mosaicstack.dev} + MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-} + MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-} + MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false} healthcheck: test: [ diff --git a/docker-compose.yml b/docker-compose.yml index e88da0a..c6b4eb5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -377,6 +377,12 @@ services: OLLAMA_ENDPOINT: ${OLLAMA_ENDPOINT:-http://ollama:11434} # OpenBao (optional) OPENBAO_ADDR: ${OPENBAO_ADDR:-http://openbao:8200} + # Telemetry (task completion tracking & predictions) + MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false} + MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-http://telemetry-api:8000} + MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-} + MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-} + MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false} volumes: - openbao_init:/openbao/init:ro ports: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 465ecc1..1873e78 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -51,6 +51,12 @@ services: LOG_LEVEL: ${LOG_LEVEL:-info} HOST: 0.0.0.0 PORT: 8000 + # Telemetry (task completion tracking & predictions) + MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false} + MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-http://telemetry-api:8000} + MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-} + MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-} + MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false} ports: - "8000:8000" healthcheck: @@ -122,6 +128,29 @@ services: com.mosaic.service: "secrets-init" com.mosaic.description: "OpenBao auto-initialization sidecar" + # ====================== + # Telemetry API (Optional - for local development) + # ====================== + # Uncomment to run the telemetry API locally for self-contained development. + # For production, use an external telemetry API URL instead. + # telemetry-api: + # image: git.mosaicstack.dev/mosaic/telemetry-api:latest + # container_name: mosaic-telemetry-api + # restart: unless-stopped + # environment: + # HOST: 0.0.0.0 + # PORT: 8000 + # ports: + # - "8001:8000" + # healthcheck: + # test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + # interval: 30s + # timeout: 10s + # retries: 3 + # start_period: 10s + # networks: + # - mosaic-network + volumes: postgres_data: name: mosaic-postgres-data From fcecf3654ba5d2b21fd3858a9275b886644621d1 Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 01:44:29 -0600 Subject: [PATCH 04/15] feat(#371): track LLM task completions via Mosaic Telemetry - Create LlmTelemetryTrackerService for non-blocking event emission - Normalize token usage across Anthropic, OpenAI, Ollama providers - Add cost table with per-token pricing in microdollars - Instrument chat, chatStream, and embed methods - Infer task type from calling context - Aggregate streaming tokens after stream ends with fallback estimation - Add 69 unit tests for tracker service, cost table, and LLM service Refs #371 Co-Authored-By: Claude Opus 4.6 --- apps/api/src/llm/llm-cost-table.ts | 106 ++++ .../llm/llm-telemetry-tracker.service.spec.ts | 491 ++++++++++++++++++ .../src/llm/llm-telemetry-tracker.service.ts | 197 +++++++ apps/api/src/llm/llm.module.ts | 3 +- apps/api/src/llm/llm.service.spec.ts | 175 +++++++ apps/api/src/llm/llm.service.ts | 139 ++++- 6 files changed, 1103 insertions(+), 8 deletions(-) create mode 100644 apps/api/src/llm/llm-cost-table.ts create mode 100644 apps/api/src/llm/llm-telemetry-tracker.service.spec.ts create mode 100644 apps/api/src/llm/llm-telemetry-tracker.service.ts diff --git a/apps/api/src/llm/llm-cost-table.ts b/apps/api/src/llm/llm-cost-table.ts new file mode 100644 index 0000000..4aab2a9 --- /dev/null +++ b/apps/api/src/llm/llm-cost-table.ts @@ -0,0 +1,106 @@ +/** + * LLM Cost Table + * + * Maps model names to per-token costs in microdollars (USD * 1,000,000). + * For example, $0.003 per 1K tokens = 3,000 microdollars per 1K tokens = 3 microdollars per token. + * + * Costs are split into input (prompt) and output (completion) pricing. + * Ollama models run locally and are free (0 cost). + */ + +/** + * Per-token cost in microdollars for a single model. + */ +export interface ModelCost { + /** Cost per input token in microdollars */ + inputPerToken: number; + /** Cost per output token in microdollars */ + outputPerToken: number; +} + +/** + * Cost table mapping model name prefixes to per-token pricing. + * + * Model matching is prefix-based: "claude-sonnet-4-5" matches "claude-sonnet-4-5-20250929". + * More specific prefixes are checked first (longest match wins). + * + * Prices sourced from provider pricing pages as of 2026-02. + */ +const MODEL_COSTS: Record = { + // Anthropic Claude models (per-token microdollars) + // claude-sonnet-4-5: $3/M input, $15/M output + "claude-sonnet-4-5": { inputPerToken: 3, outputPerToken: 15 }, + // claude-opus-4: $15/M input, $75/M output + "claude-opus-4": { inputPerToken: 15, outputPerToken: 75 }, + // claude-3-5-haiku / claude-haiku-4-5: $0.80/M input, $4/M output + "claude-haiku-4-5": { inputPerToken: 0.8, outputPerToken: 4 }, + "claude-3-5-haiku": { inputPerToken: 0.8, outputPerToken: 4 }, + // claude-3-5-sonnet: $3/M input, $15/M output + "claude-3-5-sonnet": { inputPerToken: 3, outputPerToken: 15 }, + // claude-3-opus: $15/M input, $75/M output + "claude-3-opus": { inputPerToken: 15, outputPerToken: 75 }, + // claude-3-sonnet: $3/M input, $15/M output + "claude-3-sonnet": { inputPerToken: 3, outputPerToken: 15 }, + // claude-3-haiku: $0.25/M input, $1.25/M output + "claude-3-haiku": { inputPerToken: 0.25, outputPerToken: 1.25 }, + + // OpenAI models (per-token microdollars) + // gpt-4o: $2.50/M input, $10/M output + "gpt-4o-mini": { inputPerToken: 0.15, outputPerToken: 0.6 }, + "gpt-4o": { inputPerToken: 2.5, outputPerToken: 10 }, + // gpt-4-turbo: $10/M input, $30/M output + "gpt-4-turbo": { inputPerToken: 10, outputPerToken: 30 }, + // gpt-4: $30/M input, $60/M output + "gpt-4": { inputPerToken: 30, outputPerToken: 60 }, + // gpt-3.5-turbo: $0.50/M input, $1.50/M output + "gpt-3.5-turbo": { inputPerToken: 0.5, outputPerToken: 1.5 }, + + // Ollama / local models: free + // These are catch-all entries; any model not matched above falls through to getModelCost default +}; + +/** + * Sorted model prefixes from longest to shortest for greedy prefix matching. + * Ensures "gpt-4o-mini" matches before "gpt-4o" and "claude-3-5-haiku" before "claude-3-haiku". + */ +const SORTED_PREFIXES = Object.keys(MODEL_COSTS).sort((a, b) => b.length - a.length); + +/** + * Look up per-token cost for a given model name. + * + * Uses longest-prefix matching: the model name is compared against known + * prefixes from longest to shortest. If no prefix matches, returns zero cost + * (assumes local/free model). + * + * @param modelName - Full model name (e.g. "claude-sonnet-4-5-20250929", "gpt-4o") + * @returns Per-token cost in microdollars + */ +export function getModelCost(modelName: string): ModelCost { + const normalized = modelName.toLowerCase(); + + for (const prefix of SORTED_PREFIXES) { + if (normalized.startsWith(prefix)) { + return MODEL_COSTS[prefix]; + } + } + + // Unknown or local model — assume free + return { inputPerToken: 0, outputPerToken: 0 }; +} + +/** + * Calculate total cost in microdollars for a given model and token counts. + * + * @param modelName - Full model name + * @param inputTokens - Number of input (prompt) tokens + * @param outputTokens - Number of output (completion) tokens + * @returns Total cost in microdollars (USD * 1,000,000) + */ +export function calculateCostMicrodollars( + modelName: string, + inputTokens: number, + outputTokens: number +): number { + const cost = getModelCost(modelName); + return Math.round(cost.inputPerToken * inputTokens + cost.outputPerToken * outputTokens); +} diff --git a/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts b/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts new file mode 100644 index 0000000..0f43489 --- /dev/null +++ b/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts @@ -0,0 +1,491 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import { Test, TestingModule } from "@nestjs/testing"; +import { + TaskType, + Complexity, + Harness, + Provider, + Outcome, +} from "@mosaicstack/telemetry-client"; +import type { TaskCompletionEvent, EventBuilderParams } from "@mosaicstack/telemetry-client"; +import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service"; +import { + LlmTelemetryTrackerService, + estimateTokens, + mapProviderType, + mapHarness, + inferTaskType, +} from "./llm-telemetry-tracker.service"; +import type { LlmCompletionParams } from "./llm-telemetry-tracker.service"; +import { getModelCost, calculateCostMicrodollars } from "./llm-cost-table"; + +// ---------- Cost Table Tests ---------- + +describe("llm-cost-table", () => { + describe("getModelCost", () => { + it("should return cost for claude-sonnet-4-5 models", () => { + const cost = getModelCost("claude-sonnet-4-5-20250929"); + expect(cost.inputPerToken).toBe(3); + expect(cost.outputPerToken).toBe(15); + }); + + it("should return cost for claude-opus-4 models", () => { + const cost = getModelCost("claude-opus-4-6"); + expect(cost.inputPerToken).toBe(15); + expect(cost.outputPerToken).toBe(75); + }); + + it("should return cost for claude-haiku-4-5 models", () => { + const cost = getModelCost("claude-haiku-4-5-20251001"); + expect(cost.inputPerToken).toBe(0.8); + expect(cost.outputPerToken).toBe(4); + }); + + it("should return cost for gpt-4o", () => { + const cost = getModelCost("gpt-4o"); + expect(cost.inputPerToken).toBe(2.5); + expect(cost.outputPerToken).toBe(10); + }); + + it("should return cost for gpt-4o-mini (longer prefix matches first)", () => { + const cost = getModelCost("gpt-4o-mini"); + expect(cost.inputPerToken).toBe(0.15); + expect(cost.outputPerToken).toBe(0.6); + }); + + it("should return zero cost for unknown/local models", () => { + const cost = getModelCost("llama3.2"); + expect(cost.inputPerToken).toBe(0); + expect(cost.outputPerToken).toBe(0); + }); + + it("should return zero cost for ollama models", () => { + const cost = getModelCost("mistral:7b"); + expect(cost.inputPerToken).toBe(0); + expect(cost.outputPerToken).toBe(0); + }); + + it("should be case-insensitive", () => { + const cost = getModelCost("Claude-Sonnet-4-5-20250929"); + expect(cost.inputPerToken).toBe(3); + }); + }); + + describe("calculateCostMicrodollars", () => { + it("should calculate cost for claude-sonnet-4-5 with token counts", () => { + // 1000 input tokens * 3 + 500 output tokens * 15 = 3000 + 7500 = 10500 + const cost = calculateCostMicrodollars("claude-sonnet-4-5-20250929", 1000, 500); + expect(cost).toBe(10500); + }); + + it("should return 0 for local models", () => { + const cost = calculateCostMicrodollars("llama3.2", 1000, 500); + expect(cost).toBe(0); + }); + + it("should return 0 when token counts are 0", () => { + const cost = calculateCostMicrodollars("claude-opus-4-6", 0, 0); + expect(cost).toBe(0); + }); + + it("should round the result to integer microdollars", () => { + // gpt-4o-mini: 0.15 * 3 + 0.6 * 7 = 0.45 + 4.2 = 4.65 -> rounds to 5 + const cost = calculateCostMicrodollars("gpt-4o-mini", 3, 7); + expect(cost).toBe(5); + }); + }); +}); + +// ---------- Helper Function Tests ---------- + +describe("helper functions", () => { + describe("estimateTokens", () => { + it("should estimate ~1 token per 4 characters", () => { + expect(estimateTokens("abcd")).toBe(1); + expect(estimateTokens("abcdefgh")).toBe(2); + }); + + it("should round up for partial tokens", () => { + expect(estimateTokens("abc")).toBe(1); + expect(estimateTokens("abcde")).toBe(2); + }); + + it("should return 0 for empty string", () => { + expect(estimateTokens("")).toBe(0); + }); + }); + + describe("mapProviderType", () => { + it("should map claude to ANTHROPIC", () => { + expect(mapProviderType("claude")).toBe(Provider.ANTHROPIC); + }); + + it("should map openai to OPENAI", () => { + expect(mapProviderType("openai")).toBe(Provider.OPENAI); + }); + + it("should map ollama to OLLAMA", () => { + expect(mapProviderType("ollama")).toBe(Provider.OLLAMA); + }); + }); + + describe("mapHarness", () => { + it("should map ollama to OLLAMA_LOCAL", () => { + expect(mapHarness("ollama")).toBe(Harness.OLLAMA_LOCAL); + }); + + it("should map claude to API_DIRECT", () => { + expect(mapHarness("claude")).toBe(Harness.API_DIRECT); + }); + + it("should map openai to API_DIRECT", () => { + expect(mapHarness("openai")).toBe(Harness.API_DIRECT); + }); + }); + + describe("inferTaskType", () => { + it("should return IMPLEMENTATION for embed operation", () => { + expect(inferTaskType("embed")).toBe(TaskType.IMPLEMENTATION); + }); + + it("should return UNKNOWN when no context provided for chat", () => { + expect(inferTaskType("chat")).toBe(TaskType.UNKNOWN); + }); + + it("should return PLANNING for brain context", () => { + expect(inferTaskType("chat", "brain")).toBe(TaskType.PLANNING); + }); + + it("should return PLANNING for planning context", () => { + expect(inferTaskType("chat", "planning")).toBe(TaskType.PLANNING); + }); + + it("should return CODE_REVIEW for review context", () => { + expect(inferTaskType("chat", "code-review")).toBe(TaskType.CODE_REVIEW); + }); + + it("should return TESTING for test context", () => { + expect(inferTaskType("chat", "test-generation")).toBe(TaskType.TESTING); + }); + + it("should return DEBUGGING for debug context", () => { + expect(inferTaskType("chatStream", "debug-session")).toBe(TaskType.DEBUGGING); + }); + + it("should return REFACTORING for refactor context", () => { + expect(inferTaskType("chat", "refactor")).toBe(TaskType.REFACTORING); + }); + + it("should return DOCUMENTATION for doc context", () => { + expect(inferTaskType("chat", "documentation")).toBe(TaskType.DOCUMENTATION); + }); + + it("should return CONFIGURATION for config context", () => { + expect(inferTaskType("chat", "config-update")).toBe(TaskType.CONFIGURATION); + }); + + it("should return SECURITY_AUDIT for security context", () => { + expect(inferTaskType("chat", "security-check")).toBe(TaskType.SECURITY_AUDIT); + }); + + it("should return IMPLEMENTATION for chat context", () => { + expect(inferTaskType("chat", "chat")).toBe(TaskType.IMPLEMENTATION); + }); + + it("should be case-insensitive", () => { + expect(inferTaskType("chat", "BRAIN")).toBe(TaskType.PLANNING); + }); + + it("should return UNKNOWN for unrecognized context", () => { + expect(inferTaskType("chat", "something-else")).toBe(TaskType.UNKNOWN); + }); + }); +}); + +// ---------- LlmTelemetryTrackerService Tests ---------- + +describe("LlmTelemetryTrackerService", () => { + let service: LlmTelemetryTrackerService; + let mockTelemetryService: { + eventBuilder: { build: ReturnType } | null; + trackTaskCompletion: ReturnType; + isEnabled: boolean; + }; + + const mockEvent: TaskCompletionEvent = { + instance_id: "test-instance", + event_id: "test-event", + schema_version: "1.0.0", + timestamp: new Date().toISOString(), + task_duration_ms: 1000, + task_type: TaskType.IMPLEMENTATION, + complexity: Complexity.LOW, + harness: Harness.API_DIRECT, + model: "claude-sonnet-4-5-20250929", + provider: Provider.ANTHROPIC, + estimated_input_tokens: 100, + estimated_output_tokens: 200, + actual_input_tokens: 100, + actual_output_tokens: 200, + estimated_cost_usd_micros: 3300, + actual_cost_usd_micros: 3300, + quality_gate_passed: true, + quality_gates_run: [], + quality_gates_failed: [], + context_compactions: 0, + context_rotations: 0, + context_utilization_final: 0, + outcome: Outcome.SUCCESS, + retry_count: 0, + }; + + beforeEach(async () => { + mockTelemetryService = { + eventBuilder: { + build: vi.fn().mockReturnValue(mockEvent), + }, + trackTaskCompletion: vi.fn(), + isEnabled: true, + }; + + const module: TestingModule = await Test.createTestingModule({ + providers: [ + LlmTelemetryTrackerService, + { + provide: MosaicTelemetryService, + useValue: mockTelemetryService, + }, + ], + }).compile(); + + service = module.get(LlmTelemetryTrackerService); + }); + + it("should be defined", () => { + expect(service).toBeDefined(); + }); + + describe("trackLlmCompletion", () => { + const baseParams: LlmCompletionParams = { + model: "claude-sonnet-4-5-20250929", + providerType: "claude", + operation: "chat", + durationMs: 1200, + inputTokens: 150, + outputTokens: 300, + callingContext: "chat", + success: true, + }; + + it("should build and track a telemetry event for Anthropic provider", () => { + service.trackLlmCompletion(baseParams); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + task_duration_ms: 1200, + task_type: TaskType.IMPLEMENTATION, + complexity: Complexity.LOW, + harness: Harness.API_DIRECT, + model: "claude-sonnet-4-5-20250929", + provider: Provider.ANTHROPIC, + actual_input_tokens: 150, + actual_output_tokens: 300, + outcome: Outcome.SUCCESS, + }), + ); + + expect(mockTelemetryService.trackTaskCompletion).toHaveBeenCalledWith(mockEvent); + }); + + it("should build and track a telemetry event for OpenAI provider", () => { + service.trackLlmCompletion({ + ...baseParams, + model: "gpt-4o", + providerType: "openai", + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + model: "gpt-4o", + provider: Provider.OPENAI, + harness: Harness.API_DIRECT, + }), + ); + }); + + it("should build and track a telemetry event for Ollama provider", () => { + service.trackLlmCompletion({ + ...baseParams, + model: "llama3.2", + providerType: "ollama", + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + model: "llama3.2", + provider: Provider.OLLAMA, + harness: Harness.OLLAMA_LOCAL, + }), + ); + }); + + it("should calculate cost in microdollars correctly", () => { + service.trackLlmCompletion(baseParams); + + // claude-sonnet-4-5: 150 * 3 + 300 * 15 = 450 + 4500 = 4950 + const expectedCost = 4950; + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + estimated_cost_usd_micros: expectedCost, + actual_cost_usd_micros: expectedCost, + }), + ); + }); + + it("should calculate zero cost for ollama models", () => { + service.trackLlmCompletion({ + ...baseParams, + model: "llama3.2", + providerType: "ollama", + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + estimated_cost_usd_micros: 0, + actual_cost_usd_micros: 0, + }), + ); + }); + + it("should track FAILURE outcome when success is false", () => { + service.trackLlmCompletion({ + ...baseParams, + success: false, + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + outcome: Outcome.FAILURE, + }), + ); + }); + + it("should infer task type from calling context", () => { + service.trackLlmCompletion({ + ...baseParams, + callingContext: "brain", + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + task_type: TaskType.PLANNING, + }), + ); + }); + + it("should set empty quality gates arrays for direct LLM calls", () => { + service.trackLlmCompletion(baseParams); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + quality_gate_passed: true, + quality_gates_run: [], + quality_gates_failed: [], + }), + ); + }); + + it("should silently skip when telemetry is disabled (eventBuilder is null)", () => { + mockTelemetryService.eventBuilder = null; + + // Should not throw + service.trackLlmCompletion(baseParams); + + expect(mockTelemetryService.trackTaskCompletion).not.toHaveBeenCalled(); + }); + + it("should not throw when eventBuilder.build throws an error", () => { + mockTelemetryService.eventBuilder = { + build: vi.fn().mockImplementation(() => { + throw new Error("Build failed"); + }), + }; + + // Should not throw + expect(() => service.trackLlmCompletion(baseParams)).not.toThrow(); + }); + + it("should not throw when trackTaskCompletion throws an error", () => { + mockTelemetryService.trackTaskCompletion.mockImplementation(() => { + throw new Error("Track failed"); + }); + + // Should not throw + expect(() => service.trackLlmCompletion(baseParams)).not.toThrow(); + }); + + it("should handle streaming operation with estimated tokens", () => { + service.trackLlmCompletion({ + ...baseParams, + operation: "chatStream", + inputTokens: 50, + outputTokens: 100, + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + actual_input_tokens: 50, + actual_output_tokens: 100, + estimated_input_tokens: 50, + estimated_output_tokens: 100, + }), + ); + }); + + it("should handle embed operation", () => { + service.trackLlmCompletion({ + ...baseParams, + operation: "embed", + outputTokens: 0, + callingContext: undefined, + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + task_type: TaskType.IMPLEMENTATION, + actual_output_tokens: 0, + }), + ); + }); + + it("should pass all required EventBuilderParams fields", () => { + service.trackLlmCompletion(baseParams); + + const buildCall = (mockTelemetryService.eventBuilder?.build as ReturnType).mock + .calls[0][0] as EventBuilderParams; + + // Verify all required fields are present + expect(buildCall).toHaveProperty("task_duration_ms"); + expect(buildCall).toHaveProperty("task_type"); + expect(buildCall).toHaveProperty("complexity"); + expect(buildCall).toHaveProperty("harness"); + expect(buildCall).toHaveProperty("model"); + expect(buildCall).toHaveProperty("provider"); + expect(buildCall).toHaveProperty("estimated_input_tokens"); + expect(buildCall).toHaveProperty("estimated_output_tokens"); + expect(buildCall).toHaveProperty("actual_input_tokens"); + expect(buildCall).toHaveProperty("actual_output_tokens"); + expect(buildCall).toHaveProperty("estimated_cost_usd_micros"); + expect(buildCall).toHaveProperty("actual_cost_usd_micros"); + expect(buildCall).toHaveProperty("quality_gate_passed"); + expect(buildCall).toHaveProperty("quality_gates_run"); + expect(buildCall).toHaveProperty("quality_gates_failed"); + expect(buildCall).toHaveProperty("context_compactions"); + expect(buildCall).toHaveProperty("context_rotations"); + expect(buildCall).toHaveProperty("context_utilization_final"); + expect(buildCall).toHaveProperty("outcome"); + expect(buildCall).toHaveProperty("retry_count"); + }); + }); +}); diff --git a/apps/api/src/llm/llm-telemetry-tracker.service.ts b/apps/api/src/llm/llm-telemetry-tracker.service.ts new file mode 100644 index 0000000..e4905a9 --- /dev/null +++ b/apps/api/src/llm/llm-telemetry-tracker.service.ts @@ -0,0 +1,197 @@ +import { Injectable, Logger } from "@nestjs/common"; +import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service"; +import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client"; +import type { LlmProviderType } from "./providers/llm-provider.interface"; +import { calculateCostMicrodollars } from "./llm-cost-table"; + +/** + * Parameters for tracking an LLM completion event. + */ +export interface LlmCompletionParams { + /** Full model name (e.g. "claude-sonnet-4-5-20250929") */ + model: string; + /** Provider type discriminator */ + providerType: LlmProviderType; + /** Operation type that was performed */ + operation: "chat" | "chatStream" | "embed"; + /** Duration of the LLM call in milliseconds */ + durationMs: number; + /** Number of input (prompt) tokens consumed */ + inputTokens: number; + /** Number of output (completion) tokens generated */ + outputTokens: number; + /** + * Optional calling context hint for task type inference. + * Examples: "brain", "chat", "embed", "planning", "code-review" + */ + callingContext?: string; + /** Whether the call succeeded or failed */ + success: boolean; +} + +/** + * Estimated token count from text length. + * Uses a rough approximation of ~4 characters per token (GPT/Claude average). + */ +export function estimateTokens(text: string): number { + return Math.ceil(text.length / 4); +} + +/** Map LLM provider type to telemetry Provider enum */ +export function mapProviderType(providerType: LlmProviderType): Provider { + switch (providerType) { + case "claude": + return Provider.ANTHROPIC; + case "openai": + return Provider.OPENAI; + case "ollama": + return Provider.OLLAMA; + default: + return Provider.UNKNOWN; + } +} + +/** Map LLM provider type to telemetry Harness enum */ +export function mapHarness(providerType: LlmProviderType): Harness { + switch (providerType) { + case "ollama": + return Harness.OLLAMA_LOCAL; + default: + return Harness.API_DIRECT; + } +} + +/** + * Infer the task type from calling context and operation. + * + * @param operation - The LLM operation (chat, chatStream, embed) + * @param callingContext - Optional hint about the caller's purpose + * @returns Inferred TaskType + */ +export function inferTaskType( + operation: "chat" | "chatStream" | "embed", + callingContext?: string +): TaskType { + // Embedding operations are typically for indexing/search + if (operation === "embed") { + return TaskType.IMPLEMENTATION; + } + + if (!callingContext) { + return TaskType.UNKNOWN; + } + + const ctx = callingContext.toLowerCase(); + + if (ctx.includes("brain") || ctx.includes("planning") || ctx.includes("plan")) { + return TaskType.PLANNING; + } + if (ctx.includes("review") || ctx.includes("code-review")) { + return TaskType.CODE_REVIEW; + } + if (ctx.includes("test")) { + return TaskType.TESTING; + } + if (ctx.includes("debug")) { + return TaskType.DEBUGGING; + } + if (ctx.includes("refactor")) { + return TaskType.REFACTORING; + } + if (ctx.includes("doc")) { + return TaskType.DOCUMENTATION; + } + if (ctx.includes("config")) { + return TaskType.CONFIGURATION; + } + if (ctx.includes("security") || ctx.includes("audit")) { + return TaskType.SECURITY_AUDIT; + } + if (ctx.includes("chat") || ctx.includes("implement")) { + return TaskType.IMPLEMENTATION; + } + + return TaskType.UNKNOWN; +} + +/** + * LLM Telemetry Tracker Service + * + * Builds and submits telemetry events for LLM completions. + * All tracking is non-blocking and fire-and-forget; telemetry errors + * never propagate to the caller. + * + * @example + * ```typescript + * // After a successful chat completion + * this.telemetryTracker.trackLlmCompletion({ + * model: "claude-sonnet-4-5-20250929", + * providerType: "claude", + * operation: "chat", + * durationMs: 1200, + * inputTokens: 150, + * outputTokens: 300, + * callingContext: "chat", + * success: true, + * }); + * ``` + */ +@Injectable() +export class LlmTelemetryTrackerService { + private readonly logger = new Logger(LlmTelemetryTrackerService.name); + + constructor(private readonly telemetry: MosaicTelemetryService) {} + + /** + * Track an LLM completion event via Mosaic Telemetry. + * + * This method is intentionally fire-and-forget. It catches all errors + * internally and logs them without propagating to the caller. + * + * @param params - LLM completion parameters + */ + trackLlmCompletion(params: LlmCompletionParams): void { + try { + const builder = this.telemetry.eventBuilder; + if (!builder) { + // Telemetry is disabled — silently skip + return; + } + + const costMicrodollars = calculateCostMicrodollars( + params.model, + params.inputTokens, + params.outputTokens + ); + + const event = builder.build({ + task_duration_ms: params.durationMs, + task_type: inferTaskType(params.operation, params.callingContext), + complexity: Complexity.LOW, + harness: mapHarness(params.providerType), + model: params.model, + provider: mapProviderType(params.providerType), + estimated_input_tokens: params.inputTokens, + estimated_output_tokens: params.outputTokens, + actual_input_tokens: params.inputTokens, + actual_output_tokens: params.outputTokens, + estimated_cost_usd_micros: costMicrodollars, + actual_cost_usd_micros: costMicrodollars, + quality_gate_passed: true, + quality_gates_run: [], + quality_gates_failed: [], + context_compactions: 0, + context_rotations: 0, + context_utilization_final: 0, + outcome: params.success ? Outcome.SUCCESS : Outcome.FAILURE, + retry_count: 0, + }); + + this.telemetry.trackTaskCompletion(event); + } catch (error: unknown) { + // Never let telemetry errors propagate + const msg = error instanceof Error ? error.message : String(error); + this.logger.warn(`Failed to track LLM telemetry event: ${msg}`); + } + } +} diff --git a/apps/api/src/llm/llm.module.ts b/apps/api/src/llm/llm.module.ts index 3d2fc4a..7528e0f 100644 --- a/apps/api/src/llm/llm.module.ts +++ b/apps/api/src/llm/llm.module.ts @@ -3,13 +3,14 @@ import { LlmController } from "./llm.controller"; import { LlmProviderAdminController } from "./llm-provider-admin.controller"; import { LlmService } from "./llm.service"; import { LlmManagerService } from "./llm-manager.service"; +import { LlmTelemetryTrackerService } from "./llm-telemetry-tracker.service"; import { PrismaModule } from "../prisma/prisma.module"; import { LlmUsageModule } from "../llm-usage/llm-usage.module"; @Module({ imports: [PrismaModule, LlmUsageModule], controllers: [LlmController, LlmProviderAdminController], - providers: [LlmService, LlmManagerService], + providers: [LlmService, LlmManagerService, LlmTelemetryTrackerService], exports: [LlmService, LlmManagerService], }) export class LlmModule {} diff --git a/apps/api/src/llm/llm.service.spec.ts b/apps/api/src/llm/llm.service.spec.ts index 2b9d84d..cff6840 100644 --- a/apps/api/src/llm/llm.service.spec.ts +++ b/apps/api/src/llm/llm.service.spec.ts @@ -3,6 +3,7 @@ import { Test, TestingModule } from "@nestjs/testing"; import { ServiceUnavailableException } from "@nestjs/common"; import { LlmService } from "./llm.service"; import { LlmManagerService } from "./llm-manager.service"; +import { LlmTelemetryTrackerService } from "./llm-telemetry-tracker.service"; import type { ChatRequestDto, EmbedRequestDto, ChatResponseDto, EmbedResponseDto } from "./dto"; import type { LlmProviderInterface, @@ -14,6 +15,9 @@ describe("LlmService", () => { let mockManagerService: { getDefaultProvider: ReturnType; }; + let mockTelemetryTracker: { + trackLlmCompletion: ReturnType; + }; let mockProvider: { chat: ReturnType; chatStream: ReturnType; @@ -41,6 +45,11 @@ describe("LlmService", () => { getDefaultProvider: vi.fn().mockResolvedValue(mockProvider), }; + // Create mock telemetry tracker + mockTelemetryTracker = { + trackLlmCompletion: vi.fn(), + }; + const module: TestingModule = await Test.createTestingModule({ providers: [ LlmService, @@ -48,6 +57,10 @@ describe("LlmService", () => { provide: LlmManagerService, useValue: mockManagerService, }, + { + provide: LlmTelemetryTrackerService, + useValue: mockTelemetryTracker, + }, ], }).compile(); @@ -135,6 +148,45 @@ describe("LlmService", () => { expect(result).toEqual(response); }); + it("should track telemetry on successful chat", async () => { + const response: ChatResponseDto = { + model: "llama3.2", + message: { role: "assistant", content: "Hello" }, + done: true, + promptEvalCount: 10, + evalCount: 20, + }; + mockProvider.chat.mockResolvedValue(response); + + await service.chat(request, "chat"); + + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + model: "llama3.2", + providerType: "ollama", + operation: "chat", + inputTokens: 10, + outputTokens: 20, + callingContext: "chat", + success: true, + }), + ); + }); + + it("should track telemetry on failed chat", async () => { + mockProvider.chat.mockRejectedValue(new Error("Chat failed")); + + await expect(service.chat(request)).rejects.toThrow(ServiceUnavailableException); + + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + model: "llama3.2", + operation: "chat", + success: false, + }), + ); + }); + it("should throw ServiceUnavailableException on error", async () => { mockProvider.chat.mockRejectedValue(new Error("Chat failed")); @@ -177,6 +229,94 @@ describe("LlmService", () => { expect(chunks[1].message.content).toBe(" world"); }); + it("should track telemetry after stream completes", async () => { + async function* mockGenerator(): AsyncGenerator { + yield { + model: "llama3.2", + message: { role: "assistant", content: "Hello" }, + done: false, + }; + yield { + model: "llama3.2", + message: { role: "assistant", content: " world" }, + done: true, + promptEvalCount: 5, + evalCount: 10, + }; + } + + mockProvider.chatStream.mockReturnValue(mockGenerator()); + + const chunks: ChatResponseDto[] = []; + for await (const chunk of service.chatStream(request, "brain")) { + chunks.push(chunk); + } + + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + model: "llama3.2", + providerType: "ollama", + operation: "chatStream", + inputTokens: 5, + outputTokens: 10, + callingContext: "brain", + success: true, + }), + ); + }); + + it("should estimate tokens when provider does not return counts in stream", async () => { + async function* mockGenerator(): AsyncGenerator { + yield { + model: "llama3.2", + message: { role: "assistant", content: "Hello world" }, + done: false, + }; + yield { + model: "llama3.2", + message: { role: "assistant", content: "" }, + done: true, + }; + } + + mockProvider.chatStream.mockReturnValue(mockGenerator()); + + const chunks: ChatResponseDto[] = []; + for await (const chunk of service.chatStream(request)) { + chunks.push(chunk); + } + + // Should use estimated tokens since no actual counts provided + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + operation: "chatStream", + success: true, + // Input estimated from "Hi" -> ceil(2/4) = 1 + inputTokens: 1, + // Output estimated from "Hello world" -> ceil(11/4) = 3 + outputTokens: 3, + }), + ); + }); + + it("should track telemetry on stream failure", async () => { + async function* errorGenerator(): AsyncGenerator { + throw new Error("Stream failed"); + } + + mockProvider.chatStream.mockReturnValue(errorGenerator()); + + const generator = service.chatStream(request); + await expect(generator.next()).rejects.toThrow(ServiceUnavailableException); + + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + operation: "chatStream", + success: false, + }), + ); + }); + it("should throw ServiceUnavailableException on error", async () => { async function* errorGenerator(): AsyncGenerator { throw new Error("Stream failed"); @@ -210,6 +350,41 @@ describe("LlmService", () => { expect(result).toEqual(response); }); + it("should track telemetry on successful embed", async () => { + const response: EmbedResponseDto = { + model: "llama3.2", + embeddings: [[0.1, 0.2, 0.3]], + totalDuration: 500, + }; + mockProvider.embed.mockResolvedValue(response); + + await service.embed(request, "embed"); + + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + model: "llama3.2", + providerType: "ollama", + operation: "embed", + outputTokens: 0, + callingContext: "embed", + success: true, + }), + ); + }); + + it("should track telemetry on failed embed", async () => { + mockProvider.embed.mockRejectedValue(new Error("Embedding failed")); + + await expect(service.embed(request)).rejects.toThrow(ServiceUnavailableException); + + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + operation: "embed", + success: false, + }), + ); + }); + it("should throw ServiceUnavailableException on error", async () => { mockProvider.embed.mockRejectedValue(new Error("Embedding failed")); diff --git a/apps/api/src/llm/llm.service.ts b/apps/api/src/llm/llm.service.ts index 2dfc065..ee938fb 100644 --- a/apps/api/src/llm/llm.service.ts +++ b/apps/api/src/llm/llm.service.ts @@ -1,13 +1,15 @@ import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common"; import { LlmManagerService } from "./llm-manager.service"; +import { LlmTelemetryTrackerService, estimateTokens } from "./llm-telemetry-tracker.service"; import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "./dto"; -import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface"; +import type { LlmProviderHealthStatus, LlmProviderType } from "./providers/llm-provider.interface"; /** * LLM Service * * High-level service for LLM operations. Delegates to providers via LlmManagerService. * Maintains backward compatibility with the original API while supporting multiple providers. + * Automatically tracks completions via Mosaic Telemetry (non-blocking). * * @example * ```typescript @@ -33,7 +35,10 @@ import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface export class LlmService implements OnModuleInit { private readonly logger = new Logger(LlmService.name); - constructor(private readonly llmManager: LlmManagerService) { + constructor( + private readonly llmManager: LlmManagerService, + private readonly telemetryTracker: LlmTelemetryTrackerService + ) { this.logger.log("LLM service initialized"); } @@ -91,14 +96,45 @@ export class LlmService implements OnModuleInit { * Perform a synchronous chat completion. * * @param request - Chat request with messages and configuration + * @param callingContext - Optional context hint for telemetry task type inference * @returns Complete chat response * @throws {ServiceUnavailableException} If provider is unavailable or request fails */ - async chat(request: ChatRequestDto): Promise { + async chat(request: ChatRequestDto, callingContext?: string): Promise { + const startTime = Date.now(); + let providerType: LlmProviderType = "ollama"; + try { const provider = await this.llmManager.getDefaultProvider(); - return await provider.chat(request); + providerType = provider.type; + const response = await provider.chat(request); + + // Fire-and-forget telemetry tracking + this.telemetryTracker.trackLlmCompletion({ + model: response.model, + providerType, + operation: "chat", + durationMs: Date.now() - startTime, + inputTokens: response.promptEvalCount ?? 0, + outputTokens: response.evalCount ?? 0, + callingContext, + success: true, + }); + + return response; } catch (error: unknown) { + // Track failure (fire-and-forget) + this.telemetryTracker.trackLlmCompletion({ + model: request.model, + providerType, + operation: "chat", + durationMs: Date.now() - startTime, + inputTokens: 0, + outputTokens: 0, + callingContext, + success: false, + }); + const errorMessage = error instanceof Error ? error.message : String(error); this.logger.error(`Chat failed: ${errorMessage}`); throw new ServiceUnavailableException(`Chat completion failed: ${errorMessage}`); @@ -107,20 +143,75 @@ export class LlmService implements OnModuleInit { /** * Perform a streaming chat completion. * Yields response chunks as they arrive from the provider. + * Aggregates token usage and tracks telemetry after the stream ends. * * @param request - Chat request with messages and configuration + * @param callingContext - Optional context hint for telemetry task type inference * @yields Chat response chunks * @throws {ServiceUnavailableException} If provider is unavailable or request fails */ - async *chatStream(request: ChatRequestDto): AsyncGenerator { + async *chatStream( + request: ChatRequestDto, + callingContext?: string + ): AsyncGenerator { + const startTime = Date.now(); + let providerType: LlmProviderType = "ollama"; + let aggregatedContent = ""; + let lastChunkInputTokens = 0; + let lastChunkOutputTokens = 0; + try { const provider = await this.llmManager.getDefaultProvider(); + providerType = provider.type; const stream = provider.chatStream(request); for await (const chunk of stream) { + // Accumulate content for token estimation + aggregatedContent += chunk.message.content; + + // Some providers include token counts on the final chunk + if (chunk.promptEvalCount !== undefined) { + lastChunkInputTokens = chunk.promptEvalCount; + } + if (chunk.evalCount !== undefined) { + lastChunkOutputTokens = chunk.evalCount; + } + yield chunk; } + + // After stream completes, track telemetry + // Use actual token counts if available, otherwise estimate from content length + const inputTokens = + lastChunkInputTokens > 0 + ? lastChunkInputTokens + : estimateTokens(request.messages.map((m) => m.content).join(" ")); + const outputTokens = + lastChunkOutputTokens > 0 ? lastChunkOutputTokens : estimateTokens(aggregatedContent); + + this.telemetryTracker.trackLlmCompletion({ + model: request.model, + providerType, + operation: "chatStream", + durationMs: Date.now() - startTime, + inputTokens, + outputTokens, + callingContext, + success: true, + }); } catch (error: unknown) { + // Track failure (fire-and-forget) + this.telemetryTracker.trackLlmCompletion({ + model: request.model, + providerType, + operation: "chatStream", + durationMs: Date.now() - startTime, + inputTokens: 0, + outputTokens: 0, + callingContext, + success: false, + }); + const errorMessage = error instanceof Error ? error.message : String(error); this.logger.error(`Stream failed: ${errorMessage}`); throw new ServiceUnavailableException(`Streaming failed: ${errorMessage}`); @@ -130,14 +221,48 @@ export class LlmService implements OnModuleInit { * Generate embeddings for the given input texts. * * @param request - Embedding request with model and input texts + * @param callingContext - Optional context hint for telemetry task type inference * @returns Embeddings response with vector arrays * @throws {ServiceUnavailableException} If provider is unavailable or request fails */ - async embed(request: EmbedRequestDto): Promise { + async embed(request: EmbedRequestDto, callingContext?: string): Promise { + const startTime = Date.now(); + let providerType: LlmProviderType = "ollama"; + try { const provider = await this.llmManager.getDefaultProvider(); - return await provider.embed(request); + providerType = provider.type; + const response = await provider.embed(request); + + // Estimate input tokens from the input text + const inputTokens = estimateTokens(request.input.join(" ")); + + // Fire-and-forget telemetry tracking + this.telemetryTracker.trackLlmCompletion({ + model: response.model, + providerType, + operation: "embed", + durationMs: Date.now() - startTime, + inputTokens, + outputTokens: 0, // Embeddings don't produce output tokens + callingContext, + success: true, + }); + + return response; } catch (error: unknown) { + // Track failure (fire-and-forget) + this.telemetryTracker.trackLlmCompletion({ + model: request.model, + providerType, + operation: "embed", + durationMs: Date.now() - startTime, + inputTokens: 0, + outputTokens: 0, + callingContext, + success: false, + }); + const errorMessage = error instanceof Error ? error.message : String(error); this.logger.error(`Embed failed: ${errorMessage}`); throw new ServiceUnavailableException(`Embedding failed: ${errorMessage}`); From ed23293e1add22fd7dfe2325906c510abf47f692 Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 01:50:58 -0600 Subject: [PATCH 05/15] feat(#373): prediction integration for cost estimation - Create PredictionService for pre-task cost/token estimates - Refresh common predictions on startup - Integrate predictions into LLM telemetry tracker - Add GET /api/telemetry/estimate endpoint - Graceful degradation when no prediction data available - Add unit tests for prediction service Refs #373 Co-Authored-By: Claude Opus 4.6 --- .../llm/llm-telemetry-tracker.service.spec.ts | 12 +- .../src/llm/llm-telemetry-tracker.service.ts | 41 ++- .../mosaic-telemetry.controller.ts | 92 +++++ .../mosaic-telemetry.module.ts | 10 +- .../prediction.service.spec.ts | 320 ++++++++++++++++++ .../mosaic-telemetry/prediction.service.ts | 161 +++++++++ 6 files changed, 621 insertions(+), 15 deletions(-) create mode 100644 apps/api/src/mosaic-telemetry/mosaic-telemetry.controller.ts create mode 100644 apps/api/src/mosaic-telemetry/prediction.service.spec.ts create mode 100644 apps/api/src/mosaic-telemetry/prediction.service.ts diff --git a/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts b/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts index 0f43489..ca2a867 100644 --- a/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts +++ b/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts @@ -333,12 +333,13 @@ describe("LlmTelemetryTrackerService", () => { service.trackLlmCompletion(baseParams); // claude-sonnet-4-5: 150 * 3 + 300 * 15 = 450 + 4500 = 4950 - const expectedCost = 4950; + const expectedActualCost = 4950; expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( expect.objectContaining({ - estimated_cost_usd_micros: expectedCost, - actual_cost_usd_micros: expectedCost, + // Estimated values are 0 when no PredictionService is injected + estimated_cost_usd_micros: 0, + actual_cost_usd_micros: expectedActualCost, }), ); }); @@ -437,8 +438,9 @@ describe("LlmTelemetryTrackerService", () => { expect.objectContaining({ actual_input_tokens: 50, actual_output_tokens: 100, - estimated_input_tokens: 50, - estimated_output_tokens: 100, + // Estimated values are 0 when no PredictionService is injected + estimated_input_tokens: 0, + estimated_output_tokens: 0, }), ); }); diff --git a/apps/api/src/llm/llm-telemetry-tracker.service.ts b/apps/api/src/llm/llm-telemetry-tracker.service.ts index e4905a9..0b79f8b 100644 --- a/apps/api/src/llm/llm-telemetry-tracker.service.ts +++ b/apps/api/src/llm/llm-telemetry-tracker.service.ts @@ -1,5 +1,6 @@ -import { Injectable, Logger } from "@nestjs/common"; +import { Injectable, Logger, Optional } from "@nestjs/common"; import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service"; +import { PredictionService } from "../mosaic-telemetry/prediction.service"; import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client"; import type { LlmProviderType } from "./providers/llm-provider.interface"; import { calculateCostMicrodollars } from "./llm-cost-table"; @@ -140,7 +141,10 @@ export function inferTaskType( export class LlmTelemetryTrackerService { private readonly logger = new Logger(LlmTelemetryTrackerService.name); - constructor(private readonly telemetry: MosaicTelemetryService) {} + constructor( + private readonly telemetry: MosaicTelemetryService, + @Optional() private readonly predictionService?: PredictionService + ) {} /** * Track an LLM completion event via Mosaic Telemetry. @@ -158,24 +162,47 @@ export class LlmTelemetryTrackerService { return; } + const taskType = inferTaskType(params.operation, params.callingContext); + const provider = mapProviderType(params.providerType); + const costMicrodollars = calculateCostMicrodollars( params.model, params.inputTokens, params.outputTokens ); + // Query predictions for estimated fields (graceful degradation) + let estimatedInputTokens = 0; + let estimatedOutputTokens = 0; + let estimatedCostMicros = 0; + + if (this.predictionService) { + const prediction = this.predictionService.getEstimate( + taskType, + params.model, + provider, + Complexity.LOW + ); + + if (prediction?.prediction && prediction.metadata.confidence !== "none") { + estimatedInputTokens = prediction.prediction.input_tokens.median; + estimatedOutputTokens = prediction.prediction.output_tokens.median; + estimatedCostMicros = prediction.prediction.cost_usd_micros.median ?? 0; + } + } + const event = builder.build({ task_duration_ms: params.durationMs, - task_type: inferTaskType(params.operation, params.callingContext), + task_type: taskType, complexity: Complexity.LOW, harness: mapHarness(params.providerType), model: params.model, - provider: mapProviderType(params.providerType), - estimated_input_tokens: params.inputTokens, - estimated_output_tokens: params.outputTokens, + provider, + estimated_input_tokens: estimatedInputTokens, + estimated_output_tokens: estimatedOutputTokens, actual_input_tokens: params.inputTokens, actual_output_tokens: params.outputTokens, - estimated_cost_usd_micros: costMicrodollars, + estimated_cost_usd_micros: estimatedCostMicros, actual_cost_usd_micros: costMicrodollars, quality_gate_passed: true, quality_gates_run: [], diff --git a/apps/api/src/mosaic-telemetry/mosaic-telemetry.controller.ts b/apps/api/src/mosaic-telemetry/mosaic-telemetry.controller.ts new file mode 100644 index 0000000..a3d0f9c --- /dev/null +++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.controller.ts @@ -0,0 +1,92 @@ +import { Controller, Get, Query, UseGuards, BadRequestException } from "@nestjs/common"; +import { AuthGuard } from "../auth/guards/auth.guard"; +import { PredictionService } from "./prediction.service"; +import { + TaskType, + Complexity, + Provider, + type PredictionResponse, +} from "@mosaicstack/telemetry-client"; + +/** + * Valid values for query parameter validation. + */ +const VALID_TASK_TYPES = new Set(Object.values(TaskType)); +const VALID_COMPLEXITIES = new Set(Object.values(Complexity)); +const VALID_PROVIDERS = new Set(Object.values(Provider)); + +/** + * Response DTO for the estimate endpoint. + */ +interface EstimateResponseDto { + data: PredictionResponse | null; +} + +/** + * Mosaic Telemetry Controller + * + * Provides API endpoints for accessing telemetry prediction data. + * All endpoints require authentication via AuthGuard. + * + * This controller is intentionally lightweight - it delegates to PredictionService + * for the actual prediction logic and returns results directly to the frontend. + */ +@Controller("telemetry") +@UseGuards(AuthGuard) +export class MosaicTelemetryController { + constructor(private readonly predictionService: PredictionService) {} + + /** + * GET /api/telemetry/estimate + * + * Get a cost/token estimate for a given task configuration. + * Returns prediction data including confidence level, or null if + * no prediction is available. + * + * @param taskType - Task type enum value (e.g. "implementation", "planning") + * @param model - Model name (e.g. "claude-sonnet-4-5") + * @param provider - Provider enum value (e.g. "anthropic", "openai") + * @param complexity - Complexity level (e.g. "low", "medium", "high") + * @returns Prediction response with estimates and confidence + */ + @Get("estimate") + getEstimate( + @Query("taskType") taskType: string, + @Query("model") model: string, + @Query("provider") provider: string, + @Query("complexity") complexity: string + ): EstimateResponseDto { + if (!taskType || !model || !provider || !complexity) { + throw new BadRequestException( + "Missing query parameters. Required: taskType, model, provider, complexity" + ); + } + + if (!VALID_TASK_TYPES.has(taskType)) { + throw new BadRequestException( + `Invalid taskType "${taskType}". Valid values: ${[...VALID_TASK_TYPES].join(", ")}` + ); + } + + if (!VALID_PROVIDERS.has(provider)) { + throw new BadRequestException( + `Invalid provider "${provider}". Valid values: ${[...VALID_PROVIDERS].join(", ")}` + ); + } + + if (!VALID_COMPLEXITIES.has(complexity)) { + throw new BadRequestException( + `Invalid complexity "${complexity}". Valid values: ${[...VALID_COMPLEXITIES].join(", ")}` + ); + } + + const prediction = this.predictionService.getEstimate( + taskType as TaskType, + model, + provider as Provider, + complexity as Complexity + ); + + return { data: prediction }; + } +} diff --git a/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.ts b/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.ts index a321dda..55bb91c 100644 --- a/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.ts +++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.ts @@ -1,6 +1,9 @@ import { Module, Global } from "@nestjs/common"; import { ConfigModule } from "@nestjs/config"; +import { AuthModule } from "../auth/auth.module"; import { MosaicTelemetryService } from "./mosaic-telemetry.service"; +import { PredictionService } from "./prediction.service"; +import { MosaicTelemetryController } from "./mosaic-telemetry.controller"; /** * Global module providing Mosaic Telemetry integration via @mosaicstack/telemetry-client. @@ -30,8 +33,9 @@ import { MosaicTelemetryService } from "./mosaic-telemetry.service"; */ @Global() @Module({ - imports: [ConfigModule], - providers: [MosaicTelemetryService], - exports: [MosaicTelemetryService], + imports: [ConfigModule, AuthModule], + controllers: [MosaicTelemetryController], + providers: [MosaicTelemetryService, PredictionService], + exports: [MosaicTelemetryService, PredictionService], }) export class MosaicTelemetryModule {} diff --git a/apps/api/src/mosaic-telemetry/prediction.service.spec.ts b/apps/api/src/mosaic-telemetry/prediction.service.spec.ts new file mode 100644 index 0000000..f933f2e --- /dev/null +++ b/apps/api/src/mosaic-telemetry/prediction.service.spec.ts @@ -0,0 +1,320 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import { Test, TestingModule } from "@nestjs/testing"; +import { + TaskType, + Complexity, + Provider, +} from "@mosaicstack/telemetry-client"; +import type { + PredictionResponse, + PredictionQuery, +} from "@mosaicstack/telemetry-client"; +import { MosaicTelemetryService } from "./mosaic-telemetry.service"; +import { PredictionService } from "./prediction.service"; + +describe("PredictionService", () => { + let service: PredictionService; + let mockTelemetryService: { + isEnabled: boolean; + getPrediction: ReturnType; + refreshPredictions: ReturnType; + }; + + const mockPredictionResponse: PredictionResponse = { + prediction: { + input_tokens: { + p10: 50, + p25: 80, + median: 120, + p75: 200, + p90: 350, + }, + output_tokens: { + p10: 100, + p25: 150, + median: 250, + p75: 400, + p90: 600, + }, + cost_usd_micros: { + p10: 500, + p25: 800, + median: 1200, + p75: 2000, + p90: 3500, + }, + duration_ms: { + p10: 200, + p25: 400, + median: 800, + p75: 1500, + p90: 3000, + }, + correction_factors: { + input: 1.0, + output: 1.0, + }, + quality: { + gate_pass_rate: 0.95, + success_rate: 0.92, + }, + }, + metadata: { + sample_size: 150, + fallback_level: 0, + confidence: "high", + last_updated: "2026-02-15T00:00:00Z", + cache_hit: true, + }, + }; + + const nullPredictionResponse: PredictionResponse = { + prediction: null, + metadata: { + sample_size: 0, + fallback_level: 3, + confidence: "none", + last_updated: null, + cache_hit: false, + }, + }; + + beforeEach(async () => { + mockTelemetryService = { + isEnabled: true, + getPrediction: vi.fn().mockReturnValue(mockPredictionResponse), + refreshPredictions: vi.fn().mockResolvedValue(undefined), + }; + + const module: TestingModule = await Test.createTestingModule({ + providers: [ + PredictionService, + { + provide: MosaicTelemetryService, + useValue: mockTelemetryService, + }, + ], + }).compile(); + + service = module.get(PredictionService); + }); + + it("should be defined", () => { + expect(service).toBeDefined(); + }); + + // ---------- getEstimate ---------- + + describe("getEstimate", () => { + it("should return prediction response for valid query", () => { + const result = service.getEstimate( + TaskType.IMPLEMENTATION, + "claude-sonnet-4-5", + Provider.ANTHROPIC, + Complexity.LOW + ); + + expect(result).toEqual(mockPredictionResponse); + expect(mockTelemetryService.getPrediction).toHaveBeenCalledWith({ + task_type: TaskType.IMPLEMENTATION, + model: "claude-sonnet-4-5", + provider: Provider.ANTHROPIC, + complexity: Complexity.LOW, + }); + }); + + it("should pass correct query parameters to telemetry service", () => { + service.getEstimate( + TaskType.CODE_REVIEW, + "gpt-4o", + Provider.OPENAI, + Complexity.HIGH + ); + + expect(mockTelemetryService.getPrediction).toHaveBeenCalledWith({ + task_type: TaskType.CODE_REVIEW, + model: "gpt-4o", + provider: Provider.OPENAI, + complexity: Complexity.HIGH, + }); + }); + + it("should return null when telemetry returns null", () => { + mockTelemetryService.getPrediction.mockReturnValue(null); + + const result = service.getEstimate( + TaskType.IMPLEMENTATION, + "claude-sonnet-4-5", + Provider.ANTHROPIC, + Complexity.LOW + ); + + expect(result).toBeNull(); + }); + + it("should return null prediction response when confidence is none", () => { + mockTelemetryService.getPrediction.mockReturnValue(nullPredictionResponse); + + const result = service.getEstimate( + TaskType.IMPLEMENTATION, + "unknown-model", + Provider.UNKNOWN, + Complexity.LOW + ); + + expect(result).toEqual(nullPredictionResponse); + expect(result?.metadata.confidence).toBe("none"); + }); + + it("should return null and not throw when getPrediction throws", () => { + mockTelemetryService.getPrediction.mockImplementation(() => { + throw new Error("Prediction fetch failed"); + }); + + const result = service.getEstimate( + TaskType.IMPLEMENTATION, + "claude-sonnet-4-5", + Provider.ANTHROPIC, + Complexity.LOW + ); + + expect(result).toBeNull(); + }); + + it("should handle non-Error thrown objects gracefully", () => { + mockTelemetryService.getPrediction.mockImplementation(() => { + throw "string error"; + }); + + const result = service.getEstimate( + TaskType.IMPLEMENTATION, + "claude-sonnet-4-5", + Provider.ANTHROPIC, + Complexity.LOW + ); + + expect(result).toBeNull(); + }); + }); + + // ---------- refreshCommonPredictions ---------- + + describe("refreshCommonPredictions", () => { + it("should call refreshPredictions with multiple query combinations", async () => { + await service.refreshCommonPredictions(); + + expect(mockTelemetryService.refreshPredictions).toHaveBeenCalledTimes(1); + + const queries: PredictionQuery[] = + mockTelemetryService.refreshPredictions.mock.calls[0][0]; + + // Should have queries for cross-product of models, task types, and complexities + expect(queries.length).toBeGreaterThan(0); + + // Verify all queries have valid structure + for (const query of queries) { + expect(query).toHaveProperty("task_type"); + expect(query).toHaveProperty("model"); + expect(query).toHaveProperty("provider"); + expect(query).toHaveProperty("complexity"); + } + }); + + it("should include Anthropic model predictions", async () => { + await service.refreshCommonPredictions(); + + const queries: PredictionQuery[] = + mockTelemetryService.refreshPredictions.mock.calls[0][0]; + + const anthropicQueries = queries.filter( + (q: PredictionQuery) => q.provider === Provider.ANTHROPIC + ); + expect(anthropicQueries.length).toBeGreaterThan(0); + }); + + it("should include OpenAI model predictions", async () => { + await service.refreshCommonPredictions(); + + const queries: PredictionQuery[] = + mockTelemetryService.refreshPredictions.mock.calls[0][0]; + + const openaiQueries = queries.filter( + (q: PredictionQuery) => q.provider === Provider.OPENAI + ); + expect(openaiQueries.length).toBeGreaterThan(0); + }); + + it("should not call refreshPredictions when telemetry is disabled", async () => { + mockTelemetryService.isEnabled = false; + + await service.refreshCommonPredictions(); + + expect(mockTelemetryService.refreshPredictions).not.toHaveBeenCalled(); + }); + + it("should not throw when refreshPredictions rejects", async () => { + mockTelemetryService.refreshPredictions.mockRejectedValue( + new Error("Server unreachable") + ); + + // Should not throw + await expect(service.refreshCommonPredictions()).resolves.not.toThrow(); + }); + + it("should include common task types in queries", async () => { + await service.refreshCommonPredictions(); + + const queries: PredictionQuery[] = + mockTelemetryService.refreshPredictions.mock.calls[0][0]; + + const taskTypes = new Set(queries.map((q: PredictionQuery) => q.task_type)); + + expect(taskTypes.has(TaskType.IMPLEMENTATION)).toBe(true); + expect(taskTypes.has(TaskType.PLANNING)).toBe(true); + expect(taskTypes.has(TaskType.CODE_REVIEW)).toBe(true); + }); + + it("should include common complexity levels in queries", async () => { + await service.refreshCommonPredictions(); + + const queries: PredictionQuery[] = + mockTelemetryService.refreshPredictions.mock.calls[0][0]; + + const complexities = new Set(queries.map((q: PredictionQuery) => q.complexity)); + + expect(complexities.has(Complexity.LOW)).toBe(true); + expect(complexities.has(Complexity.MEDIUM)).toBe(true); + }); + }); + + // ---------- onModuleInit ---------- + + describe("onModuleInit", () => { + it("should trigger refreshCommonPredictions on init when telemetry is enabled", () => { + // refreshPredictions is async, but onModuleInit fires it and forgets + service.onModuleInit(); + + // Give the promise microtask a chance to execute + expect(mockTelemetryService.isEnabled).toBe(true); + // refreshPredictions will be called asynchronously + }); + + it("should not refresh when telemetry is disabled", () => { + mockTelemetryService.isEnabled = false; + + service.onModuleInit(); + + // refreshPredictions should not be called since we returned early + expect(mockTelemetryService.refreshPredictions).not.toHaveBeenCalled(); + }); + + it("should not throw when refresh fails on init", () => { + mockTelemetryService.refreshPredictions.mockRejectedValue( + new Error("Connection refused") + ); + + // Should not throw + expect(() => service.onModuleInit()).not.toThrow(); + }); + }); +}); diff --git a/apps/api/src/mosaic-telemetry/prediction.service.ts b/apps/api/src/mosaic-telemetry/prediction.service.ts new file mode 100644 index 0000000..7ffe6cb --- /dev/null +++ b/apps/api/src/mosaic-telemetry/prediction.service.ts @@ -0,0 +1,161 @@ +import { Injectable, Logger, OnModuleInit } from "@nestjs/common"; +import { + TaskType, + Complexity, + Provider, + type PredictionQuery, + type PredictionResponse, +} from "@mosaicstack/telemetry-client"; +import { MosaicTelemetryService } from "./mosaic-telemetry.service"; + +/** + * Common model-provider combinations used for pre-fetching predictions. + * These represent the most frequently used LLM configurations. + */ +const COMMON_MODELS: { model: string; provider: Provider }[] = [ + { model: "claude-sonnet-4-5", provider: Provider.ANTHROPIC }, + { model: "claude-opus-4", provider: Provider.ANTHROPIC }, + { model: "claude-haiku-4-5", provider: Provider.ANTHROPIC }, + { model: "gpt-4o", provider: Provider.OPENAI }, + { model: "gpt-4o-mini", provider: Provider.OPENAI }, +]; + +/** + * Common task types to pre-fetch predictions for. + */ +const COMMON_TASK_TYPES: TaskType[] = [ + TaskType.IMPLEMENTATION, + TaskType.PLANNING, + TaskType.CODE_REVIEW, +]; + +/** + * Common complexity levels to pre-fetch predictions for. + */ +const COMMON_COMPLEXITIES: Complexity[] = [Complexity.LOW, Complexity.MEDIUM]; + +/** + * PredictionService + * + * Provides pre-task cost and token estimates using crowd-sourced prediction data + * from the Mosaic Telemetry server. Predictions are cached by the underlying SDK + * with a 6-hour TTL. + * + * This service is intentionally non-blocking: if predictions are unavailable + * (telemetry disabled, server unreachable, no data), all methods return null + * without throwing errors. Task execution should never be blocked by prediction + * failures. + * + * @example + * ```typescript + * const estimate = this.predictionService.getEstimate( + * TaskType.IMPLEMENTATION, + * "claude-sonnet-4-5", + * Provider.ANTHROPIC, + * Complexity.LOW, + * ); + * if (estimate?.prediction) { + * console.log(`Estimated cost: ${estimate.prediction.cost_usd_micros}`); + * } + * ``` + */ +@Injectable() +export class PredictionService implements OnModuleInit { + private readonly logger = new Logger(PredictionService.name); + + constructor(private readonly telemetry: MosaicTelemetryService) {} + + /** + * Refresh common predictions on startup. + * Runs asynchronously and never blocks module initialization. + */ + onModuleInit(): void { + if (!this.telemetry.isEnabled) { + this.logger.log("Telemetry disabled - skipping prediction refresh"); + return; + } + + // Fire-and-forget: refresh in the background + this.refreshCommonPredictions().catch((error: unknown) => { + const msg = error instanceof Error ? error.message : String(error); + this.logger.warn(`Failed to refresh common predictions on startup: ${msg}`); + }); + } + + /** + * Get a cost/token estimate for a given task configuration. + * + * Returns the cached prediction from the SDK, or null if: + * - Telemetry is disabled + * - No prediction data exists for this combination + * - The prediction has expired + * + * @param taskType - The type of task to estimate + * @param model - The model name (e.g. "claude-sonnet-4-5") + * @param provider - The provider enum value + * @param complexity - The complexity level + * @returns Prediction response with estimates and confidence, or null + */ + getEstimate( + taskType: TaskType, + model: string, + provider: Provider, + complexity: Complexity + ): PredictionResponse | null { + try { + const query: PredictionQuery = { + task_type: taskType, + model, + provider, + complexity, + }; + + return this.telemetry.getPrediction(query); + } catch (error: unknown) { + const msg = error instanceof Error ? error.message : String(error); + this.logger.warn(`Failed to get prediction estimate: ${msg}`); + return null; + } + } + + /** + * Refresh predictions for commonly used (taskType, model, provider, complexity) combinations. + * + * Generates the cross-product of common models, task types, and complexities, + * then batch-refreshes them from the telemetry server. The SDK caches the + * results with a 6-hour TTL. + * + * This method is safe to call at any time. If telemetry is disabled or the + * server is unreachable, it completes without error. + */ + async refreshCommonPredictions(): Promise { + if (!this.telemetry.isEnabled) { + return; + } + + const queries: PredictionQuery[] = []; + + for (const { model, provider } of COMMON_MODELS) { + for (const taskType of COMMON_TASK_TYPES) { + for (const complexity of COMMON_COMPLEXITIES) { + queries.push({ + task_type: taskType, + model, + provider, + complexity, + }); + } + } + } + + this.logger.log(`Refreshing ${String(queries.length)} common prediction queries...`); + + try { + await this.telemetry.refreshPredictions(queries); + this.logger.log(`Successfully refreshed ${String(queries.length)} predictions`); + } catch (error: unknown) { + const msg = error instanceof Error ? error.message : String(error); + this.logger.warn(`Failed to refresh predictions: ${msg}`); + } + } +} From d6c6af10d9e365c300b79c8e56c7d0b1a6edcd7b Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 01:52:54 -0600 Subject: [PATCH 06/15] feat(#372): track orchestrator agent task completions via telemetry - Instrument Coordinator.process_queue() with timing and telemetry events - Instrument OrchestrationLoop.process_next_issue() with quality gate tracking - Add agent-to-telemetry mapping (model, provider, harness per agent name) - Map difficulty levels to Complexity enum and gate names to QualityGate enum - Track retry counts per issue (increment on failure, clear on success) - Emit FAILURE outcome on agent spawn failure or quality gate rejection - Non-blocking: telemetry errors are logged and swallowed, never delay tasks - Pass telemetry client from FastAPI lifespan to Coordinator constructor - Add 33 unit tests covering all telemetry scenarios Refs #372 Co-Authored-By: Claude Opus 4.6 --- apps/coordinator/src/coordinator.py | 260 +++++- apps/coordinator/src/main.py | 2 + apps/coordinator/tests/test_task_telemetry.py | 796 ++++++++++++++++++ 3 files changed, 1057 insertions(+), 1 deletion(-) create mode 100644 apps/coordinator/tests/test_task_telemetry.py diff --git a/apps/coordinator/src/coordinator.py b/apps/coordinator/src/coordinator.py index aee45c2..090302b 100644 --- a/apps/coordinator/src/coordinator.py +++ b/apps/coordinator/src/coordinator.py @@ -2,12 +2,24 @@ import asyncio import logging +import time from typing import TYPE_CHECKING, Any +from mosaicstack_telemetry import ( # type: ignore[import-untyped] + Complexity, + Harness, + Outcome, + Provider, + QualityGate, + TaskType, + TelemetryClient, +) + from src.circuit_breaker import CircuitBreaker, CircuitBreakerError from src.context_monitor import ContextMonitor from src.forced_continuation import ForcedContinuationService from src.models import ContextAction +from src.mosaic_telemetry import build_task_event from src.quality_orchestrator import QualityOrchestrator, VerificationResult from src.queue import QueueItem, QueueManager from src.tracing_decorators import trace_agent_operation @@ -17,6 +29,49 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +# --------------------------------------------------------------------------- +# Agent-name → telemetry-field mapping helpers +# --------------------------------------------------------------------------- + +# Maps assigned_agent strings to (model, Provider, Harness) +_AGENT_TELEMETRY_MAP: dict[str, tuple[str, Provider, Harness]] = { + "sonnet": ("claude-sonnet-4-20250514", Provider.ANTHROPIC, Harness.CLAUDE_CODE), + "opus": ("claude-opus-4-20250514", Provider.ANTHROPIC, Harness.CLAUDE_CODE), + "haiku": ("claude-haiku-3.5-20241022", Provider.ANTHROPIC, Harness.CLAUDE_CODE), + "glm": ("glm-4", Provider.CUSTOM, Harness.CUSTOM), + "minimax": ("minimax", Provider.CUSTOM, Harness.CUSTOM), +} + +_DIFFICULTY_TO_COMPLEXITY: dict[str, Complexity] = { + "easy": Complexity.LOW, + "medium": Complexity.MEDIUM, + "hard": Complexity.HIGH, +} + +_GATE_NAME_TO_ENUM: dict[str, QualityGate] = { + "build": QualityGate.BUILD, + "lint": QualityGate.LINT, + "test": QualityGate.TEST, + "coverage": QualityGate.COVERAGE, +} + + +def _resolve_agent_fields( + assigned_agent: str, +) -> tuple[str, Provider, Harness]: + """Resolve agent name to (model, provider, harness) for telemetry. + + Args: + assigned_agent: The agent name string from issue metadata. + + Returns: + Tuple of (model_name, Provider, Harness). + """ + return _AGENT_TELEMETRY_MAP.get( + assigned_agent, + ("unknown", Provider.UNKNOWN, Harness.UNKNOWN), + ) + class Coordinator: """Main orchestration loop for processing the issue queue. @@ -41,6 +96,8 @@ class Coordinator: poll_interval: float = 5.0, circuit_breaker_threshold: int = 5, circuit_breaker_cooldown: float = 30.0, + telemetry_client: TelemetryClient | None = None, + instance_id: str = "", ) -> None: """Initialize the Coordinator. @@ -49,12 +106,16 @@ class Coordinator: poll_interval: Seconds between queue polls (default: 5.0) circuit_breaker_threshold: Consecutive failures before opening circuit (default: 5) circuit_breaker_cooldown: Seconds to wait before retry after circuit opens (default: 30) + telemetry_client: Optional Mosaic telemetry client for tracking task events + instance_id: UUID identifying this coordinator instance for telemetry """ self.queue_manager = queue_manager self.poll_interval = poll_interval self._running = False self._stop_event: asyncio.Event | None = None self._active_agents: dict[int, dict[str, Any]] = {} + self._telemetry_client = telemetry_client + self._instance_id = instance_id # Circuit breaker for preventing infinite retry loops (SEC-ORCH-7) self._circuit_breaker = CircuitBreaker( @@ -197,7 +258,8 @@ class Coordinator: """Process the next ready item from the queue. Gets the next ready item, spawns an agent to process it, - and marks it complete on success. + and marks it complete on success. Emits a Mosaic telemetry + TaskCompletionEvent after each task attempt. Returns: The QueueItem that was processed, or None if queue is empty @@ -218,6 +280,10 @@ class Coordinator: # Mark as in progress self.queue_manager.mark_in_progress(item.issue_number) + # Track timing for telemetry + start_mono = time.monotonic() + outcome = Outcome.FAILURE + # Spawn agent (stub implementation) try: success = await self.spawn_agent(item) @@ -225,6 +291,7 @@ class Coordinator: if success: # Mark as complete self.queue_manager.mark_complete(item.issue_number) + outcome = Outcome.SUCCESS logger.info(f"Issue #{item.issue_number} completed successfully") else: logger.warning(f"Issue #{item.issue_number} agent failed - remains in progress") @@ -233,8 +300,81 @@ class Coordinator: logger.error(f"Error spawning agent for issue #{item.issue_number}: {e}") # Item remains in progress on error + finally: + elapsed_ms = int((time.monotonic() - start_mono) * 1000) + self._emit_task_telemetry(item, outcome=outcome, duration_ms=elapsed_ms) + return item + def _emit_task_telemetry( + self, + item: QueueItem, + *, + outcome: Outcome, + duration_ms: int, + retry_count: int = 0, + actual_input_tokens: int = 0, + actual_output_tokens: int = 0, + quality_passed: bool = False, + quality_gates_run: list[QualityGate] | None = None, + quality_gates_failed: list[QualityGate] | None = None, + ) -> None: + """Emit a Mosaic telemetry TaskCompletionEvent (non-blocking). + + This method never raises; any telemetry errors are logged and swallowed + so they do not interfere with task processing. + + Args: + item: The QueueItem that was processed. + outcome: Task outcome (SUCCESS, FAILURE, TIMEOUT, etc.). + duration_ms: Wall-clock duration in milliseconds. + retry_count: Number of retries before this attempt. + actual_input_tokens: Actual input tokens consumed by the harness. + actual_output_tokens: Actual output tokens consumed by the harness. + quality_passed: Whether all quality gates passed. + quality_gates_run: Quality gates that were executed. + quality_gates_failed: Quality gates that failed. + """ + if self._telemetry_client is None or not self._instance_id: + return + + try: + model, provider, harness = _resolve_agent_fields( + item.metadata.assigned_agent, + ) + complexity = _DIFFICULTY_TO_COMPLEXITY.get( + item.metadata.difficulty, Complexity.MEDIUM + ) + + event = build_task_event( + instance_id=self._instance_id, + task_type=TaskType.IMPLEMENTATION, + complexity=complexity, + outcome=outcome, + duration_ms=duration_ms, + model=model, + provider=provider, + harness=harness, + actual_input_tokens=actual_input_tokens, + actual_output_tokens=actual_output_tokens, + estimated_input_tokens=item.metadata.estimated_context, + quality_passed=quality_passed, + quality_gates_run=quality_gates_run, + quality_gates_failed=quality_gates_failed, + retry_count=retry_count, + ) + self._telemetry_client.track(event) + logger.debug( + "Telemetry event emitted for issue #%d (outcome=%s)", + item.issue_number, + outcome.value, + ) + except Exception: + logger.exception( + "Failed to emit telemetry for issue #%d (non-fatal)", + item.issue_number, + ) + @trace_agent_operation(operation_name="spawn_agent") async def spawn_agent(self, item: QueueItem) -> bool: """Spawn an agent to process the given item. @@ -294,6 +434,8 @@ class OrchestrationLoop: poll_interval: float = 5.0, circuit_breaker_threshold: int = 5, circuit_breaker_cooldown: float = 30.0, + telemetry_client: TelemetryClient | None = None, + instance_id: str = "", ) -> None: """Initialize the OrchestrationLoop. @@ -305,6 +447,8 @@ class OrchestrationLoop: poll_interval: Seconds between queue polls (default: 5.0) circuit_breaker_threshold: Consecutive failures before opening circuit (default: 5) circuit_breaker_cooldown: Seconds to wait before retry after circuit opens (default: 30) + telemetry_client: Optional Mosaic telemetry client for tracking task events + instance_id: UUID identifying this coordinator instance for telemetry """ self.queue_manager = queue_manager self.quality_orchestrator = quality_orchestrator @@ -314,6 +458,11 @@ class OrchestrationLoop: self._running = False self._stop_event: asyncio.Event | None = None self._active_agents: dict[int, dict[str, Any]] = {} + self._telemetry_client = telemetry_client + self._instance_id = instance_id + + # Per-issue retry tracking + self._retry_counts: dict[int, int] = {} # Metrics tracking self._processed_count = 0 @@ -493,6 +642,7 @@ class OrchestrationLoop: 3. Spawns an agent to process it 4. Runs quality gates on completion 5. Handles rejection with forced continuation or marks complete + 6. Emits a Mosaic telemetry TaskCompletionEvent Returns: The QueueItem that was processed, or None if queue is empty @@ -524,12 +674,21 @@ class OrchestrationLoop: "status": "running", } + # Track timing for telemetry + start_mono = time.monotonic() + outcome = Outcome.FAILURE + quality_passed = False + gates_run: list[QualityGate] = [] + gates_failed: list[QualityGate] = [] + retry_count = self._retry_counts.get(item.issue_number, 0) + try: # Spawn agent (stub implementation) agent_success = await self._spawn_agent(item) if not agent_success: logger.warning(f"Issue #{item.issue_number} agent failed - remains in progress") + self._retry_counts[item.issue_number] = retry_count + 1 return item # Check context usage (stub - no real monitoring in Phase 0) @@ -538,24 +697,123 @@ class OrchestrationLoop: # Run quality gates on completion verification = await self._verify_quality(item) + # Map gate results for telemetry + gates_run = [ + _GATE_NAME_TO_ENUM[name] + for name in verification.gate_results + if name in _GATE_NAME_TO_ENUM + ] + gates_failed = [ + _GATE_NAME_TO_ENUM[name] + for name, result in verification.gate_results.items() + if name in _GATE_NAME_TO_ENUM and not result.passed + ] + quality_passed = verification.all_passed + if verification.all_passed: # All gates passed - mark as complete self.queue_manager.mark_complete(item.issue_number) self._success_count += 1 + outcome = Outcome.SUCCESS + # Clear retry counter on success + self._retry_counts.pop(item.issue_number, None) logger.info( f"Issue #{item.issue_number} completed successfully - all gates passed" ) else: # Gates failed - generate continuation prompt self._rejection_count += 1 + outcome = Outcome.FAILURE + self._retry_counts[item.issue_number] = retry_count + 1 await self._handle_rejection(item, verification) except Exception as e: logger.error(f"Error processing issue #{item.issue_number}: {e}") # Item remains in progress on error + finally: + elapsed_ms = int((time.monotonic() - start_mono) * 1000) + self._emit_task_telemetry( + item, + outcome=outcome, + duration_ms=elapsed_ms, + retry_count=retry_count, + quality_passed=quality_passed, + quality_gates_run=gates_run, + quality_gates_failed=gates_failed, + ) + return item + def _emit_task_telemetry( + self, + item: QueueItem, + *, + outcome: Outcome, + duration_ms: int, + retry_count: int = 0, + actual_input_tokens: int = 0, + actual_output_tokens: int = 0, + quality_passed: bool = False, + quality_gates_run: list[QualityGate] | None = None, + quality_gates_failed: list[QualityGate] | None = None, + ) -> None: + """Emit a Mosaic telemetry TaskCompletionEvent (non-blocking). + + This method never raises; any telemetry errors are logged and swallowed + so they do not interfere with task processing. + + Args: + item: The QueueItem that was processed. + outcome: Task outcome (SUCCESS, FAILURE, TIMEOUT, etc.). + duration_ms: Wall-clock duration in milliseconds. + retry_count: Number of retries before this attempt. + actual_input_tokens: Actual input tokens consumed by the harness. + actual_output_tokens: Actual output tokens consumed by the harness. + quality_passed: Whether all quality gates passed. + quality_gates_run: Quality gates that were executed. + quality_gates_failed: Quality gates that failed. + """ + if self._telemetry_client is None or not self._instance_id: + return + + try: + model, provider, harness = _resolve_agent_fields( + item.metadata.assigned_agent, + ) + complexity = _DIFFICULTY_TO_COMPLEXITY.get( + item.metadata.difficulty, Complexity.MEDIUM + ) + + event = build_task_event( + instance_id=self._instance_id, + task_type=TaskType.IMPLEMENTATION, + complexity=complexity, + outcome=outcome, + duration_ms=duration_ms, + model=model, + provider=provider, + harness=harness, + actual_input_tokens=actual_input_tokens, + actual_output_tokens=actual_output_tokens, + estimated_input_tokens=item.metadata.estimated_context, + quality_passed=quality_passed, + quality_gates_run=quality_gates_run, + quality_gates_failed=quality_gates_failed, + retry_count=retry_count, + ) + self._telemetry_client.track(event) + logger.debug( + "Telemetry event emitted for issue #%d (outcome=%s)", + item.issue_number, + outcome.value, + ) + except Exception: + logger.exception( + "Failed to emit telemetry for issue #%d (non-fatal)", + item.issue_number, + ) + async def _spawn_agent(self, item: QueueItem) -> bool: """Spawn an agent to process the given item. diff --git a/apps/coordinator/src/main.py b/apps/coordinator/src/main.py index 8f345b5..9c37f91 100644 --- a/apps/coordinator/src/main.py +++ b/apps/coordinator/src/main.py @@ -100,6 +100,8 @@ async def lifespan(app: FastAPI) -> AsyncIterator[dict[str, Any]]: _coordinator = Coordinator( queue_manager=queue_manager, poll_interval=settings.coordinator_poll_interval, + telemetry_client=mosaic_telemetry_client, + instance_id=mosaic_telemetry_config.instance_id or "", ) logger.info( f"Coordinator initialized (poll interval: {settings.coordinator_poll_interval}s, " diff --git a/apps/coordinator/tests/test_task_telemetry.py b/apps/coordinator/tests/test_task_telemetry.py new file mode 100644 index 0000000..ddcc2c4 --- /dev/null +++ b/apps/coordinator/tests/test_task_telemetry.py @@ -0,0 +1,796 @@ +"""Tests for task completion telemetry instrumentation in the coordinator. + +These tests verify that the Coordinator and OrchestrationLoop correctly +emit TaskCompletionEvents via the Mosaic telemetry SDK after each task +dispatch attempt. +""" + +from __future__ import annotations + +import tempfile +from collections.abc import Generator +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest +from mosaicstack_telemetry import ( # type: ignore[import-untyped] + Complexity, + Harness, + Outcome, + Provider, + QualityGate, + TaskCompletionEvent, + TaskType, + TelemetryClient, +) + +from src.coordinator import ( + _AGENT_TELEMETRY_MAP, + _DIFFICULTY_TO_COMPLEXITY, + _GATE_NAME_TO_ENUM, + Coordinator, + OrchestrationLoop, + _resolve_agent_fields, +) +from src.gates.quality_gate import GateResult +from src.models import IssueMetadata +from src.quality_orchestrator import QualityOrchestrator, VerificationResult +from src.queue import QueueManager + +VALID_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc" + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def temp_queue_file() -> Generator[Path, None, None]: + """Create a temporary file for queue persistence.""" + with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".json") as f: + temp_path = Path(f.name) + yield temp_path + if temp_path.exists(): + temp_path.unlink() + + +@pytest.fixture +def queue_manager(temp_queue_file: Path) -> QueueManager: + """Create a queue manager with temporary storage.""" + return QueueManager(queue_file=temp_queue_file) + + +@pytest.fixture +def mock_telemetry_client() -> MagicMock: + """Create a mock TelemetryClient.""" + client = MagicMock(spec=TelemetryClient) + client.track = MagicMock() + return client + + +@pytest.fixture +def sonnet_metadata() -> IssueMetadata: + """Metadata for a sonnet agent task.""" + return IssueMetadata( + assigned_agent="sonnet", + difficulty="medium", + estimated_context=50000, + ) + + +@pytest.fixture +def opus_metadata() -> IssueMetadata: + """Metadata for an opus agent task (hard difficulty).""" + return IssueMetadata( + assigned_agent="opus", + difficulty="hard", + estimated_context=120000, + ) + + +# --------------------------------------------------------------------------- +# _resolve_agent_fields tests +# --------------------------------------------------------------------------- + + +class TestResolveAgentFields: + """Tests for the _resolve_agent_fields helper.""" + + def test_known_agent_sonnet(self) -> None: + """Should return correct fields for sonnet agent.""" + model, provider, harness = _resolve_agent_fields("sonnet") + assert model == "claude-sonnet-4-20250514" + assert provider == Provider.ANTHROPIC + assert harness == Harness.CLAUDE_CODE + + def test_known_agent_opus(self) -> None: + """Should return correct fields for opus agent.""" + model, provider, harness = _resolve_agent_fields("opus") + assert model == "claude-opus-4-20250514" + assert provider == Provider.ANTHROPIC + assert harness == Harness.CLAUDE_CODE + + def test_known_agent_haiku(self) -> None: + """Should return correct fields for haiku agent.""" + model, provider, harness = _resolve_agent_fields("haiku") + assert model == "claude-haiku-3.5-20241022" + assert provider == Provider.ANTHROPIC + assert harness == Harness.CLAUDE_CODE + + def test_known_agent_glm(self) -> None: + """Should return correct fields for glm (self-hosted) agent.""" + model, provider, harness = _resolve_agent_fields("glm") + assert model == "glm-4" + assert provider == Provider.CUSTOM + assert harness == Harness.CUSTOM + + def test_known_agent_minimax(self) -> None: + """Should return correct fields for minimax (self-hosted) agent.""" + model, provider, harness = _resolve_agent_fields("minimax") + assert model == "minimax" + assert provider == Provider.CUSTOM + assert harness == Harness.CUSTOM + + def test_unknown_agent_returns_defaults(self) -> None: + """Should return unknown values for unrecognised agent names.""" + model, provider, harness = _resolve_agent_fields("nonexistent") + assert model == "unknown" + assert provider == Provider.UNKNOWN + assert harness == Harness.UNKNOWN + + def test_all_map_entries_covered(self) -> None: + """Ensure every entry in _AGENT_TELEMETRY_MAP is resolvable.""" + for agent_name in _AGENT_TELEMETRY_MAP: + model, provider, harness = _resolve_agent_fields(agent_name) + assert model != "unknown" + + +# --------------------------------------------------------------------------- +# Coordinator telemetry emission tests +# --------------------------------------------------------------------------- + + +class TestCoordinatorTelemetry: + """Tests for telemetry emission in the Coordinator class.""" + + @pytest.mark.asyncio + async def test_emits_success_event_on_completion( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """Should emit a SUCCESS event when task completes successfully.""" + queue_manager.enqueue(100, sonnet_metadata) + + coordinator = Coordinator( + queue_manager=queue_manager, + telemetry_client=mock_telemetry_client, + instance_id=VALID_INSTANCE_ID, + ) + + await coordinator.process_queue() + + mock_telemetry_client.track.assert_called_once() + event = mock_telemetry_client.track.call_args[0][0] + assert isinstance(event, TaskCompletionEvent) + assert event.outcome == Outcome.SUCCESS + assert event.task_type == TaskType.IMPLEMENTATION + assert event.complexity == Complexity.MEDIUM + assert event.provider == Provider.ANTHROPIC + assert event.harness == Harness.CLAUDE_CODE + assert str(event.instance_id) == VALID_INSTANCE_ID + assert event.task_duration_ms >= 0 + + @pytest.mark.asyncio + async def test_emits_failure_event_when_agent_fails( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """Should emit a FAILURE event when spawn_agent returns False.""" + queue_manager.enqueue(101, sonnet_metadata) + + coordinator = Coordinator( + queue_manager=queue_manager, + telemetry_client=mock_telemetry_client, + instance_id=VALID_INSTANCE_ID, + ) + # Override spawn_agent to fail + coordinator.spawn_agent = AsyncMock(return_value=False) # type: ignore[method-assign] + + await coordinator.process_queue() + + mock_telemetry_client.track.assert_called_once() + event = mock_telemetry_client.track.call_args[0][0] + assert event.outcome == Outcome.FAILURE + + @pytest.mark.asyncio + async def test_emits_failure_event_on_exception( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """Should emit a FAILURE event when spawn_agent raises an exception.""" + queue_manager.enqueue(102, sonnet_metadata) + + coordinator = Coordinator( + queue_manager=queue_manager, + telemetry_client=mock_telemetry_client, + instance_id=VALID_INSTANCE_ID, + ) + coordinator.spawn_agent = AsyncMock(side_effect=RuntimeError("agent crashed")) # type: ignore[method-assign] + + await coordinator.process_queue() + + mock_telemetry_client.track.assert_called_once() + event = mock_telemetry_client.track.call_args[0][0] + assert event.outcome == Outcome.FAILURE + + @pytest.mark.asyncio + async def test_maps_difficulty_to_complexity( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + opus_metadata: IssueMetadata, + ) -> None: + """Should map difficulty='hard' to Complexity.HIGH in the event.""" + queue_manager.enqueue(103, opus_metadata) + + coordinator = Coordinator( + queue_manager=queue_manager, + telemetry_client=mock_telemetry_client, + instance_id=VALID_INSTANCE_ID, + ) + + await coordinator.process_queue() + + event = mock_telemetry_client.track.call_args[0][0] + assert event.complexity == Complexity.HIGH + + @pytest.mark.asyncio + async def test_maps_agent_to_model_and_provider( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + opus_metadata: IssueMetadata, + ) -> None: + """Should map 'opus' agent to opus model and ANTHROPIC provider.""" + queue_manager.enqueue(104, opus_metadata) + + coordinator = Coordinator( + queue_manager=queue_manager, + telemetry_client=mock_telemetry_client, + instance_id=VALID_INSTANCE_ID, + ) + + await coordinator.process_queue() + + event = mock_telemetry_client.track.call_args[0][0] + assert "opus" in event.model + assert event.provider == Provider.ANTHROPIC + assert event.harness == Harness.CLAUDE_CODE + + @pytest.mark.asyncio + async def test_no_event_when_telemetry_disabled( + self, + queue_manager: QueueManager, + sonnet_metadata: IssueMetadata, + ) -> None: + """Should not call track when telemetry_client is None.""" + queue_manager.enqueue(105, sonnet_metadata) + + coordinator = Coordinator( + queue_manager=queue_manager, + telemetry_client=None, + instance_id=VALID_INSTANCE_ID, + ) + + # Should not raise + await coordinator.process_queue() + + @pytest.mark.asyncio + async def test_no_event_when_instance_id_empty( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """Should not call track when instance_id is empty.""" + queue_manager.enqueue(106, sonnet_metadata) + + coordinator = Coordinator( + queue_manager=queue_manager, + telemetry_client=mock_telemetry_client, + instance_id="", + ) + + await coordinator.process_queue() + mock_telemetry_client.track.assert_not_called() + + @pytest.mark.asyncio + async def test_telemetry_exception_does_not_propagate( + self, + queue_manager: QueueManager, + sonnet_metadata: IssueMetadata, + ) -> None: + """Telemetry failures must never break task processing.""" + queue_manager.enqueue(107, sonnet_metadata) + + bad_client = MagicMock(spec=TelemetryClient) + bad_client.track = MagicMock(side_effect=RuntimeError("telemetry down")) + + coordinator = Coordinator( + queue_manager=queue_manager, + telemetry_client=bad_client, + instance_id=VALID_INSTANCE_ID, + ) + + # Should complete without raising, despite telemetry failure + result = await coordinator.process_queue() + assert result is not None + assert result.issue_number == 107 + + @pytest.mark.asyncio + async def test_no_event_when_queue_empty( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + ) -> None: + """Should not emit any event when the queue is empty.""" + coordinator = Coordinator( + queue_manager=queue_manager, + telemetry_client=mock_telemetry_client, + instance_id=VALID_INSTANCE_ID, + ) + + result = await coordinator.process_queue() + assert result is None + mock_telemetry_client.track.assert_not_called() + + @pytest.mark.asyncio + async def test_estimated_input_tokens_from_metadata( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """Should set estimated_input_tokens from issue metadata.""" + queue_manager.enqueue(108, sonnet_metadata) + + coordinator = Coordinator( + queue_manager=queue_manager, + telemetry_client=mock_telemetry_client, + instance_id=VALID_INSTANCE_ID, + ) + + await coordinator.process_queue() + + event = mock_telemetry_client.track.call_args[0][0] + assert event.estimated_input_tokens == 50000 + + +# --------------------------------------------------------------------------- +# OrchestrationLoop telemetry emission tests +# --------------------------------------------------------------------------- + + +def _make_orchestration_loop( + queue_manager: QueueManager, + telemetry_client: TelemetryClient | None = None, + instance_id: str = VALID_INSTANCE_ID, + quality_result: VerificationResult | None = None, +) -> OrchestrationLoop: + """Create an OrchestrationLoop with mocked dependencies. + + Args: + queue_manager: Queue manager instance. + telemetry_client: Optional telemetry client. + instance_id: Coordinator instance ID. + quality_result: Override quality verification result. + + Returns: + Configured OrchestrationLoop. + """ + # Create quality orchestrator mock + qo = MagicMock(spec=QualityOrchestrator) + default_result = quality_result or VerificationResult( + all_passed=True, + gate_results={ + "build": GateResult(passed=True, message="Build OK"), + "lint": GateResult(passed=True, message="Lint OK"), + "test": GateResult(passed=True, message="Test OK"), + "coverage": GateResult(passed=True, message="Coverage OK"), + }, + ) + qo.verify_completion = AsyncMock(return_value=default_result) + + # Continuation service mock + from src.forced_continuation import ForcedContinuationService + + cs = MagicMock(spec=ForcedContinuationService) + cs.generate_prompt = MagicMock(return_value="Fix: build failed") + + # Context monitor mock + from src.context_monitor import ContextMonitor + + cm = MagicMock(spec=ContextMonitor) + cm.determine_action = AsyncMock(return_value="continue") + + return OrchestrationLoop( + queue_manager=queue_manager, + quality_orchestrator=qo, + continuation_service=cs, + context_monitor=cm, + telemetry_client=telemetry_client, + instance_id=instance_id, + ) + + +class TestOrchestrationLoopTelemetry: + """Tests for telemetry emission in the OrchestrationLoop class.""" + + @pytest.mark.asyncio + async def test_emits_success_with_quality_gates( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """Should emit SUCCESS event with quality gate details.""" + queue_manager.enqueue(200, sonnet_metadata) + + loop = _make_orchestration_loop( + queue_manager, telemetry_client=mock_telemetry_client + ) + + await loop.process_next_issue() + + mock_telemetry_client.track.assert_called_once() + event = mock_telemetry_client.track.call_args[0][0] + assert event.outcome == Outcome.SUCCESS + assert event.quality_gate_passed is True + assert set(event.quality_gates_run) == { + QualityGate.BUILD, + QualityGate.LINT, + QualityGate.TEST, + QualityGate.COVERAGE, + } + assert event.quality_gates_failed == [] + + @pytest.mark.asyncio + async def test_emits_failure_with_failed_gates( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """Should emit FAILURE event with failed gate details.""" + queue_manager.enqueue(201, sonnet_metadata) + + failed_result = VerificationResult( + all_passed=False, + gate_results={ + "build": GateResult(passed=True, message="Build OK"), + "lint": GateResult(passed=True, message="Lint OK"), + "test": GateResult(passed=False, message="3 tests failed"), + "coverage": GateResult(passed=False, message="Coverage 70% < 85%"), + }, + ) + + loop = _make_orchestration_loop( + queue_manager, + telemetry_client=mock_telemetry_client, + quality_result=failed_result, + ) + + await loop.process_next_issue() + + mock_telemetry_client.track.assert_called_once() + event = mock_telemetry_client.track.call_args[0][0] + assert event.outcome == Outcome.FAILURE + assert event.quality_gate_passed is False + assert set(event.quality_gates_failed) == { + QualityGate.TEST, + QualityGate.COVERAGE, + } + assert set(event.quality_gates_run) == { + QualityGate.BUILD, + QualityGate.LINT, + QualityGate.TEST, + QualityGate.COVERAGE, + } + + @pytest.mark.asyncio + async def test_retry_count_starts_at_zero( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """First attempt should report retry_count=0.""" + queue_manager.enqueue(202, sonnet_metadata) + + loop = _make_orchestration_loop( + queue_manager, telemetry_client=mock_telemetry_client + ) + + await loop.process_next_issue() + + event = mock_telemetry_client.track.call_args[0][0] + assert event.retry_count == 0 + + @pytest.mark.asyncio + async def test_retry_count_increments_on_failure( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """Retry count should increment after a quality gate failure.""" + queue_manager.enqueue(203, sonnet_metadata) + + failed_result = VerificationResult( + all_passed=False, + gate_results={ + "build": GateResult(passed=False, message="Build failed"), + }, + ) + + loop = _make_orchestration_loop( + queue_manager, + telemetry_client=mock_telemetry_client, + quality_result=failed_result, + ) + + # First attempt + await loop.process_next_issue() + event1 = mock_telemetry_client.track.call_args[0][0] + assert event1.retry_count == 0 + + # Re-enqueue and process again (simulates retry) + queue_manager.enqueue(203, sonnet_metadata) + mock_telemetry_client.track.reset_mock() + + await loop.process_next_issue() + event2 = mock_telemetry_client.track.call_args[0][0] + assert event2.retry_count == 1 + + @pytest.mark.asyncio + async def test_retry_count_clears_on_success( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """Retry count should be cleared after a successful completion.""" + queue_manager.enqueue(204, sonnet_metadata) + + # First: fail + failed_result = VerificationResult( + all_passed=False, + gate_results={ + "build": GateResult(passed=False, message="Build failed"), + }, + ) + loop = _make_orchestration_loop( + queue_manager, + telemetry_client=mock_telemetry_client, + quality_result=failed_result, + ) + + await loop.process_next_issue() + assert loop._retry_counts.get(204) == 1 + + # Now succeed + success_result = VerificationResult( + all_passed=True, + gate_results={ + "build": GateResult(passed=True, message="Build OK"), + }, + ) + loop.quality_orchestrator.verify_completion = AsyncMock(return_value=success_result) # type: ignore[method-assign] + queue_manager.enqueue(204, sonnet_metadata) + mock_telemetry_client.track.reset_mock() + + await loop.process_next_issue() + assert 204 not in loop._retry_counts + + @pytest.mark.asyncio + async def test_emits_failure_when_agent_spawn_fails( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """Should emit FAILURE when _spawn_agent returns False.""" + queue_manager.enqueue(205, sonnet_metadata) + + loop = _make_orchestration_loop( + queue_manager, telemetry_client=mock_telemetry_client + ) + loop._spawn_agent = AsyncMock(return_value=False) # type: ignore[method-assign] + + await loop.process_next_issue() + + mock_telemetry_client.track.assert_called_once() + event = mock_telemetry_client.track.call_args[0][0] + assert event.outcome == Outcome.FAILURE + + @pytest.mark.asyncio + async def test_no_event_when_telemetry_disabled( + self, + queue_manager: QueueManager, + sonnet_metadata: IssueMetadata, + ) -> None: + """Should not call track when telemetry_client is None.""" + queue_manager.enqueue(206, sonnet_metadata) + + loop = _make_orchestration_loop( + queue_manager, telemetry_client=None + ) + + # Should not raise + result = await loop.process_next_issue() + assert result is not None + + @pytest.mark.asyncio + async def test_telemetry_exception_does_not_propagate( + self, + queue_manager: QueueManager, + sonnet_metadata: IssueMetadata, + ) -> None: + """Telemetry failures must never disrupt task processing.""" + queue_manager.enqueue(207, sonnet_metadata) + + bad_client = MagicMock(spec=TelemetryClient) + bad_client.track = MagicMock(side_effect=RuntimeError("telemetry down")) + + loop = _make_orchestration_loop( + queue_manager, telemetry_client=bad_client + ) + + result = await loop.process_next_issue() + assert result is not None + assert result.issue_number == 207 + + @pytest.mark.asyncio + async def test_duration_is_positive( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """Duration should be a non-negative integer.""" + queue_manager.enqueue(208, sonnet_metadata) + + loop = _make_orchestration_loop( + queue_manager, telemetry_client=mock_telemetry_client + ) + + await loop.process_next_issue() + + event = mock_telemetry_client.track.call_args[0][0] + assert event.task_duration_ms >= 0 + + @pytest.mark.asyncio + async def test_maps_glm_agent_correctly( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + ) -> None: + """Should map GLM (self-hosted) agent to CUSTOM provider/harness.""" + glm_meta = IssueMetadata( + assigned_agent="glm", + difficulty="medium", + estimated_context=30000, + ) + queue_manager.enqueue(209, glm_meta) + + loop = _make_orchestration_loop( + queue_manager, telemetry_client=mock_telemetry_client + ) + + await loop.process_next_issue() + + event = mock_telemetry_client.track.call_args[0][0] + assert event.model == "glm-4" + assert event.provider == Provider.CUSTOM + assert event.harness == Harness.CUSTOM + + @pytest.mark.asyncio + async def test_maps_easy_difficulty_to_low_complexity( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + ) -> None: + """Should map difficulty='easy' to Complexity.LOW.""" + easy_meta = IssueMetadata( + assigned_agent="haiku", + difficulty="easy", + estimated_context=10000, + ) + queue_manager.enqueue(210, easy_meta) + + loop = _make_orchestration_loop( + queue_manager, telemetry_client=mock_telemetry_client + ) + + await loop.process_next_issue() + + event = mock_telemetry_client.track.call_args[0][0] + assert event.complexity == Complexity.LOW + + @pytest.mark.asyncio + async def test_no_event_when_queue_empty( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + ) -> None: + """Should not emit an event when queue is empty.""" + loop = _make_orchestration_loop( + queue_manager, telemetry_client=mock_telemetry_client + ) + + result = await loop.process_next_issue() + assert result is None + mock_telemetry_client.track.assert_not_called() + + @pytest.mark.asyncio + async def test_unknown_gate_names_excluded( + self, + queue_manager: QueueManager, + mock_telemetry_client: MagicMock, + sonnet_metadata: IssueMetadata, + ) -> None: + """Gate names not in _GATE_NAME_TO_ENUM should be excluded from telemetry.""" + queue_manager.enqueue(211, sonnet_metadata) + + result_with_unknown = VerificationResult( + all_passed=False, + gate_results={ + "build": GateResult(passed=True, message="Build OK"), + "unknown_gate": GateResult(passed=False, message="Unknown gate"), + }, + ) + + loop = _make_orchestration_loop( + queue_manager, + telemetry_client=mock_telemetry_client, + quality_result=result_with_unknown, + ) + + await loop.process_next_issue() + + event = mock_telemetry_client.track.call_args[0][0] + assert QualityGate.BUILD in event.quality_gates_run + # unknown_gate should not appear + assert len(event.quality_gates_run) == 1 + assert len(event.quality_gates_failed) == 0 + + +# --------------------------------------------------------------------------- +# Mapping dict completeness tests +# --------------------------------------------------------------------------- + + +class TestMappingCompleteness: + """Tests to verify mapping dicts cover expected values.""" + + def test_difficulty_map_covers_all_metadata_values(self) -> None: + """All valid difficulty levels should have Complexity mappings.""" + expected_difficulties = {"easy", "medium", "hard"} + assert expected_difficulties == set(_DIFFICULTY_TO_COMPLEXITY.keys()) + + def test_gate_name_map_covers_all_orchestrator_gates(self) -> None: + """All gate names used by QualityOrchestrator should be mappable.""" + expected_gates = {"build", "lint", "test", "coverage"} + assert expected_gates == set(_GATE_NAME_TO_ENUM.keys()) + + def test_agent_map_covers_all_configured_agents(self) -> None: + """All agents used by the coordinator should have telemetry mappings.""" + expected_agents = {"sonnet", "opus", "haiku", "glm", "minimax"} + assert expected_agents == set(_AGENT_TELEMETRY_MAP.keys()) From 5958569cbaf48b2ed41d531fafb7b3731f4ba01a Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 01:58:01 -0600 Subject: [PATCH 07/15] docs(#376): telemetry integration guide - Create comprehensive telemetry documentation at docs/telemetry.md - Cover configuration, event schema, predictions, SDK reference - Include development guide with dry-run mode and troubleshooting - Link from main README.md Refs #376 Co-Authored-By: Claude Opus 4.6 --- README.md | 1 + docs/telemetry.md | 736 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 737 insertions(+) create mode 100644 docs/telemetry.md diff --git a/README.md b/README.md index 65b2ab2..0f31495 100644 --- a/README.md +++ b/README.md @@ -758,6 +758,7 @@ Complete documentation is organized in a Bookstack-compatible structure in the ` - **[Overview](docs/3-architecture/1-overview/)** — System design and components - **[Authentication](docs/3-architecture/2-authentication/)** — BetterAuth and OIDC integration - **[Design Principles](docs/3-architecture/3-design-principles/1-pda-friendly.md)** — PDA-friendly patterns (non-negotiable) +- **[Telemetry](docs/telemetry.md)** — AI task completion tracking, predictions, and SDK reference ### 🔌 API Reference diff --git a/docs/telemetry.md b/docs/telemetry.md new file mode 100644 index 0000000..ed37eca --- /dev/null +++ b/docs/telemetry.md @@ -0,0 +1,736 @@ +# Mosaic Telemetry Integration Guide + +## 1. Overview + +### What is Mosaic Telemetry? + +Mosaic Telemetry is a task completion tracking system purpose-built for AI operations within Mosaic Stack. It captures detailed metrics about every AI task execution -- token usage, cost, duration, outcome, and quality gate results -- and submits them to a central telemetry API for aggregation and analysis. + +The aggregated data powers a **prediction system** that provides pre-task estimates for cost, token usage, and expected quality, enabling informed decisions before dispatching work to AI agents. + +### How It Differs from OpenTelemetry + +Mosaic Stack uses **two separate telemetry systems** that serve different purposes: + +| Aspect | OpenTelemetry (OTEL) | Mosaic Telemetry | +|--------|---------------------|------------------| +| **Purpose** | Distributed request tracing and observability | AI task completion metrics and predictions | +| **What it tracks** | HTTP requests, spans, latency, errors | Token counts, costs, outcomes, quality gates | +| **Data destination** | OTEL Collector (Jaeger, Grafana, etc.) | Mosaic Telemetry API (PostgreSQL-backed) | +| **Module location (API)** | `apps/api/src/telemetry/` | `apps/api/src/mosaic-telemetry/` | +| **Module location (Coordinator)** | `apps/coordinator/src/telemetry.py` | `apps/coordinator/src/mosaic_telemetry.py` | + +Both systems can run simultaneously. They are completely independent. + +### Architecture + +``` ++------------------+ +------------------+ +| Mosaic API | | Coordinator | +| (NestJS) | | (FastAPI) | ++--------+---------+ +--------+---------+ + | | + Track events Track events + | | + v v ++------------------------------------------+ +| Telemetry Client SDK | +| (JS: @mosaicstack/telemetry-client) | +| (Py: mosaicstack-telemetry) | +| | +| - Event queue (in-memory) | +| - Batch submission (5-min intervals) | +| - Prediction cache (6hr TTL) | ++-------------------+----------------------+ + | + HTTP POST /events + HTTP POST /predictions + | + v ++------------------------------------------+ +| Mosaic Telemetry API | +| (Separate service) | +| | +| - Event ingestion & validation | +| - Aggregation & statistics | +| - Prediction generation | ++-------------------+----------------------+ + | + v + +---------------+ + | PostgreSQL | + +---------------+ +``` + +**Data flow:** + +1. Application code calls `trackTaskCompletion()` (JS) or `client.track()` (Python) +2. Events are queued in memory (up to 1,000 events) +3. A background timer flushes the queue every 5 minutes in batches of up to 100 +4. The telemetry API ingests events, validates them, and stores them in PostgreSQL +5. Prediction queries are served from aggregated data with a 6-hour cache TTL + +--- + +## 2. Configuration Guide + +### Environment Variables + +All configuration is done through environment variables prefixed with `MOSAIC_TELEMETRY_`: + +| Variable | Type | Default | Description | +|----------|------|---------|-------------| +| `MOSAIC_TELEMETRY_ENABLED` | boolean | `true` | Master switch. Set to `false` to completely disable telemetry (no HTTP calls). | +| `MOSAIC_TELEMETRY_SERVER_URL` | string | (none) | URL of the telemetry API server. For Docker Compose: `http://telemetry-api:8000`. For production: `https://tel-api.mosaicstack.dev`. | +| `MOSAIC_TELEMETRY_API_KEY` | string | (none) | API key for authenticating with the telemetry server. Generate with: `openssl rand -hex 32` (64-char hex string). | +| `MOSAIC_TELEMETRY_INSTANCE_ID` | string | (none) | Unique UUID identifying this Mosaic Stack instance. Generate with: `uuidgen` or `python -c "import uuid; print(uuid.uuid4())"`. | +| `MOSAIC_TELEMETRY_DRY_RUN` | boolean | `false` | When `true`, events are logged to console instead of being sent via HTTP. Useful for development. | + +### Enabling Telemetry + +To enable telemetry, set all three required variables in your `.env` file: + +```bash +MOSAIC_TELEMETRY_ENABLED=true +MOSAIC_TELEMETRY_SERVER_URL=http://telemetry-api:8000 +MOSAIC_TELEMETRY_API_KEY= +MOSAIC_TELEMETRY_INSTANCE_ID= +``` + +If `MOSAIC_TELEMETRY_ENABLED` is `true` but any of `SERVER_URL`, `API_KEY`, or `INSTANCE_ID` is missing, the service logs a warning and disables telemetry gracefully. This is intentional: telemetry configuration issues never prevent the application from starting. + +### Disabling Telemetry + +Set `MOSAIC_TELEMETRY_ENABLED=false` in your `.env`. No HTTP calls will be made, and all tracking methods become safe no-ops. + +### Dry-Run Mode + +For local development and debugging, enable dry-run mode: + +```bash +MOSAIC_TELEMETRY_ENABLED=true +MOSAIC_TELEMETRY_DRY_RUN=true +MOSAIC_TELEMETRY_SERVER_URL=http://localhost:8000 # Not actually called +MOSAIC_TELEMETRY_API_KEY=0000000000000000000000000000000000000000000000000000000000000000 +MOSAIC_TELEMETRY_INSTANCE_ID=00000000-0000-0000-0000-000000000000 +``` + +In dry-run mode, the SDK logs event payloads to the console instead of submitting them via HTTP. This lets you verify that tracking points are firing correctly without needing a running telemetry API. + +### Docker Compose Configuration + +Both `docker-compose.yml` (root) and `docker/docker-compose.yml` pass telemetry environment variables to the API service: + +```yaml +services: + mosaic-api: + environment: + # Telemetry (task completion tracking & predictions) + MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false} + MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-http://telemetry-api:8000} + MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-} + MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-} + MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false} +``` + +Note that telemetry defaults to `false` in Docker Compose. Set `MOSAIC_TELEMETRY_ENABLED=true` in your `.env` to activate it. + +An optional local telemetry API service is available (commented out in `docker/docker-compose.yml`). Uncomment it to run a self-contained development environment: + +```yaml +# Uncomment in docker/docker-compose.yml +telemetry-api: + image: git.mosaicstack.dev/mosaic/telemetry-api:latest + container_name: mosaic-telemetry-api + restart: unless-stopped + environment: + HOST: 0.0.0.0 + PORT: 8000 + ports: + - "8001:8000" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + networks: + - mosaic-network +``` + +--- + +## 3. What Gets Tracked + +### TaskCompletionEvent Schema + +Every tracked event conforms to the `TaskCompletionEvent` interface. This is the core data structure submitted to the telemetry API: + +| Field | Type | Description | +|-------|------|-------------| +| `instance_id` | `string` | UUID of the Mosaic Stack instance that generated the event | +| `event_id` | `string` | Unique UUID for this event (auto-generated by the SDK) | +| `schema_version` | `string` | Schema version for forward compatibility (auto-set by the SDK) | +| `timestamp` | `string` | ISO 8601 timestamp of event creation (auto-set by the SDK) | +| `task_duration_ms` | `number` | How long the task took in milliseconds | +| `task_type` | `TaskType` | Type of task performed (see enum below) | +| `complexity` | `Complexity` | Complexity level of the task | +| `harness` | `Harness` | The coding harness or tool used | +| `model` | `string` | AI model name (e.g., `"claude-sonnet-4-5"`) | +| `provider` | `Provider` | AI model provider | +| `estimated_input_tokens` | `number` | Pre-task estimated input tokens (from predictions) | +| `estimated_output_tokens` | `number` | Pre-task estimated output tokens (from predictions) | +| `actual_input_tokens` | `number` | Actual input tokens consumed | +| `actual_output_tokens` | `number` | Actual output tokens generated | +| `estimated_cost_usd_micros` | `number` | Pre-task estimated cost in microdollars (USD * 1,000,000) | +| `actual_cost_usd_micros` | `number` | Actual cost in microdollars | +| `quality_gate_passed` | `boolean` | Whether all quality gates passed | +| `quality_gates_run` | `QualityGate[]` | List of quality gates that were executed | +| `quality_gates_failed` | `QualityGate[]` | List of quality gates that failed | +| `context_compactions` | `number` | Number of context window compactions during the task | +| `context_rotations` | `number` | Number of context window rotations during the task | +| `context_utilization_final` | `number` | Final context window utilization (0.0 to 1.0) | +| `outcome` | `Outcome` | Task outcome | +| `retry_count` | `number` | Number of retries before completion | +| `language` | `string?` | Primary programming language (optional) | +| `repo_size_category` | `RepoSizeCategory?` | Repository size category (optional) | + +### Enum Values + +**TaskType:** +`planning`, `implementation`, `code_review`, `testing`, `debugging`, `refactoring`, `documentation`, `configuration`, `security_audit`, `unknown` + +**Complexity:** +`low`, `medium`, `high`, `critical` + +**Harness:** +`claude_code`, `opencode`, `kilo_code`, `aider`, `api_direct`, `ollama_local`, `custom`, `unknown` + +**Provider:** +`anthropic`, `openai`, `openrouter`, `ollama`, `google`, `mistral`, `custom`, `unknown` + +**QualityGate:** +`build`, `lint`, `test`, `coverage`, `typecheck`, `security` + +**Outcome:** +`success`, `failure`, `partial`, `timeout` + +**RepoSizeCategory:** +`tiny`, `small`, `medium`, `large`, `huge` + +### API Service: LLM Call Tracking + +The NestJS API tracks every LLM service call (chat, streaming chat, and embeddings) via `LlmTelemetryTrackerService` at `apps/api/src/llm/llm-telemetry-tracker.service.ts`. + +Tracked operations: +- **`chat`** -- Synchronous chat completions +- **`chatStream`** -- Streaming chat completions +- **`embed`** -- Embedding generation + +For each call, the tracker captures: +- Model name and provider type +- Input and output token counts +- Duration in milliseconds +- Success or failure outcome +- Calculated cost from the built-in cost table (`apps/api/src/llm/llm-cost-table.ts`) +- Task type inferred from calling context (e.g., `"brain"` maps to `planning`, `"review"` maps to `code_review`) + +The cost table uses longest-prefix matching on model names and covers all major Anthropic and OpenAI models. Ollama/local models are treated as zero-cost. + +### Coordinator: Agent Task Dispatch Tracking + +The FastAPI coordinator tracks agent task completions in `apps/coordinator/src/mosaic_telemetry.py` and `apps/coordinator/src/coordinator.py`. + +After each agent task dispatch (success or failure), the coordinator emits a `TaskCompletionEvent` capturing: +- Task duration from start to finish +- Agent model, provider, and harness (resolved from the `assigned_agent` field) +- Task outcome (`success`, `failure`, `partial`, `timeout`) +- Quality gate results (build, lint, test, etc.) +- Retry count for the issue +- Complexity level from issue metadata + +The coordinator uses the `build_task_event()` helper function which provides sensible defaults for the coordinator context (Claude Code harness, Anthropic provider, TypeScript language). + +### Event Lifecycle + +``` +1. Application code calls trackTaskCompletion() or client.track() + | + v +2. Event is added to in-memory queue (max 1,000 events) + | + v +3. Background timer fires every 5 minutes (submitIntervalMs) + | + v +4. Queue is drained in batches of up to 100 events (batchSize) + | + v +5. Each batch is POSTed to the telemetry API + | + v +6. API validates, stores, and acknowledges each event +``` + +If the telemetry API is unreachable, events remain in the queue and are retried on the next interval (up to 3 retries per submission). Telemetry errors are logged but never propagated to calling code. + +--- + +## 4. Prediction System + +### How Predictions Work + +The Mosaic Telemetry API aggregates historical task completion data across all contributing instances. From this data, it generates statistical predictions for new tasks based on their characteristics (task type, model, provider, complexity). + +Predictions include percentile distributions (p10, p25, median, p75, p90) for token usage and cost, plus quality metrics (gate pass rate, success rate). + +### Querying Predictions via API + +The API exposes a prediction endpoint at: + +``` +GET /api/telemetry/estimate?taskType=&model=&provider=&complexity= +``` + +**Authentication:** Requires a valid session (Bearer token via `AuthGuard`). + +**Query Parameters (all required):** + +| Parameter | Type | Example | Description | +|-----------|------|---------|-------------| +| `taskType` | `TaskType` | `implementation` | Task type to estimate | +| `model` | `string` | `claude-sonnet-4-5` | Model name | +| `provider` | `Provider` | `anthropic` | Provider name | +| `complexity` | `Complexity` | `medium` | Complexity level | + +**Example Request:** + +```bash +curl -X GET \ + 'http://localhost:3001/api/telemetry/estimate?taskType=implementation&model=claude-sonnet-4-5&provider=anthropic&complexity=medium' \ + -H 'Authorization: Bearer YOUR_SESSION_TOKEN' +``` + +**Response:** + +```json +{ + "data": { + "prediction": { + "input_tokens": { + "p10": 500, + "p25": 1200, + "median": 2500, + "p75": 5000, + "p90": 10000 + }, + "output_tokens": { + "p10": 200, + "p25": 800, + "median": 1500, + "p75": 3000, + "p90": 6000 + }, + "cost_usd_micros": { + "median": 30000 + }, + "duration_ms": { + "median": 5000 + }, + "correction_factors": { + "input": 1.0, + "output": 1.0 + }, + "quality": { + "gate_pass_rate": 0.85, + "success_rate": 0.92 + } + }, + "metadata": { + "sample_size": 150, + "fallback_level": 0, + "confidence": "high", + "last_updated": "2026-02-15T10:00:00Z", + "cache_hit": true + } + } +} +``` + +If no prediction data is available, the response returns `{ "data": null }`. + +### Confidence Levels + +The prediction system reports a confidence level based on sample size and data freshness: + +| Confidence | Meaning | +|------------|---------| +| `high` | Substantial sample size, recent data, all dimensions matched | +| `medium` | Moderate sample, some dimension fallback | +| `low` | Small sample or significant fallback from requested dimensions | +| `none` | No data available for this combination | + +### Fallback Behavior + +When exact matches are unavailable, the prediction system falls back through progressively broader aggregations: + +1. **Exact match** -- task_type + model + provider + complexity +2. **Drop complexity** -- task_type + model + provider +3. **Drop model** -- task_type + provider +4. **Global** -- task_type only + +The `fallback_level` field in metadata indicates which level was used (0 = exact match). + +### Cache Strategy + +Predictions are cached in-memory by the SDK with a **6-hour TTL** (`predictionCacheTtlMs: 21_600_000`). The `PredictionService` pre-fetches common combinations on startup to warm the cache: + +- **Models:** claude-sonnet-4-5, claude-opus-4, claude-haiku-4-5, gpt-4o, gpt-4o-mini +- **Task types:** implementation, planning, code_review +- **Complexities:** low, medium + +This produces 30 pre-cached queries (5 models x 3 task types x 2 complexities). Subsequent requests for these combinations are served from cache without any HTTP call. + +--- + +## 5. SDK Reference + +### JavaScript: @mosaicstack/telemetry-client + +**Registry:** Gitea npm registry at `git.mosaicstack.dev` +**Version:** 0.1.0 + +**Installation:** + +```bash +pnpm add @mosaicstack/telemetry-client +``` + +**Key Exports:** + +```typescript +// Client +import { + TelemetryClient, + EventBuilder, + resolveConfig, +} from "@mosaicstack/telemetry-client"; + +// Types +import type { + TelemetryConfig, + TaskCompletionEvent, + EventBuilderParams, + PredictionQuery, + PredictionResponse, + PredictionData, + PredictionMetadata, + TokenDistribution, +} from "@mosaicstack/telemetry-client"; + +// Enums +import { + TaskType, + Complexity, + Harness, + Provider, + QualityGate, + Outcome, + RepoSizeCategory, +} from "@mosaicstack/telemetry-client"; +``` + +**TelemetryClient API:** + +| Method | Description | +|--------|-------------| +| `constructor(config: TelemetryConfig)` | Create a new client with the given configuration | +| `start(): void` | Start background batch submission (idempotent) | +| `stop(): Promise` | Stop background submission, flush remaining events | +| `track(event: TaskCompletionEvent): void` | Queue an event for batch submission (never throws) | +| `getPrediction(query: PredictionQuery): PredictionResponse \| null` | Get a cached prediction (returns null if not cached/expired) | +| `refreshPredictions(queries: PredictionQuery[]): Promise` | Force-refresh predictions from the server | +| `eventBuilder: EventBuilder` | Get the EventBuilder for constructing events | +| `queueSize: number` | Number of events currently queued | +| `isRunning: boolean` | Whether the client is currently running | + +**TelemetryConfig Options:** + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `serverUrl` | `string` | (required) | Base URL of the telemetry server | +| `apiKey` | `string` | (required) | 64-char hex API key | +| `instanceId` | `string` | (required) | UUID for this instance | +| `enabled` | `boolean` | `true` | Enable/disable telemetry | +| `submitIntervalMs` | `number` | `300_000` (5 min) | Interval between batch submissions | +| `maxQueueSize` | `number` | `1000` | Maximum queued events | +| `batchSize` | `number` | `100` | Maximum events per batch | +| `requestTimeoutMs` | `number` | `10_000` (10 sec) | HTTP request timeout | +| `predictionCacheTtlMs` | `number` | `21_600_000` (6 hr) | Prediction cache TTL | +| `dryRun` | `boolean` | `false` | Log events instead of sending | +| `maxRetries` | `number` | `3` | Retries per submission | +| `onError` | `(error: Error) => void` | noop | Error callback | + +**EventBuilder Usage:** + +```typescript +const event = client.eventBuilder.build({ + task_duration_ms: 1500, + task_type: TaskType.IMPLEMENTATION, + complexity: Complexity.LOW, + harness: Harness.API_DIRECT, + model: "claude-sonnet-4-5", + provider: Provider.ANTHROPIC, + estimated_input_tokens: 0, + estimated_output_tokens: 0, + actual_input_tokens: 200, + actual_output_tokens: 500, + estimated_cost_usd_micros: 0, + actual_cost_usd_micros: 8100, + quality_gate_passed: true, + quality_gates_run: [QualityGate.LINT, QualityGate.TEST], + quality_gates_failed: [], + context_compactions: 0, + context_rotations: 0, + context_utilization_final: 0.3, + outcome: Outcome.SUCCESS, + retry_count: 0, + language: "typescript", +}); + +client.track(event); +``` + +### Python: mosaicstack-telemetry + +**Registry:** Gitea PyPI registry at `git.mosaicstack.dev` +**Version:** 0.1.0 + +**Installation:** + +```bash +pip install mosaicstack-telemetry +``` + +**Key Imports:** + +```python +from mosaicstack_telemetry import ( + TelemetryClient, + TelemetryConfig, + EventBuilder, + TaskType, + Complexity, + Harness, + Provider, + QualityGate, + Outcome, +) +``` + +**Python Client Usage:** + +```python +# Create config (reads MOSAIC_TELEMETRY_* env vars automatically) +config = TelemetryConfig() +errors = config.validate() + +# Create and start client +client = TelemetryClient(config) +await client.start_async() + +# Build and track an event +builder = EventBuilder(instance_id=config.instance_id) +event = ( + builder + .task_type(TaskType.IMPLEMENTATION) + .complexity_level(Complexity.MEDIUM) + .harness_type(Harness.CLAUDE_CODE) + .model("claude-sonnet-4-5") + .provider(Provider.ANTHROPIC) + .duration_ms(5000) + .outcome_value(Outcome.SUCCESS) + .tokens( + estimated_in=0, + estimated_out=0, + actual_in=3000, + actual_out=1500, + ) + .cost(estimated=0, actual=52500) + .quality( + passed=True, + gates_run=[QualityGate.BUILD, QualityGate.LINT, QualityGate.TEST], + gates_failed=[], + ) + .context(compactions=0, rotations=0, utilization=0.4) + .retry_count(0) + .language("typescript") + .build() +) + +client.track(event) + +# Shutdown (flushes remaining events) +await client.stop_async() +``` + +--- + +## 6. Development Guide + +### Testing Locally with Dry-Run Mode + +The fastest way to develop with telemetry is to use dry-run mode. This logs event payloads to the console without needing a running telemetry API: + +```bash +# In your .env +MOSAIC_TELEMETRY_ENABLED=true +MOSAIC_TELEMETRY_DRY_RUN=true +MOSAIC_TELEMETRY_SERVER_URL=http://localhost:8000 +MOSAIC_TELEMETRY_API_KEY=0000000000000000000000000000000000000000000000000000000000000000 +MOSAIC_TELEMETRY_INSTANCE_ID=00000000-0000-0000-0000-000000000000 +``` + +Start the API server and trigger LLM operations. You will see telemetry event payloads logged in the console output. + +### Adding New Tracking Points + +To add telemetry tracking to a new service in the NestJS API: + +**Step 1:** Inject `MosaicTelemetryService` into your service. Because `MosaicTelemetryModule` is global, no module import is needed: + +```typescript +import { Injectable } from "@nestjs/common"; +import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service"; +import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client"; + +@Injectable() +export class MyService { + constructor(private readonly telemetry: MosaicTelemetryService) {} +} +``` + +**Step 2:** Build and track events after task completion: + +```typescript +async performTask(): Promise { + const start = Date.now(); + + // ... perform the task ... + + const duration = Date.now() - start; + const builder = this.telemetry.eventBuilder; + + if (builder) { + const event = builder.build({ + task_duration_ms: duration, + task_type: TaskType.IMPLEMENTATION, + complexity: Complexity.MEDIUM, + harness: Harness.API_DIRECT, + model: "claude-sonnet-4-5", + provider: Provider.ANTHROPIC, + estimated_input_tokens: 0, + estimated_output_tokens: 0, + actual_input_tokens: inputTokens, + actual_output_tokens: outputTokens, + estimated_cost_usd_micros: 0, + actual_cost_usd_micros: costMicros, + quality_gate_passed: true, + quality_gates_run: [], + quality_gates_failed: [], + context_compactions: 0, + context_rotations: 0, + context_utilization_final: 0, + outcome: Outcome.SUCCESS, + retry_count: 0, + }); + + this.telemetry.trackTaskCompletion(event); + } +} +``` + +**Step 3:** For LLM-specific tracking, use `LlmTelemetryTrackerService` instead, which handles cost calculation and task type inference automatically: + +```typescript +import { LlmTelemetryTrackerService } from "../llm/llm-telemetry-tracker.service"; + +@Injectable() +export class MyLlmService { + constructor(private readonly telemetryTracker: LlmTelemetryTrackerService) {} + + async chat(): Promise { + const start = Date.now(); + + // ... call LLM ... + + this.telemetryTracker.trackLlmCompletion({ + model: "claude-sonnet-4-5", + providerType: "claude", + operation: "chat", + durationMs: Date.now() - start, + inputTokens: 150, + outputTokens: 300, + callingContext: "brain", // Used for task type inference + success: true, + }); + } +} +``` + +### Adding Tracking in the Coordinator (Python) + +Use the `build_task_event()` helper from `src/mosaic_telemetry.py`: + +```python +from src.mosaic_telemetry import build_task_event, get_telemetry_client + +client = get_telemetry_client(app) +if client is not None: + event = build_task_event( + instance_id=instance_id, + task_type=TaskType.IMPLEMENTATION, + complexity=Complexity.MEDIUM, + outcome=Outcome.SUCCESS, + duration_ms=5000, + model="claude-sonnet-4-5", + provider=Provider.ANTHROPIC, + harness=Harness.CLAUDE_CODE, + actual_input_tokens=3000, + actual_output_tokens=1500, + actual_cost_micros=52500, + ) + client.track(event) +``` + +### Troubleshooting + +**Telemetry events not appearing:** + +1. Check that `MOSAIC_TELEMETRY_ENABLED=true` is set +2. Verify all three required variables are set: `SERVER_URL`, `API_KEY`, `INSTANCE_ID` +3. Look for warning logs: `"Mosaic Telemetry is enabled but missing configuration"` indicates a missing variable +4. Try dry-run mode to confirm events are being generated + +**Console shows "Mosaic Telemetry is disabled":** + +This is the expected message when `MOSAIC_TELEMETRY_ENABLED=false`. If you intended telemetry to be active, set it to `true`. + +**Events queuing but not submitting:** + +- Check that the telemetry API server at `MOSAIC_TELEMETRY_SERVER_URL` is reachable +- Verify the API key is a valid 64-character hex string +- The default submission interval is 5 minutes; wait at least one interval or call `stop()` to force a flush + +**Prediction endpoint returns null:** + +- Predictions require sufficient historical data in the telemetry API +- Check the `metadata.confidence` field; `"none"` means no data exists for this combination +- Predictions are cached for 6 hours; new data takes time to appear +- The `PredictionService` logs startup refresh status; check logs for errors + +**"Telemetry client error" in logs:** + +- These are non-fatal. The SDK never blocks application logic. +- Common causes: network timeout, invalid API key, server-side validation failure +- Check the telemetry API logs for corresponding errors From a5ee974765295d4ea88b0be7ddd11d5ae0954e0c Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 02:02:55 -0600 Subject: [PATCH 08/15] feat(#375): frontend token usage and cost dashboard - Install recharts for data visualization - Add Usage nav item to sidebar navigation - Create telemetry API service with data fetching functions - Build dashboard page with summary cards, charts, and time range selector - Token usage line chart, cost breakdown bar chart, task outcome pie chart - Loading and empty states handled - Responsive layout with PDA-friendly design - Add unit tests (14 tests passing) Refs #375 Co-Authored-By: Claude Opus 4.6 --- apps/web/package.json | 1 + .../app/(authenticated)/usage/page.test.tsx | 288 ++++++++++++ .../src/app/(authenticated)/usage/page.tsx | 427 ++++++++++++++++++ apps/web/src/components/layout/Navigation.tsx | 1 + apps/web/src/lib/api/index.ts | 1 + apps/web/src/lib/api/telemetry.ts | 187 ++++++++ pnpm-lock.yaml | 175 ++++++- 7 files changed, 1073 insertions(+), 7 deletions(-) create mode 100644 apps/web/src/app/(authenticated)/usage/page.test.tsx create mode 100644 apps/web/src/app/(authenticated)/usage/page.tsx create mode 100644 apps/web/src/lib/api/telemetry.ts diff --git a/apps/web/package.json b/apps/web/package.json index 0ee4a5a..024c4ce 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -33,6 +33,7 @@ "react": "^19.0.0", "react-dom": "^19.0.0", "react-grid-layout": "^2.2.2", + "recharts": "^3.7.0", "socket.io-client": "^4.8.3" }, "devDependencies": { diff --git a/apps/web/src/app/(authenticated)/usage/page.test.tsx b/apps/web/src/app/(authenticated)/usage/page.test.tsx new file mode 100644 index 0000000..4d97ff6 --- /dev/null +++ b/apps/web/src/app/(authenticated)/usage/page.test.tsx @@ -0,0 +1,288 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { render, screen, waitFor, fireEvent } from "@testing-library/react"; +import type { ReactNode } from "react"; +import UsagePage from "./page"; + +// ─── Component Prop Types ──────────────────────────────────────────── + +interface ChildrenProps { + children: ReactNode; +} + +interface StyledChildrenProps extends ChildrenProps { + className?: string; +} + +// ─── Mocks ─────────────────────────────────────────────────────────── + +// Mock @/components/ui/card — @mosaic/ui can't be resolved in vitest +vi.mock("@/components/ui/card", () => ({ + Card: ({ children, className }: StyledChildrenProps): React.JSX.Element => ( +
{children}
+ ), + CardHeader: ({ children }: ChildrenProps): React.JSX.Element =>
{children}
, + CardContent: ({ children, className }: StyledChildrenProps): React.JSX.Element => ( +
{children}
+ ), + CardFooter: ({ children }: ChildrenProps): React.JSX.Element =>
{children}
, + CardTitle: ({ children, className }: StyledChildrenProps): React.JSX.Element => ( +

{children}

+ ), + CardDescription: ({ children, className }: StyledChildrenProps): React.JSX.Element => ( +

{children}

+ ), +})); + +// Mock recharts — jsdom has no SVG layout engine, so we render stubs +vi.mock("recharts", () => ({ + LineChart: ({ children }: ChildrenProps): React.JSX.Element => ( +
{children}
+ ), + Line: (): React.JSX.Element =>
, + BarChart: ({ children }: ChildrenProps): React.JSX.Element => ( +
{children}
+ ), + Bar: (): React.JSX.Element =>
, + PieChart: ({ children }: ChildrenProps): React.JSX.Element => ( +
{children}
+ ), + Pie: (): React.JSX.Element =>
, + Cell: (): React.JSX.Element =>
, + XAxis: (): React.JSX.Element =>
, + YAxis: (): React.JSX.Element =>
, + CartesianGrid: (): React.JSX.Element =>
, + Tooltip: (): React.JSX.Element =>
, + ResponsiveContainer: ({ children }: ChildrenProps): React.JSX.Element =>
{children}
, + Legend: (): React.JSX.Element =>
, +})); + +// Mock the telemetry API module +vi.mock("@/lib/api/telemetry", () => ({ + fetchUsageSummary: vi.fn(), + fetchTokenUsage: vi.fn(), + fetchCostBreakdown: vi.fn(), + fetchTaskOutcomes: vi.fn(), +})); + +// Import mocked modules after vi.mock +import { + fetchUsageSummary, + fetchTokenUsage, + fetchCostBreakdown, + fetchTaskOutcomes, +} from "@/lib/api/telemetry"; + +// ─── Test Data ─────────────────────────────────────────────────────── + +const mockSummary = { + totalTokens: 245800, + totalCost: 3.42, + taskCount: 47, + avgQualityGatePassRate: 0.87, +}; + +const mockTokenUsage = [ + { date: "2026-02-08", inputTokens: 10000, outputTokens: 5000, totalTokens: 15000 }, + { date: "2026-02-09", inputTokens: 12000, outputTokens: 6000, totalTokens: 18000 }, +]; + +const mockCostBreakdown = [ + { model: "claude-sonnet-4-5", provider: "anthropic", cost: 18.5, taskCount: 124 }, + { model: "gpt-4o", provider: "openai", cost: 12.3, taskCount: 89 }, +]; + +const mockTaskOutcomes = [ + { outcome: "Success", count: 312, color: "#6EBF8B" }, + { outcome: "Partial", count: 48, color: "#F5C862" }, +]; + +function setupMocks(overrides?: { empty?: boolean; error?: boolean }): void { + if (overrides?.error) { + vi.mocked(fetchUsageSummary).mockRejectedValue(new Error("Network error")); + vi.mocked(fetchTokenUsage).mockRejectedValue(new Error("Network error")); + vi.mocked(fetchCostBreakdown).mockRejectedValue(new Error("Network error")); + vi.mocked(fetchTaskOutcomes).mockRejectedValue(new Error("Network error")); + return; + } + + const summary = overrides?.empty ? { ...mockSummary, taskCount: 0 } : mockSummary; + + vi.mocked(fetchUsageSummary).mockResolvedValue(summary); + vi.mocked(fetchTokenUsage).mockResolvedValue(mockTokenUsage); + vi.mocked(fetchCostBreakdown).mockResolvedValue(mockCostBreakdown); + vi.mocked(fetchTaskOutcomes).mockResolvedValue(mockTaskOutcomes); +} + +// ─── Tests ─────────────────────────────────────────────────────────── + +describe("UsagePage", (): void => { + beforeEach((): void => { + vi.clearAllMocks(); + }); + + it("should render the page title and subtitle", (): void => { + setupMocks(); + render(); + + expect(screen.getByRole("heading", { level: 1 })).toHaveTextContent("Usage"); + expect(screen.getByText("Token usage and cost overview")).toBeInTheDocument(); + }); + + it("should have proper layout structure", (): void => { + setupMocks(); + const { container } = render(); + const main = container.querySelector("main"); + expect(main).toBeInTheDocument(); + }); + + it("should show loading skeleton initially", (): void => { + setupMocks(); + render(); + expect(screen.getByTestId("loading-skeleton")).toBeInTheDocument(); + }); + + it("should render summary cards after loading", async (): Promise => { + setupMocks(); + render(); + + await waitFor((): void => { + expect(screen.getByTestId("summary-cards")).toBeInTheDocument(); + }); + + // Check summary card values + expect(screen.getByText("Total Tokens")).toBeInTheDocument(); + expect(screen.getByText("245.8K")).toBeInTheDocument(); + expect(screen.getByText("Estimated Cost")).toBeInTheDocument(); + expect(screen.getByText("$3.42")).toBeInTheDocument(); + expect(screen.getByText("Task Count")).toBeInTheDocument(); + expect(screen.getByText("47")).toBeInTheDocument(); + expect(screen.getByText("Quality Gate Pass Rate")).toBeInTheDocument(); + expect(screen.getByText("87.0%")).toBeInTheDocument(); + }); + + it("should render all chart sections after loading", async (): Promise => { + setupMocks(); + render(); + + await waitFor((): void => { + expect(screen.getByTestId("token-usage-chart")).toBeInTheDocument(); + expect(screen.getByTestId("cost-breakdown-chart")).toBeInTheDocument(); + expect(screen.getByTestId("task-outcomes-chart")).toBeInTheDocument(); + }); + }); + + it("should render the time range selector with three options", (): void => { + setupMocks(); + render(); + + expect(screen.getByText("7 Days")).toBeInTheDocument(); + expect(screen.getByText("30 Days")).toBeInTheDocument(); + expect(screen.getByText("90 Days")).toBeInTheDocument(); + }); + + it("should have 30 Days selected by default", (): void => { + setupMocks(); + render(); + + const button30d = screen.getByText("30 Days"); + expect(button30d).toHaveAttribute("aria-pressed", "true"); + }); + + it("should change time range when a different option is clicked", async (): Promise => { + setupMocks(); + render(); + + // Wait for initial load + await waitFor((): void => { + expect(screen.getByTestId("summary-cards")).toBeInTheDocument(); + }); + + // Click 7 Days + const button7d = screen.getByText("7 Days"); + fireEvent.click(button7d); + + expect(button7d).toHaveAttribute("aria-pressed", "true"); + expect(screen.getByText("30 Days")).toHaveAttribute("aria-pressed", "false"); + }); + + it("should refetch data when time range changes", async (): Promise => { + setupMocks(); + render(); + + // Wait for initial load (30d default) + await waitFor((): void => { + expect(screen.getByTestId("summary-cards")).toBeInTheDocument(); + }); + + // Initial call was with "30d" + expect(fetchUsageSummary).toHaveBeenCalledWith("30d"); + + // Change to 7d + fireEvent.click(screen.getByText("7 Days")); + + await waitFor((): void => { + expect(fetchUsageSummary).toHaveBeenCalledWith("7d"); + }); + }); + + it("should show empty state when no tasks exist", async (): Promise => { + setupMocks({ empty: true }); + render(); + + await waitFor((): void => { + expect(screen.getByTestId("empty-state")).toBeInTheDocument(); + }); + + expect(screen.getByText("No usage data yet")).toBeInTheDocument(); + }); + + it("should show error state on fetch failure", async (): Promise => { + setupMocks({ error: true }); + render(); + + await waitFor((): void => { + expect(screen.getByText("Network error")).toBeInTheDocument(); + }); + + expect(screen.getByText("Try again")).toBeInTheDocument(); + }); + + it("should retry loading when Try again button is clicked after error", async (): Promise => { + setupMocks({ error: true }); + render(); + + await waitFor((): void => { + expect(screen.getByText("Try again")).toBeInTheDocument(); + }); + + // Now set up success mocks and click retry + setupMocks(); + fireEvent.click(screen.getByText("Try again")); + + await waitFor((): void => { + expect(screen.getByTestId("summary-cards")).toBeInTheDocument(); + }); + }); + + it("should display chart section titles", async (): Promise => { + setupMocks(); + render(); + + await waitFor((): void => { + expect(screen.getByText("Token Usage Over Time")).toBeInTheDocument(); + expect(screen.getByText("Cost by Model")).toBeInTheDocument(); + expect(screen.getByText("Task Outcomes")).toBeInTheDocument(); + }); + }); + + it("should render recharts components within chart containers", async (): Promise => { + setupMocks(); + render(); + + await waitFor((): void => { + expect(screen.getByTestId("recharts-line-chart")).toBeInTheDocument(); + expect(screen.getByTestId("recharts-bar-chart")).toBeInTheDocument(); + expect(screen.getByTestId("recharts-pie-chart")).toBeInTheDocument(); + }); + }); +}); diff --git a/apps/web/src/app/(authenticated)/usage/page.tsx b/apps/web/src/app/(authenticated)/usage/page.tsx new file mode 100644 index 0000000..d90917b --- /dev/null +++ b/apps/web/src/app/(authenticated)/usage/page.tsx @@ -0,0 +1,427 @@ +"use client"; + +import { useState, useEffect, useCallback } from "react"; +import type { ReactElement } from "react"; +import { + LineChart, + Line, + BarChart, + Bar, + PieChart, + Pie, + XAxis, + YAxis, + CartesianGrid, + Tooltip, + ResponsiveContainer, + Legend, +} from "recharts"; +import { Card, CardHeader, CardContent, CardTitle, CardDescription } from "@/components/ui/card"; +import { + fetchUsageSummary, + fetchTokenUsage, + fetchCostBreakdown, + fetchTaskOutcomes, +} from "@/lib/api/telemetry"; +import type { + TimeRange, + UsageSummary, + TokenUsagePoint, + CostBreakdownItem, + TaskOutcomeItem, +} from "@/lib/api/telemetry"; + +// ─── Constants ─────────────────────────────────────────────────────── + +const TIME_RANGES: { value: TimeRange; label: string }[] = [ + { value: "7d", label: "7 Days" }, + { value: "30d", label: "30 Days" }, + { value: "90d", label: "90 Days" }, +]; + +// Calm, PDA-friendly chart colors (no aggressive reds) +const CHART_COLORS = { + inputTokens: "#6366F1", // Indigo + outputTokens: "#38BDF8", // Sky blue + grid: "#E2E8F0", // Slate 200 + barFill: "#818CF8", // Indigo 400 +}; + +// ─── Helpers ───────────────────────────────────────────────────────── + +function formatNumber(value: number): string { + if (value >= 1_000_000) { + return `${(value / 1_000_000).toFixed(1)}M`; + } + if (value >= 1_000) { + return `${(value / 1_000).toFixed(1)}K`; + } + return value.toFixed(0); +} + +function formatCurrency(value: number): string { + return `$${value.toFixed(2)}`; +} + +function formatPercent(value: number): string { + return `${(value * 100).toFixed(1)}%`; +} + +function formatDateLabel(dateStr: string): string { + const date = new Date(dateStr + "T00:00:00"); + return date.toLocaleDateString("en-US", { month: "short", day: "numeric" }); +} + +/** + * Map TaskOutcomeItem[] to recharts-compatible data with `fill` property. + * This replaces deprecated Cell component (removed in Recharts 4.0). + */ +function toFillData( + outcomes: TaskOutcomeItem[] +): { outcome: string; count: number; fill: string }[] { + return outcomes.map((item) => ({ + outcome: item.outcome, + count: item.count, + fill: item.color, + })); +} + +// ─── Sub-components ────────────────────────────────────────────────── + +function SummaryCard({ + title, + value, + subtitle, +}: { + title: string; + value: string; + subtitle?: string; +}): ReactElement { + return ( + + +

{title}

+

{value}

+ {subtitle ?

{subtitle}

: null} +
+
+ ); +} + +function LoadingSkeleton(): ReactElement { + return ( +
+ {/* Summary cards skeleton */} +
+ {Array.from({ length: 4 }).map((_, i) => ( + + +
+
+ + + ))} +
+ {/* Chart skeletons */} +
+ {Array.from({ length: 3 }).map((_, i) => ( + + +
+ + +
+ + + ))} +
+
+ ); +} + +function EmptyState(): ReactElement { + return ( +
+
📊
+

No usage data yet

+

+ Once you start using AI-powered features, your token usage and cost data will appear here. +

+
+ ); +} + +// ─── Main Page Component ───────────────────────────────────────────── + +export default function UsagePage(): ReactElement { + const [timeRange, setTimeRange] = useState("30d"); + const [isLoading, setIsLoading] = useState(true); + const [isEmpty, setIsEmpty] = useState(false); + const [error, setError] = useState(null); + + const [summary, setSummary] = useState(null); + const [tokenUsage, setTokenUsage] = useState([]); + const [costBreakdown, setCostBreakdown] = useState([]); + const [taskOutcomes, setTaskOutcomes] = useState([]); + + const loadData = useCallback(async (range: TimeRange): Promise => { + setIsLoading(true); + setError(null); + + try { + const [summaryData, tokenData, costData, outcomeData] = await Promise.all([ + fetchUsageSummary(range), + fetchTokenUsage(range), + fetchCostBreakdown(range), + fetchTaskOutcomes(range), + ]); + + setSummary(summaryData); + setTokenUsage(tokenData); + setCostBreakdown(costData); + setTaskOutcomes(outcomeData); + + // Check if there's any meaningful data + setIsEmpty(summaryData.taskCount === 0); + } catch (err) { + setError( + err instanceof Error + ? err.message + : "We had trouble loading usage data. Please try again when you're ready." + ); + } finally { + setIsLoading(false); + } + }, []); + + useEffect(() => { + void loadData(timeRange); + }, [timeRange, loadData]); + + function handleTimeRangeChange(range: TimeRange): void { + setTimeRange(range); + } + + return ( +
+ {/* Header */} +
+
+

Usage

+

Token usage and cost overview

+
+ + {/* Time range selector */} +
+ {TIME_RANGES.map(({ value, label }) => ( + + ))} +
+
+ + {/* Error state */} + {error !== null ? ( +
+

{error}

+ +
+ ) : isLoading ? ( + + ) : isEmpty ? ( + + ) : ( +
+ {/* Summary Cards */} +
+ + + + +
+ + {/* Charts */} +
+ {/* Token Usage Over Time — Full width */} + + + Token Usage Over Time + Input and output tokens by day + + +
+ + + + + + [ + formatNumber(value), + name === "inputTokens" ? "Input Tokens" : "Output Tokens", + ]} + labelFormatter={formatDateLabel} + contentStyle={{ + borderRadius: "8px", + border: "1px solid #E2E8F0", + boxShadow: "0 2px 8px rgba(0,0,0,0.08)", + }} + /> + + value === "inputTokens" ? "Input Tokens" : "Output Tokens" + } + /> + + + + +
+
+
+ + {/* Cost Breakdown by Model */} + + + Cost by Model + Estimated cost breakdown + + +
+ + + + formatCurrency(v)} + tick={{ fontSize: 12, fill: "#64748B" }} + /> + + [formatCurrency(value), "Cost"]} + contentStyle={{ + borderRadius: "8px", + border: "1px solid #E2E8F0", + boxShadow: "0 2px 8px rgba(0,0,0,0.08)", + }} + /> + + + +
+
+
+ + {/* Task Outcomes */} + + + Task Outcomes + Distribution of task completion results + + +
+ + + + `${outcome}: ${String(count)}` + } + /> + [value, name]} + contentStyle={{ + borderRadius: "8px", + border: "1px solid #E2E8F0", + boxShadow: "0 2px 8px rgba(0,0,0,0.08)", + }} + /> + + + +
+
+
+
+
+ )} +
+ ); +} diff --git a/apps/web/src/components/layout/Navigation.tsx b/apps/web/src/components/layout/Navigation.tsx index e961e47..5757717 100644 --- a/apps/web/src/components/layout/Navigation.tsx +++ b/apps/web/src/components/layout/Navigation.tsx @@ -16,6 +16,7 @@ export function Navigation(): React.JSX.Element { { href: "/tasks", label: "Tasks" }, { href: "/calendar", label: "Calendar" }, { href: "/knowledge", label: "Knowledge" }, + { href: "/usage", label: "Usage" }, ]; // Global keyboard shortcut for search (Cmd+K or Ctrl+K) diff --git a/apps/web/src/lib/api/index.ts b/apps/web/src/lib/api/index.ts index 8c1629c..5877de4 100644 --- a/apps/web/src/lib/api/index.ts +++ b/apps/web/src/lib/api/index.ts @@ -12,3 +12,4 @@ export * from "./knowledge"; export * from "./domains"; export * from "./teams"; export * from "./personalities"; +export * from "./telemetry"; diff --git a/apps/web/src/lib/api/telemetry.ts b/apps/web/src/lib/api/telemetry.ts new file mode 100644 index 0000000..49cb779 --- /dev/null +++ b/apps/web/src/lib/api/telemetry.ts @@ -0,0 +1,187 @@ +/** + * Telemetry API Client + * Handles telemetry data fetching for the usage dashboard. + * + * NOTE: Currently returns mock/placeholder data since the telemetry API + * aggregation endpoints don't exist yet. The important thing is the UI structure. + * When the backend endpoints are ready, replace mock calls with real apiGet() calls. + */ + +import { apiGet, type ApiResponse } from "./client"; + +// ─── Types ─────────────────────────────────────────────────────────── + +export type TimeRange = "7d" | "30d" | "90d"; + +export interface UsageSummary { + totalTokens: number; + totalCost: number; + taskCount: number; + avgQualityGatePassRate: number; +} + +export interface TokenUsagePoint { + date: string; + inputTokens: number; + outputTokens: number; + totalTokens: number; +} + +export interface CostBreakdownItem { + model: string; + provider: string; + cost: number; + taskCount: number; +} + +export interface TaskOutcomeItem { + outcome: string; + count: number; + color: string; +} + +export interface EstimateParams { + taskType: string; + model: string; + provider: string; + complexity: string; +} + +export interface EstimateResponse { + prediction: { + input_tokens: { median: number; p75: number; p90: number }; + output_tokens: { median: number; p75: number; p90: number }; + cost_usd_micros: Record; + quality: { gate_pass_rate: number; success_rate: number }; + } | null; + metadata: { + sample_size: number; + confidence: "none" | "low" | "medium" | "high"; + }; +} + +// ─── Mock Data Generators ──────────────────────────────────────────── + +function generateDateRange(range: TimeRange): string[] { + const days = range === "7d" ? 7 : range === "30d" ? 30 : 90; + const dates: string[] = []; + const now = new Date(); + + for (let i = days - 1; i >= 0; i--) { + const d = new Date(now); + d.setDate(d.getDate() - i); + dates.push(d.toISOString().split("T")[0]); + } + + return dates; +} + +function generateMockTokenUsage(range: TimeRange): TokenUsagePoint[] { + const dates = generateDateRange(range); + + return dates.map((date) => { + const baseInput = 8000 + Math.floor(Math.random() * 12000); + const baseOutput = 3000 + Math.floor(Math.random() * 7000); + return { + date, + inputTokens: baseInput, + outputTokens: baseOutput, + totalTokens: baseInput + baseOutput, + }; + }); +} + +function generateMockSummary(range: TimeRange): UsageSummary { + const multiplier = range === "7d" ? 1 : range === "30d" ? 4 : 12; + return { + totalTokens: 245_800 * multiplier, + totalCost: 3.42 * multiplier, + taskCount: 47 * multiplier, + avgQualityGatePassRate: 0.87, + }; +} + +function generateMockCostBreakdown(): CostBreakdownItem[] { + return [ + { model: "claude-sonnet-4-5", provider: "anthropic", cost: 18.5, taskCount: 124 }, + { model: "gpt-4o", provider: "openai", cost: 12.3, taskCount: 89 }, + { model: "claude-haiku-3.5", provider: "anthropic", cost: 4.2, taskCount: 156 }, + { model: "llama-3.3-70b", provider: "ollama", cost: 0, taskCount: 67 }, + { model: "gemini-2.0-flash", provider: "google", cost: 2.8, taskCount: 42 }, + ]; +} + +// PDA-friendly colors: calm, no aggressive reds +function generateMockTaskOutcomes(): TaskOutcomeItem[] { + return [ + { outcome: "Success", count: 312, color: "#6EBF8B" }, + { outcome: "Partial", count: 48, color: "#F5C862" }, + { outcome: "Timeout", count: 18, color: "#94A3B8" }, + { outcome: "Incomplete", count: 22, color: "#C4A5DE" }, + ]; +} + +// ─── API Functions ─────────────────────────────────────────────────── + +/** + * Fetch usage summary data (total tokens, cost, task count, quality rate) + */ +export async function fetchUsageSummary(timeRange: TimeRange): Promise { + // TODO: Replace with real API call when backend aggregation endpoints are ready + // const response = await apiGet>(`/api/telemetry/summary?range=${timeRange}`); + // return response.data; + void apiGet; // suppress unused import warning in the meantime + await new Promise((resolve) => setTimeout(resolve, 200)); + return generateMockSummary(timeRange); +} + +/** + * Fetch token usage time series for charts + */ +export async function fetchTokenUsage(timeRange: TimeRange): Promise { + // TODO: Replace with real API call + // const response = await apiGet>(`/api/telemetry/tokens?range=${timeRange}`); + // return response.data; + await new Promise((resolve) => setTimeout(resolve, 250)); + return generateMockTokenUsage(timeRange); +} + +/** + * Fetch cost breakdown by model + */ +export async function fetchCostBreakdown(timeRange: TimeRange): Promise { + // TODO: Replace with real API call + // const response = await apiGet>(`/api/telemetry/costs?range=${timeRange}`); + // return response.data; + await new Promise((resolve) => setTimeout(resolve, 200)); + void timeRange; + return generateMockCostBreakdown(); +} + +/** + * Fetch task outcome distribution + */ +export async function fetchTaskOutcomes(timeRange: TimeRange): Promise { + // TODO: Replace with real API call + // const response = await apiGet>(`/api/telemetry/outcomes?range=${timeRange}`); + // return response.data; + await new Promise((resolve) => setTimeout(resolve, 150)); + void timeRange; + return generateMockTaskOutcomes(); +} + +/** + * Fetch cost/token estimate for a given task configuration. + * Uses the real GET /api/telemetry/estimate endpoint from TEL-006. + */ +export async function fetchEstimate(params: EstimateParams): Promise { + const query = new URLSearchParams({ + taskType: params.taskType, + model: params.model, + provider: params.provider, + complexity: params.complexity, + }).toString(); + + const response = await apiGet>(`/api/telemetry/estimate?${query}`); + return response.data; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8450600..6678c42 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -391,7 +391,7 @@ importers: version: 3.2.0 '@xyflow/react': specifier: ^12.5.3 - version: 12.10.0(@types/react@19.2.10)(react-dom@19.2.4(react@19.2.4))(react@19.2.4) + version: 12.10.0(@types/react@19.2.10)(immer@11.1.4)(react-dom@19.2.4(react@19.2.4))(react@19.2.4) better-auth: specifier: ^1.4.17 version: 1.4.17(@prisma/client@6.19.2(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3))(typescript@5.9.3))(better-sqlite3@12.6.2)(drizzle-orm@0.41.0(@opentelemetry/api@1.9.0)(@prisma/client@6.19.2(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3))(typescript@5.9.3))(@types/pg@8.16.0)(better-sqlite3@12.6.2)(kysely@0.28.10)(pg@8.17.2)(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)))(next@16.1.6(@babel/core@7.28.6)(@opentelemetry/api@1.9.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(pg@8.17.2)(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(vitest@3.2.4(@types/node@22.19.7)(jiti@2.6.1)(jsdom@26.1.0)(terser@5.46.0)(tsx@4.21.0)(yaml@2.8.2)) @@ -422,6 +422,9 @@ importers: react-grid-layout: specifier: ^2.2.2 version: 2.2.2(react-dom@19.2.4(react@19.2.4))(react@19.2.4) + recharts: + specifier: ^3.7.0 + version: 3.7.0(@types/react@19.2.10)(react-dom@19.2.4(react@19.2.4))(react-is@17.0.2)(react@19.2.4)(redux@5.0.1) socket.io-client: specifier: ^4.8.3 version: 4.8.3 @@ -2431,6 +2434,17 @@ packages: '@protobufjs/utf8@1.1.0': resolution: {integrity: sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==} + '@reduxjs/toolkit@2.11.2': + resolution: {integrity: sha512-Kd6kAHTA6/nUpp8mySPqj3en3dm0tdMIgbttnQ1xFMVpufoj+ADi8pXLBsd4xzTRHQa7t/Jv8W5UnCuW4kuWMQ==} + peerDependencies: + react: ^16.9.0 || ^17.0.0 || ^18 || ^19 + react-redux: ^7.2.1 || ^8.1.3 || ^9.0.0 + peerDependenciesMeta: + react: + optional: true + react-redux: + optional: true + '@rolldown/pluginutils@1.0.0-beta.27': resolution: {integrity: sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==} @@ -2586,6 +2600,9 @@ packages: '@standard-schema/spec@1.1.0': resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==} + '@standard-schema/utils@0.3.0': + resolution: {integrity: sha512-e7Mew686owMaPJVNNLs55PUvgz371nKgwsc4vxE49zsODpJEnxgxRo2y/OKrqueavXgZNMDVj3DdHFlaSAeU8g==} + '@swc/core-darwin-arm64@1.15.11': resolution: {integrity: sha512-QoIupRWVH8AF1TgxYyeA5nS18dtqMuxNwchjBIwJo3RdwLEFiJq6onOx9JAxHtuPwUkIVuU2Xbp+jCJ7Vzmgtg==} engines: {node: '>=10'} @@ -2980,6 +2997,9 @@ packages: '@types/trusted-types@2.0.7': resolution: {integrity: sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==} + '@types/use-sync-external-store@0.0.6': + resolution: {integrity: sha512-zFDAD+tlpf2r4asuHEj0XH6pY6i0g5NeAHPn+15wk3BV6JA69eERFXC1gyGThDkVa1zCyKr5jox1+2LbV/AMLg==} + '@types/validator@13.15.10': resolution: {integrity: sha512-T8L6i7wCuyoK8A/ZeLYt1+q0ty3Zb9+qbSSvrIVitzT3YjZqkTZ40IbRsPanlB4h1QB3JVL1SYCdR6ngtFYcuA==} @@ -3989,6 +4009,9 @@ packages: supports-color: optional: true + decimal.js-light@2.5.1: + resolution: {integrity: sha512-qIMFpTMZmny+MMIitAB6D7iVPEorVw6YQRWkvarTkT4tBeSLLiHzcwj6q0MmYSFCiVpiqPJTJEYIrpcPzVEIvg==} + decimal.js@10.6.0: resolution: {integrity: sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==} @@ -4299,6 +4322,9 @@ packages: resolution: {integrity: sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==} engines: {node: '>= 0.4'} + es-toolkit@1.44.0: + resolution: {integrity: sha512-6penXeZalaV88MM3cGkFZZfOoLGWshWWfdy0tWw/RlVVyhvMaWSBTOvXNeiW3e5FwdS5ePW0LGEu17zT139ktg==} + esbuild@0.27.2: resolution: {integrity: sha512-HyNQImnsOC7X9PMNaCIeAm4ISCQXs5a5YasTXVliKv4uuBo1dKrG0A+uQS8M5eXjVMnLg3WgXaKvprHlFJQffw==} engines: {node: '>=18'} @@ -4708,6 +4734,12 @@ packages: resolution: {integrity: sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==} engines: {node: '>= 4'} + immer@10.2.0: + resolution: {integrity: sha512-d/+XTN3zfODyjr89gM3mPq1WNX2B8pYsu7eORitdwyA2sBubnTl3laYlBk4sXY5FUa5qTZGBDPJICVbvqzjlbw==} + + immer@11.1.4: + resolution: {integrity: sha512-XREFCPo6ksxVzP4E0ekD5aMdf8WMwmdNaz6vuvxgI40UaEiu6q3p8X52aU6GdyvLY3XXX/8R7JOTXStz/nBbRw==} + import-fresh@3.3.1: resolution: {integrity: sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==} engines: {node: '>=6'} @@ -5650,6 +5682,18 @@ packages: react-is@17.0.2: resolution: {integrity: sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==} + react-redux@9.2.0: + resolution: {integrity: sha512-ROY9fvHhwOD9ySfrF0wmvu//bKCQ6AeZZq1nJNtbDC+kk5DuSuNX/n6YWYF/SYy7bSba4D4FSz8DJeKY/S/r+g==} + peerDependencies: + '@types/react': ^18.2.25 || ^19 + react: ^18.0 || ^19 + redux: ^5.0.0 + peerDependenciesMeta: + '@types/react': + optional: true + redux: + optional: true + react-refresh@0.17.0: resolution: {integrity: sha512-z6F7K9bV85EfseRCp2bzrpyQ0Gkw1uLoCel9XBVWPg/TjRj94SkJzUTGfOa4bs7iJvBWtQG0Wq7wnI0syw3EBQ==} engines: {node: '>=0.10.0'} @@ -5686,6 +5730,14 @@ packages: resolution: {integrity: sha512-9u/XQ1pvrQtYyMpZe7DXKv2p5CNvyVwzUB6uhLAnQwHMSgKMBR62lc7AHljaeteeHXn11XTAaLLUVZYVZyuRBQ==} engines: {node: '>= 20.19.0'} + recharts@3.7.0: + resolution: {integrity: sha512-l2VCsy3XXeraxIID9fx23eCb6iCBsxUQDnE8tWm6DFdszVAO7WVY/ChAD9wVit01y6B2PMupYiMmQwhgPHc9Ew==} + engines: {node: '>=18'} + peerDependencies: + react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + react-dom: ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + react-is: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + redent@3.0.0: resolution: {integrity: sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg==} engines: {node: '>=8'} @@ -5698,6 +5750,14 @@ packages: resolution: {integrity: sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A==} engines: {node: '>=4'} + redux-thunk@3.1.0: + resolution: {integrity: sha512-NW2r5T6ksUKXCabzhL9z+h206HQw/NJkcLm1GPImRQ8IzfXwRGqjVhKJGauHirT0DAuyy6hjdnMZaRoAcy0Klw==} + peerDependencies: + redux: ^5.0.0 + + redux@5.0.1: + resolution: {integrity: sha512-M9/ELqF6fy8FwmkpnF0S3YKOqMyoWJ4+CS5Efg2ct3oY9daQvd/Pc71FpGZsVsbl3Cpb+IIcjBDUnnyBdQbq4w==} + reflect-metadata@0.2.2: resolution: {integrity: sha512-urBwgfrvVP/eAyXx4hluJivBKzuEbSQs9rKWCrCkbSxNv8mxPcUZKeuoF3Uy4mJl3Lwprp6yy5/39VWigZ4K6Q==} @@ -5720,6 +5780,9 @@ packages: resolution: {integrity: sha512-gAZ+kLqBdHarXB64XpAe2VCjB7rIRv+mU8tfRWziHRJ5umKsIHN2tLLv6EtMw7WCdP19S0ERVMldNvxYCHnhSQ==} engines: {node: '>=8.6.0'} + reselect@5.1.1: + resolution: {integrity: sha512-K/BG6eIky/SBpzfHZv/dd+9JBFiS4SWV7FIujVyJRux6e45+73RaUHXLmIR1f7WOMaQ0U1km6qwklRQxpJJY0w==} + resize-observer-polyfill@1.5.1: resolution: {integrity: sha512-LwZrotdHOo12nQuZlHEmtuXdqGoOD0OhaxopaNFxWzInpEgaLWoVuAMbTzixuosCx2nEG58ngzW3vxdWoxIgdg==} @@ -6115,6 +6178,9 @@ packages: text-decoder@1.2.3: resolution: {integrity: sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==} + tiny-invariant@1.3.3: + resolution: {integrity: sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==} + tinybench@2.9.0: resolution: {integrity: sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==} @@ -6366,6 +6432,9 @@ packages: resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==} engines: {node: '>= 0.8'} + victory-vendor@37.3.6: + resolution: {integrity: sha512-SbPDPdDBYp+5MJHhBCAyI7wKM3d5ivekigc2Dk2s7pgbZ9wIgIBYGVw4zGHBml/qTFbexrofXW6Gu4noGxrOwQ==} + vite-node@3.2.4: resolution: {integrity: sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==} engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0} @@ -7047,7 +7116,7 @@ snapshots: chalk: 5.6.2 commander: 12.1.0 dotenv: 17.2.4 - drizzle-orm: 0.41.0(@opentelemetry/api@1.9.0)(@prisma/client@6.19.2(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3))(typescript@5.9.3))(@types/pg@8.16.0)(better-sqlite3@12.6.2)(kysely@0.28.10)(pg@8.17.2)(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)) + drizzle-orm: 0.41.0(@opentelemetry/api@1.9.0)(@prisma/client@5.22.0(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)))(@types/pg@8.16.0)(better-sqlite3@12.6.2)(kysely@0.28.10)(pg@8.17.2)(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)) open: 10.2.0 pg: 8.17.2 prettier: 3.8.1 @@ -8904,6 +8973,18 @@ snapshots: '@protobufjs/utf8@1.1.0': {} + '@reduxjs/toolkit@2.11.2(react-redux@9.2.0(@types/react@19.2.10)(react@19.2.4)(redux@5.0.1))(react@19.2.4)': + dependencies: + '@standard-schema/spec': 1.1.0 + '@standard-schema/utils': 0.3.0 + immer: 11.1.4 + redux: 5.0.1 + redux-thunk: 3.1.0(redux@5.0.1) + reselect: 5.1.1 + optionalDependencies: + react: 19.2.4 + react-redux: 9.2.0(@types/react@19.2.10)(react@19.2.4)(redux@5.0.1) + '@rolldown/pluginutils@1.0.0-beta.27': {} '@rollup/pluginutils@5.3.0(rollup@4.57.0)': @@ -9002,6 +9083,8 @@ snapshots: '@standard-schema/spec@1.1.0': {} + '@standard-schema/utils@0.3.0': {} + '@swc/core-darwin-arm64@1.15.11': optional: true @@ -9449,6 +9532,8 @@ snapshots: '@types/trusted-types@2.0.7': optional: true + '@types/use-sync-external-store@0.0.6': {} + '@types/validator@13.15.10': {} '@types/ws@8.18.1': @@ -9768,13 +9853,13 @@ snapshots: '@xtuc/long@4.2.2': {} - '@xyflow/react@12.10.0(@types/react@19.2.10)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)': + '@xyflow/react@12.10.0(@types/react@19.2.10)(immer@11.1.4)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)': dependencies: '@xyflow/system': 0.0.74 classcat: 5.0.5 react: 19.2.4 react-dom: 19.2.4(react@19.2.4) - zustand: 4.5.7(@types/react@19.2.10)(react@19.2.4) + zustand: 4.5.7(@types/react@19.2.10)(immer@11.1.4)(react@19.2.4) transitivePeerDependencies: - '@types/react' - immer @@ -9978,7 +10063,7 @@ snapshots: optionalDependencies: '@prisma/client': 5.22.0(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)) better-sqlite3: 12.6.2 - drizzle-orm: 0.41.0(@opentelemetry/api@1.9.0)(@prisma/client@6.19.2(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3))(typescript@5.9.3))(@types/pg@8.16.0)(better-sqlite3@12.6.2)(kysely@0.28.10)(pg@8.17.2)(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)) + drizzle-orm: 0.41.0(@opentelemetry/api@1.9.0)(@prisma/client@5.22.0(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)))(@types/pg@8.16.0)(better-sqlite3@12.6.2)(kysely@0.28.10)(pg@8.17.2)(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)) next: 16.1.6(@babel/core@7.28.6)(@opentelemetry/api@1.9.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4) pg: 8.17.2 prisma: 6.19.2(magicast@0.3.5)(typescript@5.9.3) @@ -10003,7 +10088,7 @@ snapshots: optionalDependencies: '@prisma/client': 6.19.2(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3))(typescript@5.9.3) better-sqlite3: 12.6.2 - drizzle-orm: 0.41.0(@opentelemetry/api@1.9.0)(@prisma/client@6.19.2(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3))(typescript@5.9.3))(@types/pg@8.16.0)(better-sqlite3@12.6.2)(kysely@0.28.10)(pg@8.17.2)(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)) + drizzle-orm: 0.41.0(@opentelemetry/api@1.9.0)(@prisma/client@5.22.0(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)))(@types/pg@8.16.0)(better-sqlite3@12.6.2)(kysely@0.28.10)(pg@8.17.2)(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)) next: 16.1.6(@babel/core@7.28.6)(@opentelemetry/api@1.9.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4) pg: 8.17.2 prisma: 6.19.2(magicast@0.3.5)(typescript@5.9.3) @@ -10626,6 +10711,8 @@ snapshots: dependencies: ms: 2.1.3 + decimal.js-light@2.5.1: {} + decimal.js@10.6.0: {} decompress-response@6.0.0: @@ -10758,6 +10845,16 @@ snapshots: dotenv@17.2.4: {} + drizzle-orm@0.41.0(@opentelemetry/api@1.9.0)(@prisma/client@5.22.0(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)))(@types/pg@8.16.0)(better-sqlite3@12.6.2)(kysely@0.28.10)(pg@8.17.2)(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)): + optionalDependencies: + '@opentelemetry/api': 1.9.0 + '@prisma/client': 5.22.0(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)) + '@types/pg': 8.16.0 + better-sqlite3: 12.6.2 + kysely: 0.28.10 + pg: 8.17.2 + prisma: 6.19.2(magicast@0.3.5)(typescript@5.9.3) + drizzle-orm@0.41.0(@opentelemetry/api@1.9.0)(@prisma/client@6.19.2(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3))(typescript@5.9.3))(@types/pg@8.16.0)(better-sqlite3@12.6.2)(kysely@0.28.10)(pg@8.17.2)(prisma@6.19.2(magicast@0.3.5)(typescript@5.9.3)): optionalDependencies: '@opentelemetry/api': 1.9.0 @@ -10767,6 +10864,7 @@ snapshots: kysely: 0.28.10 pg: 8.17.2 prisma: 6.19.2(magicast@0.3.5)(typescript@5.9.3) + optional: true dunder-proto@1.0.1: dependencies: @@ -10865,6 +10963,8 @@ snapshots: has-tostringtag: 1.0.2 hasown: 2.0.2 + es-toolkit@1.44.0: {} + esbuild@0.27.2: optionalDependencies: '@esbuild/aix-ppc64': 0.27.2 @@ -11349,6 +11449,10 @@ snapshots: ignore@7.0.5: {} + immer@10.2.0: {} + + immer@11.1.4: {} + import-fresh@3.3.1: dependencies: parent-module: 1.0.1 @@ -12271,6 +12375,15 @@ snapshots: react-is@17.0.2: {} + react-redux@9.2.0(@types/react@19.2.10)(react@19.2.4)(redux@5.0.1): + dependencies: + '@types/use-sync-external-store': 0.0.6 + react: 19.2.4 + use-sync-external-store: 1.6.0(react@19.2.4) + optionalDependencies: + '@types/react': 19.2.10 + redux: 5.0.1 + react-refresh@0.17.0: {} react-resizable@3.1.3(react-dom@19.2.4(react@19.2.4))(react@19.2.4): @@ -12314,6 +12427,26 @@ snapshots: readdirp@5.0.0: {} + recharts@3.7.0(@types/react@19.2.10)(react-dom@19.2.4(react@19.2.4))(react-is@17.0.2)(react@19.2.4)(redux@5.0.1): + dependencies: + '@reduxjs/toolkit': 2.11.2(react-redux@9.2.0(@types/react@19.2.10)(react@19.2.4)(redux@5.0.1))(react@19.2.4) + clsx: 2.1.1 + decimal.js-light: 2.5.1 + es-toolkit: 1.44.0 + eventemitter3: 5.0.4 + immer: 10.2.0 + react: 19.2.4 + react-dom: 19.2.4(react@19.2.4) + react-is: 17.0.2 + react-redux: 9.2.0(@types/react@19.2.10)(react@19.2.4)(redux@5.0.1) + reselect: 5.1.1 + tiny-invariant: 1.3.3 + use-sync-external-store: 1.6.0(react@19.2.4) + victory-vendor: 37.3.6 + transitivePeerDependencies: + - '@types/react' + - redux + redent@3.0.0: dependencies: indent-string: 4.0.0 @@ -12325,6 +12458,12 @@ snapshots: dependencies: redis-errors: 1.2.0 + redux-thunk@3.1.0(redux@5.0.1): + dependencies: + redux: 5.0.1 + + redux@5.0.1: {} + reflect-metadata@0.2.2: {} regexp-to-ast@0.5.0: {} @@ -12343,6 +12482,8 @@ snapshots: transitivePeerDependencies: - supports-color + reselect@5.1.1: {} + resize-observer-polyfill@1.5.1: {} resolve-from@4.0.0: {} @@ -12863,6 +13004,8 @@ snapshots: transitivePeerDependencies: - react-native-b4a + tiny-invariant@1.3.3: {} + tinybench@2.9.0: {} tinyexec@0.3.2: {} @@ -13087,6 +13230,23 @@ snapshots: vary@1.1.2: {} + victory-vendor@37.3.6: + dependencies: + '@types/d3-array': 3.2.2 + '@types/d3-ease': 3.0.2 + '@types/d3-interpolate': 3.0.4 + '@types/d3-scale': 4.0.9 + '@types/d3-shape': 3.1.8 + '@types/d3-time': 3.0.4 + '@types/d3-timer': 3.0.2 + d3-array: 3.2.4 + d3-ease: 3.0.1 + d3-interpolate: 3.0.1 + d3-scale: 4.0.2 + d3-shape: 3.2.0 + d3-time: 3.1.0 + d3-timer: 3.0.1 + vite-node@3.2.4(@types/node@22.19.7)(jiti@2.6.1)(terser@5.46.0)(tsx@4.21.0)(yaml@2.8.2): dependencies: cac: 6.7.14 @@ -13388,9 +13548,10 @@ snapshots: zod@4.3.6: {} - zustand@4.5.7(@types/react@19.2.10)(react@19.2.4): + zustand@4.5.7(@types/react@19.2.10)(immer@11.1.4)(react@19.2.4): dependencies: use-sync-external-store: 1.6.0(react@19.2.4) optionalDependencies: '@types/react': 19.2.10 + immer: 11.1.4 react: 19.2.4 From 746ab20c38050b5b024ce369df715f7b807bc4b6 Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 02:05:09 -0600 Subject: [PATCH 09/15] =?UTF-8?q?chore:=20update=20tasks.md=20=E2=80=94=20?= =?UTF-8?q?all=20M10-Telemetry=20tasks=20complete?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- docs/tasks.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/tasks.md b/docs/tasks.md index 036bde0..c8c57a1 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -1,5 +1,25 @@ # Tasks +## M10-Telemetry (0.0.10) — Telemetry Integration + +**Orchestrator:** Claude Code +**Started:** 2026-02-15 +**Branch:** feature/m10-telemetry +**Milestone:** M10-Telemetry (0.0.10) + +| id | status | description | issue | repo | branch | depends_on | blocks | agent | started_at | completed_at | estimate | used | +| ------- | ----------- | -------------------------------------------------------- | ----- | ----------- | -------------------- | ------------------- | ------------------------------- | ----- | ---------- | ------------ | -------- | ---- | +| TEL-001 | done | Install @mosaicstack/telemetry-client in API + NestJS module | #369 | api | feature/m10-telemetry | | TEL-004,TEL-006,TEL-007 | w-1 | 2026-02-15T10:00Z | 2026-02-15T10:37Z | 20K | 25K | +| TEL-002 | done | Install mosaicstack-telemetry in Coordinator | #370 | coordinator | feature/m10-telemetry | | TEL-005,TEL-006 | w-2 | 2026-02-15T10:00Z | 2026-02-15T10:34Z | 15K | 20K | +| TEL-003 | done | Add telemetry config to docker-compose and .env | #374 | devops | feature/m10-telemetry | | | w-3 | 2026-02-15T10:38Z | 2026-02-15T10:40Z | 8K | 10K | +| TEL-004 | done | Track LLM task completions via Mosaic Telemetry | #371 | api | feature/m10-telemetry | TEL-001 | TEL-007 | w-4 | 2026-02-15T10:38Z | 2026-02-15T10:44Z | 25K | 30K | +| TEL-005 | done | Track orchestrator agent task completions | #372 | coordinator | feature/m10-telemetry | TEL-002 | | w-5 | 2026-02-15T10:45Z | 2026-02-15T10:52Z | 20K | 25K | +| TEL-006 | done | Prediction integration for cost estimation | #373 | api | feature/m10-telemetry | TEL-001,TEL-002 | TEL-007 | w-6 | 2026-02-15T10:45Z | 2026-02-15T10:51Z | 20K | 25K | +| TEL-007 | done | Frontend: Token usage and cost dashboard | #375 | web | feature/m10-telemetry | TEL-004,TEL-006 | TEL-008 | w-7 | 2026-02-15T10:53Z | 2026-02-15T11:03Z | 30K | 115K | +| TEL-008 | done | Documentation: Telemetry integration guide | #376 | docs | feature/m10-telemetry | TEL-007 | | w-8 | 2026-02-15T10:53Z | 2026-02-15T10:58Z | 15K | 75K | + +--- + ## M11-CIPipeline (0.0.11) — CI Pipeline #360 Remediation **Orchestrator:** Claude Code From 306c2e5bd81f5e45430fa09f036d545a522310d4 Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 02:07:45 -0600 Subject: [PATCH 10/15] fix(#371): resolve TypeScript strictness errors in telemetry tracking - llm-cost-table.ts: Add undefined guard for MODEL_COSTS lookup - llm-telemetry-tracker.service.ts: Allow undefined in callingContext for exactOptionalPropertyTypes compatibility Refs #371 Co-Authored-By: Claude Opus 4.6 --- apps/api/src/llm/llm-cost-table.ts | 5 ++++- apps/api/src/llm/llm-telemetry-tracker.service.ts | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/apps/api/src/llm/llm-cost-table.ts b/apps/api/src/llm/llm-cost-table.ts index 4aab2a9..acc07fd 100644 --- a/apps/api/src/llm/llm-cost-table.ts +++ b/apps/api/src/llm/llm-cost-table.ts @@ -80,7 +80,10 @@ export function getModelCost(modelName: string): ModelCost { for (const prefix of SORTED_PREFIXES) { if (normalized.startsWith(prefix)) { - return MODEL_COSTS[prefix]; + const cost = MODEL_COSTS[prefix]; + if (cost !== undefined) { + return cost; + } } } diff --git a/apps/api/src/llm/llm-telemetry-tracker.service.ts b/apps/api/src/llm/llm-telemetry-tracker.service.ts index 0b79f8b..1713882 100644 --- a/apps/api/src/llm/llm-telemetry-tracker.service.ts +++ b/apps/api/src/llm/llm-telemetry-tracker.service.ts @@ -25,7 +25,7 @@ export interface LlmCompletionParams { * Optional calling context hint for task type inference. * Examples: "brain", "chat", "embed", "planning", "code-review" */ - callingContext?: string; + callingContext?: string | undefined; /** Whether the call succeeded or failed */ success: boolean; } From 248f7115715fff3f65c5f92360a2d7a2bd962466 Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 02:14:11 -0600 Subject: [PATCH 11/15] fix(#370): add Gitea PyPI registry to coordinator CI install step The mosaicstack-telemetry package is hosted on the Gitea PyPI registry. CI pip install needs --extra-index-url to find it. Refs #370 Co-Authored-By: Claude Opus 4.6 --- .woodpecker/coordinator.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.woodpecker/coordinator.yml b/.woodpecker/coordinator.yml index 8bce34e..1af4c5f 100644 --- a/.woodpecker/coordinator.yml +++ b/.woodpecker/coordinator.yml @@ -30,7 +30,7 @@ steps: - python -m venv venv - . venv/bin/activate - pip install --no-cache-dir --upgrade "pip>=25.3" - - pip install --no-cache-dir -e ".[dev]" + - pip install --no-cache-dir --extra-index-url https://git.mosaicstack.dev/api/packages/mosaic/pypi/simple/ -e ".[dev]" - pip install --no-cache-dir bandit pip-audit ruff-check: From 2eafa91e708c6591e96d7045439dd09debbc6b46 Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 02:16:44 -0600 Subject: [PATCH 12/15] fix(#370): add mypy import-untyped ignore for mosaicstack_telemetry The mosaicstack-telemetry package lacks py.typed marker. Add type ignore comment consistent with other import sites. Refs #370 Co-Authored-By: Claude Opus 4.6 --- apps/coordinator/src/mosaic_telemetry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/coordinator/src/mosaic_telemetry.py b/apps/coordinator/src/mosaic_telemetry.py index c4793fd..c178d7b 100644 --- a/apps/coordinator/src/mosaic_telemetry.py +++ b/apps/coordinator/src/mosaic_telemetry.py @@ -16,7 +16,7 @@ from __future__ import annotations import logging from typing import TYPE_CHECKING -from mosaicstack_telemetry import ( +from mosaicstack_telemetry import ( # type: ignore[import-untyped] Complexity, EventBuilder, Harness, From 8e27f73f8fb6d3a39a2955aec88f82a4b9cf3a2b Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 02:21:54 -0600 Subject: [PATCH 13/15] fix(#375): resolve recharts TypeScript strict mode type errors - Fix Tooltip formatter/labelFormatter type overload conflicts - Fix Pie label render props type mismatch - Fix telemetry.ts date split array access type Refs #375 Co-Authored-By: Claude Opus 4.6 --- .../src/app/(authenticated)/usage/page.tsx | 25 ++++++++++++------- apps/web/src/lib/api/telemetry.ts | 2 +- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/apps/web/src/app/(authenticated)/usage/page.tsx b/apps/web/src/app/(authenticated)/usage/page.tsx index d90917b..0eb2fce 100644 --- a/apps/web/src/app/(authenticated)/usage/page.tsx +++ b/apps/web/src/app/(authenticated)/usage/page.tsx @@ -304,11 +304,13 @@ export default function UsagePage(): ReactElement { width={60} /> [ - formatNumber(value), - name === "inputTokens" ? "Input Tokens" : "Output Tokens", - ]} - labelFormatter={formatDateLabel} + formatter={ + ((value: number, name: string) => [ + formatNumber(value), + name === "inputTokens" ? "Input Tokens" : "Output Tokens", + ]) as never + } + labelFormatter={((label: string) => formatDateLabel(label)) as never} contentStyle={{ borderRadius: "8px", border: "1px solid #E2E8F0", @@ -365,7 +367,9 @@ export default function UsagePage(): ReactElement { width={140} /> [formatCurrency(value), "Cost"]} + formatter={ + ((value: number) => [formatCurrency(value), "Cost"]) as never + } contentStyle={{ borderRadius: "8px", border: "1px solid #E2E8F0", @@ -401,12 +405,15 @@ export default function UsagePage(): ReactElement { paddingAngle={2} dataKey="count" nameKey="outcome" - label={({ outcome, count }: { outcome: string; count: number }) => - `${outcome}: ${String(count)}` + label={ + ((props: Record) => + `${String(props.outcome ?? "")}: ${String(props.count ?? 0)}`) as never } /> [value, name]} + formatter={ + ((value: number, name: string) => [value, name]) as never + } contentStyle={{ borderRadius: "8px", border: "1px solid #E2E8F0", diff --git a/apps/web/src/lib/api/telemetry.ts b/apps/web/src/lib/api/telemetry.ts index 49cb779..551c9d5 100644 --- a/apps/web/src/lib/api/telemetry.ts +++ b/apps/web/src/lib/api/telemetry.ts @@ -70,7 +70,7 @@ function generateDateRange(range: TimeRange): string[] { for (let i = days - 1; i >= 0; i--) { const d = new Date(now); d.setDate(d.getDate() - i); - dates.push(d.toISOString().split("T")[0]); + dates.push(d.toISOString().split("T")[0] ?? ""); } return dates; From a943ae139aeea6821f9e13d9605e3e0b33b3321c Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 02:25:51 -0600 Subject: [PATCH 14/15] fix(#375): resolve lint errors in usage dashboard - Fix prettier formatting for Tooltip formatter props (single-line) - Fix no-base-to-string by using typed props instead of Record - Fix restrict-template-expressions by wrapping number in String() Refs #375 Co-Authored-By: Claude Opus 4.6 --- apps/web/src/app/(authenticated)/usage/page.tsx | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/apps/web/src/app/(authenticated)/usage/page.tsx b/apps/web/src/app/(authenticated)/usage/page.tsx index 0eb2fce..14cccf8 100644 --- a/apps/web/src/app/(authenticated)/usage/page.tsx +++ b/apps/web/src/app/(authenticated)/usage/page.tsx @@ -367,9 +367,7 @@ export default function UsagePage(): ReactElement { width={140} /> [formatCurrency(value), "Cost"]) as never - } + formatter={((value: number) => [formatCurrency(value), "Cost"]) as never} contentStyle={{ borderRadius: "8px", border: "1px solid #E2E8F0", @@ -406,14 +404,12 @@ export default function UsagePage(): ReactElement { dataKey="count" nameKey="outcome" label={ - ((props: Record) => - `${String(props.outcome ?? "")}: ${String(props.count ?? 0)}`) as never + ((props: { outcome?: string; count?: number }) => + `${props.outcome ?? ""}: ${String(props.count ?? 0)}`) as never } /> [value, name]) as never - } + formatter={((value: number, name: string) => [value, name]) as never} contentStyle={{ borderRadius: "8px", border: "1px solid #E2E8F0", From c5a87df6e1afc7cb79b8994adce1b5690b1c7fdd Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 12:05:04 -0600 Subject: [PATCH 15/15] fix(#374): add pip.conf to coordinator Docker build for private registry The Docker build failed because pip couldn't find mosaicstack-telemetry from the private Gitea PyPI registry. Copy pip.conf into the image so pip resolves the extra-index-url during docker build. Co-Authored-By: Claude Opus 4.6 --- apps/coordinator/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/coordinator/Dockerfile b/apps/coordinator/Dockerfile index 28cac96..04d85a2 100644 --- a/apps/coordinator/Dockerfile +++ b/apps/coordinator/Dockerfile @@ -9,8 +9,9 @@ RUN apt-get update && \ build-essential \ && rm -rf /var/lib/apt/lists/* -# Copy dependency files +# Copy dependency files and private registry config COPY pyproject.toml . +COPY pip.conf /etc/pip.conf # Create virtual environment and install dependencies RUN python -m venv /opt/venv