From fcecf3654ba5d2b21fd3858a9275b886644621d1 Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 01:44:29 -0600 Subject: [PATCH] feat(#371): track LLM task completions via Mosaic Telemetry - Create LlmTelemetryTrackerService for non-blocking event emission - Normalize token usage across Anthropic, OpenAI, Ollama providers - Add cost table with per-token pricing in microdollars - Instrument chat, chatStream, and embed methods - Infer task type from calling context - Aggregate streaming tokens after stream ends with fallback estimation - Add 69 unit tests for tracker service, cost table, and LLM service Refs #371 Co-Authored-By: Claude Opus 4.6 --- apps/api/src/llm/llm-cost-table.ts | 106 ++++ .../llm/llm-telemetry-tracker.service.spec.ts | 491 ++++++++++++++++++ .../src/llm/llm-telemetry-tracker.service.ts | 197 +++++++ apps/api/src/llm/llm.module.ts | 3 +- apps/api/src/llm/llm.service.spec.ts | 175 +++++++ apps/api/src/llm/llm.service.ts | 139 ++++- 6 files changed, 1103 insertions(+), 8 deletions(-) create mode 100644 apps/api/src/llm/llm-cost-table.ts create mode 100644 apps/api/src/llm/llm-telemetry-tracker.service.spec.ts create mode 100644 apps/api/src/llm/llm-telemetry-tracker.service.ts diff --git a/apps/api/src/llm/llm-cost-table.ts b/apps/api/src/llm/llm-cost-table.ts new file mode 100644 index 0000000..4aab2a9 --- /dev/null +++ b/apps/api/src/llm/llm-cost-table.ts @@ -0,0 +1,106 @@ +/** + * LLM Cost Table + * + * Maps model names to per-token costs in microdollars (USD * 1,000,000). + * For example, $0.003 per 1K tokens = 3,000 microdollars per 1K tokens = 3 microdollars per token. + * + * Costs are split into input (prompt) and output (completion) pricing. + * Ollama models run locally and are free (0 cost). + */ + +/** + * Per-token cost in microdollars for a single model. + */ +export interface ModelCost { + /** Cost per input token in microdollars */ + inputPerToken: number; + /** Cost per output token in microdollars */ + outputPerToken: number; +} + +/** + * Cost table mapping model name prefixes to per-token pricing. + * + * Model matching is prefix-based: "claude-sonnet-4-5" matches "claude-sonnet-4-5-20250929". + * More specific prefixes are checked first (longest match wins). + * + * Prices sourced from provider pricing pages as of 2026-02. + */ +const MODEL_COSTS: Record = { + // Anthropic Claude models (per-token microdollars) + // claude-sonnet-4-5: $3/M input, $15/M output + "claude-sonnet-4-5": { inputPerToken: 3, outputPerToken: 15 }, + // claude-opus-4: $15/M input, $75/M output + "claude-opus-4": { inputPerToken: 15, outputPerToken: 75 }, + // claude-3-5-haiku / claude-haiku-4-5: $0.80/M input, $4/M output + "claude-haiku-4-5": { inputPerToken: 0.8, outputPerToken: 4 }, + "claude-3-5-haiku": { inputPerToken: 0.8, outputPerToken: 4 }, + // claude-3-5-sonnet: $3/M input, $15/M output + "claude-3-5-sonnet": { inputPerToken: 3, outputPerToken: 15 }, + // claude-3-opus: $15/M input, $75/M output + "claude-3-opus": { inputPerToken: 15, outputPerToken: 75 }, + // claude-3-sonnet: $3/M input, $15/M output + "claude-3-sonnet": { inputPerToken: 3, outputPerToken: 15 }, + // claude-3-haiku: $0.25/M input, $1.25/M output + "claude-3-haiku": { inputPerToken: 0.25, outputPerToken: 1.25 }, + + // OpenAI models (per-token microdollars) + // gpt-4o: $2.50/M input, $10/M output + "gpt-4o-mini": { inputPerToken: 0.15, outputPerToken: 0.6 }, + "gpt-4o": { inputPerToken: 2.5, outputPerToken: 10 }, + // gpt-4-turbo: $10/M input, $30/M output + "gpt-4-turbo": { inputPerToken: 10, outputPerToken: 30 }, + // gpt-4: $30/M input, $60/M output + "gpt-4": { inputPerToken: 30, outputPerToken: 60 }, + // gpt-3.5-turbo: $0.50/M input, $1.50/M output + "gpt-3.5-turbo": { inputPerToken: 0.5, outputPerToken: 1.5 }, + + // Ollama / local models: free + // These are catch-all entries; any model not matched above falls through to getModelCost default +}; + +/** + * Sorted model prefixes from longest to shortest for greedy prefix matching. + * Ensures "gpt-4o-mini" matches before "gpt-4o" and "claude-3-5-haiku" before "claude-3-haiku". + */ +const SORTED_PREFIXES = Object.keys(MODEL_COSTS).sort((a, b) => b.length - a.length); + +/** + * Look up per-token cost for a given model name. + * + * Uses longest-prefix matching: the model name is compared against known + * prefixes from longest to shortest. If no prefix matches, returns zero cost + * (assumes local/free model). + * + * @param modelName - Full model name (e.g. "claude-sonnet-4-5-20250929", "gpt-4o") + * @returns Per-token cost in microdollars + */ +export function getModelCost(modelName: string): ModelCost { + const normalized = modelName.toLowerCase(); + + for (const prefix of SORTED_PREFIXES) { + if (normalized.startsWith(prefix)) { + return MODEL_COSTS[prefix]; + } + } + + // Unknown or local model — assume free + return { inputPerToken: 0, outputPerToken: 0 }; +} + +/** + * Calculate total cost in microdollars for a given model and token counts. + * + * @param modelName - Full model name + * @param inputTokens - Number of input (prompt) tokens + * @param outputTokens - Number of output (completion) tokens + * @returns Total cost in microdollars (USD * 1,000,000) + */ +export function calculateCostMicrodollars( + modelName: string, + inputTokens: number, + outputTokens: number +): number { + const cost = getModelCost(modelName); + return Math.round(cost.inputPerToken * inputTokens + cost.outputPerToken * outputTokens); +} diff --git a/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts b/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts new file mode 100644 index 0000000..0f43489 --- /dev/null +++ b/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts @@ -0,0 +1,491 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import { Test, TestingModule } from "@nestjs/testing"; +import { + TaskType, + Complexity, + Harness, + Provider, + Outcome, +} from "@mosaicstack/telemetry-client"; +import type { TaskCompletionEvent, EventBuilderParams } from "@mosaicstack/telemetry-client"; +import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service"; +import { + LlmTelemetryTrackerService, + estimateTokens, + mapProviderType, + mapHarness, + inferTaskType, +} from "./llm-telemetry-tracker.service"; +import type { LlmCompletionParams } from "./llm-telemetry-tracker.service"; +import { getModelCost, calculateCostMicrodollars } from "./llm-cost-table"; + +// ---------- Cost Table Tests ---------- + +describe("llm-cost-table", () => { + describe("getModelCost", () => { + it("should return cost for claude-sonnet-4-5 models", () => { + const cost = getModelCost("claude-sonnet-4-5-20250929"); + expect(cost.inputPerToken).toBe(3); + expect(cost.outputPerToken).toBe(15); + }); + + it("should return cost for claude-opus-4 models", () => { + const cost = getModelCost("claude-opus-4-6"); + expect(cost.inputPerToken).toBe(15); + expect(cost.outputPerToken).toBe(75); + }); + + it("should return cost for claude-haiku-4-5 models", () => { + const cost = getModelCost("claude-haiku-4-5-20251001"); + expect(cost.inputPerToken).toBe(0.8); + expect(cost.outputPerToken).toBe(4); + }); + + it("should return cost for gpt-4o", () => { + const cost = getModelCost("gpt-4o"); + expect(cost.inputPerToken).toBe(2.5); + expect(cost.outputPerToken).toBe(10); + }); + + it("should return cost for gpt-4o-mini (longer prefix matches first)", () => { + const cost = getModelCost("gpt-4o-mini"); + expect(cost.inputPerToken).toBe(0.15); + expect(cost.outputPerToken).toBe(0.6); + }); + + it("should return zero cost for unknown/local models", () => { + const cost = getModelCost("llama3.2"); + expect(cost.inputPerToken).toBe(0); + expect(cost.outputPerToken).toBe(0); + }); + + it("should return zero cost for ollama models", () => { + const cost = getModelCost("mistral:7b"); + expect(cost.inputPerToken).toBe(0); + expect(cost.outputPerToken).toBe(0); + }); + + it("should be case-insensitive", () => { + const cost = getModelCost("Claude-Sonnet-4-5-20250929"); + expect(cost.inputPerToken).toBe(3); + }); + }); + + describe("calculateCostMicrodollars", () => { + it("should calculate cost for claude-sonnet-4-5 with token counts", () => { + // 1000 input tokens * 3 + 500 output tokens * 15 = 3000 + 7500 = 10500 + const cost = calculateCostMicrodollars("claude-sonnet-4-5-20250929", 1000, 500); + expect(cost).toBe(10500); + }); + + it("should return 0 for local models", () => { + const cost = calculateCostMicrodollars("llama3.2", 1000, 500); + expect(cost).toBe(0); + }); + + it("should return 0 when token counts are 0", () => { + const cost = calculateCostMicrodollars("claude-opus-4-6", 0, 0); + expect(cost).toBe(0); + }); + + it("should round the result to integer microdollars", () => { + // gpt-4o-mini: 0.15 * 3 + 0.6 * 7 = 0.45 + 4.2 = 4.65 -> rounds to 5 + const cost = calculateCostMicrodollars("gpt-4o-mini", 3, 7); + expect(cost).toBe(5); + }); + }); +}); + +// ---------- Helper Function Tests ---------- + +describe("helper functions", () => { + describe("estimateTokens", () => { + it("should estimate ~1 token per 4 characters", () => { + expect(estimateTokens("abcd")).toBe(1); + expect(estimateTokens("abcdefgh")).toBe(2); + }); + + it("should round up for partial tokens", () => { + expect(estimateTokens("abc")).toBe(1); + expect(estimateTokens("abcde")).toBe(2); + }); + + it("should return 0 for empty string", () => { + expect(estimateTokens("")).toBe(0); + }); + }); + + describe("mapProviderType", () => { + it("should map claude to ANTHROPIC", () => { + expect(mapProviderType("claude")).toBe(Provider.ANTHROPIC); + }); + + it("should map openai to OPENAI", () => { + expect(mapProviderType("openai")).toBe(Provider.OPENAI); + }); + + it("should map ollama to OLLAMA", () => { + expect(mapProviderType("ollama")).toBe(Provider.OLLAMA); + }); + }); + + describe("mapHarness", () => { + it("should map ollama to OLLAMA_LOCAL", () => { + expect(mapHarness("ollama")).toBe(Harness.OLLAMA_LOCAL); + }); + + it("should map claude to API_DIRECT", () => { + expect(mapHarness("claude")).toBe(Harness.API_DIRECT); + }); + + it("should map openai to API_DIRECT", () => { + expect(mapHarness("openai")).toBe(Harness.API_DIRECT); + }); + }); + + describe("inferTaskType", () => { + it("should return IMPLEMENTATION for embed operation", () => { + expect(inferTaskType("embed")).toBe(TaskType.IMPLEMENTATION); + }); + + it("should return UNKNOWN when no context provided for chat", () => { + expect(inferTaskType("chat")).toBe(TaskType.UNKNOWN); + }); + + it("should return PLANNING for brain context", () => { + expect(inferTaskType("chat", "brain")).toBe(TaskType.PLANNING); + }); + + it("should return PLANNING for planning context", () => { + expect(inferTaskType("chat", "planning")).toBe(TaskType.PLANNING); + }); + + it("should return CODE_REVIEW for review context", () => { + expect(inferTaskType("chat", "code-review")).toBe(TaskType.CODE_REVIEW); + }); + + it("should return TESTING for test context", () => { + expect(inferTaskType("chat", "test-generation")).toBe(TaskType.TESTING); + }); + + it("should return DEBUGGING for debug context", () => { + expect(inferTaskType("chatStream", "debug-session")).toBe(TaskType.DEBUGGING); + }); + + it("should return REFACTORING for refactor context", () => { + expect(inferTaskType("chat", "refactor")).toBe(TaskType.REFACTORING); + }); + + it("should return DOCUMENTATION for doc context", () => { + expect(inferTaskType("chat", "documentation")).toBe(TaskType.DOCUMENTATION); + }); + + it("should return CONFIGURATION for config context", () => { + expect(inferTaskType("chat", "config-update")).toBe(TaskType.CONFIGURATION); + }); + + it("should return SECURITY_AUDIT for security context", () => { + expect(inferTaskType("chat", "security-check")).toBe(TaskType.SECURITY_AUDIT); + }); + + it("should return IMPLEMENTATION for chat context", () => { + expect(inferTaskType("chat", "chat")).toBe(TaskType.IMPLEMENTATION); + }); + + it("should be case-insensitive", () => { + expect(inferTaskType("chat", "BRAIN")).toBe(TaskType.PLANNING); + }); + + it("should return UNKNOWN for unrecognized context", () => { + expect(inferTaskType("chat", "something-else")).toBe(TaskType.UNKNOWN); + }); + }); +}); + +// ---------- LlmTelemetryTrackerService Tests ---------- + +describe("LlmTelemetryTrackerService", () => { + let service: LlmTelemetryTrackerService; + let mockTelemetryService: { + eventBuilder: { build: ReturnType } | null; + trackTaskCompletion: ReturnType; + isEnabled: boolean; + }; + + const mockEvent: TaskCompletionEvent = { + instance_id: "test-instance", + event_id: "test-event", + schema_version: "1.0.0", + timestamp: new Date().toISOString(), + task_duration_ms: 1000, + task_type: TaskType.IMPLEMENTATION, + complexity: Complexity.LOW, + harness: Harness.API_DIRECT, + model: "claude-sonnet-4-5-20250929", + provider: Provider.ANTHROPIC, + estimated_input_tokens: 100, + estimated_output_tokens: 200, + actual_input_tokens: 100, + actual_output_tokens: 200, + estimated_cost_usd_micros: 3300, + actual_cost_usd_micros: 3300, + quality_gate_passed: true, + quality_gates_run: [], + quality_gates_failed: [], + context_compactions: 0, + context_rotations: 0, + context_utilization_final: 0, + outcome: Outcome.SUCCESS, + retry_count: 0, + }; + + beforeEach(async () => { + mockTelemetryService = { + eventBuilder: { + build: vi.fn().mockReturnValue(mockEvent), + }, + trackTaskCompletion: vi.fn(), + isEnabled: true, + }; + + const module: TestingModule = await Test.createTestingModule({ + providers: [ + LlmTelemetryTrackerService, + { + provide: MosaicTelemetryService, + useValue: mockTelemetryService, + }, + ], + }).compile(); + + service = module.get(LlmTelemetryTrackerService); + }); + + it("should be defined", () => { + expect(service).toBeDefined(); + }); + + describe("trackLlmCompletion", () => { + const baseParams: LlmCompletionParams = { + model: "claude-sonnet-4-5-20250929", + providerType: "claude", + operation: "chat", + durationMs: 1200, + inputTokens: 150, + outputTokens: 300, + callingContext: "chat", + success: true, + }; + + it("should build and track a telemetry event for Anthropic provider", () => { + service.trackLlmCompletion(baseParams); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + task_duration_ms: 1200, + task_type: TaskType.IMPLEMENTATION, + complexity: Complexity.LOW, + harness: Harness.API_DIRECT, + model: "claude-sonnet-4-5-20250929", + provider: Provider.ANTHROPIC, + actual_input_tokens: 150, + actual_output_tokens: 300, + outcome: Outcome.SUCCESS, + }), + ); + + expect(mockTelemetryService.trackTaskCompletion).toHaveBeenCalledWith(mockEvent); + }); + + it("should build and track a telemetry event for OpenAI provider", () => { + service.trackLlmCompletion({ + ...baseParams, + model: "gpt-4o", + providerType: "openai", + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + model: "gpt-4o", + provider: Provider.OPENAI, + harness: Harness.API_DIRECT, + }), + ); + }); + + it("should build and track a telemetry event for Ollama provider", () => { + service.trackLlmCompletion({ + ...baseParams, + model: "llama3.2", + providerType: "ollama", + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + model: "llama3.2", + provider: Provider.OLLAMA, + harness: Harness.OLLAMA_LOCAL, + }), + ); + }); + + it("should calculate cost in microdollars correctly", () => { + service.trackLlmCompletion(baseParams); + + // claude-sonnet-4-5: 150 * 3 + 300 * 15 = 450 + 4500 = 4950 + const expectedCost = 4950; + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + estimated_cost_usd_micros: expectedCost, + actual_cost_usd_micros: expectedCost, + }), + ); + }); + + it("should calculate zero cost for ollama models", () => { + service.trackLlmCompletion({ + ...baseParams, + model: "llama3.2", + providerType: "ollama", + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + estimated_cost_usd_micros: 0, + actual_cost_usd_micros: 0, + }), + ); + }); + + it("should track FAILURE outcome when success is false", () => { + service.trackLlmCompletion({ + ...baseParams, + success: false, + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + outcome: Outcome.FAILURE, + }), + ); + }); + + it("should infer task type from calling context", () => { + service.trackLlmCompletion({ + ...baseParams, + callingContext: "brain", + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + task_type: TaskType.PLANNING, + }), + ); + }); + + it("should set empty quality gates arrays for direct LLM calls", () => { + service.trackLlmCompletion(baseParams); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + quality_gate_passed: true, + quality_gates_run: [], + quality_gates_failed: [], + }), + ); + }); + + it("should silently skip when telemetry is disabled (eventBuilder is null)", () => { + mockTelemetryService.eventBuilder = null; + + // Should not throw + service.trackLlmCompletion(baseParams); + + expect(mockTelemetryService.trackTaskCompletion).not.toHaveBeenCalled(); + }); + + it("should not throw when eventBuilder.build throws an error", () => { + mockTelemetryService.eventBuilder = { + build: vi.fn().mockImplementation(() => { + throw new Error("Build failed"); + }), + }; + + // Should not throw + expect(() => service.trackLlmCompletion(baseParams)).not.toThrow(); + }); + + it("should not throw when trackTaskCompletion throws an error", () => { + mockTelemetryService.trackTaskCompletion.mockImplementation(() => { + throw new Error("Track failed"); + }); + + // Should not throw + expect(() => service.trackLlmCompletion(baseParams)).not.toThrow(); + }); + + it("should handle streaming operation with estimated tokens", () => { + service.trackLlmCompletion({ + ...baseParams, + operation: "chatStream", + inputTokens: 50, + outputTokens: 100, + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + actual_input_tokens: 50, + actual_output_tokens: 100, + estimated_input_tokens: 50, + estimated_output_tokens: 100, + }), + ); + }); + + it("should handle embed operation", () => { + service.trackLlmCompletion({ + ...baseParams, + operation: "embed", + outputTokens: 0, + callingContext: undefined, + }); + + expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith( + expect.objectContaining({ + task_type: TaskType.IMPLEMENTATION, + actual_output_tokens: 0, + }), + ); + }); + + it("should pass all required EventBuilderParams fields", () => { + service.trackLlmCompletion(baseParams); + + const buildCall = (mockTelemetryService.eventBuilder?.build as ReturnType).mock + .calls[0][0] as EventBuilderParams; + + // Verify all required fields are present + expect(buildCall).toHaveProperty("task_duration_ms"); + expect(buildCall).toHaveProperty("task_type"); + expect(buildCall).toHaveProperty("complexity"); + expect(buildCall).toHaveProperty("harness"); + expect(buildCall).toHaveProperty("model"); + expect(buildCall).toHaveProperty("provider"); + expect(buildCall).toHaveProperty("estimated_input_tokens"); + expect(buildCall).toHaveProperty("estimated_output_tokens"); + expect(buildCall).toHaveProperty("actual_input_tokens"); + expect(buildCall).toHaveProperty("actual_output_tokens"); + expect(buildCall).toHaveProperty("estimated_cost_usd_micros"); + expect(buildCall).toHaveProperty("actual_cost_usd_micros"); + expect(buildCall).toHaveProperty("quality_gate_passed"); + expect(buildCall).toHaveProperty("quality_gates_run"); + expect(buildCall).toHaveProperty("quality_gates_failed"); + expect(buildCall).toHaveProperty("context_compactions"); + expect(buildCall).toHaveProperty("context_rotations"); + expect(buildCall).toHaveProperty("context_utilization_final"); + expect(buildCall).toHaveProperty("outcome"); + expect(buildCall).toHaveProperty("retry_count"); + }); + }); +}); diff --git a/apps/api/src/llm/llm-telemetry-tracker.service.ts b/apps/api/src/llm/llm-telemetry-tracker.service.ts new file mode 100644 index 0000000..e4905a9 --- /dev/null +++ b/apps/api/src/llm/llm-telemetry-tracker.service.ts @@ -0,0 +1,197 @@ +import { Injectable, Logger } from "@nestjs/common"; +import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service"; +import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client"; +import type { LlmProviderType } from "./providers/llm-provider.interface"; +import { calculateCostMicrodollars } from "./llm-cost-table"; + +/** + * Parameters for tracking an LLM completion event. + */ +export interface LlmCompletionParams { + /** Full model name (e.g. "claude-sonnet-4-5-20250929") */ + model: string; + /** Provider type discriminator */ + providerType: LlmProviderType; + /** Operation type that was performed */ + operation: "chat" | "chatStream" | "embed"; + /** Duration of the LLM call in milliseconds */ + durationMs: number; + /** Number of input (prompt) tokens consumed */ + inputTokens: number; + /** Number of output (completion) tokens generated */ + outputTokens: number; + /** + * Optional calling context hint for task type inference. + * Examples: "brain", "chat", "embed", "planning", "code-review" + */ + callingContext?: string; + /** Whether the call succeeded or failed */ + success: boolean; +} + +/** + * Estimated token count from text length. + * Uses a rough approximation of ~4 characters per token (GPT/Claude average). + */ +export function estimateTokens(text: string): number { + return Math.ceil(text.length / 4); +} + +/** Map LLM provider type to telemetry Provider enum */ +export function mapProviderType(providerType: LlmProviderType): Provider { + switch (providerType) { + case "claude": + return Provider.ANTHROPIC; + case "openai": + return Provider.OPENAI; + case "ollama": + return Provider.OLLAMA; + default: + return Provider.UNKNOWN; + } +} + +/** Map LLM provider type to telemetry Harness enum */ +export function mapHarness(providerType: LlmProviderType): Harness { + switch (providerType) { + case "ollama": + return Harness.OLLAMA_LOCAL; + default: + return Harness.API_DIRECT; + } +} + +/** + * Infer the task type from calling context and operation. + * + * @param operation - The LLM operation (chat, chatStream, embed) + * @param callingContext - Optional hint about the caller's purpose + * @returns Inferred TaskType + */ +export function inferTaskType( + operation: "chat" | "chatStream" | "embed", + callingContext?: string +): TaskType { + // Embedding operations are typically for indexing/search + if (operation === "embed") { + return TaskType.IMPLEMENTATION; + } + + if (!callingContext) { + return TaskType.UNKNOWN; + } + + const ctx = callingContext.toLowerCase(); + + if (ctx.includes("brain") || ctx.includes("planning") || ctx.includes("plan")) { + return TaskType.PLANNING; + } + if (ctx.includes("review") || ctx.includes("code-review")) { + return TaskType.CODE_REVIEW; + } + if (ctx.includes("test")) { + return TaskType.TESTING; + } + if (ctx.includes("debug")) { + return TaskType.DEBUGGING; + } + if (ctx.includes("refactor")) { + return TaskType.REFACTORING; + } + if (ctx.includes("doc")) { + return TaskType.DOCUMENTATION; + } + if (ctx.includes("config")) { + return TaskType.CONFIGURATION; + } + if (ctx.includes("security") || ctx.includes("audit")) { + return TaskType.SECURITY_AUDIT; + } + if (ctx.includes("chat") || ctx.includes("implement")) { + return TaskType.IMPLEMENTATION; + } + + return TaskType.UNKNOWN; +} + +/** + * LLM Telemetry Tracker Service + * + * Builds and submits telemetry events for LLM completions. + * All tracking is non-blocking and fire-and-forget; telemetry errors + * never propagate to the caller. + * + * @example + * ```typescript + * // After a successful chat completion + * this.telemetryTracker.trackLlmCompletion({ + * model: "claude-sonnet-4-5-20250929", + * providerType: "claude", + * operation: "chat", + * durationMs: 1200, + * inputTokens: 150, + * outputTokens: 300, + * callingContext: "chat", + * success: true, + * }); + * ``` + */ +@Injectable() +export class LlmTelemetryTrackerService { + private readonly logger = new Logger(LlmTelemetryTrackerService.name); + + constructor(private readonly telemetry: MosaicTelemetryService) {} + + /** + * Track an LLM completion event via Mosaic Telemetry. + * + * This method is intentionally fire-and-forget. It catches all errors + * internally and logs them without propagating to the caller. + * + * @param params - LLM completion parameters + */ + trackLlmCompletion(params: LlmCompletionParams): void { + try { + const builder = this.telemetry.eventBuilder; + if (!builder) { + // Telemetry is disabled — silently skip + return; + } + + const costMicrodollars = calculateCostMicrodollars( + params.model, + params.inputTokens, + params.outputTokens + ); + + const event = builder.build({ + task_duration_ms: params.durationMs, + task_type: inferTaskType(params.operation, params.callingContext), + complexity: Complexity.LOW, + harness: mapHarness(params.providerType), + model: params.model, + provider: mapProviderType(params.providerType), + estimated_input_tokens: params.inputTokens, + estimated_output_tokens: params.outputTokens, + actual_input_tokens: params.inputTokens, + actual_output_tokens: params.outputTokens, + estimated_cost_usd_micros: costMicrodollars, + actual_cost_usd_micros: costMicrodollars, + quality_gate_passed: true, + quality_gates_run: [], + quality_gates_failed: [], + context_compactions: 0, + context_rotations: 0, + context_utilization_final: 0, + outcome: params.success ? Outcome.SUCCESS : Outcome.FAILURE, + retry_count: 0, + }); + + this.telemetry.trackTaskCompletion(event); + } catch (error: unknown) { + // Never let telemetry errors propagate + const msg = error instanceof Error ? error.message : String(error); + this.logger.warn(`Failed to track LLM telemetry event: ${msg}`); + } + } +} diff --git a/apps/api/src/llm/llm.module.ts b/apps/api/src/llm/llm.module.ts index 3d2fc4a..7528e0f 100644 --- a/apps/api/src/llm/llm.module.ts +++ b/apps/api/src/llm/llm.module.ts @@ -3,13 +3,14 @@ import { LlmController } from "./llm.controller"; import { LlmProviderAdminController } from "./llm-provider-admin.controller"; import { LlmService } from "./llm.service"; import { LlmManagerService } from "./llm-manager.service"; +import { LlmTelemetryTrackerService } from "./llm-telemetry-tracker.service"; import { PrismaModule } from "../prisma/prisma.module"; import { LlmUsageModule } from "../llm-usage/llm-usage.module"; @Module({ imports: [PrismaModule, LlmUsageModule], controllers: [LlmController, LlmProviderAdminController], - providers: [LlmService, LlmManagerService], + providers: [LlmService, LlmManagerService, LlmTelemetryTrackerService], exports: [LlmService, LlmManagerService], }) export class LlmModule {} diff --git a/apps/api/src/llm/llm.service.spec.ts b/apps/api/src/llm/llm.service.spec.ts index 2b9d84d..cff6840 100644 --- a/apps/api/src/llm/llm.service.spec.ts +++ b/apps/api/src/llm/llm.service.spec.ts @@ -3,6 +3,7 @@ import { Test, TestingModule } from "@nestjs/testing"; import { ServiceUnavailableException } from "@nestjs/common"; import { LlmService } from "./llm.service"; import { LlmManagerService } from "./llm-manager.service"; +import { LlmTelemetryTrackerService } from "./llm-telemetry-tracker.service"; import type { ChatRequestDto, EmbedRequestDto, ChatResponseDto, EmbedResponseDto } from "./dto"; import type { LlmProviderInterface, @@ -14,6 +15,9 @@ describe("LlmService", () => { let mockManagerService: { getDefaultProvider: ReturnType; }; + let mockTelemetryTracker: { + trackLlmCompletion: ReturnType; + }; let mockProvider: { chat: ReturnType; chatStream: ReturnType; @@ -41,6 +45,11 @@ describe("LlmService", () => { getDefaultProvider: vi.fn().mockResolvedValue(mockProvider), }; + // Create mock telemetry tracker + mockTelemetryTracker = { + trackLlmCompletion: vi.fn(), + }; + const module: TestingModule = await Test.createTestingModule({ providers: [ LlmService, @@ -48,6 +57,10 @@ describe("LlmService", () => { provide: LlmManagerService, useValue: mockManagerService, }, + { + provide: LlmTelemetryTrackerService, + useValue: mockTelemetryTracker, + }, ], }).compile(); @@ -135,6 +148,45 @@ describe("LlmService", () => { expect(result).toEqual(response); }); + it("should track telemetry on successful chat", async () => { + const response: ChatResponseDto = { + model: "llama3.2", + message: { role: "assistant", content: "Hello" }, + done: true, + promptEvalCount: 10, + evalCount: 20, + }; + mockProvider.chat.mockResolvedValue(response); + + await service.chat(request, "chat"); + + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + model: "llama3.2", + providerType: "ollama", + operation: "chat", + inputTokens: 10, + outputTokens: 20, + callingContext: "chat", + success: true, + }), + ); + }); + + it("should track telemetry on failed chat", async () => { + mockProvider.chat.mockRejectedValue(new Error("Chat failed")); + + await expect(service.chat(request)).rejects.toThrow(ServiceUnavailableException); + + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + model: "llama3.2", + operation: "chat", + success: false, + }), + ); + }); + it("should throw ServiceUnavailableException on error", async () => { mockProvider.chat.mockRejectedValue(new Error("Chat failed")); @@ -177,6 +229,94 @@ describe("LlmService", () => { expect(chunks[1].message.content).toBe(" world"); }); + it("should track telemetry after stream completes", async () => { + async function* mockGenerator(): AsyncGenerator { + yield { + model: "llama3.2", + message: { role: "assistant", content: "Hello" }, + done: false, + }; + yield { + model: "llama3.2", + message: { role: "assistant", content: " world" }, + done: true, + promptEvalCount: 5, + evalCount: 10, + }; + } + + mockProvider.chatStream.mockReturnValue(mockGenerator()); + + const chunks: ChatResponseDto[] = []; + for await (const chunk of service.chatStream(request, "brain")) { + chunks.push(chunk); + } + + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + model: "llama3.2", + providerType: "ollama", + operation: "chatStream", + inputTokens: 5, + outputTokens: 10, + callingContext: "brain", + success: true, + }), + ); + }); + + it("should estimate tokens when provider does not return counts in stream", async () => { + async function* mockGenerator(): AsyncGenerator { + yield { + model: "llama3.2", + message: { role: "assistant", content: "Hello world" }, + done: false, + }; + yield { + model: "llama3.2", + message: { role: "assistant", content: "" }, + done: true, + }; + } + + mockProvider.chatStream.mockReturnValue(mockGenerator()); + + const chunks: ChatResponseDto[] = []; + for await (const chunk of service.chatStream(request)) { + chunks.push(chunk); + } + + // Should use estimated tokens since no actual counts provided + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + operation: "chatStream", + success: true, + // Input estimated from "Hi" -> ceil(2/4) = 1 + inputTokens: 1, + // Output estimated from "Hello world" -> ceil(11/4) = 3 + outputTokens: 3, + }), + ); + }); + + it("should track telemetry on stream failure", async () => { + async function* errorGenerator(): AsyncGenerator { + throw new Error("Stream failed"); + } + + mockProvider.chatStream.mockReturnValue(errorGenerator()); + + const generator = service.chatStream(request); + await expect(generator.next()).rejects.toThrow(ServiceUnavailableException); + + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + operation: "chatStream", + success: false, + }), + ); + }); + it("should throw ServiceUnavailableException on error", async () => { async function* errorGenerator(): AsyncGenerator { throw new Error("Stream failed"); @@ -210,6 +350,41 @@ describe("LlmService", () => { expect(result).toEqual(response); }); + it("should track telemetry on successful embed", async () => { + const response: EmbedResponseDto = { + model: "llama3.2", + embeddings: [[0.1, 0.2, 0.3]], + totalDuration: 500, + }; + mockProvider.embed.mockResolvedValue(response); + + await service.embed(request, "embed"); + + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + model: "llama3.2", + providerType: "ollama", + operation: "embed", + outputTokens: 0, + callingContext: "embed", + success: true, + }), + ); + }); + + it("should track telemetry on failed embed", async () => { + mockProvider.embed.mockRejectedValue(new Error("Embedding failed")); + + await expect(service.embed(request)).rejects.toThrow(ServiceUnavailableException); + + expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith( + expect.objectContaining({ + operation: "embed", + success: false, + }), + ); + }); + it("should throw ServiceUnavailableException on error", async () => { mockProvider.embed.mockRejectedValue(new Error("Embedding failed")); diff --git a/apps/api/src/llm/llm.service.ts b/apps/api/src/llm/llm.service.ts index 2dfc065..ee938fb 100644 --- a/apps/api/src/llm/llm.service.ts +++ b/apps/api/src/llm/llm.service.ts @@ -1,13 +1,15 @@ import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common"; import { LlmManagerService } from "./llm-manager.service"; +import { LlmTelemetryTrackerService, estimateTokens } from "./llm-telemetry-tracker.service"; import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "./dto"; -import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface"; +import type { LlmProviderHealthStatus, LlmProviderType } from "./providers/llm-provider.interface"; /** * LLM Service * * High-level service for LLM operations. Delegates to providers via LlmManagerService. * Maintains backward compatibility with the original API while supporting multiple providers. + * Automatically tracks completions via Mosaic Telemetry (non-blocking). * * @example * ```typescript @@ -33,7 +35,10 @@ import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface export class LlmService implements OnModuleInit { private readonly logger = new Logger(LlmService.name); - constructor(private readonly llmManager: LlmManagerService) { + constructor( + private readonly llmManager: LlmManagerService, + private readonly telemetryTracker: LlmTelemetryTrackerService + ) { this.logger.log("LLM service initialized"); } @@ -91,14 +96,45 @@ export class LlmService implements OnModuleInit { * Perform a synchronous chat completion. * * @param request - Chat request with messages and configuration + * @param callingContext - Optional context hint for telemetry task type inference * @returns Complete chat response * @throws {ServiceUnavailableException} If provider is unavailable or request fails */ - async chat(request: ChatRequestDto): Promise { + async chat(request: ChatRequestDto, callingContext?: string): Promise { + const startTime = Date.now(); + let providerType: LlmProviderType = "ollama"; + try { const provider = await this.llmManager.getDefaultProvider(); - return await provider.chat(request); + providerType = provider.type; + const response = await provider.chat(request); + + // Fire-and-forget telemetry tracking + this.telemetryTracker.trackLlmCompletion({ + model: response.model, + providerType, + operation: "chat", + durationMs: Date.now() - startTime, + inputTokens: response.promptEvalCount ?? 0, + outputTokens: response.evalCount ?? 0, + callingContext, + success: true, + }); + + return response; } catch (error: unknown) { + // Track failure (fire-and-forget) + this.telemetryTracker.trackLlmCompletion({ + model: request.model, + providerType, + operation: "chat", + durationMs: Date.now() - startTime, + inputTokens: 0, + outputTokens: 0, + callingContext, + success: false, + }); + const errorMessage = error instanceof Error ? error.message : String(error); this.logger.error(`Chat failed: ${errorMessage}`); throw new ServiceUnavailableException(`Chat completion failed: ${errorMessage}`); @@ -107,20 +143,75 @@ export class LlmService implements OnModuleInit { /** * Perform a streaming chat completion. * Yields response chunks as they arrive from the provider. + * Aggregates token usage and tracks telemetry after the stream ends. * * @param request - Chat request with messages and configuration + * @param callingContext - Optional context hint for telemetry task type inference * @yields Chat response chunks * @throws {ServiceUnavailableException} If provider is unavailable or request fails */ - async *chatStream(request: ChatRequestDto): AsyncGenerator { + async *chatStream( + request: ChatRequestDto, + callingContext?: string + ): AsyncGenerator { + const startTime = Date.now(); + let providerType: LlmProviderType = "ollama"; + let aggregatedContent = ""; + let lastChunkInputTokens = 0; + let lastChunkOutputTokens = 0; + try { const provider = await this.llmManager.getDefaultProvider(); + providerType = provider.type; const stream = provider.chatStream(request); for await (const chunk of stream) { + // Accumulate content for token estimation + aggregatedContent += chunk.message.content; + + // Some providers include token counts on the final chunk + if (chunk.promptEvalCount !== undefined) { + lastChunkInputTokens = chunk.promptEvalCount; + } + if (chunk.evalCount !== undefined) { + lastChunkOutputTokens = chunk.evalCount; + } + yield chunk; } + + // After stream completes, track telemetry + // Use actual token counts if available, otherwise estimate from content length + const inputTokens = + lastChunkInputTokens > 0 + ? lastChunkInputTokens + : estimateTokens(request.messages.map((m) => m.content).join(" ")); + const outputTokens = + lastChunkOutputTokens > 0 ? lastChunkOutputTokens : estimateTokens(aggregatedContent); + + this.telemetryTracker.trackLlmCompletion({ + model: request.model, + providerType, + operation: "chatStream", + durationMs: Date.now() - startTime, + inputTokens, + outputTokens, + callingContext, + success: true, + }); } catch (error: unknown) { + // Track failure (fire-and-forget) + this.telemetryTracker.trackLlmCompletion({ + model: request.model, + providerType, + operation: "chatStream", + durationMs: Date.now() - startTime, + inputTokens: 0, + outputTokens: 0, + callingContext, + success: false, + }); + const errorMessage = error instanceof Error ? error.message : String(error); this.logger.error(`Stream failed: ${errorMessage}`); throw new ServiceUnavailableException(`Streaming failed: ${errorMessage}`); @@ -130,14 +221,48 @@ export class LlmService implements OnModuleInit { * Generate embeddings for the given input texts. * * @param request - Embedding request with model and input texts + * @param callingContext - Optional context hint for telemetry task type inference * @returns Embeddings response with vector arrays * @throws {ServiceUnavailableException} If provider is unavailable or request fails */ - async embed(request: EmbedRequestDto): Promise { + async embed(request: EmbedRequestDto, callingContext?: string): Promise { + const startTime = Date.now(); + let providerType: LlmProviderType = "ollama"; + try { const provider = await this.llmManager.getDefaultProvider(); - return await provider.embed(request); + providerType = provider.type; + const response = await provider.embed(request); + + // Estimate input tokens from the input text + const inputTokens = estimateTokens(request.input.join(" ")); + + // Fire-and-forget telemetry tracking + this.telemetryTracker.trackLlmCompletion({ + model: response.model, + providerType, + operation: "embed", + durationMs: Date.now() - startTime, + inputTokens, + outputTokens: 0, // Embeddings don't produce output tokens + callingContext, + success: true, + }); + + return response; } catch (error: unknown) { + // Track failure (fire-and-forget) + this.telemetryTracker.trackLlmCompletion({ + model: request.model, + providerType, + operation: "embed", + durationMs: Date.now() - startTime, + inputTokens: 0, + outputTokens: 0, + callingContext, + success: false, + }); + const errorMessage = error instanceof Error ? error.message : String(error); this.logger.error(`Embed failed: ${errorMessage}`); throw new ServiceUnavailableException(`Embedding failed: ${errorMessage}`);