import { Injectable, Logger, Optional } from "@nestjs/common"; import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service"; import { PredictionService } from "../mosaic-telemetry/prediction.service"; import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client"; import type { LlmProviderType } from "./providers/llm-provider.interface"; import { calculateCostMicrodollars } from "./llm-cost-table"; /** * Parameters for tracking an LLM completion event. */ export interface LlmCompletionParams { /** Full model name (e.g. "claude-sonnet-4-5-20250929") */ model: string; /** Provider type discriminator */ providerType: LlmProviderType; /** Operation type that was performed */ operation: "chat" | "chatStream" | "embed"; /** Duration of the LLM call in milliseconds */ durationMs: number; /** Number of input (prompt) tokens consumed */ inputTokens: number; /** Number of output (completion) tokens generated */ outputTokens: number; /** * Optional calling context hint for task type inference. * Examples: "brain", "chat", "embed", "planning", "code-review" */ callingContext?: string | undefined; /** Whether the call succeeded or failed */ success: boolean; } /** * Estimated token count from text length. * Uses a rough approximation of ~4 characters per token (GPT/Claude average). */ export function estimateTokens(text: string): number { return Math.ceil(text.length / 4); } /** Map LLM provider type to telemetry Provider enum */ export function mapProviderType(providerType: LlmProviderType): Provider { switch (providerType) { case "claude": return Provider.ANTHROPIC; case "openai": return Provider.OPENAI; case "ollama": return Provider.OLLAMA; default: return Provider.UNKNOWN; } } /** Map LLM provider type to telemetry Harness enum */ export function mapHarness(providerType: LlmProviderType): Harness { switch (providerType) { case "ollama": return Harness.OLLAMA_LOCAL; default: return Harness.API_DIRECT; } } /** * Infer the task type from calling context and operation. * * @param operation - The LLM operation (chat, chatStream, embed) * @param callingContext - Optional hint about the caller's purpose * @returns Inferred TaskType */ export function inferTaskType( operation: "chat" | "chatStream" | "embed", callingContext?: string ): TaskType { // Embedding operations are typically for indexing/search if (operation === "embed") { return TaskType.IMPLEMENTATION; } if (!callingContext) { return TaskType.UNKNOWN; } const ctx = callingContext.toLowerCase(); if (ctx.includes("brain") || ctx.includes("planning") || ctx.includes("plan")) { return TaskType.PLANNING; } if (ctx.includes("review") || ctx.includes("code-review")) { return TaskType.CODE_REVIEW; } if (ctx.includes("test")) { return TaskType.TESTING; } if (ctx.includes("debug")) { return TaskType.DEBUGGING; } if (ctx.includes("refactor")) { return TaskType.REFACTORING; } if (ctx.includes("doc")) { return TaskType.DOCUMENTATION; } if (ctx.includes("config")) { return TaskType.CONFIGURATION; } if (ctx.includes("security") || ctx.includes("audit")) { return TaskType.SECURITY_AUDIT; } if (ctx.includes("chat") || ctx.includes("implement")) { return TaskType.IMPLEMENTATION; } return TaskType.UNKNOWN; } /** * LLM Telemetry Tracker Service * * Builds and submits telemetry events for LLM completions. * All tracking is non-blocking and fire-and-forget; telemetry errors * never propagate to the caller. * * @example * ```typescript * // After a successful chat completion * this.telemetryTracker.trackLlmCompletion({ * model: "claude-sonnet-4-5-20250929", * providerType: "claude", * operation: "chat", * durationMs: 1200, * inputTokens: 150, * outputTokens: 300, * callingContext: "chat", * success: true, * }); * ``` */ @Injectable() export class LlmTelemetryTrackerService { private readonly logger = new Logger(LlmTelemetryTrackerService.name); constructor( private readonly telemetry: MosaicTelemetryService, @Optional() private readonly predictionService?: PredictionService ) {} /** * Track an LLM completion event via Mosaic Telemetry. * * This method is intentionally fire-and-forget. It catches all errors * internally and logs them without propagating to the caller. * * @param params - LLM completion parameters */ trackLlmCompletion(params: LlmCompletionParams): void { try { const builder = this.telemetry.eventBuilder; if (!builder) { // Telemetry is disabled — silently skip return; } const taskType = inferTaskType(params.operation, params.callingContext); const provider = mapProviderType(params.providerType); const costMicrodollars = calculateCostMicrodollars( params.model, params.inputTokens, params.outputTokens ); // Query predictions for estimated fields (graceful degradation) let estimatedInputTokens = 0; let estimatedOutputTokens = 0; let estimatedCostMicros = 0; if (this.predictionService) { const prediction = this.predictionService.getEstimate( taskType, params.model, provider, Complexity.LOW ); if (prediction?.prediction && prediction.metadata.confidence !== "none") { estimatedInputTokens = prediction.prediction.input_tokens.median; estimatedOutputTokens = prediction.prediction.output_tokens.median; estimatedCostMicros = prediction.prediction.cost_usd_micros.median ?? 0; } } const event = builder.build({ task_duration_ms: params.durationMs, task_type: taskType, complexity: Complexity.LOW, harness: mapHarness(params.providerType), model: params.model, provider, estimated_input_tokens: estimatedInputTokens, estimated_output_tokens: estimatedOutputTokens, actual_input_tokens: params.inputTokens, actual_output_tokens: params.outputTokens, estimated_cost_usd_micros: estimatedCostMicros, actual_cost_usd_micros: costMicrodollars, quality_gate_passed: true, quality_gates_run: [], quality_gates_failed: [], context_compactions: 0, context_rotations: 0, context_utilization_final: 0, outcome: params.success ? Outcome.SUCCESS : Outcome.FAILURE, retry_count: 0, }); this.telemetry.trackTaskCompletion(event); } catch (error: unknown) { // Never let telemetry errors propagate const msg = error instanceof Error ? error.message : String(error); this.logger.warn(`Failed to track LLM telemetry event: ${msg}`); } } }