feat(#371): track LLM task completions via Mosaic Telemetry

- Create LlmTelemetryTrackerService for non-blocking event emission - Normalize token usage across Anthropic, OpenAI, Ollama providers - Add cost table with per-token pricing in microdollars - Instrument chat, chatStream, and embed methods - Infer task type from calling context - Aggregate streaming tokens after stream ends with fallback estimation - Add 69 unit tests for tracker service, cost table, and LLM service Refs #371 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 01:44:29 -06:00
parent 24c21f45b3
commit fcecf3654b
6 changed files with 1103 additions and 8 deletions
--- a/apps/api/src/llm/llm-telemetry-tracker.service.ts
+++ b/apps/api/src/llm/llm-telemetry-tracker.service.ts
@@ -0,0 +1,197 @@
+import { Injectable, Logger } from "@nestjs/common";
+import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
+import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
+import type { LlmProviderType } from "./providers/llm-provider.interface";
+import { calculateCostMicrodollars } from "./llm-cost-table";
+
+/**
+ * Parameters for tracking an LLM completion event.
+ */
+export interface LlmCompletionParams {
+  /** Full model name (e.g. "claude-sonnet-4-5-20250929") */
+  model: string;
+  /** Provider type discriminator */
+  providerType: LlmProviderType;
+  /** Operation type that was performed */
+  operation: "chat" | "chatStream" | "embed";
+  /** Duration of the LLM call in milliseconds */
+  durationMs: number;
+  /** Number of input (prompt) tokens consumed */
+  inputTokens: number;
+  /** Number of output (completion) tokens generated */
+  outputTokens: number;
+  /**
+   * Optional calling context hint for task type inference.
+   * Examples: "brain", "chat", "embed", "planning", "code-review"
+   */
+  callingContext?: string;
+  /** Whether the call succeeded or failed */
+  success: boolean;
+}
+
+/**
+ * Estimated token count from text length.
+ * Uses a rough approximation of ~4 characters per token (GPT/Claude average).
+ */
+export function estimateTokens(text: string): number {
+  return Math.ceil(text.length / 4);
+}
+
+/** Map LLM provider type to telemetry Provider enum */
+export function mapProviderType(providerType: LlmProviderType): Provider {
+  switch (providerType) {
+    case "claude":
+      return Provider.ANTHROPIC;
+    case "openai":
+      return Provider.OPENAI;
+    case "ollama":
+      return Provider.OLLAMA;
+    default:
+      return Provider.UNKNOWN;
+  }
+}
+
+/** Map LLM provider type to telemetry Harness enum */
+export function mapHarness(providerType: LlmProviderType): Harness {
+  switch (providerType) {
+    case "ollama":
+      return Harness.OLLAMA_LOCAL;
+    default:
+      return Harness.API_DIRECT;
+  }
+}
+
+/**
+ * Infer the task type from calling context and operation.
+ *
+ * @param operation - The LLM operation (chat, chatStream, embed)
+ * @param callingContext - Optional hint about the caller's purpose
+ * @returns Inferred TaskType
+ */
+export function inferTaskType(
+  operation: "chat" | "chatStream" | "embed",
+  callingContext?: string
+): TaskType {
+  // Embedding operations are typically for indexing/search
+  if (operation === "embed") {
+    return TaskType.IMPLEMENTATION;
+  }
+
+  if (!callingContext) {
+    return TaskType.UNKNOWN;
+  }
+
+  const ctx = callingContext.toLowerCase();
+
+  if (ctx.includes("brain") || ctx.includes("planning") || ctx.includes("plan")) {
+    return TaskType.PLANNING;
+  }
+  if (ctx.includes("review") || ctx.includes("code-review")) {
+    return TaskType.CODE_REVIEW;
+  }
+  if (ctx.includes("test")) {
+    return TaskType.TESTING;
+  }
+  if (ctx.includes("debug")) {
+    return TaskType.DEBUGGING;
+  }
+  if (ctx.includes("refactor")) {
+    return TaskType.REFACTORING;
+  }
+  if (ctx.includes("doc")) {
+    return TaskType.DOCUMENTATION;
+  }
+  if (ctx.includes("config")) {
+    return TaskType.CONFIGURATION;
+  }
+  if (ctx.includes("security") || ctx.includes("audit")) {
+    return TaskType.SECURITY_AUDIT;
+  }
+  if (ctx.includes("chat") || ctx.includes("implement")) {
+    return TaskType.IMPLEMENTATION;
+  }
+
+  return TaskType.UNKNOWN;
+}
+
+/**
+ * LLM Telemetry Tracker Service
+ *
+ * Builds and submits telemetry events for LLM completions.
+ * All tracking is non-blocking and fire-and-forget; telemetry errors
+ * never propagate to the caller.
+ *
+ * @example
+ * ```typescript
+ * // After a successful chat completion
+ * this.telemetryTracker.trackLlmCompletion({
+ *   model: "claude-sonnet-4-5-20250929",
+ *   providerType: "claude",
+ *   operation: "chat",
+ *   durationMs: 1200,
+ *   inputTokens: 150,
+ *   outputTokens: 300,
+ *   callingContext: "chat",
+ *   success: true,
+ * });
+ * ```
+ */
+@Injectable()
+export class LlmTelemetryTrackerService {
+  private readonly logger = new Logger(LlmTelemetryTrackerService.name);
+
+  constructor(private readonly telemetry: MosaicTelemetryService) {}
+
+  /**
+   * Track an LLM completion event via Mosaic Telemetry.
+   *
+   * This method is intentionally fire-and-forget. It catches all errors
+   * internally and logs them without propagating to the caller.
+   *
+   * @param params - LLM completion parameters
+   */
+  trackLlmCompletion(params: LlmCompletionParams): void {
+    try {
+      const builder = this.telemetry.eventBuilder;
+      if (!builder) {
+        // Telemetry is disabled — silently skip
+        return;
+      }
+
+      const costMicrodollars = calculateCostMicrodollars(
+        params.model,
+        params.inputTokens,
+        params.outputTokens
+      );
+
+      const event = builder.build({
+        task_duration_ms: params.durationMs,
+        task_type: inferTaskType(params.operation, params.callingContext),
+        complexity: Complexity.LOW,
+        harness: mapHarness(params.providerType),
+        model: params.model,
+        provider: mapProviderType(params.providerType),
+        estimated_input_tokens: params.inputTokens,
+        estimated_output_tokens: params.outputTokens,
+        actual_input_tokens: params.inputTokens,
+        actual_output_tokens: params.outputTokens,
+        estimated_cost_usd_micros: costMicrodollars,
+        actual_cost_usd_micros: costMicrodollars,
+        quality_gate_passed: true,
+        quality_gates_run: [],
+        quality_gates_failed: [],
+        context_compactions: 0,
+        context_rotations: 0,
+        context_utilization_final: 0,
+        outcome: params.success ? Outcome.SUCCESS : Outcome.FAILURE,
+        retry_count: 0,
+      });
+
+      this.telemetry.trackTaskCompletion(event);
+    } catch (error: unknown) {
+      // Never let telemetry errors propagate
+      const msg = error instanceof Error ? error.message : String(error);
+      this.logger.warn(`Failed to track LLM telemetry event: ${msg}`);
+    }
+  }
+}