feat(#371): track LLM task completions via Mosaic Telemetry

- Create LlmTelemetryTrackerService for non-blocking event emission - Normalize token usage across Anthropic, OpenAI, Ollama providers - Add cost table with per-token pricing in microdollars - Instrument chat, chatStream, and embed methods - Infer task type from calling context - Aggregate streaming tokens after stream ends with fallback estimation - Add 69 unit tests for tracker service, cost table, and LLM service Refs #371 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 01:44:29 -06:00
parent 24c21f45b3
commit fcecf3654b
6 changed files with 1103 additions and 8 deletions
--- a/apps/api/src/llm/llm-cost-table.ts
+++ b/apps/api/src/llm/llm-cost-table.ts
@@ -0,0 +1,106 @@
+/**
+ * LLM Cost Table
+ *
+ * Maps model names to per-token costs in microdollars (USD * 1,000,000).
+ * For example, $0.003 per 1K tokens = 3,000 microdollars per 1K tokens = 3 microdollars per token.
+ *
+ * Costs are split into input (prompt) and output (completion) pricing.
+ * Ollama models run locally and are free (0 cost).
+ */
+
+/**
+ * Per-token cost in microdollars for a single model.
+ */
+export interface ModelCost {
+  /** Cost per input token in microdollars */
+  inputPerToken: number;
+  /** Cost per output token in microdollars */
+  outputPerToken: number;
+}
+
+/**
+ * Cost table mapping model name prefixes to per-token pricing.
+ *
+ * Model matching is prefix-based: "claude-sonnet-4-5" matches "claude-sonnet-4-5-20250929".
+ * More specific prefixes are checked first (longest match wins).
+ *
+ * Prices sourced from provider pricing pages as of 2026-02.
+ */
+const MODEL_COSTS: Record<string, ModelCost> = {
+  // Anthropic Claude models (per-token microdollars)
+  // claude-sonnet-4-5: $3/M input, $15/M output
+  "claude-sonnet-4-5": { inputPerToken: 3, outputPerToken: 15 },
+  // claude-opus-4: $15/M input, $75/M output
+  "claude-opus-4": { inputPerToken: 15, outputPerToken: 75 },
+  // claude-3-5-haiku / claude-haiku-4-5: $0.80/M input, $4/M output
+  "claude-haiku-4-5": { inputPerToken: 0.8, outputPerToken: 4 },
+  "claude-3-5-haiku": { inputPerToken: 0.8, outputPerToken: 4 },
+  // claude-3-5-sonnet: $3/M input, $15/M output
+  "claude-3-5-sonnet": { inputPerToken: 3, outputPerToken: 15 },
+  // claude-3-opus: $15/M input, $75/M output
+  "claude-3-opus": { inputPerToken: 15, outputPerToken: 75 },
+  // claude-3-sonnet: $3/M input, $15/M output
+  "claude-3-sonnet": { inputPerToken: 3, outputPerToken: 15 },
+  // claude-3-haiku: $0.25/M input, $1.25/M output
+  "claude-3-haiku": { inputPerToken: 0.25, outputPerToken: 1.25 },
+
+  // OpenAI models (per-token microdollars)
+  // gpt-4o: $2.50/M input, $10/M output
+  "gpt-4o-mini": { inputPerToken: 0.15, outputPerToken: 0.6 },
+  "gpt-4o": { inputPerToken: 2.5, outputPerToken: 10 },
+  // gpt-4-turbo: $10/M input, $30/M output
+  "gpt-4-turbo": { inputPerToken: 10, outputPerToken: 30 },
+  // gpt-4: $30/M input, $60/M output
+  "gpt-4": { inputPerToken: 30, outputPerToken: 60 },
+  // gpt-3.5-turbo: $0.50/M input, $1.50/M output
+  "gpt-3.5-turbo": { inputPerToken: 0.5, outputPerToken: 1.5 },
+
+  // Ollama / local models: free
+  // These are catch-all entries; any model not matched above falls through to getModelCost default
+};
+
+/**
+ * Sorted model prefixes from longest to shortest for greedy prefix matching.
+ * Ensures "gpt-4o-mini" matches before "gpt-4o" and "claude-3-5-haiku" before "claude-3-haiku".
+ */
+const SORTED_PREFIXES = Object.keys(MODEL_COSTS).sort((a, b) => b.length - a.length);
+
+/**
+ * Look up per-token cost for a given model name.
+ *
+ * Uses longest-prefix matching: the model name is compared against known
+ * prefixes from longest to shortest. If no prefix matches, returns zero cost
+ * (assumes local/free model).
+ *
+ * @param modelName - Full model name (e.g. "claude-sonnet-4-5-20250929", "gpt-4o")
+ * @returns Per-token cost in microdollars
+ */
+export function getModelCost(modelName: string): ModelCost {
+  const normalized = modelName.toLowerCase();
+
+  for (const prefix of SORTED_PREFIXES) {
+    if (normalized.startsWith(prefix)) {
+      return MODEL_COSTS[prefix];
+    }
+  }
+
+  // Unknown or local model — assume free
+  return { inputPerToken: 0, outputPerToken: 0 };
+}
+
+/**
+ * Calculate total cost in microdollars for a given model and token counts.
+ *
+ * @param modelName - Full model name
+ * @param inputTokens - Number of input (prompt) tokens
+ * @param outputTokens - Number of output (completion) tokens
+ * @returns Total cost in microdollars (USD * 1,000,000)
+ */
+export function calculateCostMicrodollars(
+  modelName: string,
+  inputTokens: number,
+  outputTokens: number
+): number {
+  const cost = getModelCost(modelName);
+  return Math.round(cost.inputPerToken * inputTokens + cost.outputPerToken * outputTokens);
+}
--- a/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts
+++ b/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts
@@ -0,0 +1,491 @@
+import { describe, it, expect, beforeEach, vi } from "vitest";
+import { Test, TestingModule } from "@nestjs/testing";
+import {
+  TaskType,
+  Complexity,
+  Harness,
+  Provider,
+  Outcome,
+} from "@mosaicstack/telemetry-client";
+import type { TaskCompletionEvent, EventBuilderParams } from "@mosaicstack/telemetry-client";
+import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
+import {
+  LlmTelemetryTrackerService,
+  estimateTokens,
+  mapProviderType,
+  mapHarness,
+  inferTaskType,
+} from "./llm-telemetry-tracker.service";
+import type { LlmCompletionParams } from "./llm-telemetry-tracker.service";
+import { getModelCost, calculateCostMicrodollars } from "./llm-cost-table";
+
+// ---------- Cost Table Tests ----------
+
+describe("llm-cost-table", () => {
+  describe("getModelCost", () => {
+    it("should return cost for claude-sonnet-4-5 models", () => {
+      const cost = getModelCost("claude-sonnet-4-5-20250929");
+      expect(cost.inputPerToken).toBe(3);
+      expect(cost.outputPerToken).toBe(15);
+    });
+
+    it("should return cost for claude-opus-4 models", () => {
+      const cost = getModelCost("claude-opus-4-6");
+      expect(cost.inputPerToken).toBe(15);
+      expect(cost.outputPerToken).toBe(75);
+    });
+
+    it("should return cost for claude-haiku-4-5 models", () => {
+      const cost = getModelCost("claude-haiku-4-5-20251001");
+      expect(cost.inputPerToken).toBe(0.8);
+      expect(cost.outputPerToken).toBe(4);
+    });
+
+    it("should return cost for gpt-4o", () => {
+      const cost = getModelCost("gpt-4o");
+      expect(cost.inputPerToken).toBe(2.5);
+      expect(cost.outputPerToken).toBe(10);
+    });
+
+    it("should return cost for gpt-4o-mini (longer prefix matches first)", () => {
+      const cost = getModelCost("gpt-4o-mini");
+      expect(cost.inputPerToken).toBe(0.15);
+      expect(cost.outputPerToken).toBe(0.6);
+    });
+
+    it("should return zero cost for unknown/local models", () => {
+      const cost = getModelCost("llama3.2");
+      expect(cost.inputPerToken).toBe(0);
+      expect(cost.outputPerToken).toBe(0);
+    });
+
+    it("should return zero cost for ollama models", () => {
+      const cost = getModelCost("mistral:7b");
+      expect(cost.inputPerToken).toBe(0);
+      expect(cost.outputPerToken).toBe(0);
+    });
+
+    it("should be case-insensitive", () => {
+      const cost = getModelCost("Claude-Sonnet-4-5-20250929");
+      expect(cost.inputPerToken).toBe(3);
+    });
+  });
+
+  describe("calculateCostMicrodollars", () => {
+    it("should calculate cost for claude-sonnet-4-5 with token counts", () => {
+      // 1000 input tokens * 3 + 500 output tokens * 15 = 3000 + 7500 = 10500
+      const cost = calculateCostMicrodollars("claude-sonnet-4-5-20250929", 1000, 500);
+      expect(cost).toBe(10500);
+    });
+
+    it("should return 0 for local models", () => {
+      const cost = calculateCostMicrodollars("llama3.2", 1000, 500);
+      expect(cost).toBe(0);
+    });
+
+    it("should return 0 when token counts are 0", () => {
+      const cost = calculateCostMicrodollars("claude-opus-4-6", 0, 0);
+      expect(cost).toBe(0);
+    });
+
+    it("should round the result to integer microdollars", () => {
+      // gpt-4o-mini: 0.15 * 3 + 0.6 * 7 = 0.45 + 4.2 = 4.65 -> rounds to 5
+      const cost = calculateCostMicrodollars("gpt-4o-mini", 3, 7);
+      expect(cost).toBe(5);
+    });
+  });
+});
+
+// ---------- Helper Function Tests ----------
+
+describe("helper functions", () => {
+  describe("estimateTokens", () => {
+    it("should estimate ~1 token per 4 characters", () => {
+      expect(estimateTokens("abcd")).toBe(1);
+      expect(estimateTokens("abcdefgh")).toBe(2);
+    });
+
+    it("should round up for partial tokens", () => {
+      expect(estimateTokens("abc")).toBe(1);
+      expect(estimateTokens("abcde")).toBe(2);
+    });
+
+    it("should return 0 for empty string", () => {
+      expect(estimateTokens("")).toBe(0);
+    });
+  });
+
+  describe("mapProviderType", () => {
+    it("should map claude to ANTHROPIC", () => {
+      expect(mapProviderType("claude")).toBe(Provider.ANTHROPIC);
+    });
+
+    it("should map openai to OPENAI", () => {
+      expect(mapProviderType("openai")).toBe(Provider.OPENAI);
+    });
+
+    it("should map ollama to OLLAMA", () => {
+      expect(mapProviderType("ollama")).toBe(Provider.OLLAMA);
+    });
+  });
+
+  describe("mapHarness", () => {
+    it("should map ollama to OLLAMA_LOCAL", () => {
+      expect(mapHarness("ollama")).toBe(Harness.OLLAMA_LOCAL);
+    });
+
+    it("should map claude to API_DIRECT", () => {
+      expect(mapHarness("claude")).toBe(Harness.API_DIRECT);
+    });
+
+    it("should map openai to API_DIRECT", () => {
+      expect(mapHarness("openai")).toBe(Harness.API_DIRECT);
+    });
+  });
+
+  describe("inferTaskType", () => {
+    it("should return IMPLEMENTATION for embed operation", () => {
+      expect(inferTaskType("embed")).toBe(TaskType.IMPLEMENTATION);
+    });
+
+    it("should return UNKNOWN when no context provided for chat", () => {
+      expect(inferTaskType("chat")).toBe(TaskType.UNKNOWN);
+    });
+
+    it("should return PLANNING for brain context", () => {
+      expect(inferTaskType("chat", "brain")).toBe(TaskType.PLANNING);
+    });
+
+    it("should return PLANNING for planning context", () => {
+      expect(inferTaskType("chat", "planning")).toBe(TaskType.PLANNING);
+    });
+
+    it("should return CODE_REVIEW for review context", () => {
+      expect(inferTaskType("chat", "code-review")).toBe(TaskType.CODE_REVIEW);
+    });
+
+    it("should return TESTING for test context", () => {
+      expect(inferTaskType("chat", "test-generation")).toBe(TaskType.TESTING);
+    });
+
+    it("should return DEBUGGING for debug context", () => {
+      expect(inferTaskType("chatStream", "debug-session")).toBe(TaskType.DEBUGGING);
+    });
+
+    it("should return REFACTORING for refactor context", () => {
+      expect(inferTaskType("chat", "refactor")).toBe(TaskType.REFACTORING);
+    });
+
+    it("should return DOCUMENTATION for doc context", () => {
+      expect(inferTaskType("chat", "documentation")).toBe(TaskType.DOCUMENTATION);
+    });
+
+    it("should return CONFIGURATION for config context", () => {
+      expect(inferTaskType("chat", "config-update")).toBe(TaskType.CONFIGURATION);
+    });
+
+    it("should return SECURITY_AUDIT for security context", () => {
+      expect(inferTaskType("chat", "security-check")).toBe(TaskType.SECURITY_AUDIT);
+    });
+
+    it("should return IMPLEMENTATION for chat context", () => {
+      expect(inferTaskType("chat", "chat")).toBe(TaskType.IMPLEMENTATION);
+    });
+
+    it("should be case-insensitive", () => {
+      expect(inferTaskType("chat", "BRAIN")).toBe(TaskType.PLANNING);
+    });
+
+    it("should return UNKNOWN for unrecognized context", () => {
+      expect(inferTaskType("chat", "something-else")).toBe(TaskType.UNKNOWN);
+    });
+  });
+});
+
+// ---------- LlmTelemetryTrackerService Tests ----------
+
+describe("LlmTelemetryTrackerService", () => {
+  let service: LlmTelemetryTrackerService;
+  let mockTelemetryService: {
+    eventBuilder: { build: ReturnType<typeof vi.fn> } | null;
+    trackTaskCompletion: ReturnType<typeof vi.fn>;
+    isEnabled: boolean;
+  };
+
+  const mockEvent: TaskCompletionEvent = {
+    instance_id: "test-instance",
+    event_id: "test-event",
+    schema_version: "1.0.0",
+    timestamp: new Date().toISOString(),
+    task_duration_ms: 1000,
+    task_type: TaskType.IMPLEMENTATION,
+    complexity: Complexity.LOW,
+    harness: Harness.API_DIRECT,
+    model: "claude-sonnet-4-5-20250929",
+    provider: Provider.ANTHROPIC,
+    estimated_input_tokens: 100,
+    estimated_output_tokens: 200,
+    actual_input_tokens: 100,
+    actual_output_tokens: 200,
+    estimated_cost_usd_micros: 3300,
+    actual_cost_usd_micros: 3300,
+    quality_gate_passed: true,
+    quality_gates_run: [],
+    quality_gates_failed: [],
+    context_compactions: 0,
+    context_rotations: 0,
+    context_utilization_final: 0,
+    outcome: Outcome.SUCCESS,
+    retry_count: 0,
+  };
+
+  beforeEach(async () => {
+    mockTelemetryService = {
+      eventBuilder: {
+        build: vi.fn().mockReturnValue(mockEvent),
+      },
+      trackTaskCompletion: vi.fn(),
+      isEnabled: true,
+    };
+
+    const module: TestingModule = await Test.createTestingModule({
+      providers: [
+        LlmTelemetryTrackerService,
+        {
+          provide: MosaicTelemetryService,
+          useValue: mockTelemetryService,
+        },
+      ],
+    }).compile();
+
+    service = module.get<LlmTelemetryTrackerService>(LlmTelemetryTrackerService);
+  });
+
+  it("should be defined", () => {
+    expect(service).toBeDefined();
+  });
+
+  describe("trackLlmCompletion", () => {
+    const baseParams: LlmCompletionParams = {
+      model: "claude-sonnet-4-5-20250929",
+      providerType: "claude",
+      operation: "chat",
+      durationMs: 1200,
+      inputTokens: 150,
+      outputTokens: 300,
+      callingContext: "chat",
+      success: true,
+    };
+
+    it("should build and track a telemetry event for Anthropic provider", () => {
+      service.trackLlmCompletion(baseParams);
+
+      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
+        expect.objectContaining({
+          task_duration_ms: 1200,
+          task_type: TaskType.IMPLEMENTATION,
+          complexity: Complexity.LOW,
+          harness: Harness.API_DIRECT,
+          model: "claude-sonnet-4-5-20250929",
+          provider: Provider.ANTHROPIC,
+          actual_input_tokens: 150,
+          actual_output_tokens: 300,
+          outcome: Outcome.SUCCESS,
+        }),
+      );
+
+      expect(mockTelemetryService.trackTaskCompletion).toHaveBeenCalledWith(mockEvent);
+    });
+
+    it("should build and track a telemetry event for OpenAI provider", () => {
+      service.trackLlmCompletion({
+        ...baseParams,
+        model: "gpt-4o",
+        providerType: "openai",
+      });
+
+      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: "gpt-4o",
+          provider: Provider.OPENAI,
+          harness: Harness.API_DIRECT,
+        }),
+      );
+    });
+
+    it("should build and track a telemetry event for Ollama provider", () => {
+      service.trackLlmCompletion({
+        ...baseParams,
+        model: "llama3.2",
+        providerType: "ollama",
+      });
+
+      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: "llama3.2",
+          provider: Provider.OLLAMA,
+          harness: Harness.OLLAMA_LOCAL,
+        }),
+      );
+    });
+
+    it("should calculate cost in microdollars correctly", () => {
+      service.trackLlmCompletion(baseParams);
+
+      // claude-sonnet-4-5: 150 * 3 + 300 * 15 = 450 + 4500 = 4950
+      const expectedCost = 4950;
+
+      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
+        expect.objectContaining({
+          estimated_cost_usd_micros: expectedCost,
+          actual_cost_usd_micros: expectedCost,
+        }),
+      );
+    });
+
+    it("should calculate zero cost for ollama models", () => {
+      service.trackLlmCompletion({
+        ...baseParams,
+        model: "llama3.2",
+        providerType: "ollama",
+      });
+
+      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
+        expect.objectContaining({
+          estimated_cost_usd_micros: 0,
+          actual_cost_usd_micros: 0,
+        }),
+      );
+    });
+
+    it("should track FAILURE outcome when success is false", () => {
+      service.trackLlmCompletion({
+        ...baseParams,
+        success: false,
+      });
+
+      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
+        expect.objectContaining({
+          outcome: Outcome.FAILURE,
+        }),
+      );
+    });
+
+    it("should infer task type from calling context", () => {
+      service.trackLlmCompletion({
+        ...baseParams,
+        callingContext: "brain",
+      });
+
+      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
+        expect.objectContaining({
+          task_type: TaskType.PLANNING,
+        }),
+      );
+    });
+
+    it("should set empty quality gates arrays for direct LLM calls", () => {
+      service.trackLlmCompletion(baseParams);
+
+      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
+        expect.objectContaining({
+          quality_gate_passed: true,
+          quality_gates_run: [],
+          quality_gates_failed: [],
+        }),
+      );
+    });
+
+    it("should silently skip when telemetry is disabled (eventBuilder is null)", () => {
+      mockTelemetryService.eventBuilder = null;
+
+      // Should not throw
+      service.trackLlmCompletion(baseParams);
+
+      expect(mockTelemetryService.trackTaskCompletion).not.toHaveBeenCalled();
+    });
+
+    it("should not throw when eventBuilder.build throws an error", () => {
+      mockTelemetryService.eventBuilder = {
+        build: vi.fn().mockImplementation(() => {
+          throw new Error("Build failed");
+        }),
+      };
+
+      // Should not throw
+      expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
+    });
+
+    it("should not throw when trackTaskCompletion throws an error", () => {
+      mockTelemetryService.trackTaskCompletion.mockImplementation(() => {
+        throw new Error("Track failed");
+      });
+
+      // Should not throw
+      expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
+    });
+
+    it("should handle streaming operation with estimated tokens", () => {
+      service.trackLlmCompletion({
+        ...baseParams,
+        operation: "chatStream",
+        inputTokens: 50,
+        outputTokens: 100,
+      });
+
+      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
+        expect.objectContaining({
+          actual_input_tokens: 50,
+          actual_output_tokens: 100,
+          estimated_input_tokens: 50,
+          estimated_output_tokens: 100,
+        }),
+      );
+    });
+
+    it("should handle embed operation", () => {
+      service.trackLlmCompletion({
+        ...baseParams,
+        operation: "embed",
+        outputTokens: 0,
+        callingContext: undefined,
+      });
+
+      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
+        expect.objectContaining({
+          task_type: TaskType.IMPLEMENTATION,
+          actual_output_tokens: 0,
+        }),
+      );
+    });
+
+    it("should pass all required EventBuilderParams fields", () => {
+      service.trackLlmCompletion(baseParams);
+
+      const buildCall = (mockTelemetryService.eventBuilder?.build as ReturnType<typeof vi.fn>).mock
+        .calls[0][0] as EventBuilderParams;
+
+      // Verify all required fields are present
+      expect(buildCall).toHaveProperty("task_duration_ms");
+      expect(buildCall).toHaveProperty("task_type");
+      expect(buildCall).toHaveProperty("complexity");
+      expect(buildCall).toHaveProperty("harness");
+      expect(buildCall).toHaveProperty("model");
+      expect(buildCall).toHaveProperty("provider");
+      expect(buildCall).toHaveProperty("estimated_input_tokens");
+      expect(buildCall).toHaveProperty("estimated_output_tokens");
+      expect(buildCall).toHaveProperty("actual_input_tokens");
+      expect(buildCall).toHaveProperty("actual_output_tokens");
+      expect(buildCall).toHaveProperty("estimated_cost_usd_micros");
+      expect(buildCall).toHaveProperty("actual_cost_usd_micros");
+      expect(buildCall).toHaveProperty("quality_gate_passed");
+      expect(buildCall).toHaveProperty("quality_gates_run");
+      expect(buildCall).toHaveProperty("quality_gates_failed");
+      expect(buildCall).toHaveProperty("context_compactions");
+      expect(buildCall).toHaveProperty("context_rotations");
+      expect(buildCall).toHaveProperty("context_utilization_final");
+      expect(buildCall).toHaveProperty("outcome");
+      expect(buildCall).toHaveProperty("retry_count");
+    });
+  });
+});
--- a/apps/api/src/llm/llm-telemetry-tracker.service.ts
+++ b/apps/api/src/llm/llm-telemetry-tracker.service.ts
@@ -0,0 +1,197 @@
+import { Injectable, Logger } from "@nestjs/common";
+import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
+import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
+import type { LlmProviderType } from "./providers/llm-provider.interface";
+import { calculateCostMicrodollars } from "./llm-cost-table";
+
+/**
+ * Parameters for tracking an LLM completion event.
+ */
+export interface LlmCompletionParams {
+  /** Full model name (e.g. "claude-sonnet-4-5-20250929") */
+  model: string;
+  /** Provider type discriminator */
+  providerType: LlmProviderType;
+  /** Operation type that was performed */
+  operation: "chat" | "chatStream" | "embed";
+  /** Duration of the LLM call in milliseconds */
+  durationMs: number;
+  /** Number of input (prompt) tokens consumed */
+  inputTokens: number;
+  /** Number of output (completion) tokens generated */
+  outputTokens: number;
+  /**
+   * Optional calling context hint for task type inference.
+   * Examples: "brain", "chat", "embed", "planning", "code-review"
+   */
+  callingContext?: string;
+  /** Whether the call succeeded or failed */
+  success: boolean;
+}
+
+/**
+ * Estimated token count from text length.
+ * Uses a rough approximation of ~4 characters per token (GPT/Claude average).
+ */
+export function estimateTokens(text: string): number {
+  return Math.ceil(text.length / 4);
+}
+
+/** Map LLM provider type to telemetry Provider enum */
+export function mapProviderType(providerType: LlmProviderType): Provider {
+  switch (providerType) {
+    case "claude":
+      return Provider.ANTHROPIC;
+    case "openai":
+      return Provider.OPENAI;
+    case "ollama":
+      return Provider.OLLAMA;
+    default:
+      return Provider.UNKNOWN;
+  }
+}
+
+/** Map LLM provider type to telemetry Harness enum */
+export function mapHarness(providerType: LlmProviderType): Harness {
+  switch (providerType) {
+    case "ollama":
+      return Harness.OLLAMA_LOCAL;
+    default:
+      return Harness.API_DIRECT;
+  }
+}
+
+/**
+ * Infer the task type from calling context and operation.
+ *
+ * @param operation - The LLM operation (chat, chatStream, embed)
+ * @param callingContext - Optional hint about the caller's purpose
+ * @returns Inferred TaskType
+ */
+export function inferTaskType(
+  operation: "chat" | "chatStream" | "embed",
+  callingContext?: string
+): TaskType {
+  // Embedding operations are typically for indexing/search
+  if (operation === "embed") {
+    return TaskType.IMPLEMENTATION;
+  }
+
+  if (!callingContext) {
+    return TaskType.UNKNOWN;
+  }
+
+  const ctx = callingContext.toLowerCase();
+
+  if (ctx.includes("brain") || ctx.includes("planning") || ctx.includes("plan")) {
+    return TaskType.PLANNING;
+  }
+  if (ctx.includes("review") || ctx.includes("code-review")) {
+    return TaskType.CODE_REVIEW;
+  }
+  if (ctx.includes("test")) {
+    return TaskType.TESTING;
+  }
+  if (ctx.includes("debug")) {
+    return TaskType.DEBUGGING;
+  }
+  if (ctx.includes("refactor")) {
+    return TaskType.REFACTORING;
+  }
+  if (ctx.includes("doc")) {
+    return TaskType.DOCUMENTATION;
+  }
+  if (ctx.includes("config")) {
+    return TaskType.CONFIGURATION;
+  }
+  if (ctx.includes("security") || ctx.includes("audit")) {
+    return TaskType.SECURITY_AUDIT;
+  }
+  if (ctx.includes("chat") || ctx.includes("implement")) {
+    return TaskType.IMPLEMENTATION;
+  }
+
+  return TaskType.UNKNOWN;
+}
+
+/**
+ * LLM Telemetry Tracker Service
+ *
+ * Builds and submits telemetry events for LLM completions.
+ * All tracking is non-blocking and fire-and-forget; telemetry errors
+ * never propagate to the caller.
+ *
+ * @example
+ * ```typescript
+ * // After a successful chat completion
+ * this.telemetryTracker.trackLlmCompletion({
+ *   model: "claude-sonnet-4-5-20250929",
+ *   providerType: "claude",
+ *   operation: "chat",
+ *   durationMs: 1200,
+ *   inputTokens: 150,
+ *   outputTokens: 300,
+ *   callingContext: "chat",
+ *   success: true,
+ * });
+ * ```
+ */
+@Injectable()
+export class LlmTelemetryTrackerService {
+  private readonly logger = new Logger(LlmTelemetryTrackerService.name);
+
+  constructor(private readonly telemetry: MosaicTelemetryService) {}
+
+  /**
+   * Track an LLM completion event via Mosaic Telemetry.
+   *
+   * This method is intentionally fire-and-forget. It catches all errors
+   * internally and logs them without propagating to the caller.
+   *
+   * @param params - LLM completion parameters
+   */
+  trackLlmCompletion(params: LlmCompletionParams): void {
+    try {
+      const builder = this.telemetry.eventBuilder;
+      if (!builder) {
+        // Telemetry is disabled — silently skip
+        return;
+      }
+
+      const costMicrodollars = calculateCostMicrodollars(
+        params.model,
+        params.inputTokens,
+        params.outputTokens
+      );
+
+      const event = builder.build({
+        task_duration_ms: params.durationMs,
+        task_type: inferTaskType(params.operation, params.callingContext),
+        complexity: Complexity.LOW,
+        harness: mapHarness(params.providerType),
+        model: params.model,
+        provider: mapProviderType(params.providerType),
+        estimated_input_tokens: params.inputTokens,
+        estimated_output_tokens: params.outputTokens,
+        actual_input_tokens: params.inputTokens,
+        actual_output_tokens: params.outputTokens,
+        estimated_cost_usd_micros: costMicrodollars,
+        actual_cost_usd_micros: costMicrodollars,
+        quality_gate_passed: true,
+        quality_gates_run: [],
+        quality_gates_failed: [],
+        context_compactions: 0,
+        context_rotations: 0,
+        context_utilization_final: 0,
+        outcome: params.success ? Outcome.SUCCESS : Outcome.FAILURE,
+        retry_count: 0,
+      });
+
+      this.telemetry.trackTaskCompletion(event);
+    } catch (error: unknown) {
+      // Never let telemetry errors propagate
+      const msg = error instanceof Error ? error.message : String(error);
+      this.logger.warn(`Failed to track LLM telemetry event: ${msg}`);
+    }
+  }
+}
--- a/apps/api/src/llm/llm.module.ts
+++ b/apps/api/src/llm/llm.module.ts
@@ -3,13 +3,14 @@ import { LlmController } from "./llm.controller";
 import { LlmProviderAdminController } from "./llm-provider-admin.controller";
 import { LlmService } from "./llm.service";
 import { LlmManagerService } from "./llm-manager.service";
+import { LlmTelemetryTrackerService } from "./llm-telemetry-tracker.service";
 import { PrismaModule } from "../prisma/prisma.module";
 import { LlmUsageModule } from "../llm-usage/llm-usage.module";

@Module({
  imports: [PrismaModule, LlmUsageModule],
  controllers: [LlmController, LlmProviderAdminController],
-  providers: [LlmService, LlmManagerService],
+  providers: [LlmService, LlmManagerService, LlmTelemetryTrackerService],
  exports: [LlmService, LlmManagerService],
 })
 export class LlmModule {}
--- a/apps/api/src/llm/llm.service.spec.ts
+++ b/apps/api/src/llm/llm.service.spec.ts
@@ -3,6 +3,7 @@ import { Test, TestingModule } from "@nestjs/testing";
 import { ServiceUnavailableException } from "@nestjs/common";
 import { LlmService } from "./llm.service";
 import { LlmManagerService } from "./llm-manager.service";
+import { LlmTelemetryTrackerService } from "./llm-telemetry-tracker.service";
 import type { ChatRequestDto, EmbedRequestDto, ChatResponseDto, EmbedResponseDto } from "./dto";
 import type {
  LlmProviderInterface,
@@ -14,6 +15,9 @@ describe("LlmService", () => {
  let mockManagerService: {
    getDefaultProvider: ReturnType<typeof vi.fn>;
  };
+  let mockTelemetryTracker: {
+    trackLlmCompletion: ReturnType<typeof vi.fn>;
+  };
  let mockProvider: {
    chat: ReturnType<typeof vi.fn>;
    chatStream: ReturnType<typeof vi.fn>;
@@ -41,6 +45,11 @@ describe("LlmService", () => {
      getDefaultProvider: vi.fn().mockResolvedValue(mockProvider),
    };

+    // Create mock telemetry tracker
+    mockTelemetryTracker = {
+      trackLlmCompletion: vi.fn(),
+    };
+
    const module: TestingModule = await Test.createTestingModule({
      providers: [
        LlmService,
@@ -48,6 +57,10 @@ describe("LlmService", () => {
          provide: LlmManagerService,
          useValue: mockManagerService,
        },
+        {
+          provide: LlmTelemetryTrackerService,
+          useValue: mockTelemetryTracker,
+        },
      ],
    }).compile();

@@ -135,6 +148,45 @@ describe("LlmService", () => {
      expect(result).toEqual(response);
    });

+    it("should track telemetry on successful chat", async () => {
+      const response: ChatResponseDto = {
+        model: "llama3.2",
+        message: { role: "assistant", content: "Hello" },
+        done: true,
+        promptEvalCount: 10,
+        evalCount: 20,
+      };
+      mockProvider.chat.mockResolvedValue(response);
+
+      await service.chat(request, "chat");
+
+      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: "llama3.2",
+          providerType: "ollama",
+          operation: "chat",
+          inputTokens: 10,
+          outputTokens: 20,
+          callingContext: "chat",
+          success: true,
+        }),
+      );
+    });
+
+    it("should track telemetry on failed chat", async () => {
+      mockProvider.chat.mockRejectedValue(new Error("Chat failed"));
+
+      await expect(service.chat(request)).rejects.toThrow(ServiceUnavailableException);
+
+      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: "llama3.2",
+          operation: "chat",
+          success: false,
+        }),
+      );
+    });
+
    it("should throw ServiceUnavailableException on error", async () => {
      mockProvider.chat.mockRejectedValue(new Error("Chat failed"));

@@ -177,6 +229,94 @@ describe("LlmService", () => {
      expect(chunks[1].message.content).toBe(" world");
    });

+    it("should track telemetry after stream completes", async () => {
+      async function* mockGenerator(): AsyncGenerator<ChatResponseDto> {
+        yield {
+          model: "llama3.2",
+          message: { role: "assistant", content: "Hello" },
+          done: false,
+        };
+        yield {
+          model: "llama3.2",
+          message: { role: "assistant", content: " world" },
+          done: true,
+          promptEvalCount: 5,
+          evalCount: 10,
+        };
+      }
+
+      mockProvider.chatStream.mockReturnValue(mockGenerator());
+
+      const chunks: ChatResponseDto[] = [];
+      for await (const chunk of service.chatStream(request, "brain")) {
+        chunks.push(chunk);
+      }
+
+      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: "llama3.2",
+          providerType: "ollama",
+          operation: "chatStream",
+          inputTokens: 5,
+          outputTokens: 10,
+          callingContext: "brain",
+          success: true,
+        }),
+      );
+    });
+
+    it("should estimate tokens when provider does not return counts in stream", async () => {
+      async function* mockGenerator(): AsyncGenerator<ChatResponseDto> {
+        yield {
+          model: "llama3.2",
+          message: { role: "assistant", content: "Hello world" },
+          done: false,
+        };
+        yield {
+          model: "llama3.2",
+          message: { role: "assistant", content: "" },
+          done: true,
+        };
+      }
+
+      mockProvider.chatStream.mockReturnValue(mockGenerator());
+
+      const chunks: ChatResponseDto[] = [];
+      for await (const chunk of service.chatStream(request)) {
+        chunks.push(chunk);
+      }
+
+      // Should use estimated tokens since no actual counts provided
+      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
+        expect.objectContaining({
+          operation: "chatStream",
+          success: true,
+          // Input estimated from "Hi" -> ceil(2/4) = 1
+          inputTokens: 1,
+          // Output estimated from "Hello world" -> ceil(11/4) = 3
+          outputTokens: 3,
+        }),
+      );
+    });
+
+    it("should track telemetry on stream failure", async () => {
+      async function* errorGenerator(): AsyncGenerator<ChatResponseDto> {
+        throw new Error("Stream failed");
+      }
+
+      mockProvider.chatStream.mockReturnValue(errorGenerator());
+
+      const generator = service.chatStream(request);
+      await expect(generator.next()).rejects.toThrow(ServiceUnavailableException);
+
+      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
+        expect.objectContaining({
+          operation: "chatStream",
+          success: false,
+        }),
+      );
+    });
+
    it("should throw ServiceUnavailableException on error", async () => {
      async function* errorGenerator(): AsyncGenerator<ChatResponseDto> {
        throw new Error("Stream failed");
@@ -210,6 +350,41 @@ describe("LlmService", () => {
      expect(result).toEqual(response);
    });

+    it("should track telemetry on successful embed", async () => {
+      const response: EmbedResponseDto = {
+        model: "llama3.2",
+        embeddings: [[0.1, 0.2, 0.3]],
+        totalDuration: 500,
+      };
+      mockProvider.embed.mockResolvedValue(response);
+
+      await service.embed(request, "embed");
+
+      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
+        expect.objectContaining({
+          model: "llama3.2",
+          providerType: "ollama",
+          operation: "embed",
+          outputTokens: 0,
+          callingContext: "embed",
+          success: true,
+        }),
+      );
+    });
+
+    it("should track telemetry on failed embed", async () => {
+      mockProvider.embed.mockRejectedValue(new Error("Embedding failed"));
+
+      await expect(service.embed(request)).rejects.toThrow(ServiceUnavailableException);
+
+      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
+        expect.objectContaining({
+          operation: "embed",
+          success: false,
+        }),
+      );
+    });
+
    it("should throw ServiceUnavailableException on error", async () => {
      mockProvider.embed.mockRejectedValue(new Error("Embedding failed"));

--- a/apps/api/src/llm/llm.service.ts
+++ b/apps/api/src/llm/llm.service.ts
@@ -1,13 +1,15 @@
 import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common";
 import { LlmManagerService } from "./llm-manager.service";
+import { LlmTelemetryTrackerService, estimateTokens } from "./llm-telemetry-tracker.service";
 import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "./dto";
-import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface";
+import type { LlmProviderHealthStatus, LlmProviderType } from "./providers/llm-provider.interface";

 /**
 * LLM Service
 *
 * High-level service for LLM operations. Delegates to providers via LlmManagerService.
 * Maintains backward compatibility with the original API while supporting multiple providers.
+ * Automatically tracks completions via Mosaic Telemetry (non-blocking).
 *
 * @example
 * ```typescript
@@ -33,7 +35,10 @@ import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface
 export class LlmService implements OnModuleInit {
  private readonly logger = new Logger(LlmService.name);

-  constructor(private readonly llmManager: LlmManagerService) {
+  constructor(
+    private readonly llmManager: LlmManagerService,
+    private readonly telemetryTracker: LlmTelemetryTrackerService
+  ) {
    this.logger.log("LLM service initialized");
  }

@@ -91,14 +96,45 @@ export class LlmService implements OnModuleInit {
   * Perform a synchronous chat completion.
   *
   * @param request - Chat request with messages and configuration
+   * @param callingContext - Optional context hint for telemetry task type inference
   * @returns Complete chat response
   * @throws {ServiceUnavailableException} If provider is unavailable or request fails
   */
-  async chat(request: ChatRequestDto): Promise<ChatResponseDto> {
+  async chat(request: ChatRequestDto, callingContext?: string): Promise<ChatResponseDto> {
+    const startTime = Date.now();
+    let providerType: LlmProviderType = "ollama";
+
    try {
      const provider = await this.llmManager.getDefaultProvider();
-      return await provider.chat(request);
+      providerType = provider.type;
+      const response = await provider.chat(request);
+
+      // Fire-and-forget telemetry tracking
+      this.telemetryTracker.trackLlmCompletion({
+        model: response.model,
+        providerType,
+        operation: "chat",
+        durationMs: Date.now() - startTime,
+        inputTokens: response.promptEvalCount ?? 0,
+        outputTokens: response.evalCount ?? 0,
+        callingContext,
+        success: true,
+      });
+
+      return response;
    } catch (error: unknown) {
+      // Track failure (fire-and-forget)
+      this.telemetryTracker.trackLlmCompletion({
+        model: request.model,
+        providerType,
+        operation: "chat",
+        durationMs: Date.now() - startTime,
+        inputTokens: 0,
+        outputTokens: 0,
+        callingContext,
+        success: false,
+      });
+
      const errorMessage = error instanceof Error ? error.message : String(error);
      this.logger.error(`Chat failed: ${errorMessage}`);
      throw new ServiceUnavailableException(`Chat completion failed: ${errorMessage}`);
@@ -107,20 +143,75 @@ export class LlmService implements OnModuleInit {
  /**
   * Perform a streaming chat completion.
   * Yields response chunks as they arrive from the provider.
+   * Aggregates token usage and tracks telemetry after the stream ends.
   *
   * @param request - Chat request with messages and configuration
+   * @param callingContext - Optional context hint for telemetry task type inference
   * @yields Chat response chunks
   * @throws {ServiceUnavailableException} If provider is unavailable or request fails
   */
-  async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatResponseDto, void, unknown> {
+  async *chatStream(
+    request: ChatRequestDto,
+    callingContext?: string
+  ): AsyncGenerator<ChatResponseDto, void, unknown> {
+    const startTime = Date.now();
+    let providerType: LlmProviderType = "ollama";
+    let aggregatedContent = "";
+    let lastChunkInputTokens = 0;
+    let lastChunkOutputTokens = 0;
+
    try {
      const provider = await this.llmManager.getDefaultProvider();
+      providerType = provider.type;
      const stream = provider.chatStream(request);

      for await (const chunk of stream) {
+        // Accumulate content for token estimation
+        aggregatedContent += chunk.message.content;
+
+        // Some providers include token counts on the final chunk
+        if (chunk.promptEvalCount !== undefined) {
+          lastChunkInputTokens = chunk.promptEvalCount;
+        }
+        if (chunk.evalCount !== undefined) {
+          lastChunkOutputTokens = chunk.evalCount;
+        }
+
        yield chunk;
      }
+
+      // After stream completes, track telemetry
+      // Use actual token counts if available, otherwise estimate from content length
+      const inputTokens =
+        lastChunkInputTokens > 0
+          ? lastChunkInputTokens
+          : estimateTokens(request.messages.map((m) => m.content).join(" "));
+      const outputTokens =
+        lastChunkOutputTokens > 0 ? lastChunkOutputTokens : estimateTokens(aggregatedContent);
+
+      this.telemetryTracker.trackLlmCompletion({
+        model: request.model,
+        providerType,
+        operation: "chatStream",
+        durationMs: Date.now() - startTime,
+        inputTokens,
+        outputTokens,
+        callingContext,
+        success: true,
+      });
    } catch (error: unknown) {
+      // Track failure (fire-and-forget)
+      this.telemetryTracker.trackLlmCompletion({
+        model: request.model,
+        providerType,
+        operation: "chatStream",
+        durationMs: Date.now() - startTime,
+        inputTokens: 0,
+        outputTokens: 0,
+        callingContext,
+        success: false,
+      });
+
      const errorMessage = error instanceof Error ? error.message : String(error);
      this.logger.error(`Stream failed: ${errorMessage}`);
      throw new ServiceUnavailableException(`Streaming failed: ${errorMessage}`);
@@ -130,14 +221,48 @@ export class LlmService implements OnModuleInit {
   * Generate embeddings for the given input texts.
   *
   * @param request - Embedding request with model and input texts
+   * @param callingContext - Optional context hint for telemetry task type inference
   * @returns Embeddings response with vector arrays
   * @throws {ServiceUnavailableException} If provider is unavailable or request fails
   */
-  async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> {
+  async embed(request: EmbedRequestDto, callingContext?: string): Promise<EmbedResponseDto> {
+    const startTime = Date.now();
+    let providerType: LlmProviderType = "ollama";
+
    try {
      const provider = await this.llmManager.getDefaultProvider();
-      return await provider.embed(request);
+      providerType = provider.type;
+      const response = await provider.embed(request);
+
+      // Estimate input tokens from the input text
+      const inputTokens = estimateTokens(request.input.join(" "));
+
+      // Fire-and-forget telemetry tracking
+      this.telemetryTracker.trackLlmCompletion({
+        model: response.model,
+        providerType,
+        operation: "embed",
+        durationMs: Date.now() - startTime,
+        inputTokens,
+        outputTokens: 0, // Embeddings don't produce output tokens
+        callingContext,
+        success: true,
+      });
+
+      return response;
    } catch (error: unknown) {
+      // Track failure (fire-and-forget)
+      this.telemetryTracker.trackLlmCompletion({
+        model: request.model,
+        providerType,
+        operation: "embed",
+        durationMs: Date.now() - startTime,
+        inputTokens: 0,
+        outputTokens: 0,
+        callingContext,
+        success: false,
+      });
+
      const errorMessage = error instanceof Error ? error.message : String(error);
      this.logger.error(`Embed failed: ${errorMessage}`);
      throw new ServiceUnavailableException(`Embedding failed: ${errorMessage}`);