feat(#371): track LLM task completions via Mosaic Telemetry

- Create LlmTelemetryTrackerService for non-blocking event emission
- Normalize token usage across Anthropic, OpenAI, Ollama providers
- Add cost table with per-token pricing in microdollars
- Instrument chat, chatStream, and embed methods
- Infer task type from calling context
- Aggregate streaming tokens after stream ends with fallback estimation
- Add 69 unit tests for tracker service, cost table, and LLM service

Refs #371

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 01:44:29 -06:00
parent 24c21f45b3
commit fcecf3654b
6 changed files with 1103 additions and 8 deletions

View File

@@ -0,0 +1,197 @@
import { Injectable, Logger } from "@nestjs/common";
import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
import type { LlmProviderType } from "./providers/llm-provider.interface";
import { calculateCostMicrodollars } from "./llm-cost-table";
/**
* Parameters for tracking an LLM completion event.
*/
export interface LlmCompletionParams {
/** Full model name (e.g. "claude-sonnet-4-5-20250929") */
model: string;
/** Provider type discriminator */
providerType: LlmProviderType;
/** Operation type that was performed */
operation: "chat" | "chatStream" | "embed";
/** Duration of the LLM call in milliseconds */
durationMs: number;
/** Number of input (prompt) tokens consumed */
inputTokens: number;
/** Number of output (completion) tokens generated */
outputTokens: number;
/**
* Optional calling context hint for task type inference.
* Examples: "brain", "chat", "embed", "planning", "code-review"
*/
callingContext?: string;
/** Whether the call succeeded or failed */
success: boolean;
}
/**
* Estimated token count from text length.
* Uses a rough approximation of ~4 characters per token (GPT/Claude average).
*/
export function estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}
/** Map LLM provider type to telemetry Provider enum */
export function mapProviderType(providerType: LlmProviderType): Provider {
switch (providerType) {
case "claude":
return Provider.ANTHROPIC;
case "openai":
return Provider.OPENAI;
case "ollama":
return Provider.OLLAMA;
default:
return Provider.UNKNOWN;
}
}
/** Map LLM provider type to telemetry Harness enum */
export function mapHarness(providerType: LlmProviderType): Harness {
switch (providerType) {
case "ollama":
return Harness.OLLAMA_LOCAL;
default:
return Harness.API_DIRECT;
}
}
/**
* Infer the task type from calling context and operation.
*
* @param operation - The LLM operation (chat, chatStream, embed)
* @param callingContext - Optional hint about the caller's purpose
* @returns Inferred TaskType
*/
export function inferTaskType(
operation: "chat" | "chatStream" | "embed",
callingContext?: string
): TaskType {
// Embedding operations are typically for indexing/search
if (operation === "embed") {
return TaskType.IMPLEMENTATION;
}
if (!callingContext) {
return TaskType.UNKNOWN;
}
const ctx = callingContext.toLowerCase();
if (ctx.includes("brain") || ctx.includes("planning") || ctx.includes("plan")) {
return TaskType.PLANNING;
}
if (ctx.includes("review") || ctx.includes("code-review")) {
return TaskType.CODE_REVIEW;
}
if (ctx.includes("test")) {
return TaskType.TESTING;
}
if (ctx.includes("debug")) {
return TaskType.DEBUGGING;
}
if (ctx.includes("refactor")) {
return TaskType.REFACTORING;
}
if (ctx.includes("doc")) {
return TaskType.DOCUMENTATION;
}
if (ctx.includes("config")) {
return TaskType.CONFIGURATION;
}
if (ctx.includes("security") || ctx.includes("audit")) {
return TaskType.SECURITY_AUDIT;
}
if (ctx.includes("chat") || ctx.includes("implement")) {
return TaskType.IMPLEMENTATION;
}
return TaskType.UNKNOWN;
}
/**
* LLM Telemetry Tracker Service
*
* Builds and submits telemetry events for LLM completions.
* All tracking is non-blocking and fire-and-forget; telemetry errors
* never propagate to the caller.
*
* @example
* ```typescript
* // After a successful chat completion
* this.telemetryTracker.trackLlmCompletion({
* model: "claude-sonnet-4-5-20250929",
* providerType: "claude",
* operation: "chat",
* durationMs: 1200,
* inputTokens: 150,
* outputTokens: 300,
* callingContext: "chat",
* success: true,
* });
* ```
*/
@Injectable()
export class LlmTelemetryTrackerService {
private readonly logger = new Logger(LlmTelemetryTrackerService.name);
constructor(private readonly telemetry: MosaicTelemetryService) {}
/**
* Track an LLM completion event via Mosaic Telemetry.
*
* This method is intentionally fire-and-forget. It catches all errors
* internally and logs them without propagating to the caller.
*
* @param params - LLM completion parameters
*/
trackLlmCompletion(params: LlmCompletionParams): void {
try {
const builder = this.telemetry.eventBuilder;
if (!builder) {
// Telemetry is disabled — silently skip
return;
}
const costMicrodollars = calculateCostMicrodollars(
params.model,
params.inputTokens,
params.outputTokens
);
const event = builder.build({
task_duration_ms: params.durationMs,
task_type: inferTaskType(params.operation, params.callingContext),
complexity: Complexity.LOW,
harness: mapHarness(params.providerType),
model: params.model,
provider: mapProviderType(params.providerType),
estimated_input_tokens: params.inputTokens,
estimated_output_tokens: params.outputTokens,
actual_input_tokens: params.inputTokens,
actual_output_tokens: params.outputTokens,
estimated_cost_usd_micros: costMicrodollars,
actual_cost_usd_micros: costMicrodollars,
quality_gate_passed: true,
quality_gates_run: [],
quality_gates_failed: [],
context_compactions: 0,
context_rotations: 0,
context_utilization_final: 0,
outcome: params.success ? Outcome.SUCCESS : Outcome.FAILURE,
retry_count: 0,
});
this.telemetry.trackTaskCompletion(event);
} catch (error: unknown) {
// Never let telemetry errors propagate
const msg = error instanceof Error ? error.message : String(error);
this.logger.warn(`Failed to track LLM telemetry event: ${msg}`);
}
}
}