feat(#371): track LLM task completions via Mosaic Telemetry
- Create LlmTelemetryTrackerService for non-blocking event emission - Normalize token usage across Anthropic, OpenAI, Ollama providers - Add cost table with per-token pricing in microdollars - Instrument chat, chatStream, and embed methods - Infer task type from calling context - Aggregate streaming tokens after stream ends with fallback estimation - Add 69 unit tests for tracker service, cost table, and LLM service Refs #371 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
197
apps/api/src/llm/llm-telemetry-tracker.service.ts
Normal file
197
apps/api/src/llm/llm-telemetry-tracker.service.ts
Normal file
@@ -0,0 +1,197 @@
|
||||
import { Injectable, Logger } from "@nestjs/common";
|
||||
import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
|
||||
import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
|
||||
import type { LlmProviderType } from "./providers/llm-provider.interface";
|
||||
import { calculateCostMicrodollars } from "./llm-cost-table";
|
||||
|
||||
/**
|
||||
* Parameters for tracking an LLM completion event.
|
||||
*/
|
||||
export interface LlmCompletionParams {
|
||||
/** Full model name (e.g. "claude-sonnet-4-5-20250929") */
|
||||
model: string;
|
||||
/** Provider type discriminator */
|
||||
providerType: LlmProviderType;
|
||||
/** Operation type that was performed */
|
||||
operation: "chat" | "chatStream" | "embed";
|
||||
/** Duration of the LLM call in milliseconds */
|
||||
durationMs: number;
|
||||
/** Number of input (prompt) tokens consumed */
|
||||
inputTokens: number;
|
||||
/** Number of output (completion) tokens generated */
|
||||
outputTokens: number;
|
||||
/**
|
||||
* Optional calling context hint for task type inference.
|
||||
* Examples: "brain", "chat", "embed", "planning", "code-review"
|
||||
*/
|
||||
callingContext?: string;
|
||||
/** Whether the call succeeded or failed */
|
||||
success: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimated token count from text length.
|
||||
* Uses a rough approximation of ~4 characters per token (GPT/Claude average).
|
||||
*/
|
||||
export function estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
/** Map LLM provider type to telemetry Provider enum */
|
||||
export function mapProviderType(providerType: LlmProviderType): Provider {
|
||||
switch (providerType) {
|
||||
case "claude":
|
||||
return Provider.ANTHROPIC;
|
||||
case "openai":
|
||||
return Provider.OPENAI;
|
||||
case "ollama":
|
||||
return Provider.OLLAMA;
|
||||
default:
|
||||
return Provider.UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
/** Map LLM provider type to telemetry Harness enum */
|
||||
export function mapHarness(providerType: LlmProviderType): Harness {
|
||||
switch (providerType) {
|
||||
case "ollama":
|
||||
return Harness.OLLAMA_LOCAL;
|
||||
default:
|
||||
return Harness.API_DIRECT;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Infer the task type from calling context and operation.
|
||||
*
|
||||
* @param operation - The LLM operation (chat, chatStream, embed)
|
||||
* @param callingContext - Optional hint about the caller's purpose
|
||||
* @returns Inferred TaskType
|
||||
*/
|
||||
export function inferTaskType(
|
||||
operation: "chat" | "chatStream" | "embed",
|
||||
callingContext?: string
|
||||
): TaskType {
|
||||
// Embedding operations are typically for indexing/search
|
||||
if (operation === "embed") {
|
||||
return TaskType.IMPLEMENTATION;
|
||||
}
|
||||
|
||||
if (!callingContext) {
|
||||
return TaskType.UNKNOWN;
|
||||
}
|
||||
|
||||
const ctx = callingContext.toLowerCase();
|
||||
|
||||
if (ctx.includes("brain") || ctx.includes("planning") || ctx.includes("plan")) {
|
||||
return TaskType.PLANNING;
|
||||
}
|
||||
if (ctx.includes("review") || ctx.includes("code-review")) {
|
||||
return TaskType.CODE_REVIEW;
|
||||
}
|
||||
if (ctx.includes("test")) {
|
||||
return TaskType.TESTING;
|
||||
}
|
||||
if (ctx.includes("debug")) {
|
||||
return TaskType.DEBUGGING;
|
||||
}
|
||||
if (ctx.includes("refactor")) {
|
||||
return TaskType.REFACTORING;
|
||||
}
|
||||
if (ctx.includes("doc")) {
|
||||
return TaskType.DOCUMENTATION;
|
||||
}
|
||||
if (ctx.includes("config")) {
|
||||
return TaskType.CONFIGURATION;
|
||||
}
|
||||
if (ctx.includes("security") || ctx.includes("audit")) {
|
||||
return TaskType.SECURITY_AUDIT;
|
||||
}
|
||||
if (ctx.includes("chat") || ctx.includes("implement")) {
|
||||
return TaskType.IMPLEMENTATION;
|
||||
}
|
||||
|
||||
return TaskType.UNKNOWN;
|
||||
}
|
||||
|
||||
/**
|
||||
* LLM Telemetry Tracker Service
|
||||
*
|
||||
* Builds and submits telemetry events for LLM completions.
|
||||
* All tracking is non-blocking and fire-and-forget; telemetry errors
|
||||
* never propagate to the caller.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* // After a successful chat completion
|
||||
* this.telemetryTracker.trackLlmCompletion({
|
||||
* model: "claude-sonnet-4-5-20250929",
|
||||
* providerType: "claude",
|
||||
* operation: "chat",
|
||||
* durationMs: 1200,
|
||||
* inputTokens: 150,
|
||||
* outputTokens: 300,
|
||||
* callingContext: "chat",
|
||||
* success: true,
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
@Injectable()
|
||||
export class LlmTelemetryTrackerService {
|
||||
private readonly logger = new Logger(LlmTelemetryTrackerService.name);
|
||||
|
||||
constructor(private readonly telemetry: MosaicTelemetryService) {}
|
||||
|
||||
/**
|
||||
* Track an LLM completion event via Mosaic Telemetry.
|
||||
*
|
||||
* This method is intentionally fire-and-forget. It catches all errors
|
||||
* internally and logs them without propagating to the caller.
|
||||
*
|
||||
* @param params - LLM completion parameters
|
||||
*/
|
||||
trackLlmCompletion(params: LlmCompletionParams): void {
|
||||
try {
|
||||
const builder = this.telemetry.eventBuilder;
|
||||
if (!builder) {
|
||||
// Telemetry is disabled — silently skip
|
||||
return;
|
||||
}
|
||||
|
||||
const costMicrodollars = calculateCostMicrodollars(
|
||||
params.model,
|
||||
params.inputTokens,
|
||||
params.outputTokens
|
||||
);
|
||||
|
||||
const event = builder.build({
|
||||
task_duration_ms: params.durationMs,
|
||||
task_type: inferTaskType(params.operation, params.callingContext),
|
||||
complexity: Complexity.LOW,
|
||||
harness: mapHarness(params.providerType),
|
||||
model: params.model,
|
||||
provider: mapProviderType(params.providerType),
|
||||
estimated_input_tokens: params.inputTokens,
|
||||
estimated_output_tokens: params.outputTokens,
|
||||
actual_input_tokens: params.inputTokens,
|
||||
actual_output_tokens: params.outputTokens,
|
||||
estimated_cost_usd_micros: costMicrodollars,
|
||||
actual_cost_usd_micros: costMicrodollars,
|
||||
quality_gate_passed: true,
|
||||
quality_gates_run: [],
|
||||
quality_gates_failed: [],
|
||||
context_compactions: 0,
|
||||
context_rotations: 0,
|
||||
context_utilization_final: 0,
|
||||
outcome: params.success ? Outcome.SUCCESS : Outcome.FAILURE,
|
||||
retry_count: 0,
|
||||
});
|
||||
|
||||
this.telemetry.trackTaskCompletion(event);
|
||||
} catch (error: unknown) {
|
||||
// Never let telemetry errors propagate
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
this.logger.warn(`Failed to track LLM telemetry event: ${msg}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user