- llm-cost-table.ts: Add undefined guard for MODEL_COSTS lookup - llm-telemetry-tracker.service.ts: Allow undefined in callingContext for exactOptionalPropertyTypes compatibility Refs #371 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
225 lines
6.8 KiB
TypeScript
225 lines
6.8 KiB
TypeScript
import { Injectable, Logger, Optional } from "@nestjs/common";
|
|
import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
|
|
import { PredictionService } from "../mosaic-telemetry/prediction.service";
|
|
import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
|
|
import type { LlmProviderType } from "./providers/llm-provider.interface";
|
|
import { calculateCostMicrodollars } from "./llm-cost-table";
|
|
|
|
/**
|
|
* Parameters for tracking an LLM completion event.
|
|
*/
|
|
export interface LlmCompletionParams {
|
|
/** Full model name (e.g. "claude-sonnet-4-5-20250929") */
|
|
model: string;
|
|
/** Provider type discriminator */
|
|
providerType: LlmProviderType;
|
|
/** Operation type that was performed */
|
|
operation: "chat" | "chatStream" | "embed";
|
|
/** Duration of the LLM call in milliseconds */
|
|
durationMs: number;
|
|
/** Number of input (prompt) tokens consumed */
|
|
inputTokens: number;
|
|
/** Number of output (completion) tokens generated */
|
|
outputTokens: number;
|
|
/**
|
|
* Optional calling context hint for task type inference.
|
|
* Examples: "brain", "chat", "embed", "planning", "code-review"
|
|
*/
|
|
callingContext?: string | undefined;
|
|
/** Whether the call succeeded or failed */
|
|
success: boolean;
|
|
}
|
|
|
|
/**
|
|
* Estimated token count from text length.
|
|
* Uses a rough approximation of ~4 characters per token (GPT/Claude average).
|
|
*/
|
|
export function estimateTokens(text: string): number {
|
|
return Math.ceil(text.length / 4);
|
|
}
|
|
|
|
/** Map LLM provider type to telemetry Provider enum */
|
|
export function mapProviderType(providerType: LlmProviderType): Provider {
|
|
switch (providerType) {
|
|
case "claude":
|
|
return Provider.ANTHROPIC;
|
|
case "openai":
|
|
return Provider.OPENAI;
|
|
case "ollama":
|
|
return Provider.OLLAMA;
|
|
default:
|
|
return Provider.UNKNOWN;
|
|
}
|
|
}
|
|
|
|
/** Map LLM provider type to telemetry Harness enum */
|
|
export function mapHarness(providerType: LlmProviderType): Harness {
|
|
switch (providerType) {
|
|
case "ollama":
|
|
return Harness.OLLAMA_LOCAL;
|
|
default:
|
|
return Harness.API_DIRECT;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Infer the task type from calling context and operation.
|
|
*
|
|
* @param operation - The LLM operation (chat, chatStream, embed)
|
|
* @param callingContext - Optional hint about the caller's purpose
|
|
* @returns Inferred TaskType
|
|
*/
|
|
export function inferTaskType(
|
|
operation: "chat" | "chatStream" | "embed",
|
|
callingContext?: string
|
|
): TaskType {
|
|
// Embedding operations are typically for indexing/search
|
|
if (operation === "embed") {
|
|
return TaskType.IMPLEMENTATION;
|
|
}
|
|
|
|
if (!callingContext) {
|
|
return TaskType.UNKNOWN;
|
|
}
|
|
|
|
const ctx = callingContext.toLowerCase();
|
|
|
|
if (ctx.includes("brain") || ctx.includes("planning") || ctx.includes("plan")) {
|
|
return TaskType.PLANNING;
|
|
}
|
|
if (ctx.includes("review") || ctx.includes("code-review")) {
|
|
return TaskType.CODE_REVIEW;
|
|
}
|
|
if (ctx.includes("test")) {
|
|
return TaskType.TESTING;
|
|
}
|
|
if (ctx.includes("debug")) {
|
|
return TaskType.DEBUGGING;
|
|
}
|
|
if (ctx.includes("refactor")) {
|
|
return TaskType.REFACTORING;
|
|
}
|
|
if (ctx.includes("doc")) {
|
|
return TaskType.DOCUMENTATION;
|
|
}
|
|
if (ctx.includes("config")) {
|
|
return TaskType.CONFIGURATION;
|
|
}
|
|
if (ctx.includes("security") || ctx.includes("audit")) {
|
|
return TaskType.SECURITY_AUDIT;
|
|
}
|
|
if (ctx.includes("chat") || ctx.includes("implement")) {
|
|
return TaskType.IMPLEMENTATION;
|
|
}
|
|
|
|
return TaskType.UNKNOWN;
|
|
}
|
|
|
|
/**
|
|
* LLM Telemetry Tracker Service
|
|
*
|
|
* Builds and submits telemetry events for LLM completions.
|
|
* All tracking is non-blocking and fire-and-forget; telemetry errors
|
|
* never propagate to the caller.
|
|
*
|
|
* @example
|
|
* ```typescript
|
|
* // After a successful chat completion
|
|
* this.telemetryTracker.trackLlmCompletion({
|
|
* model: "claude-sonnet-4-5-20250929",
|
|
* providerType: "claude",
|
|
* operation: "chat",
|
|
* durationMs: 1200,
|
|
* inputTokens: 150,
|
|
* outputTokens: 300,
|
|
* callingContext: "chat",
|
|
* success: true,
|
|
* });
|
|
* ```
|
|
*/
|
|
@Injectable()
|
|
export class LlmTelemetryTrackerService {
|
|
private readonly logger = new Logger(LlmTelemetryTrackerService.name);
|
|
|
|
constructor(
|
|
private readonly telemetry: MosaicTelemetryService,
|
|
@Optional() private readonly predictionService?: PredictionService
|
|
) {}
|
|
|
|
/**
|
|
* Track an LLM completion event via Mosaic Telemetry.
|
|
*
|
|
* This method is intentionally fire-and-forget. It catches all errors
|
|
* internally and logs them without propagating to the caller.
|
|
*
|
|
* @param params - LLM completion parameters
|
|
*/
|
|
trackLlmCompletion(params: LlmCompletionParams): void {
|
|
try {
|
|
const builder = this.telemetry.eventBuilder;
|
|
if (!builder) {
|
|
// Telemetry is disabled — silently skip
|
|
return;
|
|
}
|
|
|
|
const taskType = inferTaskType(params.operation, params.callingContext);
|
|
const provider = mapProviderType(params.providerType);
|
|
|
|
const costMicrodollars = calculateCostMicrodollars(
|
|
params.model,
|
|
params.inputTokens,
|
|
params.outputTokens
|
|
);
|
|
|
|
// Query predictions for estimated fields (graceful degradation)
|
|
let estimatedInputTokens = 0;
|
|
let estimatedOutputTokens = 0;
|
|
let estimatedCostMicros = 0;
|
|
|
|
if (this.predictionService) {
|
|
const prediction = this.predictionService.getEstimate(
|
|
taskType,
|
|
params.model,
|
|
provider,
|
|
Complexity.LOW
|
|
);
|
|
|
|
if (prediction?.prediction && prediction.metadata.confidence !== "none") {
|
|
estimatedInputTokens = prediction.prediction.input_tokens.median;
|
|
estimatedOutputTokens = prediction.prediction.output_tokens.median;
|
|
estimatedCostMicros = prediction.prediction.cost_usd_micros.median ?? 0;
|
|
}
|
|
}
|
|
|
|
const event = builder.build({
|
|
task_duration_ms: params.durationMs,
|
|
task_type: taskType,
|
|
complexity: Complexity.LOW,
|
|
harness: mapHarness(params.providerType),
|
|
model: params.model,
|
|
provider,
|
|
estimated_input_tokens: estimatedInputTokens,
|
|
estimated_output_tokens: estimatedOutputTokens,
|
|
actual_input_tokens: params.inputTokens,
|
|
actual_output_tokens: params.outputTokens,
|
|
estimated_cost_usd_micros: estimatedCostMicros,
|
|
actual_cost_usd_micros: costMicrodollars,
|
|
quality_gate_passed: true,
|
|
quality_gates_run: [],
|
|
quality_gates_failed: [],
|
|
context_compactions: 0,
|
|
context_rotations: 0,
|
|
context_utilization_final: 0,
|
|
outcome: params.success ? Outcome.SUCCESS : Outcome.FAILURE,
|
|
retry_count: 0,
|
|
});
|
|
|
|
this.telemetry.trackTaskCompletion(event);
|
|
} catch (error: unknown) {
|
|
// Never let telemetry errors propagate
|
|
const msg = error instanceof Error ? error.message : String(error);
|
|
this.logger.warn(`Failed to track LLM telemetry event: ${msg}`);
|
|
}
|
|
}
|
|
}
|