feat(#371): track LLM task completions via Mosaic Telemetry

- Create LlmTelemetryTrackerService for non-blocking event emission
- Normalize token usage across Anthropic, OpenAI, Ollama providers
- Add cost table with per-token pricing in microdollars
- Instrument chat, chatStream, and embed methods
- Infer task type from calling context
- Aggregate streaming tokens after stream ends with fallback estimation
- Add 69 unit tests for tracker service, cost table, and LLM service

Refs #371

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 01:44:29 -06:00
parent 24c21f45b3
commit fcecf3654b
6 changed files with 1103 additions and 8 deletions

View File

@@ -0,0 +1,106 @@
/**
* LLM Cost Table
*
* Maps model names to per-token costs in microdollars (USD * 1,000,000).
* For example, $0.003 per 1K tokens = 3,000 microdollars per 1K tokens = 3 microdollars per token.
*
* Costs are split into input (prompt) and output (completion) pricing.
* Ollama models run locally and are free (0 cost).
*/
/**
* Per-token cost in microdollars for a single model.
*/
export interface ModelCost {
/** Cost per input token in microdollars */
inputPerToken: number;
/** Cost per output token in microdollars */
outputPerToken: number;
}
/**
* Cost table mapping model name prefixes to per-token pricing.
*
* Model matching is prefix-based: "claude-sonnet-4-5" matches "claude-sonnet-4-5-20250929".
* More specific prefixes are checked first (longest match wins).
*
* Prices sourced from provider pricing pages as of 2026-02.
*/
const MODEL_COSTS: Record<string, ModelCost> = {
// Anthropic Claude models (per-token microdollars)
// claude-sonnet-4-5: $3/M input, $15/M output
"claude-sonnet-4-5": { inputPerToken: 3, outputPerToken: 15 },
// claude-opus-4: $15/M input, $75/M output
"claude-opus-4": { inputPerToken: 15, outputPerToken: 75 },
// claude-3-5-haiku / claude-haiku-4-5: $0.80/M input, $4/M output
"claude-haiku-4-5": { inputPerToken: 0.8, outputPerToken: 4 },
"claude-3-5-haiku": { inputPerToken: 0.8, outputPerToken: 4 },
// claude-3-5-sonnet: $3/M input, $15/M output
"claude-3-5-sonnet": { inputPerToken: 3, outputPerToken: 15 },
// claude-3-opus: $15/M input, $75/M output
"claude-3-opus": { inputPerToken: 15, outputPerToken: 75 },
// claude-3-sonnet: $3/M input, $15/M output
"claude-3-sonnet": { inputPerToken: 3, outputPerToken: 15 },
// claude-3-haiku: $0.25/M input, $1.25/M output
"claude-3-haiku": { inputPerToken: 0.25, outputPerToken: 1.25 },
// OpenAI models (per-token microdollars)
// gpt-4o: $2.50/M input, $10/M output
"gpt-4o-mini": { inputPerToken: 0.15, outputPerToken: 0.6 },
"gpt-4o": { inputPerToken: 2.5, outputPerToken: 10 },
// gpt-4-turbo: $10/M input, $30/M output
"gpt-4-turbo": { inputPerToken: 10, outputPerToken: 30 },
// gpt-4: $30/M input, $60/M output
"gpt-4": { inputPerToken: 30, outputPerToken: 60 },
// gpt-3.5-turbo: $0.50/M input, $1.50/M output
"gpt-3.5-turbo": { inputPerToken: 0.5, outputPerToken: 1.5 },
// Ollama / local models: free
// These are catch-all entries; any model not matched above falls through to getModelCost default
};
/**
* Sorted model prefixes from longest to shortest for greedy prefix matching.
* Ensures "gpt-4o-mini" matches before "gpt-4o" and "claude-3-5-haiku" before "claude-3-haiku".
*/
const SORTED_PREFIXES = Object.keys(MODEL_COSTS).sort((a, b) => b.length - a.length);
/**
* Look up per-token cost for a given model name.
*
* Uses longest-prefix matching: the model name is compared against known
* prefixes from longest to shortest. If no prefix matches, returns zero cost
* (assumes local/free model).
*
* @param modelName - Full model name (e.g. "claude-sonnet-4-5-20250929", "gpt-4o")
* @returns Per-token cost in microdollars
*/
export function getModelCost(modelName: string): ModelCost {
const normalized = modelName.toLowerCase();
for (const prefix of SORTED_PREFIXES) {
if (normalized.startsWith(prefix)) {
return MODEL_COSTS[prefix];
}
}
// Unknown or local model — assume free
return { inputPerToken: 0, outputPerToken: 0 };
}
/**
* Calculate total cost in microdollars for a given model and token counts.
*
* @param modelName - Full model name
* @param inputTokens - Number of input (prompt) tokens
* @param outputTokens - Number of output (completion) tokens
* @returns Total cost in microdollars (USD * 1,000,000)
*/
export function calculateCostMicrodollars(
modelName: string,
inputTokens: number,
outputTokens: number
): number {
const cost = getModelCost(modelName);
return Math.round(cost.inputPerToken * inputTokens + cost.outputPerToken * outputTokens);
}

View File

@@ -0,0 +1,491 @@
import { describe, it, expect, beforeEach, vi } from "vitest";
import { Test, TestingModule } from "@nestjs/testing";
import {
TaskType,
Complexity,
Harness,
Provider,
Outcome,
} from "@mosaicstack/telemetry-client";
import type { TaskCompletionEvent, EventBuilderParams } from "@mosaicstack/telemetry-client";
import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
import {
LlmTelemetryTrackerService,
estimateTokens,
mapProviderType,
mapHarness,
inferTaskType,
} from "./llm-telemetry-tracker.service";
import type { LlmCompletionParams } from "./llm-telemetry-tracker.service";
import { getModelCost, calculateCostMicrodollars } from "./llm-cost-table";
// ---------- Cost Table Tests ----------
describe("llm-cost-table", () => {
describe("getModelCost", () => {
it("should return cost for claude-sonnet-4-5 models", () => {
const cost = getModelCost("claude-sonnet-4-5-20250929");
expect(cost.inputPerToken).toBe(3);
expect(cost.outputPerToken).toBe(15);
});
it("should return cost for claude-opus-4 models", () => {
const cost = getModelCost("claude-opus-4-6");
expect(cost.inputPerToken).toBe(15);
expect(cost.outputPerToken).toBe(75);
});
it("should return cost for claude-haiku-4-5 models", () => {
const cost = getModelCost("claude-haiku-4-5-20251001");
expect(cost.inputPerToken).toBe(0.8);
expect(cost.outputPerToken).toBe(4);
});
it("should return cost for gpt-4o", () => {
const cost = getModelCost("gpt-4o");
expect(cost.inputPerToken).toBe(2.5);
expect(cost.outputPerToken).toBe(10);
});
it("should return cost for gpt-4o-mini (longer prefix matches first)", () => {
const cost = getModelCost("gpt-4o-mini");
expect(cost.inputPerToken).toBe(0.15);
expect(cost.outputPerToken).toBe(0.6);
});
it("should return zero cost for unknown/local models", () => {
const cost = getModelCost("llama3.2");
expect(cost.inputPerToken).toBe(0);
expect(cost.outputPerToken).toBe(0);
});
it("should return zero cost for ollama models", () => {
const cost = getModelCost("mistral:7b");
expect(cost.inputPerToken).toBe(0);
expect(cost.outputPerToken).toBe(0);
});
it("should be case-insensitive", () => {
const cost = getModelCost("Claude-Sonnet-4-5-20250929");
expect(cost.inputPerToken).toBe(3);
});
});
describe("calculateCostMicrodollars", () => {
it("should calculate cost for claude-sonnet-4-5 with token counts", () => {
// 1000 input tokens * 3 + 500 output tokens * 15 = 3000 + 7500 = 10500
const cost = calculateCostMicrodollars("claude-sonnet-4-5-20250929", 1000, 500);
expect(cost).toBe(10500);
});
it("should return 0 for local models", () => {
const cost = calculateCostMicrodollars("llama3.2", 1000, 500);
expect(cost).toBe(0);
});
it("should return 0 when token counts are 0", () => {
const cost = calculateCostMicrodollars("claude-opus-4-6", 0, 0);
expect(cost).toBe(0);
});
it("should round the result to integer microdollars", () => {
// gpt-4o-mini: 0.15 * 3 + 0.6 * 7 = 0.45 + 4.2 = 4.65 -> rounds to 5
const cost = calculateCostMicrodollars("gpt-4o-mini", 3, 7);
expect(cost).toBe(5);
});
});
});
// ---------- Helper Function Tests ----------
describe("helper functions", () => {
describe("estimateTokens", () => {
it("should estimate ~1 token per 4 characters", () => {
expect(estimateTokens("abcd")).toBe(1);
expect(estimateTokens("abcdefgh")).toBe(2);
});
it("should round up for partial tokens", () => {
expect(estimateTokens("abc")).toBe(1);
expect(estimateTokens("abcde")).toBe(2);
});
it("should return 0 for empty string", () => {
expect(estimateTokens("")).toBe(0);
});
});
describe("mapProviderType", () => {
it("should map claude to ANTHROPIC", () => {
expect(mapProviderType("claude")).toBe(Provider.ANTHROPIC);
});
it("should map openai to OPENAI", () => {
expect(mapProviderType("openai")).toBe(Provider.OPENAI);
});
it("should map ollama to OLLAMA", () => {
expect(mapProviderType("ollama")).toBe(Provider.OLLAMA);
});
});
describe("mapHarness", () => {
it("should map ollama to OLLAMA_LOCAL", () => {
expect(mapHarness("ollama")).toBe(Harness.OLLAMA_LOCAL);
});
it("should map claude to API_DIRECT", () => {
expect(mapHarness("claude")).toBe(Harness.API_DIRECT);
});
it("should map openai to API_DIRECT", () => {
expect(mapHarness("openai")).toBe(Harness.API_DIRECT);
});
});
describe("inferTaskType", () => {
it("should return IMPLEMENTATION for embed operation", () => {
expect(inferTaskType("embed")).toBe(TaskType.IMPLEMENTATION);
});
it("should return UNKNOWN when no context provided for chat", () => {
expect(inferTaskType("chat")).toBe(TaskType.UNKNOWN);
});
it("should return PLANNING for brain context", () => {
expect(inferTaskType("chat", "brain")).toBe(TaskType.PLANNING);
});
it("should return PLANNING for planning context", () => {
expect(inferTaskType("chat", "planning")).toBe(TaskType.PLANNING);
});
it("should return CODE_REVIEW for review context", () => {
expect(inferTaskType("chat", "code-review")).toBe(TaskType.CODE_REVIEW);
});
it("should return TESTING for test context", () => {
expect(inferTaskType("chat", "test-generation")).toBe(TaskType.TESTING);
});
it("should return DEBUGGING for debug context", () => {
expect(inferTaskType("chatStream", "debug-session")).toBe(TaskType.DEBUGGING);
});
it("should return REFACTORING for refactor context", () => {
expect(inferTaskType("chat", "refactor")).toBe(TaskType.REFACTORING);
});
it("should return DOCUMENTATION for doc context", () => {
expect(inferTaskType("chat", "documentation")).toBe(TaskType.DOCUMENTATION);
});
it("should return CONFIGURATION for config context", () => {
expect(inferTaskType("chat", "config-update")).toBe(TaskType.CONFIGURATION);
});
it("should return SECURITY_AUDIT for security context", () => {
expect(inferTaskType("chat", "security-check")).toBe(TaskType.SECURITY_AUDIT);
});
it("should return IMPLEMENTATION for chat context", () => {
expect(inferTaskType("chat", "chat")).toBe(TaskType.IMPLEMENTATION);
});
it("should be case-insensitive", () => {
expect(inferTaskType("chat", "BRAIN")).toBe(TaskType.PLANNING);
});
it("should return UNKNOWN for unrecognized context", () => {
expect(inferTaskType("chat", "something-else")).toBe(TaskType.UNKNOWN);
});
});
});
// ---------- LlmTelemetryTrackerService Tests ----------
describe("LlmTelemetryTrackerService", () => {
let service: LlmTelemetryTrackerService;
let mockTelemetryService: {
eventBuilder: { build: ReturnType<typeof vi.fn> } | null;
trackTaskCompletion: ReturnType<typeof vi.fn>;
isEnabled: boolean;
};
const mockEvent: TaskCompletionEvent = {
instance_id: "test-instance",
event_id: "test-event",
schema_version: "1.0.0",
timestamp: new Date().toISOString(),
task_duration_ms: 1000,
task_type: TaskType.IMPLEMENTATION,
complexity: Complexity.LOW,
harness: Harness.API_DIRECT,
model: "claude-sonnet-4-5-20250929",
provider: Provider.ANTHROPIC,
estimated_input_tokens: 100,
estimated_output_tokens: 200,
actual_input_tokens: 100,
actual_output_tokens: 200,
estimated_cost_usd_micros: 3300,
actual_cost_usd_micros: 3300,
quality_gate_passed: true,
quality_gates_run: [],
quality_gates_failed: [],
context_compactions: 0,
context_rotations: 0,
context_utilization_final: 0,
outcome: Outcome.SUCCESS,
retry_count: 0,
};
beforeEach(async () => {
mockTelemetryService = {
eventBuilder: {
build: vi.fn().mockReturnValue(mockEvent),
},
trackTaskCompletion: vi.fn(),
isEnabled: true,
};
const module: TestingModule = await Test.createTestingModule({
providers: [
LlmTelemetryTrackerService,
{
provide: MosaicTelemetryService,
useValue: mockTelemetryService,
},
],
}).compile();
service = module.get<LlmTelemetryTrackerService>(LlmTelemetryTrackerService);
});
it("should be defined", () => {
expect(service).toBeDefined();
});
describe("trackLlmCompletion", () => {
const baseParams: LlmCompletionParams = {
model: "claude-sonnet-4-5-20250929",
providerType: "claude",
operation: "chat",
durationMs: 1200,
inputTokens: 150,
outputTokens: 300,
callingContext: "chat",
success: true,
};
it("should build and track a telemetry event for Anthropic provider", () => {
service.trackLlmCompletion(baseParams);
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
task_duration_ms: 1200,
task_type: TaskType.IMPLEMENTATION,
complexity: Complexity.LOW,
harness: Harness.API_DIRECT,
model: "claude-sonnet-4-5-20250929",
provider: Provider.ANTHROPIC,
actual_input_tokens: 150,
actual_output_tokens: 300,
outcome: Outcome.SUCCESS,
}),
);
expect(mockTelemetryService.trackTaskCompletion).toHaveBeenCalledWith(mockEvent);
});
it("should build and track a telemetry event for OpenAI provider", () => {
service.trackLlmCompletion({
...baseParams,
model: "gpt-4o",
providerType: "openai",
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
model: "gpt-4o",
provider: Provider.OPENAI,
harness: Harness.API_DIRECT,
}),
);
});
it("should build and track a telemetry event for Ollama provider", () => {
service.trackLlmCompletion({
...baseParams,
model: "llama3.2",
providerType: "ollama",
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
model: "llama3.2",
provider: Provider.OLLAMA,
harness: Harness.OLLAMA_LOCAL,
}),
);
});
it("should calculate cost in microdollars correctly", () => {
service.trackLlmCompletion(baseParams);
// claude-sonnet-4-5: 150 * 3 + 300 * 15 = 450 + 4500 = 4950
const expectedCost = 4950;
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
estimated_cost_usd_micros: expectedCost,
actual_cost_usd_micros: expectedCost,
}),
);
});
it("should calculate zero cost for ollama models", () => {
service.trackLlmCompletion({
...baseParams,
model: "llama3.2",
providerType: "ollama",
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
estimated_cost_usd_micros: 0,
actual_cost_usd_micros: 0,
}),
);
});
it("should track FAILURE outcome when success is false", () => {
service.trackLlmCompletion({
...baseParams,
success: false,
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
outcome: Outcome.FAILURE,
}),
);
});
it("should infer task type from calling context", () => {
service.trackLlmCompletion({
...baseParams,
callingContext: "brain",
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
task_type: TaskType.PLANNING,
}),
);
});
it("should set empty quality gates arrays for direct LLM calls", () => {
service.trackLlmCompletion(baseParams);
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
quality_gate_passed: true,
quality_gates_run: [],
quality_gates_failed: [],
}),
);
});
it("should silently skip when telemetry is disabled (eventBuilder is null)", () => {
mockTelemetryService.eventBuilder = null;
// Should not throw
service.trackLlmCompletion(baseParams);
expect(mockTelemetryService.trackTaskCompletion).not.toHaveBeenCalled();
});
it("should not throw when eventBuilder.build throws an error", () => {
mockTelemetryService.eventBuilder = {
build: vi.fn().mockImplementation(() => {
throw new Error("Build failed");
}),
};
// Should not throw
expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
});
it("should not throw when trackTaskCompletion throws an error", () => {
mockTelemetryService.trackTaskCompletion.mockImplementation(() => {
throw new Error("Track failed");
});
// Should not throw
expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
});
it("should handle streaming operation with estimated tokens", () => {
service.trackLlmCompletion({
...baseParams,
operation: "chatStream",
inputTokens: 50,
outputTokens: 100,
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
actual_input_tokens: 50,
actual_output_tokens: 100,
estimated_input_tokens: 50,
estimated_output_tokens: 100,
}),
);
});
it("should handle embed operation", () => {
service.trackLlmCompletion({
...baseParams,
operation: "embed",
outputTokens: 0,
callingContext: undefined,
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
task_type: TaskType.IMPLEMENTATION,
actual_output_tokens: 0,
}),
);
});
it("should pass all required EventBuilderParams fields", () => {
service.trackLlmCompletion(baseParams);
const buildCall = (mockTelemetryService.eventBuilder?.build as ReturnType<typeof vi.fn>).mock
.calls[0][0] as EventBuilderParams;
// Verify all required fields are present
expect(buildCall).toHaveProperty("task_duration_ms");
expect(buildCall).toHaveProperty("task_type");
expect(buildCall).toHaveProperty("complexity");
expect(buildCall).toHaveProperty("harness");
expect(buildCall).toHaveProperty("model");
expect(buildCall).toHaveProperty("provider");
expect(buildCall).toHaveProperty("estimated_input_tokens");
expect(buildCall).toHaveProperty("estimated_output_tokens");
expect(buildCall).toHaveProperty("actual_input_tokens");
expect(buildCall).toHaveProperty("actual_output_tokens");
expect(buildCall).toHaveProperty("estimated_cost_usd_micros");
expect(buildCall).toHaveProperty("actual_cost_usd_micros");
expect(buildCall).toHaveProperty("quality_gate_passed");
expect(buildCall).toHaveProperty("quality_gates_run");
expect(buildCall).toHaveProperty("quality_gates_failed");
expect(buildCall).toHaveProperty("context_compactions");
expect(buildCall).toHaveProperty("context_rotations");
expect(buildCall).toHaveProperty("context_utilization_final");
expect(buildCall).toHaveProperty("outcome");
expect(buildCall).toHaveProperty("retry_count");
});
});
});

View File

@@ -0,0 +1,197 @@
import { Injectable, Logger } from "@nestjs/common";
import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
import type { LlmProviderType } from "./providers/llm-provider.interface";
import { calculateCostMicrodollars } from "./llm-cost-table";
/**
* Parameters for tracking an LLM completion event.
*/
export interface LlmCompletionParams {
/** Full model name (e.g. "claude-sonnet-4-5-20250929") */
model: string;
/** Provider type discriminator */
providerType: LlmProviderType;
/** Operation type that was performed */
operation: "chat" | "chatStream" | "embed";
/** Duration of the LLM call in milliseconds */
durationMs: number;
/** Number of input (prompt) tokens consumed */
inputTokens: number;
/** Number of output (completion) tokens generated */
outputTokens: number;
/**
* Optional calling context hint for task type inference.
* Examples: "brain", "chat", "embed", "planning", "code-review"
*/
callingContext?: string;
/** Whether the call succeeded or failed */
success: boolean;
}
/**
* Estimated token count from text length.
* Uses a rough approximation of ~4 characters per token (GPT/Claude average).
*/
export function estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}
/** Map LLM provider type to telemetry Provider enum */
export function mapProviderType(providerType: LlmProviderType): Provider {
switch (providerType) {
case "claude":
return Provider.ANTHROPIC;
case "openai":
return Provider.OPENAI;
case "ollama":
return Provider.OLLAMA;
default:
return Provider.UNKNOWN;
}
}
/** Map LLM provider type to telemetry Harness enum */
export function mapHarness(providerType: LlmProviderType): Harness {
switch (providerType) {
case "ollama":
return Harness.OLLAMA_LOCAL;
default:
return Harness.API_DIRECT;
}
}
/**
* Infer the task type from calling context and operation.
*
* @param operation - The LLM operation (chat, chatStream, embed)
* @param callingContext - Optional hint about the caller's purpose
* @returns Inferred TaskType
*/
export function inferTaskType(
operation: "chat" | "chatStream" | "embed",
callingContext?: string
): TaskType {
// Embedding operations are typically for indexing/search
if (operation === "embed") {
return TaskType.IMPLEMENTATION;
}
if (!callingContext) {
return TaskType.UNKNOWN;
}
const ctx = callingContext.toLowerCase();
if (ctx.includes("brain") || ctx.includes("planning") || ctx.includes("plan")) {
return TaskType.PLANNING;
}
if (ctx.includes("review") || ctx.includes("code-review")) {
return TaskType.CODE_REVIEW;
}
if (ctx.includes("test")) {
return TaskType.TESTING;
}
if (ctx.includes("debug")) {
return TaskType.DEBUGGING;
}
if (ctx.includes("refactor")) {
return TaskType.REFACTORING;
}
if (ctx.includes("doc")) {
return TaskType.DOCUMENTATION;
}
if (ctx.includes("config")) {
return TaskType.CONFIGURATION;
}
if (ctx.includes("security") || ctx.includes("audit")) {
return TaskType.SECURITY_AUDIT;
}
if (ctx.includes("chat") || ctx.includes("implement")) {
return TaskType.IMPLEMENTATION;
}
return TaskType.UNKNOWN;
}
/**
* LLM Telemetry Tracker Service
*
* Builds and submits telemetry events for LLM completions.
* All tracking is non-blocking and fire-and-forget; telemetry errors
* never propagate to the caller.
*
* @example
* ```typescript
* // After a successful chat completion
* this.telemetryTracker.trackLlmCompletion({
* model: "claude-sonnet-4-5-20250929",
* providerType: "claude",
* operation: "chat",
* durationMs: 1200,
* inputTokens: 150,
* outputTokens: 300,
* callingContext: "chat",
* success: true,
* });
* ```
*/
@Injectable()
export class LlmTelemetryTrackerService {
private readonly logger = new Logger(LlmTelemetryTrackerService.name);
constructor(private readonly telemetry: MosaicTelemetryService) {}
/**
* Track an LLM completion event via Mosaic Telemetry.
*
* This method is intentionally fire-and-forget. It catches all errors
* internally and logs them without propagating to the caller.
*
* @param params - LLM completion parameters
*/
trackLlmCompletion(params: LlmCompletionParams): void {
try {
const builder = this.telemetry.eventBuilder;
if (!builder) {
// Telemetry is disabled — silently skip
return;
}
const costMicrodollars = calculateCostMicrodollars(
params.model,
params.inputTokens,
params.outputTokens
);
const event = builder.build({
task_duration_ms: params.durationMs,
task_type: inferTaskType(params.operation, params.callingContext),
complexity: Complexity.LOW,
harness: mapHarness(params.providerType),
model: params.model,
provider: mapProviderType(params.providerType),
estimated_input_tokens: params.inputTokens,
estimated_output_tokens: params.outputTokens,
actual_input_tokens: params.inputTokens,
actual_output_tokens: params.outputTokens,
estimated_cost_usd_micros: costMicrodollars,
actual_cost_usd_micros: costMicrodollars,
quality_gate_passed: true,
quality_gates_run: [],
quality_gates_failed: [],
context_compactions: 0,
context_rotations: 0,
context_utilization_final: 0,
outcome: params.success ? Outcome.SUCCESS : Outcome.FAILURE,
retry_count: 0,
});
this.telemetry.trackTaskCompletion(event);
} catch (error: unknown) {
// Never let telemetry errors propagate
const msg = error instanceof Error ? error.message : String(error);
this.logger.warn(`Failed to track LLM telemetry event: ${msg}`);
}
}
}

View File

@@ -3,13 +3,14 @@ import { LlmController } from "./llm.controller";
import { LlmProviderAdminController } from "./llm-provider-admin.controller";
import { LlmService } from "./llm.service";
import { LlmManagerService } from "./llm-manager.service";
import { LlmTelemetryTrackerService } from "./llm-telemetry-tracker.service";
import { PrismaModule } from "../prisma/prisma.module";
import { LlmUsageModule } from "../llm-usage/llm-usage.module";
@Module({
imports: [PrismaModule, LlmUsageModule],
controllers: [LlmController, LlmProviderAdminController],
providers: [LlmService, LlmManagerService],
providers: [LlmService, LlmManagerService, LlmTelemetryTrackerService],
exports: [LlmService, LlmManagerService],
})
export class LlmModule {}

View File

@@ -3,6 +3,7 @@ import { Test, TestingModule } from "@nestjs/testing";
import { ServiceUnavailableException } from "@nestjs/common";
import { LlmService } from "./llm.service";
import { LlmManagerService } from "./llm-manager.service";
import { LlmTelemetryTrackerService } from "./llm-telemetry-tracker.service";
import type { ChatRequestDto, EmbedRequestDto, ChatResponseDto, EmbedResponseDto } from "./dto";
import type {
LlmProviderInterface,
@@ -14,6 +15,9 @@ describe("LlmService", () => {
let mockManagerService: {
getDefaultProvider: ReturnType<typeof vi.fn>;
};
let mockTelemetryTracker: {
trackLlmCompletion: ReturnType<typeof vi.fn>;
};
let mockProvider: {
chat: ReturnType<typeof vi.fn>;
chatStream: ReturnType<typeof vi.fn>;
@@ -41,6 +45,11 @@ describe("LlmService", () => {
getDefaultProvider: vi.fn().mockResolvedValue(mockProvider),
};
// Create mock telemetry tracker
mockTelemetryTracker = {
trackLlmCompletion: vi.fn(),
};
const module: TestingModule = await Test.createTestingModule({
providers: [
LlmService,
@@ -48,6 +57,10 @@ describe("LlmService", () => {
provide: LlmManagerService,
useValue: mockManagerService,
},
{
provide: LlmTelemetryTrackerService,
useValue: mockTelemetryTracker,
},
],
}).compile();
@@ -135,6 +148,45 @@ describe("LlmService", () => {
expect(result).toEqual(response);
});
it("should track telemetry on successful chat", async () => {
const response: ChatResponseDto = {
model: "llama3.2",
message: { role: "assistant", content: "Hello" },
done: true,
promptEvalCount: 10,
evalCount: 20,
};
mockProvider.chat.mockResolvedValue(response);
await service.chat(request, "chat");
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
expect.objectContaining({
model: "llama3.2",
providerType: "ollama",
operation: "chat",
inputTokens: 10,
outputTokens: 20,
callingContext: "chat",
success: true,
}),
);
});
it("should track telemetry on failed chat", async () => {
mockProvider.chat.mockRejectedValue(new Error("Chat failed"));
await expect(service.chat(request)).rejects.toThrow(ServiceUnavailableException);
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
expect.objectContaining({
model: "llama3.2",
operation: "chat",
success: false,
}),
);
});
it("should throw ServiceUnavailableException on error", async () => {
mockProvider.chat.mockRejectedValue(new Error("Chat failed"));
@@ -177,6 +229,94 @@ describe("LlmService", () => {
expect(chunks[1].message.content).toBe(" world");
});
it("should track telemetry after stream completes", async () => {
async function* mockGenerator(): AsyncGenerator<ChatResponseDto> {
yield {
model: "llama3.2",
message: { role: "assistant", content: "Hello" },
done: false,
};
yield {
model: "llama3.2",
message: { role: "assistant", content: " world" },
done: true,
promptEvalCount: 5,
evalCount: 10,
};
}
mockProvider.chatStream.mockReturnValue(mockGenerator());
const chunks: ChatResponseDto[] = [];
for await (const chunk of service.chatStream(request, "brain")) {
chunks.push(chunk);
}
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
expect.objectContaining({
model: "llama3.2",
providerType: "ollama",
operation: "chatStream",
inputTokens: 5,
outputTokens: 10,
callingContext: "brain",
success: true,
}),
);
});
it("should estimate tokens when provider does not return counts in stream", async () => {
async function* mockGenerator(): AsyncGenerator<ChatResponseDto> {
yield {
model: "llama3.2",
message: { role: "assistant", content: "Hello world" },
done: false,
};
yield {
model: "llama3.2",
message: { role: "assistant", content: "" },
done: true,
};
}
mockProvider.chatStream.mockReturnValue(mockGenerator());
const chunks: ChatResponseDto[] = [];
for await (const chunk of service.chatStream(request)) {
chunks.push(chunk);
}
// Should use estimated tokens since no actual counts provided
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
expect.objectContaining({
operation: "chatStream",
success: true,
// Input estimated from "Hi" -> ceil(2/4) = 1
inputTokens: 1,
// Output estimated from "Hello world" -> ceil(11/4) = 3
outputTokens: 3,
}),
);
});
it("should track telemetry on stream failure", async () => {
async function* errorGenerator(): AsyncGenerator<ChatResponseDto> {
throw new Error("Stream failed");
}
mockProvider.chatStream.mockReturnValue(errorGenerator());
const generator = service.chatStream(request);
await expect(generator.next()).rejects.toThrow(ServiceUnavailableException);
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
expect.objectContaining({
operation: "chatStream",
success: false,
}),
);
});
it("should throw ServiceUnavailableException on error", async () => {
async function* errorGenerator(): AsyncGenerator<ChatResponseDto> {
throw new Error("Stream failed");
@@ -210,6 +350,41 @@ describe("LlmService", () => {
expect(result).toEqual(response);
});
it("should track telemetry on successful embed", async () => {
const response: EmbedResponseDto = {
model: "llama3.2",
embeddings: [[0.1, 0.2, 0.3]],
totalDuration: 500,
};
mockProvider.embed.mockResolvedValue(response);
await service.embed(request, "embed");
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
expect.objectContaining({
model: "llama3.2",
providerType: "ollama",
operation: "embed",
outputTokens: 0,
callingContext: "embed",
success: true,
}),
);
});
it("should track telemetry on failed embed", async () => {
mockProvider.embed.mockRejectedValue(new Error("Embedding failed"));
await expect(service.embed(request)).rejects.toThrow(ServiceUnavailableException);
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
expect.objectContaining({
operation: "embed",
success: false,
}),
);
});
it("should throw ServiceUnavailableException on error", async () => {
mockProvider.embed.mockRejectedValue(new Error("Embedding failed"));

View File

@@ -1,13 +1,15 @@
import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common";
import { LlmManagerService } from "./llm-manager.service";
import { LlmTelemetryTrackerService, estimateTokens } from "./llm-telemetry-tracker.service";
import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "./dto";
import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface";
import type { LlmProviderHealthStatus, LlmProviderType } from "./providers/llm-provider.interface";
/**
* LLM Service
*
* High-level service for LLM operations. Delegates to providers via LlmManagerService.
* Maintains backward compatibility with the original API while supporting multiple providers.
* Automatically tracks completions via Mosaic Telemetry (non-blocking).
*
* @example
* ```typescript
@@ -33,7 +35,10 @@ import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface
export class LlmService implements OnModuleInit {
private readonly logger = new Logger(LlmService.name);
constructor(private readonly llmManager: LlmManagerService) {
constructor(
private readonly llmManager: LlmManagerService,
private readonly telemetryTracker: LlmTelemetryTrackerService
) {
this.logger.log("LLM service initialized");
}
@@ -91,14 +96,45 @@ export class LlmService implements OnModuleInit {
* Perform a synchronous chat completion.
*
* @param request - Chat request with messages and configuration
* @param callingContext - Optional context hint for telemetry task type inference
* @returns Complete chat response
* @throws {ServiceUnavailableException} If provider is unavailable or request fails
*/
async chat(request: ChatRequestDto): Promise<ChatResponseDto> {
async chat(request: ChatRequestDto, callingContext?: string): Promise<ChatResponseDto> {
const startTime = Date.now();
let providerType: LlmProviderType = "ollama";
try {
const provider = await this.llmManager.getDefaultProvider();
return await provider.chat(request);
providerType = provider.type;
const response = await provider.chat(request);
// Fire-and-forget telemetry tracking
this.telemetryTracker.trackLlmCompletion({
model: response.model,
providerType,
operation: "chat",
durationMs: Date.now() - startTime,
inputTokens: response.promptEvalCount ?? 0,
outputTokens: response.evalCount ?? 0,
callingContext,
success: true,
});
return response;
} catch (error: unknown) {
// Track failure (fire-and-forget)
this.telemetryTracker.trackLlmCompletion({
model: request.model,
providerType,
operation: "chat",
durationMs: Date.now() - startTime,
inputTokens: 0,
outputTokens: 0,
callingContext,
success: false,
});
const errorMessage = error instanceof Error ? error.message : String(error);
this.logger.error(`Chat failed: ${errorMessage}`);
throw new ServiceUnavailableException(`Chat completion failed: ${errorMessage}`);
@@ -107,20 +143,75 @@ export class LlmService implements OnModuleInit {
/**
* Perform a streaming chat completion.
* Yields response chunks as they arrive from the provider.
* Aggregates token usage and tracks telemetry after the stream ends.
*
* @param request - Chat request with messages and configuration
* @param callingContext - Optional context hint for telemetry task type inference
* @yields Chat response chunks
* @throws {ServiceUnavailableException} If provider is unavailable or request fails
*/
async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatResponseDto, void, unknown> {
async *chatStream(
request: ChatRequestDto,
callingContext?: string
): AsyncGenerator<ChatResponseDto, void, unknown> {
const startTime = Date.now();
let providerType: LlmProviderType = "ollama";
let aggregatedContent = "";
let lastChunkInputTokens = 0;
let lastChunkOutputTokens = 0;
try {
const provider = await this.llmManager.getDefaultProvider();
providerType = provider.type;
const stream = provider.chatStream(request);
for await (const chunk of stream) {
// Accumulate content for token estimation
aggregatedContent += chunk.message.content;
// Some providers include token counts on the final chunk
if (chunk.promptEvalCount !== undefined) {
lastChunkInputTokens = chunk.promptEvalCount;
}
if (chunk.evalCount !== undefined) {
lastChunkOutputTokens = chunk.evalCount;
}
yield chunk;
}
// After stream completes, track telemetry
// Use actual token counts if available, otherwise estimate from content length
const inputTokens =
lastChunkInputTokens > 0
? lastChunkInputTokens
: estimateTokens(request.messages.map((m) => m.content).join(" "));
const outputTokens =
lastChunkOutputTokens > 0 ? lastChunkOutputTokens : estimateTokens(aggregatedContent);
this.telemetryTracker.trackLlmCompletion({
model: request.model,
providerType,
operation: "chatStream",
durationMs: Date.now() - startTime,
inputTokens,
outputTokens,
callingContext,
success: true,
});
} catch (error: unknown) {
// Track failure (fire-and-forget)
this.telemetryTracker.trackLlmCompletion({
model: request.model,
providerType,
operation: "chatStream",
durationMs: Date.now() - startTime,
inputTokens: 0,
outputTokens: 0,
callingContext,
success: false,
});
const errorMessage = error instanceof Error ? error.message : String(error);
this.logger.error(`Stream failed: ${errorMessage}`);
throw new ServiceUnavailableException(`Streaming failed: ${errorMessage}`);
@@ -130,14 +221,48 @@ export class LlmService implements OnModuleInit {
* Generate embeddings for the given input texts.
*
* @param request - Embedding request with model and input texts
* @param callingContext - Optional context hint for telemetry task type inference
* @returns Embeddings response with vector arrays
* @throws {ServiceUnavailableException} If provider is unavailable or request fails
*/
async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> {
async embed(request: EmbedRequestDto, callingContext?: string): Promise<EmbedResponseDto> {
const startTime = Date.now();
let providerType: LlmProviderType = "ollama";
try {
const provider = await this.llmManager.getDefaultProvider();
return await provider.embed(request);
providerType = provider.type;
const response = await provider.embed(request);
// Estimate input tokens from the input text
const inputTokens = estimateTokens(request.input.join(" "));
// Fire-and-forget telemetry tracking
this.telemetryTracker.trackLlmCompletion({
model: response.model,
providerType,
operation: "embed",
durationMs: Date.now() - startTime,
inputTokens,
outputTokens: 0, // Embeddings don't produce output tokens
callingContext,
success: true,
});
return response;
} catch (error: unknown) {
// Track failure (fire-and-forget)
this.telemetryTracker.trackLlmCompletion({
model: request.model,
providerType,
operation: "embed",
durationMs: Date.now() - startTime,
inputTokens: 0,
outputTokens: 0,
callingContext,
success: false,
});
const errorMessage = error instanceof Error ? error.message : String(error);
this.logger.error(`Embed failed: ${errorMessage}`);
throw new ServiceUnavailableException(`Embedding failed: ${errorMessage}`);