Files
stack/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts
Jason Woltje ed23293e1a feat(#373): prediction integration for cost estimation
- Create PredictionService for pre-task cost/token estimates
- Refresh common predictions on startup
- Integrate predictions into LLM telemetry tracker
- Add GET /api/telemetry/estimate endpoint
- Graceful degradation when no prediction data available
- Add unit tests for prediction service

Refs #373

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 02:10:22 -06:00

494 lines
16 KiB
TypeScript

import { describe, it, expect, beforeEach, vi } from "vitest";
import { Test, TestingModule } from "@nestjs/testing";
import {
TaskType,
Complexity,
Harness,
Provider,
Outcome,
} from "@mosaicstack/telemetry-client";
import type { TaskCompletionEvent, EventBuilderParams } from "@mosaicstack/telemetry-client";
import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
import {
LlmTelemetryTrackerService,
estimateTokens,
mapProviderType,
mapHarness,
inferTaskType,
} from "./llm-telemetry-tracker.service";
import type { LlmCompletionParams } from "./llm-telemetry-tracker.service";
import { getModelCost, calculateCostMicrodollars } from "./llm-cost-table";
// ---------- Cost Table Tests ----------
describe("llm-cost-table", () => {
describe("getModelCost", () => {
it("should return cost for claude-sonnet-4-5 models", () => {
const cost = getModelCost("claude-sonnet-4-5-20250929");
expect(cost.inputPerToken).toBe(3);
expect(cost.outputPerToken).toBe(15);
});
it("should return cost for claude-opus-4 models", () => {
const cost = getModelCost("claude-opus-4-6");
expect(cost.inputPerToken).toBe(15);
expect(cost.outputPerToken).toBe(75);
});
it("should return cost for claude-haiku-4-5 models", () => {
const cost = getModelCost("claude-haiku-4-5-20251001");
expect(cost.inputPerToken).toBe(0.8);
expect(cost.outputPerToken).toBe(4);
});
it("should return cost for gpt-4o", () => {
const cost = getModelCost("gpt-4o");
expect(cost.inputPerToken).toBe(2.5);
expect(cost.outputPerToken).toBe(10);
});
it("should return cost for gpt-4o-mini (longer prefix matches first)", () => {
const cost = getModelCost("gpt-4o-mini");
expect(cost.inputPerToken).toBe(0.15);
expect(cost.outputPerToken).toBe(0.6);
});
it("should return zero cost for unknown/local models", () => {
const cost = getModelCost("llama3.2");
expect(cost.inputPerToken).toBe(0);
expect(cost.outputPerToken).toBe(0);
});
it("should return zero cost for ollama models", () => {
const cost = getModelCost("mistral:7b");
expect(cost.inputPerToken).toBe(0);
expect(cost.outputPerToken).toBe(0);
});
it("should be case-insensitive", () => {
const cost = getModelCost("Claude-Sonnet-4-5-20250929");
expect(cost.inputPerToken).toBe(3);
});
});
describe("calculateCostMicrodollars", () => {
it("should calculate cost for claude-sonnet-4-5 with token counts", () => {
// 1000 input tokens * 3 + 500 output tokens * 15 = 3000 + 7500 = 10500
const cost = calculateCostMicrodollars("claude-sonnet-4-5-20250929", 1000, 500);
expect(cost).toBe(10500);
});
it("should return 0 for local models", () => {
const cost = calculateCostMicrodollars("llama3.2", 1000, 500);
expect(cost).toBe(0);
});
it("should return 0 when token counts are 0", () => {
const cost = calculateCostMicrodollars("claude-opus-4-6", 0, 0);
expect(cost).toBe(0);
});
it("should round the result to integer microdollars", () => {
// gpt-4o-mini: 0.15 * 3 + 0.6 * 7 = 0.45 + 4.2 = 4.65 -> rounds to 5
const cost = calculateCostMicrodollars("gpt-4o-mini", 3, 7);
expect(cost).toBe(5);
});
});
});
// ---------- Helper Function Tests ----------
describe("helper functions", () => {
describe("estimateTokens", () => {
it("should estimate ~1 token per 4 characters", () => {
expect(estimateTokens("abcd")).toBe(1);
expect(estimateTokens("abcdefgh")).toBe(2);
});
it("should round up for partial tokens", () => {
expect(estimateTokens("abc")).toBe(1);
expect(estimateTokens("abcde")).toBe(2);
});
it("should return 0 for empty string", () => {
expect(estimateTokens("")).toBe(0);
});
});
describe("mapProviderType", () => {
it("should map claude to ANTHROPIC", () => {
expect(mapProviderType("claude")).toBe(Provider.ANTHROPIC);
});
it("should map openai to OPENAI", () => {
expect(mapProviderType("openai")).toBe(Provider.OPENAI);
});
it("should map ollama to OLLAMA", () => {
expect(mapProviderType("ollama")).toBe(Provider.OLLAMA);
});
});
describe("mapHarness", () => {
it("should map ollama to OLLAMA_LOCAL", () => {
expect(mapHarness("ollama")).toBe(Harness.OLLAMA_LOCAL);
});
it("should map claude to API_DIRECT", () => {
expect(mapHarness("claude")).toBe(Harness.API_DIRECT);
});
it("should map openai to API_DIRECT", () => {
expect(mapHarness("openai")).toBe(Harness.API_DIRECT);
});
});
describe("inferTaskType", () => {
it("should return IMPLEMENTATION for embed operation", () => {
expect(inferTaskType("embed")).toBe(TaskType.IMPLEMENTATION);
});
it("should return UNKNOWN when no context provided for chat", () => {
expect(inferTaskType("chat")).toBe(TaskType.UNKNOWN);
});
it("should return PLANNING for brain context", () => {
expect(inferTaskType("chat", "brain")).toBe(TaskType.PLANNING);
});
it("should return PLANNING for planning context", () => {
expect(inferTaskType("chat", "planning")).toBe(TaskType.PLANNING);
});
it("should return CODE_REVIEW for review context", () => {
expect(inferTaskType("chat", "code-review")).toBe(TaskType.CODE_REVIEW);
});
it("should return TESTING for test context", () => {
expect(inferTaskType("chat", "test-generation")).toBe(TaskType.TESTING);
});
it("should return DEBUGGING for debug context", () => {
expect(inferTaskType("chatStream", "debug-session")).toBe(TaskType.DEBUGGING);
});
it("should return REFACTORING for refactor context", () => {
expect(inferTaskType("chat", "refactor")).toBe(TaskType.REFACTORING);
});
it("should return DOCUMENTATION for doc context", () => {
expect(inferTaskType("chat", "documentation")).toBe(TaskType.DOCUMENTATION);
});
it("should return CONFIGURATION for config context", () => {
expect(inferTaskType("chat", "config-update")).toBe(TaskType.CONFIGURATION);
});
it("should return SECURITY_AUDIT for security context", () => {
expect(inferTaskType("chat", "security-check")).toBe(TaskType.SECURITY_AUDIT);
});
it("should return IMPLEMENTATION for chat context", () => {
expect(inferTaskType("chat", "chat")).toBe(TaskType.IMPLEMENTATION);
});
it("should be case-insensitive", () => {
expect(inferTaskType("chat", "BRAIN")).toBe(TaskType.PLANNING);
});
it("should return UNKNOWN for unrecognized context", () => {
expect(inferTaskType("chat", "something-else")).toBe(TaskType.UNKNOWN);
});
});
});
// ---------- LlmTelemetryTrackerService Tests ----------
describe("LlmTelemetryTrackerService", () => {
let service: LlmTelemetryTrackerService;
let mockTelemetryService: {
eventBuilder: { build: ReturnType<typeof vi.fn> } | null;
trackTaskCompletion: ReturnType<typeof vi.fn>;
isEnabled: boolean;
};
const mockEvent: TaskCompletionEvent = {
instance_id: "test-instance",
event_id: "test-event",
schema_version: "1.0.0",
timestamp: new Date().toISOString(),
task_duration_ms: 1000,
task_type: TaskType.IMPLEMENTATION,
complexity: Complexity.LOW,
harness: Harness.API_DIRECT,
model: "claude-sonnet-4-5-20250929",
provider: Provider.ANTHROPIC,
estimated_input_tokens: 100,
estimated_output_tokens: 200,
actual_input_tokens: 100,
actual_output_tokens: 200,
estimated_cost_usd_micros: 3300,
actual_cost_usd_micros: 3300,
quality_gate_passed: true,
quality_gates_run: [],
quality_gates_failed: [],
context_compactions: 0,
context_rotations: 0,
context_utilization_final: 0,
outcome: Outcome.SUCCESS,
retry_count: 0,
};
beforeEach(async () => {
mockTelemetryService = {
eventBuilder: {
build: vi.fn().mockReturnValue(mockEvent),
},
trackTaskCompletion: vi.fn(),
isEnabled: true,
};
const module: TestingModule = await Test.createTestingModule({
providers: [
LlmTelemetryTrackerService,
{
provide: MosaicTelemetryService,
useValue: mockTelemetryService,
},
],
}).compile();
service = module.get<LlmTelemetryTrackerService>(LlmTelemetryTrackerService);
});
it("should be defined", () => {
expect(service).toBeDefined();
});
describe("trackLlmCompletion", () => {
const baseParams: LlmCompletionParams = {
model: "claude-sonnet-4-5-20250929",
providerType: "claude",
operation: "chat",
durationMs: 1200,
inputTokens: 150,
outputTokens: 300,
callingContext: "chat",
success: true,
};
it("should build and track a telemetry event for Anthropic provider", () => {
service.trackLlmCompletion(baseParams);
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
task_duration_ms: 1200,
task_type: TaskType.IMPLEMENTATION,
complexity: Complexity.LOW,
harness: Harness.API_DIRECT,
model: "claude-sonnet-4-5-20250929",
provider: Provider.ANTHROPIC,
actual_input_tokens: 150,
actual_output_tokens: 300,
outcome: Outcome.SUCCESS,
}),
);
expect(mockTelemetryService.trackTaskCompletion).toHaveBeenCalledWith(mockEvent);
});
it("should build and track a telemetry event for OpenAI provider", () => {
service.trackLlmCompletion({
...baseParams,
model: "gpt-4o",
providerType: "openai",
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
model: "gpt-4o",
provider: Provider.OPENAI,
harness: Harness.API_DIRECT,
}),
);
});
it("should build and track a telemetry event for Ollama provider", () => {
service.trackLlmCompletion({
...baseParams,
model: "llama3.2",
providerType: "ollama",
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
model: "llama3.2",
provider: Provider.OLLAMA,
harness: Harness.OLLAMA_LOCAL,
}),
);
});
it("should calculate cost in microdollars correctly", () => {
service.trackLlmCompletion(baseParams);
// claude-sonnet-4-5: 150 * 3 + 300 * 15 = 450 + 4500 = 4950
const expectedActualCost = 4950;
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
// Estimated values are 0 when no PredictionService is injected
estimated_cost_usd_micros: 0,
actual_cost_usd_micros: expectedActualCost,
}),
);
});
it("should calculate zero cost for ollama models", () => {
service.trackLlmCompletion({
...baseParams,
model: "llama3.2",
providerType: "ollama",
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
estimated_cost_usd_micros: 0,
actual_cost_usd_micros: 0,
}),
);
});
it("should track FAILURE outcome when success is false", () => {
service.trackLlmCompletion({
...baseParams,
success: false,
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
outcome: Outcome.FAILURE,
}),
);
});
it("should infer task type from calling context", () => {
service.trackLlmCompletion({
...baseParams,
callingContext: "brain",
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
task_type: TaskType.PLANNING,
}),
);
});
it("should set empty quality gates arrays for direct LLM calls", () => {
service.trackLlmCompletion(baseParams);
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
quality_gate_passed: true,
quality_gates_run: [],
quality_gates_failed: [],
}),
);
});
it("should silently skip when telemetry is disabled (eventBuilder is null)", () => {
mockTelemetryService.eventBuilder = null;
// Should not throw
service.trackLlmCompletion(baseParams);
expect(mockTelemetryService.trackTaskCompletion).not.toHaveBeenCalled();
});
it("should not throw when eventBuilder.build throws an error", () => {
mockTelemetryService.eventBuilder = {
build: vi.fn().mockImplementation(() => {
throw new Error("Build failed");
}),
};
// Should not throw
expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
});
it("should not throw when trackTaskCompletion throws an error", () => {
mockTelemetryService.trackTaskCompletion.mockImplementation(() => {
throw new Error("Track failed");
});
// Should not throw
expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
});
it("should handle streaming operation with estimated tokens", () => {
service.trackLlmCompletion({
...baseParams,
operation: "chatStream",
inputTokens: 50,
outputTokens: 100,
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
actual_input_tokens: 50,
actual_output_tokens: 100,
// Estimated values are 0 when no PredictionService is injected
estimated_input_tokens: 0,
estimated_output_tokens: 0,
}),
);
});
it("should handle embed operation", () => {
service.trackLlmCompletion({
...baseParams,
operation: "embed",
outputTokens: 0,
callingContext: undefined,
});
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
expect.objectContaining({
task_type: TaskType.IMPLEMENTATION,
actual_output_tokens: 0,
}),
);
});
it("should pass all required EventBuilderParams fields", () => {
service.trackLlmCompletion(baseParams);
const buildCall = (mockTelemetryService.eventBuilder?.build as ReturnType<typeof vi.fn>).mock
.calls[0][0] as EventBuilderParams;
// Verify all required fields are present
expect(buildCall).toHaveProperty("task_duration_ms");
expect(buildCall).toHaveProperty("task_type");
expect(buildCall).toHaveProperty("complexity");
expect(buildCall).toHaveProperty("harness");
expect(buildCall).toHaveProperty("model");
expect(buildCall).toHaveProperty("provider");
expect(buildCall).toHaveProperty("estimated_input_tokens");
expect(buildCall).toHaveProperty("estimated_output_tokens");
expect(buildCall).toHaveProperty("actual_input_tokens");
expect(buildCall).toHaveProperty("actual_output_tokens");
expect(buildCall).toHaveProperty("estimated_cost_usd_micros");
expect(buildCall).toHaveProperty("actual_cost_usd_micros");
expect(buildCall).toHaveProperty("quality_gate_passed");
expect(buildCall).toHaveProperty("quality_gates_run");
expect(buildCall).toHaveProperty("quality_gates_failed");
expect(buildCall).toHaveProperty("context_compactions");
expect(buildCall).toHaveProperty("context_rotations");
expect(buildCall).toHaveProperty("context_utilization_final");
expect(buildCall).toHaveProperty("outcome");
expect(buildCall).toHaveProperty("retry_count");
});
});
});