- Create PredictionService for pre-task cost/token estimates - Refresh common predictions on startup - Integrate predictions into LLM telemetry tracker - Add GET /api/telemetry/estimate endpoint - Graceful degradation when no prediction data available - Add unit tests for prediction service Refs #373 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
494 lines
16 KiB
TypeScript
494 lines
16 KiB
TypeScript
import { describe, it, expect, beforeEach, vi } from "vitest";
|
|
import { Test, TestingModule } from "@nestjs/testing";
|
|
import {
|
|
TaskType,
|
|
Complexity,
|
|
Harness,
|
|
Provider,
|
|
Outcome,
|
|
} from "@mosaicstack/telemetry-client";
|
|
import type { TaskCompletionEvent, EventBuilderParams } from "@mosaicstack/telemetry-client";
|
|
import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
|
|
import {
|
|
LlmTelemetryTrackerService,
|
|
estimateTokens,
|
|
mapProviderType,
|
|
mapHarness,
|
|
inferTaskType,
|
|
} from "./llm-telemetry-tracker.service";
|
|
import type { LlmCompletionParams } from "./llm-telemetry-tracker.service";
|
|
import { getModelCost, calculateCostMicrodollars } from "./llm-cost-table";
|
|
|
|
// ---------- Cost Table Tests ----------
|
|
|
|
describe("llm-cost-table", () => {
|
|
describe("getModelCost", () => {
|
|
it("should return cost for claude-sonnet-4-5 models", () => {
|
|
const cost = getModelCost("claude-sonnet-4-5-20250929");
|
|
expect(cost.inputPerToken).toBe(3);
|
|
expect(cost.outputPerToken).toBe(15);
|
|
});
|
|
|
|
it("should return cost for claude-opus-4 models", () => {
|
|
const cost = getModelCost("claude-opus-4-6");
|
|
expect(cost.inputPerToken).toBe(15);
|
|
expect(cost.outputPerToken).toBe(75);
|
|
});
|
|
|
|
it("should return cost for claude-haiku-4-5 models", () => {
|
|
const cost = getModelCost("claude-haiku-4-5-20251001");
|
|
expect(cost.inputPerToken).toBe(0.8);
|
|
expect(cost.outputPerToken).toBe(4);
|
|
});
|
|
|
|
it("should return cost for gpt-4o", () => {
|
|
const cost = getModelCost("gpt-4o");
|
|
expect(cost.inputPerToken).toBe(2.5);
|
|
expect(cost.outputPerToken).toBe(10);
|
|
});
|
|
|
|
it("should return cost for gpt-4o-mini (longer prefix matches first)", () => {
|
|
const cost = getModelCost("gpt-4o-mini");
|
|
expect(cost.inputPerToken).toBe(0.15);
|
|
expect(cost.outputPerToken).toBe(0.6);
|
|
});
|
|
|
|
it("should return zero cost for unknown/local models", () => {
|
|
const cost = getModelCost("llama3.2");
|
|
expect(cost.inputPerToken).toBe(0);
|
|
expect(cost.outputPerToken).toBe(0);
|
|
});
|
|
|
|
it("should return zero cost for ollama models", () => {
|
|
const cost = getModelCost("mistral:7b");
|
|
expect(cost.inputPerToken).toBe(0);
|
|
expect(cost.outputPerToken).toBe(0);
|
|
});
|
|
|
|
it("should be case-insensitive", () => {
|
|
const cost = getModelCost("Claude-Sonnet-4-5-20250929");
|
|
expect(cost.inputPerToken).toBe(3);
|
|
});
|
|
});
|
|
|
|
describe("calculateCostMicrodollars", () => {
|
|
it("should calculate cost for claude-sonnet-4-5 with token counts", () => {
|
|
// 1000 input tokens * 3 + 500 output tokens * 15 = 3000 + 7500 = 10500
|
|
const cost = calculateCostMicrodollars("claude-sonnet-4-5-20250929", 1000, 500);
|
|
expect(cost).toBe(10500);
|
|
});
|
|
|
|
it("should return 0 for local models", () => {
|
|
const cost = calculateCostMicrodollars("llama3.2", 1000, 500);
|
|
expect(cost).toBe(0);
|
|
});
|
|
|
|
it("should return 0 when token counts are 0", () => {
|
|
const cost = calculateCostMicrodollars("claude-opus-4-6", 0, 0);
|
|
expect(cost).toBe(0);
|
|
});
|
|
|
|
it("should round the result to integer microdollars", () => {
|
|
// gpt-4o-mini: 0.15 * 3 + 0.6 * 7 = 0.45 + 4.2 = 4.65 -> rounds to 5
|
|
const cost = calculateCostMicrodollars("gpt-4o-mini", 3, 7);
|
|
expect(cost).toBe(5);
|
|
});
|
|
});
|
|
});
|
|
|
|
// ---------- Helper Function Tests ----------
|
|
|
|
describe("helper functions", () => {
|
|
describe("estimateTokens", () => {
|
|
it("should estimate ~1 token per 4 characters", () => {
|
|
expect(estimateTokens("abcd")).toBe(1);
|
|
expect(estimateTokens("abcdefgh")).toBe(2);
|
|
});
|
|
|
|
it("should round up for partial tokens", () => {
|
|
expect(estimateTokens("abc")).toBe(1);
|
|
expect(estimateTokens("abcde")).toBe(2);
|
|
});
|
|
|
|
it("should return 0 for empty string", () => {
|
|
expect(estimateTokens("")).toBe(0);
|
|
});
|
|
});
|
|
|
|
describe("mapProviderType", () => {
|
|
it("should map claude to ANTHROPIC", () => {
|
|
expect(mapProviderType("claude")).toBe(Provider.ANTHROPIC);
|
|
});
|
|
|
|
it("should map openai to OPENAI", () => {
|
|
expect(mapProviderType("openai")).toBe(Provider.OPENAI);
|
|
});
|
|
|
|
it("should map ollama to OLLAMA", () => {
|
|
expect(mapProviderType("ollama")).toBe(Provider.OLLAMA);
|
|
});
|
|
});
|
|
|
|
describe("mapHarness", () => {
|
|
it("should map ollama to OLLAMA_LOCAL", () => {
|
|
expect(mapHarness("ollama")).toBe(Harness.OLLAMA_LOCAL);
|
|
});
|
|
|
|
it("should map claude to API_DIRECT", () => {
|
|
expect(mapHarness("claude")).toBe(Harness.API_DIRECT);
|
|
});
|
|
|
|
it("should map openai to API_DIRECT", () => {
|
|
expect(mapHarness("openai")).toBe(Harness.API_DIRECT);
|
|
});
|
|
});
|
|
|
|
describe("inferTaskType", () => {
|
|
it("should return IMPLEMENTATION for embed operation", () => {
|
|
expect(inferTaskType("embed")).toBe(TaskType.IMPLEMENTATION);
|
|
});
|
|
|
|
it("should return UNKNOWN when no context provided for chat", () => {
|
|
expect(inferTaskType("chat")).toBe(TaskType.UNKNOWN);
|
|
});
|
|
|
|
it("should return PLANNING for brain context", () => {
|
|
expect(inferTaskType("chat", "brain")).toBe(TaskType.PLANNING);
|
|
});
|
|
|
|
it("should return PLANNING for planning context", () => {
|
|
expect(inferTaskType("chat", "planning")).toBe(TaskType.PLANNING);
|
|
});
|
|
|
|
it("should return CODE_REVIEW for review context", () => {
|
|
expect(inferTaskType("chat", "code-review")).toBe(TaskType.CODE_REVIEW);
|
|
});
|
|
|
|
it("should return TESTING for test context", () => {
|
|
expect(inferTaskType("chat", "test-generation")).toBe(TaskType.TESTING);
|
|
});
|
|
|
|
it("should return DEBUGGING for debug context", () => {
|
|
expect(inferTaskType("chatStream", "debug-session")).toBe(TaskType.DEBUGGING);
|
|
});
|
|
|
|
it("should return REFACTORING for refactor context", () => {
|
|
expect(inferTaskType("chat", "refactor")).toBe(TaskType.REFACTORING);
|
|
});
|
|
|
|
it("should return DOCUMENTATION for doc context", () => {
|
|
expect(inferTaskType("chat", "documentation")).toBe(TaskType.DOCUMENTATION);
|
|
});
|
|
|
|
it("should return CONFIGURATION for config context", () => {
|
|
expect(inferTaskType("chat", "config-update")).toBe(TaskType.CONFIGURATION);
|
|
});
|
|
|
|
it("should return SECURITY_AUDIT for security context", () => {
|
|
expect(inferTaskType("chat", "security-check")).toBe(TaskType.SECURITY_AUDIT);
|
|
});
|
|
|
|
it("should return IMPLEMENTATION for chat context", () => {
|
|
expect(inferTaskType("chat", "chat")).toBe(TaskType.IMPLEMENTATION);
|
|
});
|
|
|
|
it("should be case-insensitive", () => {
|
|
expect(inferTaskType("chat", "BRAIN")).toBe(TaskType.PLANNING);
|
|
});
|
|
|
|
it("should return UNKNOWN for unrecognized context", () => {
|
|
expect(inferTaskType("chat", "something-else")).toBe(TaskType.UNKNOWN);
|
|
});
|
|
});
|
|
});
|
|
|
|
// ---------- LlmTelemetryTrackerService Tests ----------
|
|
|
|
describe("LlmTelemetryTrackerService", () => {
|
|
let service: LlmTelemetryTrackerService;
|
|
let mockTelemetryService: {
|
|
eventBuilder: { build: ReturnType<typeof vi.fn> } | null;
|
|
trackTaskCompletion: ReturnType<typeof vi.fn>;
|
|
isEnabled: boolean;
|
|
};
|
|
|
|
const mockEvent: TaskCompletionEvent = {
|
|
instance_id: "test-instance",
|
|
event_id: "test-event",
|
|
schema_version: "1.0.0",
|
|
timestamp: new Date().toISOString(),
|
|
task_duration_ms: 1000,
|
|
task_type: TaskType.IMPLEMENTATION,
|
|
complexity: Complexity.LOW,
|
|
harness: Harness.API_DIRECT,
|
|
model: "claude-sonnet-4-5-20250929",
|
|
provider: Provider.ANTHROPIC,
|
|
estimated_input_tokens: 100,
|
|
estimated_output_tokens: 200,
|
|
actual_input_tokens: 100,
|
|
actual_output_tokens: 200,
|
|
estimated_cost_usd_micros: 3300,
|
|
actual_cost_usd_micros: 3300,
|
|
quality_gate_passed: true,
|
|
quality_gates_run: [],
|
|
quality_gates_failed: [],
|
|
context_compactions: 0,
|
|
context_rotations: 0,
|
|
context_utilization_final: 0,
|
|
outcome: Outcome.SUCCESS,
|
|
retry_count: 0,
|
|
};
|
|
|
|
beforeEach(async () => {
|
|
mockTelemetryService = {
|
|
eventBuilder: {
|
|
build: vi.fn().mockReturnValue(mockEvent),
|
|
},
|
|
trackTaskCompletion: vi.fn(),
|
|
isEnabled: true,
|
|
};
|
|
|
|
const module: TestingModule = await Test.createTestingModule({
|
|
providers: [
|
|
LlmTelemetryTrackerService,
|
|
{
|
|
provide: MosaicTelemetryService,
|
|
useValue: mockTelemetryService,
|
|
},
|
|
],
|
|
}).compile();
|
|
|
|
service = module.get<LlmTelemetryTrackerService>(LlmTelemetryTrackerService);
|
|
});
|
|
|
|
it("should be defined", () => {
|
|
expect(service).toBeDefined();
|
|
});
|
|
|
|
describe("trackLlmCompletion", () => {
|
|
const baseParams: LlmCompletionParams = {
|
|
model: "claude-sonnet-4-5-20250929",
|
|
providerType: "claude",
|
|
operation: "chat",
|
|
durationMs: 1200,
|
|
inputTokens: 150,
|
|
outputTokens: 300,
|
|
callingContext: "chat",
|
|
success: true,
|
|
};
|
|
|
|
it("should build and track a telemetry event for Anthropic provider", () => {
|
|
service.trackLlmCompletion(baseParams);
|
|
|
|
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
task_duration_ms: 1200,
|
|
task_type: TaskType.IMPLEMENTATION,
|
|
complexity: Complexity.LOW,
|
|
harness: Harness.API_DIRECT,
|
|
model: "claude-sonnet-4-5-20250929",
|
|
provider: Provider.ANTHROPIC,
|
|
actual_input_tokens: 150,
|
|
actual_output_tokens: 300,
|
|
outcome: Outcome.SUCCESS,
|
|
}),
|
|
);
|
|
|
|
expect(mockTelemetryService.trackTaskCompletion).toHaveBeenCalledWith(mockEvent);
|
|
});
|
|
|
|
it("should build and track a telemetry event for OpenAI provider", () => {
|
|
service.trackLlmCompletion({
|
|
...baseParams,
|
|
model: "gpt-4o",
|
|
providerType: "openai",
|
|
});
|
|
|
|
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
model: "gpt-4o",
|
|
provider: Provider.OPENAI,
|
|
harness: Harness.API_DIRECT,
|
|
}),
|
|
);
|
|
});
|
|
|
|
it("should build and track a telemetry event for Ollama provider", () => {
|
|
service.trackLlmCompletion({
|
|
...baseParams,
|
|
model: "llama3.2",
|
|
providerType: "ollama",
|
|
});
|
|
|
|
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
model: "llama3.2",
|
|
provider: Provider.OLLAMA,
|
|
harness: Harness.OLLAMA_LOCAL,
|
|
}),
|
|
);
|
|
});
|
|
|
|
it("should calculate cost in microdollars correctly", () => {
|
|
service.trackLlmCompletion(baseParams);
|
|
|
|
// claude-sonnet-4-5: 150 * 3 + 300 * 15 = 450 + 4500 = 4950
|
|
const expectedActualCost = 4950;
|
|
|
|
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
// Estimated values are 0 when no PredictionService is injected
|
|
estimated_cost_usd_micros: 0,
|
|
actual_cost_usd_micros: expectedActualCost,
|
|
}),
|
|
);
|
|
});
|
|
|
|
it("should calculate zero cost for ollama models", () => {
|
|
service.trackLlmCompletion({
|
|
...baseParams,
|
|
model: "llama3.2",
|
|
providerType: "ollama",
|
|
});
|
|
|
|
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
estimated_cost_usd_micros: 0,
|
|
actual_cost_usd_micros: 0,
|
|
}),
|
|
);
|
|
});
|
|
|
|
it("should track FAILURE outcome when success is false", () => {
|
|
service.trackLlmCompletion({
|
|
...baseParams,
|
|
success: false,
|
|
});
|
|
|
|
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
outcome: Outcome.FAILURE,
|
|
}),
|
|
);
|
|
});
|
|
|
|
it("should infer task type from calling context", () => {
|
|
service.trackLlmCompletion({
|
|
...baseParams,
|
|
callingContext: "brain",
|
|
});
|
|
|
|
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
task_type: TaskType.PLANNING,
|
|
}),
|
|
);
|
|
});
|
|
|
|
it("should set empty quality gates arrays for direct LLM calls", () => {
|
|
service.trackLlmCompletion(baseParams);
|
|
|
|
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
quality_gate_passed: true,
|
|
quality_gates_run: [],
|
|
quality_gates_failed: [],
|
|
}),
|
|
);
|
|
});
|
|
|
|
it("should silently skip when telemetry is disabled (eventBuilder is null)", () => {
|
|
mockTelemetryService.eventBuilder = null;
|
|
|
|
// Should not throw
|
|
service.trackLlmCompletion(baseParams);
|
|
|
|
expect(mockTelemetryService.trackTaskCompletion).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it("should not throw when eventBuilder.build throws an error", () => {
|
|
mockTelemetryService.eventBuilder = {
|
|
build: vi.fn().mockImplementation(() => {
|
|
throw new Error("Build failed");
|
|
}),
|
|
};
|
|
|
|
// Should not throw
|
|
expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
|
|
});
|
|
|
|
it("should not throw when trackTaskCompletion throws an error", () => {
|
|
mockTelemetryService.trackTaskCompletion.mockImplementation(() => {
|
|
throw new Error("Track failed");
|
|
});
|
|
|
|
// Should not throw
|
|
expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
|
|
});
|
|
|
|
it("should handle streaming operation with estimated tokens", () => {
|
|
service.trackLlmCompletion({
|
|
...baseParams,
|
|
operation: "chatStream",
|
|
inputTokens: 50,
|
|
outputTokens: 100,
|
|
});
|
|
|
|
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
actual_input_tokens: 50,
|
|
actual_output_tokens: 100,
|
|
// Estimated values are 0 when no PredictionService is injected
|
|
estimated_input_tokens: 0,
|
|
estimated_output_tokens: 0,
|
|
}),
|
|
);
|
|
});
|
|
|
|
it("should handle embed operation", () => {
|
|
service.trackLlmCompletion({
|
|
...baseParams,
|
|
operation: "embed",
|
|
outputTokens: 0,
|
|
callingContext: undefined,
|
|
});
|
|
|
|
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
|
expect.objectContaining({
|
|
task_type: TaskType.IMPLEMENTATION,
|
|
actual_output_tokens: 0,
|
|
}),
|
|
);
|
|
});
|
|
|
|
it("should pass all required EventBuilderParams fields", () => {
|
|
service.trackLlmCompletion(baseParams);
|
|
|
|
const buildCall = (mockTelemetryService.eventBuilder?.build as ReturnType<typeof vi.fn>).mock
|
|
.calls[0][0] as EventBuilderParams;
|
|
|
|
// Verify all required fields are present
|
|
expect(buildCall).toHaveProperty("task_duration_ms");
|
|
expect(buildCall).toHaveProperty("task_type");
|
|
expect(buildCall).toHaveProperty("complexity");
|
|
expect(buildCall).toHaveProperty("harness");
|
|
expect(buildCall).toHaveProperty("model");
|
|
expect(buildCall).toHaveProperty("provider");
|
|
expect(buildCall).toHaveProperty("estimated_input_tokens");
|
|
expect(buildCall).toHaveProperty("estimated_output_tokens");
|
|
expect(buildCall).toHaveProperty("actual_input_tokens");
|
|
expect(buildCall).toHaveProperty("actual_output_tokens");
|
|
expect(buildCall).toHaveProperty("estimated_cost_usd_micros");
|
|
expect(buildCall).toHaveProperty("actual_cost_usd_micros");
|
|
expect(buildCall).toHaveProperty("quality_gate_passed");
|
|
expect(buildCall).toHaveProperty("quality_gates_run");
|
|
expect(buildCall).toHaveProperty("quality_gates_failed");
|
|
expect(buildCall).toHaveProperty("context_compactions");
|
|
expect(buildCall).toHaveProperty("context_rotations");
|
|
expect(buildCall).toHaveProperty("context_utilization_final");
|
|
expect(buildCall).toHaveProperty("outcome");
|
|
expect(buildCall).toHaveProperty("retry_count");
|
|
});
|
|
});
|
|
});
|