feat(#373): prediction integration for cost estimation
- Create PredictionService for pre-task cost/token estimates - Refresh common predictions on startup - Integrate predictions into LLM telemetry tracker - Add GET /api/telemetry/estimate endpoint - Graceful degradation when no prediction data available - Add unit tests for prediction service Refs #373 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -333,12 +333,13 @@ describe("LlmTelemetryTrackerService", () => {
|
||||
service.trackLlmCompletion(baseParams);
|
||||
|
||||
// claude-sonnet-4-5: 150 * 3 + 300 * 15 = 450 + 4500 = 4950
|
||||
const expectedCost = 4950;
|
||||
const expectedActualCost = 4950;
|
||||
|
||||
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
estimated_cost_usd_micros: expectedCost,
|
||||
actual_cost_usd_micros: expectedCost,
|
||||
// Estimated values are 0 when no PredictionService is injected
|
||||
estimated_cost_usd_micros: 0,
|
||||
actual_cost_usd_micros: expectedActualCost,
|
||||
}),
|
||||
);
|
||||
});
|
||||
@@ -437,8 +438,9 @@ describe("LlmTelemetryTrackerService", () => {
|
||||
expect.objectContaining({
|
||||
actual_input_tokens: 50,
|
||||
actual_output_tokens: 100,
|
||||
estimated_input_tokens: 50,
|
||||
estimated_output_tokens: 100,
|
||||
// Estimated values are 0 when no PredictionService is injected
|
||||
estimated_input_tokens: 0,
|
||||
estimated_output_tokens: 0,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { Injectable, Logger } from "@nestjs/common";
|
||||
import { Injectable, Logger, Optional } from "@nestjs/common";
|
||||
import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
|
||||
import { PredictionService } from "../mosaic-telemetry/prediction.service";
|
||||
import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
|
||||
import type { LlmProviderType } from "./providers/llm-provider.interface";
|
||||
import { calculateCostMicrodollars } from "./llm-cost-table";
|
||||
@@ -140,7 +141,10 @@ export function inferTaskType(
|
||||
export class LlmTelemetryTrackerService {
|
||||
private readonly logger = new Logger(LlmTelemetryTrackerService.name);
|
||||
|
||||
constructor(private readonly telemetry: MosaicTelemetryService) {}
|
||||
constructor(
|
||||
private readonly telemetry: MosaicTelemetryService,
|
||||
@Optional() private readonly predictionService?: PredictionService
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Track an LLM completion event via Mosaic Telemetry.
|
||||
@@ -158,24 +162,47 @@ export class LlmTelemetryTrackerService {
|
||||
return;
|
||||
}
|
||||
|
||||
const taskType = inferTaskType(params.operation, params.callingContext);
|
||||
const provider = mapProviderType(params.providerType);
|
||||
|
||||
const costMicrodollars = calculateCostMicrodollars(
|
||||
params.model,
|
||||
params.inputTokens,
|
||||
params.outputTokens
|
||||
);
|
||||
|
||||
// Query predictions for estimated fields (graceful degradation)
|
||||
let estimatedInputTokens = 0;
|
||||
let estimatedOutputTokens = 0;
|
||||
let estimatedCostMicros = 0;
|
||||
|
||||
if (this.predictionService) {
|
||||
const prediction = this.predictionService.getEstimate(
|
||||
taskType,
|
||||
params.model,
|
||||
provider,
|
||||
Complexity.LOW
|
||||
);
|
||||
|
||||
if (prediction?.prediction && prediction.metadata.confidence !== "none") {
|
||||
estimatedInputTokens = prediction.prediction.input_tokens.median;
|
||||
estimatedOutputTokens = prediction.prediction.output_tokens.median;
|
||||
estimatedCostMicros = prediction.prediction.cost_usd_micros.median ?? 0;
|
||||
}
|
||||
}
|
||||
|
||||
const event = builder.build({
|
||||
task_duration_ms: params.durationMs,
|
||||
task_type: inferTaskType(params.operation, params.callingContext),
|
||||
task_type: taskType,
|
||||
complexity: Complexity.LOW,
|
||||
harness: mapHarness(params.providerType),
|
||||
model: params.model,
|
||||
provider: mapProviderType(params.providerType),
|
||||
estimated_input_tokens: params.inputTokens,
|
||||
estimated_output_tokens: params.outputTokens,
|
||||
provider,
|
||||
estimated_input_tokens: estimatedInputTokens,
|
||||
estimated_output_tokens: estimatedOutputTokens,
|
||||
actual_input_tokens: params.inputTokens,
|
||||
actual_output_tokens: params.outputTokens,
|
||||
estimated_cost_usd_micros: costMicrodollars,
|
||||
estimated_cost_usd_micros: estimatedCostMicros,
|
||||
actual_cost_usd_micros: costMicrodollars,
|
||||
quality_gate_passed: true,
|
||||
quality_gates_run: [],
|
||||
|
||||
Reference in New Issue
Block a user