Files
stack/apps/api/src/mosaic-telemetry/prediction.service.ts
Jason Woltje ed23293e1a feat(#373): prediction integration for cost estimation
- Create PredictionService for pre-task cost/token estimates
- Refresh common predictions on startup
- Integrate predictions into LLM telemetry tracker
- Add GET /api/telemetry/estimate endpoint
- Graceful degradation when no prediction data available
- Add unit tests for prediction service

Refs #373

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 02:10:22 -06:00

162 lines
5.0 KiB
TypeScript

import { Injectable, Logger, OnModuleInit } from "@nestjs/common";
import {
TaskType,
Complexity,
Provider,
type PredictionQuery,
type PredictionResponse,
} from "@mosaicstack/telemetry-client";
import { MosaicTelemetryService } from "./mosaic-telemetry.service";
/**
* Common model-provider combinations used for pre-fetching predictions.
* These represent the most frequently used LLM configurations.
*/
const COMMON_MODELS: { model: string; provider: Provider }[] = [
{ model: "claude-sonnet-4-5", provider: Provider.ANTHROPIC },
{ model: "claude-opus-4", provider: Provider.ANTHROPIC },
{ model: "claude-haiku-4-5", provider: Provider.ANTHROPIC },
{ model: "gpt-4o", provider: Provider.OPENAI },
{ model: "gpt-4o-mini", provider: Provider.OPENAI },
];
/**
* Common task types to pre-fetch predictions for.
*/
const COMMON_TASK_TYPES: TaskType[] = [
TaskType.IMPLEMENTATION,
TaskType.PLANNING,
TaskType.CODE_REVIEW,
];
/**
* Common complexity levels to pre-fetch predictions for.
*/
const COMMON_COMPLEXITIES: Complexity[] = [Complexity.LOW, Complexity.MEDIUM];
/**
* PredictionService
*
* Provides pre-task cost and token estimates using crowd-sourced prediction data
* from the Mosaic Telemetry server. Predictions are cached by the underlying SDK
* with a 6-hour TTL.
*
* This service is intentionally non-blocking: if predictions are unavailable
* (telemetry disabled, server unreachable, no data), all methods return null
* without throwing errors. Task execution should never be blocked by prediction
* failures.
*
* @example
* ```typescript
* const estimate = this.predictionService.getEstimate(
* TaskType.IMPLEMENTATION,
* "claude-sonnet-4-5",
* Provider.ANTHROPIC,
* Complexity.LOW,
* );
* if (estimate?.prediction) {
* console.log(`Estimated cost: ${estimate.prediction.cost_usd_micros}`);
* }
* ```
*/
@Injectable()
export class PredictionService implements OnModuleInit {
private readonly logger = new Logger(PredictionService.name);
constructor(private readonly telemetry: MosaicTelemetryService) {}
/**
* Refresh common predictions on startup.
* Runs asynchronously and never blocks module initialization.
*/
onModuleInit(): void {
if (!this.telemetry.isEnabled) {
this.logger.log("Telemetry disabled - skipping prediction refresh");
return;
}
// Fire-and-forget: refresh in the background
this.refreshCommonPredictions().catch((error: unknown) => {
const msg = error instanceof Error ? error.message : String(error);
this.logger.warn(`Failed to refresh common predictions on startup: ${msg}`);
});
}
/**
* Get a cost/token estimate for a given task configuration.
*
* Returns the cached prediction from the SDK, or null if:
* - Telemetry is disabled
* - No prediction data exists for this combination
* - The prediction has expired
*
* @param taskType - The type of task to estimate
* @param model - The model name (e.g. "claude-sonnet-4-5")
* @param provider - The provider enum value
* @param complexity - The complexity level
* @returns Prediction response with estimates and confidence, or null
*/
getEstimate(
taskType: TaskType,
model: string,
provider: Provider,
complexity: Complexity
): PredictionResponse | null {
try {
const query: PredictionQuery = {
task_type: taskType,
model,
provider,
complexity,
};
return this.telemetry.getPrediction(query);
} catch (error: unknown) {
const msg = error instanceof Error ? error.message : String(error);
this.logger.warn(`Failed to get prediction estimate: ${msg}`);
return null;
}
}
/**
* Refresh predictions for commonly used (taskType, model, provider, complexity) combinations.
*
* Generates the cross-product of common models, task types, and complexities,
* then batch-refreshes them from the telemetry server. The SDK caches the
* results with a 6-hour TTL.
*
* This method is safe to call at any time. If telemetry is disabled or the
* server is unreachable, it completes without error.
*/
async refreshCommonPredictions(): Promise<void> {
if (!this.telemetry.isEnabled) {
return;
}
const queries: PredictionQuery[] = [];
for (const { model, provider } of COMMON_MODELS) {
for (const taskType of COMMON_TASK_TYPES) {
for (const complexity of COMMON_COMPLEXITIES) {
queries.push({
task_type: taskType,
model,
provider,
complexity,
});
}
}
}
this.logger.log(`Refreshing ${String(queries.length)} common prediction queries...`);
try {
await this.telemetry.refreshPredictions(queries);
this.logger.log(`Successfully refreshed ${String(queries.length)} predictions`);
} catch (error: unknown) {
const msg = error instanceof Error ? error.message : String(error);
this.logger.warn(`Failed to refresh predictions: ${msg}`);
}
}
}