- Create PredictionService for pre-task cost/token estimates - Refresh common predictions on startup - Integrate predictions into LLM telemetry tracker - Add GET /api/telemetry/estimate endpoint - Graceful degradation when no prediction data available - Add unit tests for prediction service Refs #373 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
162 lines
5.0 KiB
TypeScript
162 lines
5.0 KiB
TypeScript
import { Injectable, Logger, OnModuleInit } from "@nestjs/common";
|
|
import {
|
|
TaskType,
|
|
Complexity,
|
|
Provider,
|
|
type PredictionQuery,
|
|
type PredictionResponse,
|
|
} from "@mosaicstack/telemetry-client";
|
|
import { MosaicTelemetryService } from "./mosaic-telemetry.service";
|
|
|
|
/**
|
|
* Common model-provider combinations used for pre-fetching predictions.
|
|
* These represent the most frequently used LLM configurations.
|
|
*/
|
|
const COMMON_MODELS: { model: string; provider: Provider }[] = [
|
|
{ model: "claude-sonnet-4-5", provider: Provider.ANTHROPIC },
|
|
{ model: "claude-opus-4", provider: Provider.ANTHROPIC },
|
|
{ model: "claude-haiku-4-5", provider: Provider.ANTHROPIC },
|
|
{ model: "gpt-4o", provider: Provider.OPENAI },
|
|
{ model: "gpt-4o-mini", provider: Provider.OPENAI },
|
|
];
|
|
|
|
/**
|
|
* Common task types to pre-fetch predictions for.
|
|
*/
|
|
const COMMON_TASK_TYPES: TaskType[] = [
|
|
TaskType.IMPLEMENTATION,
|
|
TaskType.PLANNING,
|
|
TaskType.CODE_REVIEW,
|
|
];
|
|
|
|
/**
|
|
* Common complexity levels to pre-fetch predictions for.
|
|
*/
|
|
const COMMON_COMPLEXITIES: Complexity[] = [Complexity.LOW, Complexity.MEDIUM];
|
|
|
|
/**
|
|
* PredictionService
|
|
*
|
|
* Provides pre-task cost and token estimates using crowd-sourced prediction data
|
|
* from the Mosaic Telemetry server. Predictions are cached by the underlying SDK
|
|
* with a 6-hour TTL.
|
|
*
|
|
* This service is intentionally non-blocking: if predictions are unavailable
|
|
* (telemetry disabled, server unreachable, no data), all methods return null
|
|
* without throwing errors. Task execution should never be blocked by prediction
|
|
* failures.
|
|
*
|
|
* @example
|
|
* ```typescript
|
|
* const estimate = this.predictionService.getEstimate(
|
|
* TaskType.IMPLEMENTATION,
|
|
* "claude-sonnet-4-5",
|
|
* Provider.ANTHROPIC,
|
|
* Complexity.LOW,
|
|
* );
|
|
* if (estimate?.prediction) {
|
|
* console.log(`Estimated cost: ${estimate.prediction.cost_usd_micros}`);
|
|
* }
|
|
* ```
|
|
*/
|
|
@Injectable()
|
|
export class PredictionService implements OnModuleInit {
|
|
private readonly logger = new Logger(PredictionService.name);
|
|
|
|
constructor(private readonly telemetry: MosaicTelemetryService) {}
|
|
|
|
/**
|
|
* Refresh common predictions on startup.
|
|
* Runs asynchronously and never blocks module initialization.
|
|
*/
|
|
onModuleInit(): void {
|
|
if (!this.telemetry.isEnabled) {
|
|
this.logger.log("Telemetry disabled - skipping prediction refresh");
|
|
return;
|
|
}
|
|
|
|
// Fire-and-forget: refresh in the background
|
|
this.refreshCommonPredictions().catch((error: unknown) => {
|
|
const msg = error instanceof Error ? error.message : String(error);
|
|
this.logger.warn(`Failed to refresh common predictions on startup: ${msg}`);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get a cost/token estimate for a given task configuration.
|
|
*
|
|
* Returns the cached prediction from the SDK, or null if:
|
|
* - Telemetry is disabled
|
|
* - No prediction data exists for this combination
|
|
* - The prediction has expired
|
|
*
|
|
* @param taskType - The type of task to estimate
|
|
* @param model - The model name (e.g. "claude-sonnet-4-5")
|
|
* @param provider - The provider enum value
|
|
* @param complexity - The complexity level
|
|
* @returns Prediction response with estimates and confidence, or null
|
|
*/
|
|
getEstimate(
|
|
taskType: TaskType,
|
|
model: string,
|
|
provider: Provider,
|
|
complexity: Complexity
|
|
): PredictionResponse | null {
|
|
try {
|
|
const query: PredictionQuery = {
|
|
task_type: taskType,
|
|
model,
|
|
provider,
|
|
complexity,
|
|
};
|
|
|
|
return this.telemetry.getPrediction(query);
|
|
} catch (error: unknown) {
|
|
const msg = error instanceof Error ? error.message : String(error);
|
|
this.logger.warn(`Failed to get prediction estimate: ${msg}`);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Refresh predictions for commonly used (taskType, model, provider, complexity) combinations.
|
|
*
|
|
* Generates the cross-product of common models, task types, and complexities,
|
|
* then batch-refreshes them from the telemetry server. The SDK caches the
|
|
* results with a 6-hour TTL.
|
|
*
|
|
* This method is safe to call at any time. If telemetry is disabled or the
|
|
* server is unreachable, it completes without error.
|
|
*/
|
|
async refreshCommonPredictions(): Promise<void> {
|
|
if (!this.telemetry.isEnabled) {
|
|
return;
|
|
}
|
|
|
|
const queries: PredictionQuery[] = [];
|
|
|
|
for (const { model, provider } of COMMON_MODELS) {
|
|
for (const taskType of COMMON_TASK_TYPES) {
|
|
for (const complexity of COMMON_COMPLEXITIES) {
|
|
queries.push({
|
|
task_type: taskType,
|
|
model,
|
|
provider,
|
|
complexity,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
this.logger.log(`Refreshing ${String(queries.length)} common prediction queries...`);
|
|
|
|
try {
|
|
await this.telemetry.refreshPredictions(queries);
|
|
this.logger.log(`Successfully refreshed ${String(queries.length)} predictions`);
|
|
} catch (error: unknown) {
|
|
const msg = error instanceof Error ? error.message : String(error);
|
|
this.logger.warn(`Failed to refresh predictions: ${msg}`);
|
|
}
|
|
}
|
|
}
|