feat(#371): track LLM task completions via Mosaic Telemetry

- Create LlmTelemetryTrackerService for non-blocking event emission
- Normalize token usage across Anthropic, OpenAI, Ollama providers
- Add cost table with per-token pricing in microdollars
- Instrument chat, chatStream, and embed methods
- Infer task type from calling context
- Aggregate streaming tokens after stream ends with fallback estimation
- Add 69 unit tests for tracker service, cost table, and LLM service

Refs #371

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 01:44:29 -06:00
parent 24c21f45b3
commit fcecf3654b
6 changed files with 1103 additions and 8 deletions

View File

@@ -0,0 +1,106 @@
/**
* LLM Cost Table
*
* Maps model names to per-token costs in microdollars (USD * 1,000,000).
* For example, $0.003 per 1K tokens = 3,000 microdollars per 1K tokens = 3 microdollars per token.
*
* Costs are split into input (prompt) and output (completion) pricing.
* Ollama models run locally and are free (0 cost).
*/
/**
* Per-token cost in microdollars for a single model.
*/
export interface ModelCost {
/** Cost per input token in microdollars */
inputPerToken: number;
/** Cost per output token in microdollars */
outputPerToken: number;
}
/**
* Cost table mapping model name prefixes to per-token pricing.
*
* Model matching is prefix-based: "claude-sonnet-4-5" matches "claude-sonnet-4-5-20250929".
* More specific prefixes are checked first (longest match wins).
*
* Prices sourced from provider pricing pages as of 2026-02.
*/
const MODEL_COSTS: Record<string, ModelCost> = {
// Anthropic Claude models (per-token microdollars)
// claude-sonnet-4-5: $3/M input, $15/M output
"claude-sonnet-4-5": { inputPerToken: 3, outputPerToken: 15 },
// claude-opus-4: $15/M input, $75/M output
"claude-opus-4": { inputPerToken: 15, outputPerToken: 75 },
// claude-3-5-haiku / claude-haiku-4-5: $0.80/M input, $4/M output
"claude-haiku-4-5": { inputPerToken: 0.8, outputPerToken: 4 },
"claude-3-5-haiku": { inputPerToken: 0.8, outputPerToken: 4 },
// claude-3-5-sonnet: $3/M input, $15/M output
"claude-3-5-sonnet": { inputPerToken: 3, outputPerToken: 15 },
// claude-3-opus: $15/M input, $75/M output
"claude-3-opus": { inputPerToken: 15, outputPerToken: 75 },
// claude-3-sonnet: $3/M input, $15/M output
"claude-3-sonnet": { inputPerToken: 3, outputPerToken: 15 },
// claude-3-haiku: $0.25/M input, $1.25/M output
"claude-3-haiku": { inputPerToken: 0.25, outputPerToken: 1.25 },
// OpenAI models (per-token microdollars)
// gpt-4o: $2.50/M input, $10/M output
"gpt-4o-mini": { inputPerToken: 0.15, outputPerToken: 0.6 },
"gpt-4o": { inputPerToken: 2.5, outputPerToken: 10 },
// gpt-4-turbo: $10/M input, $30/M output
"gpt-4-turbo": { inputPerToken: 10, outputPerToken: 30 },
// gpt-4: $30/M input, $60/M output
"gpt-4": { inputPerToken: 30, outputPerToken: 60 },
// gpt-3.5-turbo: $0.50/M input, $1.50/M output
"gpt-3.5-turbo": { inputPerToken: 0.5, outputPerToken: 1.5 },
// Ollama / local models: free
// These are catch-all entries; any model not matched above falls through to getModelCost default
};
/**
* Sorted model prefixes from longest to shortest for greedy prefix matching.
* Ensures "gpt-4o-mini" matches before "gpt-4o" and "claude-3-5-haiku" before "claude-3-haiku".
*/
const SORTED_PREFIXES = Object.keys(MODEL_COSTS).sort((a, b) => b.length - a.length);
/**
* Look up per-token cost for a given model name.
*
* Uses longest-prefix matching: the model name is compared against known
* prefixes from longest to shortest. If no prefix matches, returns zero cost
* (assumes local/free model).
*
* @param modelName - Full model name (e.g. "claude-sonnet-4-5-20250929", "gpt-4o")
* @returns Per-token cost in microdollars
*/
export function getModelCost(modelName: string): ModelCost {
const normalized = modelName.toLowerCase();
for (const prefix of SORTED_PREFIXES) {
if (normalized.startsWith(prefix)) {
return MODEL_COSTS[prefix];
}
}
// Unknown or local model — assume free
return { inputPerToken: 0, outputPerToken: 0 };
}
/**
* Calculate total cost in microdollars for a given model and token counts.
*
* @param modelName - Full model name
* @param inputTokens - Number of input (prompt) tokens
* @param outputTokens - Number of output (completion) tokens
* @returns Total cost in microdollars (USD * 1,000,000)
*/
export function calculateCostMicrodollars(
modelName: string,
inputTokens: number,
outputTokens: number
): number {
const cost = getModelCost(modelName);
return Math.round(cost.inputPerToken * inputTokens + cost.outputPerToken * outputTokens);
}