feat(#371): track LLM task completions via Mosaic Telemetry
- Create LlmTelemetryTrackerService for non-blocking event emission - Normalize token usage across Anthropic, OpenAI, Ollama providers - Add cost table with per-token pricing in microdollars - Instrument chat, chatStream, and embed methods - Infer task type from calling context - Aggregate streaming tokens after stream ends with fallback estimation - Add 69 unit tests for tracker service, cost table, and LLM service Refs #371 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
106
apps/api/src/llm/llm-cost-table.ts
Normal file
106
apps/api/src/llm/llm-cost-table.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
/**
|
||||
* LLM Cost Table
|
||||
*
|
||||
* Maps model names to per-token costs in microdollars (USD * 1,000,000).
|
||||
* For example, $0.003 per 1K tokens = 3,000 microdollars per 1K tokens = 3 microdollars per token.
|
||||
*
|
||||
* Costs are split into input (prompt) and output (completion) pricing.
|
||||
* Ollama models run locally and are free (0 cost).
|
||||
*/
|
||||
|
||||
/**
|
||||
* Per-token cost in microdollars for a single model.
|
||||
*/
|
||||
export interface ModelCost {
|
||||
/** Cost per input token in microdollars */
|
||||
inputPerToken: number;
|
||||
/** Cost per output token in microdollars */
|
||||
outputPerToken: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cost table mapping model name prefixes to per-token pricing.
|
||||
*
|
||||
* Model matching is prefix-based: "claude-sonnet-4-5" matches "claude-sonnet-4-5-20250929".
|
||||
* More specific prefixes are checked first (longest match wins).
|
||||
*
|
||||
* Prices sourced from provider pricing pages as of 2026-02.
|
||||
*/
|
||||
const MODEL_COSTS: Record<string, ModelCost> = {
|
||||
// Anthropic Claude models (per-token microdollars)
|
||||
// claude-sonnet-4-5: $3/M input, $15/M output
|
||||
"claude-sonnet-4-5": { inputPerToken: 3, outputPerToken: 15 },
|
||||
// claude-opus-4: $15/M input, $75/M output
|
||||
"claude-opus-4": { inputPerToken: 15, outputPerToken: 75 },
|
||||
// claude-3-5-haiku / claude-haiku-4-5: $0.80/M input, $4/M output
|
||||
"claude-haiku-4-5": { inputPerToken: 0.8, outputPerToken: 4 },
|
||||
"claude-3-5-haiku": { inputPerToken: 0.8, outputPerToken: 4 },
|
||||
// claude-3-5-sonnet: $3/M input, $15/M output
|
||||
"claude-3-5-sonnet": { inputPerToken: 3, outputPerToken: 15 },
|
||||
// claude-3-opus: $15/M input, $75/M output
|
||||
"claude-3-opus": { inputPerToken: 15, outputPerToken: 75 },
|
||||
// claude-3-sonnet: $3/M input, $15/M output
|
||||
"claude-3-sonnet": { inputPerToken: 3, outputPerToken: 15 },
|
||||
// claude-3-haiku: $0.25/M input, $1.25/M output
|
||||
"claude-3-haiku": { inputPerToken: 0.25, outputPerToken: 1.25 },
|
||||
|
||||
// OpenAI models (per-token microdollars)
|
||||
// gpt-4o: $2.50/M input, $10/M output
|
||||
"gpt-4o-mini": { inputPerToken: 0.15, outputPerToken: 0.6 },
|
||||
"gpt-4o": { inputPerToken: 2.5, outputPerToken: 10 },
|
||||
// gpt-4-turbo: $10/M input, $30/M output
|
||||
"gpt-4-turbo": { inputPerToken: 10, outputPerToken: 30 },
|
||||
// gpt-4: $30/M input, $60/M output
|
||||
"gpt-4": { inputPerToken: 30, outputPerToken: 60 },
|
||||
// gpt-3.5-turbo: $0.50/M input, $1.50/M output
|
||||
"gpt-3.5-turbo": { inputPerToken: 0.5, outputPerToken: 1.5 },
|
||||
|
||||
// Ollama / local models: free
|
||||
// These are catch-all entries; any model not matched above falls through to getModelCost default
|
||||
};
|
||||
|
||||
/**
|
||||
* Sorted model prefixes from longest to shortest for greedy prefix matching.
|
||||
* Ensures "gpt-4o-mini" matches before "gpt-4o" and "claude-3-5-haiku" before "claude-3-haiku".
|
||||
*/
|
||||
const SORTED_PREFIXES = Object.keys(MODEL_COSTS).sort((a, b) => b.length - a.length);
|
||||
|
||||
/**
|
||||
* Look up per-token cost for a given model name.
|
||||
*
|
||||
* Uses longest-prefix matching: the model name is compared against known
|
||||
* prefixes from longest to shortest. If no prefix matches, returns zero cost
|
||||
* (assumes local/free model).
|
||||
*
|
||||
* @param modelName - Full model name (e.g. "claude-sonnet-4-5-20250929", "gpt-4o")
|
||||
* @returns Per-token cost in microdollars
|
||||
*/
|
||||
export function getModelCost(modelName: string): ModelCost {
|
||||
const normalized = modelName.toLowerCase();
|
||||
|
||||
for (const prefix of SORTED_PREFIXES) {
|
||||
if (normalized.startsWith(prefix)) {
|
||||
return MODEL_COSTS[prefix];
|
||||
}
|
||||
}
|
||||
|
||||
// Unknown or local model — assume free
|
||||
return { inputPerToken: 0, outputPerToken: 0 };
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate total cost in microdollars for a given model and token counts.
|
||||
*
|
||||
* @param modelName - Full model name
|
||||
* @param inputTokens - Number of input (prompt) tokens
|
||||
* @param outputTokens - Number of output (completion) tokens
|
||||
* @returns Total cost in microdollars (USD * 1,000,000)
|
||||
*/
|
||||
export function calculateCostMicrodollars(
|
||||
modelName: string,
|
||||
inputTokens: number,
|
||||
outputTokens: number
|
||||
): number {
|
||||
const cost = getModelCost(modelName);
|
||||
return Math.round(cost.inputPerToken * inputTokens + cost.outputPerToken * outputTokens);
|
||||
}
|
||||
Reference in New Issue
Block a user