stack/apps/api/src/llm/llm-telemetry-tracker.service.ts

import { Injectable, Logger, Optional } from "@nestjs/common";
import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
import { PredictionService } from "../mosaic-telemetry/prediction.service";
import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
import type { LlmProviderType } from "./providers/llm-provider.interface";
import { calculateCostMicrodollars } from "./llm-cost-table";

/**
 * Parameters for tracking an LLM completion event.
 */
export interface LlmCompletionParams {
  /** Full model name (e.g. "claude-sonnet-4-5-20250929") */
  model: string;
  /** Provider type discriminator */
  providerType: LlmProviderType;
  /** Operation type that was performed */
  operation: "chat" | "chatStream" | "embed";
  /** Duration of the LLM call in milliseconds */
  durationMs: number;
  /** Number of input (prompt) tokens consumed */
  inputTokens: number;
  /** Number of output (completion) tokens generated */
  outputTokens: number;
  /**
   * Optional calling context hint for task type inference.
   * Examples: "brain", "chat", "embed", "planning", "code-review"
   */
  callingContext?: string | undefined;
  /** Whether the call succeeded or failed */
  success: boolean;
}

/**
 * Estimated token count from text length.
 * Uses a rough approximation of ~4 characters per token (GPT/Claude average).
 */
export function estimateTokens(text: string): number {
  return Math.ceil(text.length / 4);
}

/** Map LLM provider type to telemetry Provider enum */
export function mapProviderType(providerType: LlmProviderType): Provider {
  switch (providerType) {
    case "claude":
      return Provider.ANTHROPIC;
    case "openai":
      return Provider.OPENAI;
    case "ollama":
      return Provider.OLLAMA;
    default:
      return Provider.UNKNOWN;
  }
}

/** Map LLM provider type to telemetry Harness enum */
export function mapHarness(providerType: LlmProviderType): Harness {
  switch (providerType) {
    case "ollama":
      return Harness.OLLAMA_LOCAL;
    default:
      return Harness.API_DIRECT;
  }
}

/**
 * Infer the task type from calling context and operation.
 *
 * @param operation - The LLM operation (chat, chatStream, embed)
 * @param callingContext - Optional hint about the caller's purpose
 * @returns Inferred TaskType
 */
export function inferTaskType(
  operation: "chat" | "chatStream" | "embed",
  callingContext?: string
): TaskType {
  // Embedding operations are typically for indexing/search
  if (operation === "embed") {
    return TaskType.IMPLEMENTATION;
  }

  if (!callingContext) {
    return TaskType.UNKNOWN;
  }

  const ctx = callingContext.toLowerCase();

  if (ctx.includes("brain") || ctx.includes("planning") || ctx.includes("plan")) {
    return TaskType.PLANNING;
  }
  if (ctx.includes("review") || ctx.includes("code-review")) {
    return TaskType.CODE_REVIEW;
  }
  if (ctx.includes("test")) {
    return TaskType.TESTING;
  }
  if (ctx.includes("debug")) {
    return TaskType.DEBUGGING;
  }
  if (ctx.includes("refactor")) {
    return TaskType.REFACTORING;
  }
  if (ctx.includes("doc")) {
    return TaskType.DOCUMENTATION;
  }
  if (ctx.includes("config")) {
    return TaskType.CONFIGURATION;
  }
  if (ctx.includes("security") || ctx.includes("audit")) {
    return TaskType.SECURITY_AUDIT;
  }
  if (ctx.includes("chat") || ctx.includes("implement")) {
    return TaskType.IMPLEMENTATION;
  }

  return TaskType.UNKNOWN;
}

/**
 * LLM Telemetry Tracker Service
 *
 * Builds and submits telemetry events for LLM completions.
 * All tracking is non-blocking and fire-and-forget; telemetry errors
 * never propagate to the caller.
 *
 * @example
 * ```typescript
 * // After a successful chat completion
 * this.telemetryTracker.trackLlmCompletion({
 *   model: "claude-sonnet-4-5-20250929",
 *   providerType: "claude",
 *   operation: "chat",
 *   durationMs: 1200,
 *   inputTokens: 150,
 *   outputTokens: 300,
 *   callingContext: "chat",
 *   success: true,
 * });
 * ```
 */
@Injectable()
export class LlmTelemetryTrackerService {
  private readonly logger = new Logger(LlmTelemetryTrackerService.name);

  constructor(
    private readonly telemetry: MosaicTelemetryService,
    @Optional() private readonly predictionService?: PredictionService
  ) {}

  /**
   * Track an LLM completion event via Mosaic Telemetry.
   *
   * This method is intentionally fire-and-forget. It catches all errors
   * internally and logs them without propagating to the caller.
   *
   * @param params - LLM completion parameters
   */
  trackLlmCompletion(params: LlmCompletionParams): void {
    try {
      const builder = this.telemetry.eventBuilder;
      if (!builder) {
        // Telemetry is disabled — silently skip
        return;
      }

      const taskType = inferTaskType(params.operation, params.callingContext);
      const provider = mapProviderType(params.providerType);

      const costMicrodollars = calculateCostMicrodollars(
        params.model,
        params.inputTokens,
        params.outputTokens
      );

      // Query predictions for estimated fields (graceful degradation)
      let estimatedInputTokens = 0;
      let estimatedOutputTokens = 0;
      let estimatedCostMicros = 0;

      if (this.predictionService) {
        const prediction = this.predictionService.getEstimate(
          taskType,
          params.model,
          provider,
          Complexity.LOW
        );

        if (prediction?.prediction && prediction.metadata.confidence !== "none") {
          estimatedInputTokens = prediction.prediction.input_tokens.median;
          estimatedOutputTokens = prediction.prediction.output_tokens.median;
          estimatedCostMicros = prediction.prediction.cost_usd_micros.median ?? 0;
        }
      }

      const event = builder.build({
        task_duration_ms: params.durationMs,
        task_type: taskType,
        complexity: Complexity.LOW,
        harness: mapHarness(params.providerType),
        model: params.model,
        provider,
        estimated_input_tokens: estimatedInputTokens,
        estimated_output_tokens: estimatedOutputTokens,
        actual_input_tokens: params.inputTokens,
        actual_output_tokens: params.outputTokens,
        estimated_cost_usd_micros: estimatedCostMicros,
        actual_cost_usd_micros: costMicrodollars,
        quality_gate_passed: true,
        quality_gates_run: [],
        quality_gates_failed: [],
        context_compactions: 0,
        context_rotations: 0,
        context_utilization_final: 0,
        outcome: params.success ? Outcome.SUCCESS : Outcome.FAILURE,
        retry_count: 0,
      });

      this.telemetry.trackTaskCompletion(event);
    } catch (error: unknown) {
      // Never let telemetry errors propagate
      const msg = error instanceof Error ? error.message : String(error);
      this.logger.warn(`Failed to track LLM telemetry event: ${msg}`);
    }
  }
}