stack/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts

import { describe, it, expect, beforeEach, vi } from "vitest";
import { Test, TestingModule } from "@nestjs/testing";
import {
  TaskType,
  Complexity,
  Harness,
  Provider,
  Outcome,
} from "@mosaicstack/telemetry-client";
import type { TaskCompletionEvent, EventBuilderParams } from "@mosaicstack/telemetry-client";
import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
import {
  LlmTelemetryTrackerService,
  estimateTokens,
  mapProviderType,
  mapHarness,
  inferTaskType,
} from "./llm-telemetry-tracker.service";
import type { LlmCompletionParams } from "./llm-telemetry-tracker.service";
import { getModelCost, calculateCostMicrodollars } from "./llm-cost-table";

// ---------- Cost Table Tests ----------

describe("llm-cost-table", () => {
  describe("getModelCost", () => {
    it("should return cost for claude-sonnet-4-5 models", () => {
      const cost = getModelCost("claude-sonnet-4-5-20250929");
      expect(cost.inputPerToken).toBe(3);
      expect(cost.outputPerToken).toBe(15);
    });

    it("should return cost for claude-opus-4 models", () => {
      const cost = getModelCost("claude-opus-4-6");
      expect(cost.inputPerToken).toBe(15);
      expect(cost.outputPerToken).toBe(75);
    });

    it("should return cost for claude-haiku-4-5 models", () => {
      const cost = getModelCost("claude-haiku-4-5-20251001");
      expect(cost.inputPerToken).toBe(0.8);
      expect(cost.outputPerToken).toBe(4);
    });

    it("should return cost for gpt-4o", () => {
      const cost = getModelCost("gpt-4o");
      expect(cost.inputPerToken).toBe(2.5);
      expect(cost.outputPerToken).toBe(10);
    });

    it("should return cost for gpt-4o-mini (longer prefix matches first)", () => {
      const cost = getModelCost("gpt-4o-mini");
      expect(cost.inputPerToken).toBe(0.15);
      expect(cost.outputPerToken).toBe(0.6);
    });

    it("should return zero cost for unknown/local models", () => {
      const cost = getModelCost("llama3.2");
      expect(cost.inputPerToken).toBe(0);
      expect(cost.outputPerToken).toBe(0);
    });

    it("should return zero cost for ollama models", () => {
      const cost = getModelCost("mistral:7b");
      expect(cost.inputPerToken).toBe(0);
      expect(cost.outputPerToken).toBe(0);
    });

    it("should be case-insensitive", () => {
      const cost = getModelCost("Claude-Sonnet-4-5-20250929");
      expect(cost.inputPerToken).toBe(3);
    });
  });

  describe("calculateCostMicrodollars", () => {
    it("should calculate cost for claude-sonnet-4-5 with token counts", () => {
      // 1000 input tokens * 3 + 500 output tokens * 15 = 3000 + 7500 = 10500
      const cost = calculateCostMicrodollars("claude-sonnet-4-5-20250929", 1000, 500);
      expect(cost).toBe(10500);
    });

    it("should return 0 for local models", () => {
      const cost = calculateCostMicrodollars("llama3.2", 1000, 500);
      expect(cost).toBe(0);
    });

    it("should return 0 when token counts are 0", () => {
      const cost = calculateCostMicrodollars("claude-opus-4-6", 0, 0);
      expect(cost).toBe(0);
    });

    it("should round the result to integer microdollars", () => {
      // gpt-4o-mini: 0.15 * 3 + 0.6 * 7 = 0.45 + 4.2 = 4.65 -> rounds to 5
      const cost = calculateCostMicrodollars("gpt-4o-mini", 3, 7);
      expect(cost).toBe(5);
    });
  });
});

// ---------- Helper Function Tests ----------

describe("helper functions", () => {
  describe("estimateTokens", () => {
    it("should estimate ~1 token per 4 characters", () => {
      expect(estimateTokens("abcd")).toBe(1);
      expect(estimateTokens("abcdefgh")).toBe(2);
    });

    it("should round up for partial tokens", () => {
      expect(estimateTokens("abc")).toBe(1);
      expect(estimateTokens("abcde")).toBe(2);
    });

    it("should return 0 for empty string", () => {
      expect(estimateTokens("")).toBe(0);
    });
  });

  describe("mapProviderType", () => {
    it("should map claude to ANTHROPIC", () => {
      expect(mapProviderType("claude")).toBe(Provider.ANTHROPIC);
    });

    it("should map openai to OPENAI", () => {
      expect(mapProviderType("openai")).toBe(Provider.OPENAI);
    });

    it("should map ollama to OLLAMA", () => {
      expect(mapProviderType("ollama")).toBe(Provider.OLLAMA);
    });
  });

  describe("mapHarness", () => {
    it("should map ollama to OLLAMA_LOCAL", () => {
      expect(mapHarness("ollama")).toBe(Harness.OLLAMA_LOCAL);
    });

    it("should map claude to API_DIRECT", () => {
      expect(mapHarness("claude")).toBe(Harness.API_DIRECT);
    });

    it("should map openai to API_DIRECT", () => {
      expect(mapHarness("openai")).toBe(Harness.API_DIRECT);
    });
  });

  describe("inferTaskType", () => {
    it("should return IMPLEMENTATION for embed operation", () => {
      expect(inferTaskType("embed")).toBe(TaskType.IMPLEMENTATION);
    });

    it("should return UNKNOWN when no context provided for chat", () => {
      expect(inferTaskType("chat")).toBe(TaskType.UNKNOWN);
    });

    it("should return PLANNING for brain context", () => {
      expect(inferTaskType("chat", "brain")).toBe(TaskType.PLANNING);
    });

    it("should return PLANNING for planning context", () => {
      expect(inferTaskType("chat", "planning")).toBe(TaskType.PLANNING);
    });

    it("should return CODE_REVIEW for review context", () => {
      expect(inferTaskType("chat", "code-review")).toBe(TaskType.CODE_REVIEW);
    });

    it("should return TESTING for test context", () => {
      expect(inferTaskType("chat", "test-generation")).toBe(TaskType.TESTING);
    });

    it("should return DEBUGGING for debug context", () => {
      expect(inferTaskType("chatStream", "debug-session")).toBe(TaskType.DEBUGGING);
    });

    it("should return REFACTORING for refactor context", () => {
      expect(inferTaskType("chat", "refactor")).toBe(TaskType.REFACTORING);
    });

    it("should return DOCUMENTATION for doc context", () => {
      expect(inferTaskType("chat", "documentation")).toBe(TaskType.DOCUMENTATION);
    });

    it("should return CONFIGURATION for config context", () => {
      expect(inferTaskType("chat", "config-update")).toBe(TaskType.CONFIGURATION);
    });

    it("should return SECURITY_AUDIT for security context", () => {
      expect(inferTaskType("chat", "security-check")).toBe(TaskType.SECURITY_AUDIT);
    });

    it("should return IMPLEMENTATION for chat context", () => {
      expect(inferTaskType("chat", "chat")).toBe(TaskType.IMPLEMENTATION);
    });

    it("should be case-insensitive", () => {
      expect(inferTaskType("chat", "BRAIN")).toBe(TaskType.PLANNING);
    });

    it("should return UNKNOWN for unrecognized context", () => {
      expect(inferTaskType("chat", "something-else")).toBe(TaskType.UNKNOWN);
    });
  });
});

// ---------- LlmTelemetryTrackerService Tests ----------

describe("LlmTelemetryTrackerService", () => {
  let service: LlmTelemetryTrackerService;
  let mockTelemetryService: {
    eventBuilder: { build: ReturnType<typeof vi.fn> } | null;
    trackTaskCompletion: ReturnType<typeof vi.fn>;
    isEnabled: boolean;
  };

  const mockEvent: TaskCompletionEvent = {
    instance_id: "test-instance",
    event_id: "test-event",
    schema_version: "1.0.0",
    timestamp: new Date().toISOString(),
    task_duration_ms: 1000,
    task_type: TaskType.IMPLEMENTATION,
    complexity: Complexity.LOW,
    harness: Harness.API_DIRECT,
    model: "claude-sonnet-4-5-20250929",
    provider: Provider.ANTHROPIC,
    estimated_input_tokens: 100,
    estimated_output_tokens: 200,
    actual_input_tokens: 100,
    actual_output_tokens: 200,
    estimated_cost_usd_micros: 3300,
    actual_cost_usd_micros: 3300,
    quality_gate_passed: true,
    quality_gates_run: [],
    quality_gates_failed: [],
    context_compactions: 0,
    context_rotations: 0,
    context_utilization_final: 0,
    outcome: Outcome.SUCCESS,
    retry_count: 0,
  };

  beforeEach(async () => {
    mockTelemetryService = {
      eventBuilder: {
        build: vi.fn().mockReturnValue(mockEvent),
      },
      trackTaskCompletion: vi.fn(),
      isEnabled: true,
    };

    const module: TestingModule = await Test.createTestingModule({
      providers: [
        LlmTelemetryTrackerService,
        {
          provide: MosaicTelemetryService,
          useValue: mockTelemetryService,
        },
      ],
    }).compile();

    service = module.get<LlmTelemetryTrackerService>(LlmTelemetryTrackerService);
  });

  it("should be defined", () => {
    expect(service).toBeDefined();
  });

  describe("trackLlmCompletion", () => {
    const baseParams: LlmCompletionParams = {
      model: "claude-sonnet-4-5-20250929",
      providerType: "claude",
      operation: "chat",
      durationMs: 1200,
      inputTokens: 150,
      outputTokens: 300,
      callingContext: "chat",
      success: true,
    };

    it("should build and track a telemetry event for Anthropic provider", () => {
      service.trackLlmCompletion(baseParams);

      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          task_duration_ms: 1200,
          task_type: TaskType.IMPLEMENTATION,
          complexity: Complexity.LOW,
          harness: Harness.API_DIRECT,
          model: "claude-sonnet-4-5-20250929",
          provider: Provider.ANTHROPIC,
          actual_input_tokens: 150,
          actual_output_tokens: 300,
          outcome: Outcome.SUCCESS,
        }),
      );

      expect(mockTelemetryService.trackTaskCompletion).toHaveBeenCalledWith(mockEvent);
    });

    it("should build and track a telemetry event for OpenAI provider", () => {
      service.trackLlmCompletion({
        ...baseParams,
        model: "gpt-4o",
        providerType: "openai",
      });

      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          model: "gpt-4o",
          provider: Provider.OPENAI,
          harness: Harness.API_DIRECT,
        }),
      );
    });

    it("should build and track a telemetry event for Ollama provider", () => {
      service.trackLlmCompletion({
        ...baseParams,
        model: "llama3.2",
        providerType: "ollama",
      });

      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          model: "llama3.2",
          provider: Provider.OLLAMA,
          harness: Harness.OLLAMA_LOCAL,
        }),
      );
    });

    it("should calculate cost in microdollars correctly", () => {
      service.trackLlmCompletion(baseParams);

      // claude-sonnet-4-5: 150 * 3 + 300 * 15 = 450 + 4500 = 4950
      const expectedActualCost = 4950;

      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          // Estimated values are 0 when no PredictionService is injected
          estimated_cost_usd_micros: 0,
          actual_cost_usd_micros: expectedActualCost,
        }),
      );
    });

    it("should calculate zero cost for ollama models", () => {
      service.trackLlmCompletion({
        ...baseParams,
        model: "llama3.2",
        providerType: "ollama",
      });

      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          estimated_cost_usd_micros: 0,
          actual_cost_usd_micros: 0,
        }),
      );
    });

    it("should track FAILURE outcome when success is false", () => {
      service.trackLlmCompletion({
        ...baseParams,
        success: false,
      });

      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          outcome: Outcome.FAILURE,
        }),
      );
    });

    it("should infer task type from calling context", () => {
      service.trackLlmCompletion({
        ...baseParams,
        callingContext: "brain",
      });

      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          task_type: TaskType.PLANNING,
        }),
      );
    });

    it("should set empty quality gates arrays for direct LLM calls", () => {
      service.trackLlmCompletion(baseParams);

      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          quality_gate_passed: true,
          quality_gates_run: [],
          quality_gates_failed: [],
        }),
      );
    });

    it("should silently skip when telemetry is disabled (eventBuilder is null)", () => {
      mockTelemetryService.eventBuilder = null;

      // Should not throw
      service.trackLlmCompletion(baseParams);

      expect(mockTelemetryService.trackTaskCompletion).not.toHaveBeenCalled();
    });

    it("should not throw when eventBuilder.build throws an error", () => {
      mockTelemetryService.eventBuilder = {
        build: vi.fn().mockImplementation(() => {
          throw new Error("Build failed");
        }),
      };

      // Should not throw
      expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
    });

    it("should not throw when trackTaskCompletion throws an error", () => {
      mockTelemetryService.trackTaskCompletion.mockImplementation(() => {
        throw new Error("Track failed");
      });

      // Should not throw
      expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
    });

    it("should handle streaming operation with estimated tokens", () => {
      service.trackLlmCompletion({
        ...baseParams,
        operation: "chatStream",
        inputTokens: 50,
        outputTokens: 100,
      });

      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          actual_input_tokens: 50,
          actual_output_tokens: 100,
          // Estimated values are 0 when no PredictionService is injected
          estimated_input_tokens: 0,
          estimated_output_tokens: 0,
        }),
      );
    });

    it("should handle embed operation", () => {
      service.trackLlmCompletion({
        ...baseParams,
        operation: "embed",
        outputTokens: 0,
        callingContext: undefined,
      });

      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          task_type: TaskType.IMPLEMENTATION,
          actual_output_tokens: 0,
        }),
      );
    });

    it("should pass all required EventBuilderParams fields", () => {
      service.trackLlmCompletion(baseParams);

      const buildCall = (mockTelemetryService.eventBuilder?.build as ReturnType<typeof vi.fn>).mock
        .calls[0][0] as EventBuilderParams;

      // Verify all required fields are present
      expect(buildCall).toHaveProperty("task_duration_ms");
      expect(buildCall).toHaveProperty("task_type");
      expect(buildCall).toHaveProperty("complexity");
      expect(buildCall).toHaveProperty("harness");
      expect(buildCall).toHaveProperty("model");
      expect(buildCall).toHaveProperty("provider");
      expect(buildCall).toHaveProperty("estimated_input_tokens");
      expect(buildCall).toHaveProperty("estimated_output_tokens");
      expect(buildCall).toHaveProperty("actual_input_tokens");
      expect(buildCall).toHaveProperty("actual_output_tokens");
      expect(buildCall).toHaveProperty("estimated_cost_usd_micros");
      expect(buildCall).toHaveProperty("actual_cost_usd_micros");
      expect(buildCall).toHaveProperty("quality_gate_passed");
      expect(buildCall).toHaveProperty("quality_gates_run");
      expect(buildCall).toHaveProperty("quality_gates_failed");
      expect(buildCall).toHaveProperty("context_compactions");
      expect(buildCall).toHaveProperty("context_rotations");
      expect(buildCall).toHaveProperty("context_utilization_final");
      expect(buildCall).toHaveProperty("outcome");
      expect(buildCall).toHaveProperty("retry_count");
    });
  });
});