merge: resolve conflicts with develop (telemetry + lockfile)
Keep both Mosaic Telemetry section (from develop) and Matrix Dev Environment section (from feature branch) in .env.example. Regenerate pnpm-lock.yaml with both dependency trees merged. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -27,6 +27,7 @@
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.72.1",
|
||||
"@mosaic/shared": "workspace:*",
|
||||
"@mosaicstack/telemetry-client": "^0.1.0",
|
||||
"@nestjs/axios": "^4.0.1",
|
||||
"@nestjs/bullmq": "^11.0.4",
|
||||
"@nestjs/common": "^11.1.12",
|
||||
|
||||
@@ -37,6 +37,7 @@ import { JobStepsModule } from "./job-steps/job-steps.module";
|
||||
import { CoordinatorIntegrationModule } from "./coordinator-integration/coordinator-integration.module";
|
||||
import { FederationModule } from "./federation/federation.module";
|
||||
import { CredentialsModule } from "./credentials/credentials.module";
|
||||
import { MosaicTelemetryModule } from "./mosaic-telemetry";
|
||||
import { RlsContextInterceptor } from "./common/interceptors/rls-context.interceptor";
|
||||
|
||||
@Module({
|
||||
@@ -97,6 +98,7 @@ import { RlsContextInterceptor } from "./common/interceptors/rls-context.interce
|
||||
CoordinatorIntegrationModule,
|
||||
FederationModule,
|
||||
CredentialsModule,
|
||||
MosaicTelemetryModule,
|
||||
],
|
||||
controllers: [AppController, CsrfController],
|
||||
providers: [
|
||||
|
||||
109
apps/api/src/llm/llm-cost-table.ts
Normal file
109
apps/api/src/llm/llm-cost-table.ts
Normal file
@@ -0,0 +1,109 @@
|
||||
/**
|
||||
* LLM Cost Table
|
||||
*
|
||||
* Maps model names to per-token costs in microdollars (USD * 1,000,000).
|
||||
* For example, $0.003 per 1K tokens = 3,000 microdollars per 1K tokens = 3 microdollars per token.
|
||||
*
|
||||
* Costs are split into input (prompt) and output (completion) pricing.
|
||||
* Ollama models run locally and are free (0 cost).
|
||||
*/
|
||||
|
||||
/**
|
||||
* Per-token cost in microdollars for a single model.
|
||||
*/
|
||||
export interface ModelCost {
|
||||
/** Cost per input token in microdollars */
|
||||
inputPerToken: number;
|
||||
/** Cost per output token in microdollars */
|
||||
outputPerToken: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cost table mapping model name prefixes to per-token pricing.
|
||||
*
|
||||
* Model matching is prefix-based: "claude-sonnet-4-5" matches "claude-sonnet-4-5-20250929".
|
||||
* More specific prefixes are checked first (longest match wins).
|
||||
*
|
||||
* Prices sourced from provider pricing pages as of 2026-02.
|
||||
*/
|
||||
const MODEL_COSTS: Record<string, ModelCost> = {
|
||||
// Anthropic Claude models (per-token microdollars)
|
||||
// claude-sonnet-4-5: $3/M input, $15/M output
|
||||
"claude-sonnet-4-5": { inputPerToken: 3, outputPerToken: 15 },
|
||||
// claude-opus-4: $15/M input, $75/M output
|
||||
"claude-opus-4": { inputPerToken: 15, outputPerToken: 75 },
|
||||
// claude-3-5-haiku / claude-haiku-4-5: $0.80/M input, $4/M output
|
||||
"claude-haiku-4-5": { inputPerToken: 0.8, outputPerToken: 4 },
|
||||
"claude-3-5-haiku": { inputPerToken: 0.8, outputPerToken: 4 },
|
||||
// claude-3-5-sonnet: $3/M input, $15/M output
|
||||
"claude-3-5-sonnet": { inputPerToken: 3, outputPerToken: 15 },
|
||||
// claude-3-opus: $15/M input, $75/M output
|
||||
"claude-3-opus": { inputPerToken: 15, outputPerToken: 75 },
|
||||
// claude-3-sonnet: $3/M input, $15/M output
|
||||
"claude-3-sonnet": { inputPerToken: 3, outputPerToken: 15 },
|
||||
// claude-3-haiku: $0.25/M input, $1.25/M output
|
||||
"claude-3-haiku": { inputPerToken: 0.25, outputPerToken: 1.25 },
|
||||
|
||||
// OpenAI models (per-token microdollars)
|
||||
// gpt-4o: $2.50/M input, $10/M output
|
||||
"gpt-4o-mini": { inputPerToken: 0.15, outputPerToken: 0.6 },
|
||||
"gpt-4o": { inputPerToken: 2.5, outputPerToken: 10 },
|
||||
// gpt-4-turbo: $10/M input, $30/M output
|
||||
"gpt-4-turbo": { inputPerToken: 10, outputPerToken: 30 },
|
||||
// gpt-4: $30/M input, $60/M output
|
||||
"gpt-4": { inputPerToken: 30, outputPerToken: 60 },
|
||||
// gpt-3.5-turbo: $0.50/M input, $1.50/M output
|
||||
"gpt-3.5-turbo": { inputPerToken: 0.5, outputPerToken: 1.5 },
|
||||
|
||||
// Ollama / local models: free
|
||||
// These are catch-all entries; any model not matched above falls through to getModelCost default
|
||||
};
|
||||
|
||||
/**
|
||||
* Sorted model prefixes from longest to shortest for greedy prefix matching.
|
||||
* Ensures "gpt-4o-mini" matches before "gpt-4o" and "claude-3-5-haiku" before "claude-3-haiku".
|
||||
*/
|
||||
const SORTED_PREFIXES = Object.keys(MODEL_COSTS).sort((a, b) => b.length - a.length);
|
||||
|
||||
/**
|
||||
* Look up per-token cost for a given model name.
|
||||
*
|
||||
* Uses longest-prefix matching: the model name is compared against known
|
||||
* prefixes from longest to shortest. If no prefix matches, returns zero cost
|
||||
* (assumes local/free model).
|
||||
*
|
||||
* @param modelName - Full model name (e.g. "claude-sonnet-4-5-20250929", "gpt-4o")
|
||||
* @returns Per-token cost in microdollars
|
||||
*/
|
||||
export function getModelCost(modelName: string): ModelCost {
|
||||
const normalized = modelName.toLowerCase();
|
||||
|
||||
for (const prefix of SORTED_PREFIXES) {
|
||||
if (normalized.startsWith(prefix)) {
|
||||
const cost = MODEL_COSTS[prefix];
|
||||
if (cost !== undefined) {
|
||||
return cost;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unknown or local model — assume free
|
||||
return { inputPerToken: 0, outputPerToken: 0 };
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate total cost in microdollars for a given model and token counts.
|
||||
*
|
||||
* @param modelName - Full model name
|
||||
* @param inputTokens - Number of input (prompt) tokens
|
||||
* @param outputTokens - Number of output (completion) tokens
|
||||
* @returns Total cost in microdollars (USD * 1,000,000)
|
||||
*/
|
||||
export function calculateCostMicrodollars(
|
||||
modelName: string,
|
||||
inputTokens: number,
|
||||
outputTokens: number
|
||||
): number {
|
||||
const cost = getModelCost(modelName);
|
||||
return Math.round(cost.inputPerToken * inputTokens + cost.outputPerToken * outputTokens);
|
||||
}
|
||||
487
apps/api/src/llm/llm-telemetry-tracker.service.spec.ts
Normal file
487
apps/api/src/llm/llm-telemetry-tracker.service.spec.ts
Normal file
@@ -0,0 +1,487 @@
|
||||
import { describe, it, expect, beforeEach, vi } from "vitest";
|
||||
import { Test, TestingModule } from "@nestjs/testing";
|
||||
import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
|
||||
import type { TaskCompletionEvent, EventBuilderParams } from "@mosaicstack/telemetry-client";
|
||||
import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
|
||||
import {
|
||||
LlmTelemetryTrackerService,
|
||||
estimateTokens,
|
||||
mapProviderType,
|
||||
mapHarness,
|
||||
inferTaskType,
|
||||
} from "./llm-telemetry-tracker.service";
|
||||
import type { LlmCompletionParams } from "./llm-telemetry-tracker.service";
|
||||
import { getModelCost, calculateCostMicrodollars } from "./llm-cost-table";
|
||||
|
||||
// ---------- Cost Table Tests ----------
|
||||
|
||||
describe("llm-cost-table", () => {
|
||||
describe("getModelCost", () => {
|
||||
it("should return cost for claude-sonnet-4-5 models", () => {
|
||||
const cost = getModelCost("claude-sonnet-4-5-20250929");
|
||||
expect(cost.inputPerToken).toBe(3);
|
||||
expect(cost.outputPerToken).toBe(15);
|
||||
});
|
||||
|
||||
it("should return cost for claude-opus-4 models", () => {
|
||||
const cost = getModelCost("claude-opus-4-6");
|
||||
expect(cost.inputPerToken).toBe(15);
|
||||
expect(cost.outputPerToken).toBe(75);
|
||||
});
|
||||
|
||||
it("should return cost for claude-haiku-4-5 models", () => {
|
||||
const cost = getModelCost("claude-haiku-4-5-20251001");
|
||||
expect(cost.inputPerToken).toBe(0.8);
|
||||
expect(cost.outputPerToken).toBe(4);
|
||||
});
|
||||
|
||||
it("should return cost for gpt-4o", () => {
|
||||
const cost = getModelCost("gpt-4o");
|
||||
expect(cost.inputPerToken).toBe(2.5);
|
||||
expect(cost.outputPerToken).toBe(10);
|
||||
});
|
||||
|
||||
it("should return cost for gpt-4o-mini (longer prefix matches first)", () => {
|
||||
const cost = getModelCost("gpt-4o-mini");
|
||||
expect(cost.inputPerToken).toBe(0.15);
|
||||
expect(cost.outputPerToken).toBe(0.6);
|
||||
});
|
||||
|
||||
it("should return zero cost for unknown/local models", () => {
|
||||
const cost = getModelCost("llama3.2");
|
||||
expect(cost.inputPerToken).toBe(0);
|
||||
expect(cost.outputPerToken).toBe(0);
|
||||
});
|
||||
|
||||
it("should return zero cost for ollama models", () => {
|
||||
const cost = getModelCost("mistral:7b");
|
||||
expect(cost.inputPerToken).toBe(0);
|
||||
expect(cost.outputPerToken).toBe(0);
|
||||
});
|
||||
|
||||
it("should be case-insensitive", () => {
|
||||
const cost = getModelCost("Claude-Sonnet-4-5-20250929");
|
||||
expect(cost.inputPerToken).toBe(3);
|
||||
});
|
||||
});
|
||||
|
||||
describe("calculateCostMicrodollars", () => {
|
||||
it("should calculate cost for claude-sonnet-4-5 with token counts", () => {
|
||||
// 1000 input tokens * 3 + 500 output tokens * 15 = 3000 + 7500 = 10500
|
||||
const cost = calculateCostMicrodollars("claude-sonnet-4-5-20250929", 1000, 500);
|
||||
expect(cost).toBe(10500);
|
||||
});
|
||||
|
||||
it("should return 0 for local models", () => {
|
||||
const cost = calculateCostMicrodollars("llama3.2", 1000, 500);
|
||||
expect(cost).toBe(0);
|
||||
});
|
||||
|
||||
it("should return 0 when token counts are 0", () => {
|
||||
const cost = calculateCostMicrodollars("claude-opus-4-6", 0, 0);
|
||||
expect(cost).toBe(0);
|
||||
});
|
||||
|
||||
it("should round the result to integer microdollars", () => {
|
||||
// gpt-4o-mini: 0.15 * 3 + 0.6 * 7 = 0.45 + 4.2 = 4.65 -> rounds to 5
|
||||
const cost = calculateCostMicrodollars("gpt-4o-mini", 3, 7);
|
||||
expect(cost).toBe(5);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Helper Function Tests ----------
|
||||
|
||||
describe("helper functions", () => {
|
||||
describe("estimateTokens", () => {
|
||||
it("should estimate ~1 token per 4 characters", () => {
|
||||
expect(estimateTokens("abcd")).toBe(1);
|
||||
expect(estimateTokens("abcdefgh")).toBe(2);
|
||||
});
|
||||
|
||||
it("should round up for partial tokens", () => {
|
||||
expect(estimateTokens("abc")).toBe(1);
|
||||
expect(estimateTokens("abcde")).toBe(2);
|
||||
});
|
||||
|
||||
it("should return 0 for empty string", () => {
|
||||
expect(estimateTokens("")).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("mapProviderType", () => {
|
||||
it("should map claude to ANTHROPIC", () => {
|
||||
expect(mapProviderType("claude")).toBe(Provider.ANTHROPIC);
|
||||
});
|
||||
|
||||
it("should map openai to OPENAI", () => {
|
||||
expect(mapProviderType("openai")).toBe(Provider.OPENAI);
|
||||
});
|
||||
|
||||
it("should map ollama to OLLAMA", () => {
|
||||
expect(mapProviderType("ollama")).toBe(Provider.OLLAMA);
|
||||
});
|
||||
});
|
||||
|
||||
describe("mapHarness", () => {
|
||||
it("should map ollama to OLLAMA_LOCAL", () => {
|
||||
expect(mapHarness("ollama")).toBe(Harness.OLLAMA_LOCAL);
|
||||
});
|
||||
|
||||
it("should map claude to API_DIRECT", () => {
|
||||
expect(mapHarness("claude")).toBe(Harness.API_DIRECT);
|
||||
});
|
||||
|
||||
it("should map openai to API_DIRECT", () => {
|
||||
expect(mapHarness("openai")).toBe(Harness.API_DIRECT);
|
||||
});
|
||||
});
|
||||
|
||||
describe("inferTaskType", () => {
|
||||
it("should return IMPLEMENTATION for embed operation", () => {
|
||||
expect(inferTaskType("embed")).toBe(TaskType.IMPLEMENTATION);
|
||||
});
|
||||
|
||||
it("should return UNKNOWN when no context provided for chat", () => {
|
||||
expect(inferTaskType("chat")).toBe(TaskType.UNKNOWN);
|
||||
});
|
||||
|
||||
it("should return PLANNING for brain context", () => {
|
||||
expect(inferTaskType("chat", "brain")).toBe(TaskType.PLANNING);
|
||||
});
|
||||
|
||||
it("should return PLANNING for planning context", () => {
|
||||
expect(inferTaskType("chat", "planning")).toBe(TaskType.PLANNING);
|
||||
});
|
||||
|
||||
it("should return CODE_REVIEW for review context", () => {
|
||||
expect(inferTaskType("chat", "code-review")).toBe(TaskType.CODE_REVIEW);
|
||||
});
|
||||
|
||||
it("should return TESTING for test context", () => {
|
||||
expect(inferTaskType("chat", "test-generation")).toBe(TaskType.TESTING);
|
||||
});
|
||||
|
||||
it("should return DEBUGGING for debug context", () => {
|
||||
expect(inferTaskType("chatStream", "debug-session")).toBe(TaskType.DEBUGGING);
|
||||
});
|
||||
|
||||
it("should return REFACTORING for refactor context", () => {
|
||||
expect(inferTaskType("chat", "refactor")).toBe(TaskType.REFACTORING);
|
||||
});
|
||||
|
||||
it("should return DOCUMENTATION for doc context", () => {
|
||||
expect(inferTaskType("chat", "documentation")).toBe(TaskType.DOCUMENTATION);
|
||||
});
|
||||
|
||||
it("should return CONFIGURATION for config context", () => {
|
||||
expect(inferTaskType("chat", "config-update")).toBe(TaskType.CONFIGURATION);
|
||||
});
|
||||
|
||||
it("should return SECURITY_AUDIT for security context", () => {
|
||||
expect(inferTaskType("chat", "security-check")).toBe(TaskType.SECURITY_AUDIT);
|
||||
});
|
||||
|
||||
it("should return IMPLEMENTATION for chat context", () => {
|
||||
expect(inferTaskType("chat", "chat")).toBe(TaskType.IMPLEMENTATION);
|
||||
});
|
||||
|
||||
it("should be case-insensitive", () => {
|
||||
expect(inferTaskType("chat", "BRAIN")).toBe(TaskType.PLANNING);
|
||||
});
|
||||
|
||||
it("should return UNKNOWN for unrecognized context", () => {
|
||||
expect(inferTaskType("chat", "something-else")).toBe(TaskType.UNKNOWN);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- LlmTelemetryTrackerService Tests ----------
|
||||
|
||||
describe("LlmTelemetryTrackerService", () => {
|
||||
let service: LlmTelemetryTrackerService;
|
||||
let mockTelemetryService: {
|
||||
eventBuilder: { build: ReturnType<typeof vi.fn> } | null;
|
||||
trackTaskCompletion: ReturnType<typeof vi.fn>;
|
||||
isEnabled: boolean;
|
||||
};
|
||||
|
||||
const mockEvent: TaskCompletionEvent = {
|
||||
instance_id: "test-instance",
|
||||
event_id: "test-event",
|
||||
schema_version: "1.0.0",
|
||||
timestamp: new Date().toISOString(),
|
||||
task_duration_ms: 1000,
|
||||
task_type: TaskType.IMPLEMENTATION,
|
||||
complexity: Complexity.LOW,
|
||||
harness: Harness.API_DIRECT,
|
||||
model: "claude-sonnet-4-5-20250929",
|
||||
provider: Provider.ANTHROPIC,
|
||||
estimated_input_tokens: 100,
|
||||
estimated_output_tokens: 200,
|
||||
actual_input_tokens: 100,
|
||||
actual_output_tokens: 200,
|
||||
estimated_cost_usd_micros: 3300,
|
||||
actual_cost_usd_micros: 3300,
|
||||
quality_gate_passed: true,
|
||||
quality_gates_run: [],
|
||||
quality_gates_failed: [],
|
||||
context_compactions: 0,
|
||||
context_rotations: 0,
|
||||
context_utilization_final: 0,
|
||||
outcome: Outcome.SUCCESS,
|
||||
retry_count: 0,
|
||||
};
|
||||
|
||||
beforeEach(async () => {
|
||||
mockTelemetryService = {
|
||||
eventBuilder: {
|
||||
build: vi.fn().mockReturnValue(mockEvent),
|
||||
},
|
||||
trackTaskCompletion: vi.fn(),
|
||||
isEnabled: true,
|
||||
};
|
||||
|
||||
const module: TestingModule = await Test.createTestingModule({
|
||||
providers: [
|
||||
LlmTelemetryTrackerService,
|
||||
{
|
||||
provide: MosaicTelemetryService,
|
||||
useValue: mockTelemetryService,
|
||||
},
|
||||
],
|
||||
}).compile();
|
||||
|
||||
service = module.get<LlmTelemetryTrackerService>(LlmTelemetryTrackerService);
|
||||
});
|
||||
|
||||
it("should be defined", () => {
|
||||
expect(service).toBeDefined();
|
||||
});
|
||||
|
||||
describe("trackLlmCompletion", () => {
|
||||
const baseParams: LlmCompletionParams = {
|
||||
model: "claude-sonnet-4-5-20250929",
|
||||
providerType: "claude",
|
||||
operation: "chat",
|
||||
durationMs: 1200,
|
||||
inputTokens: 150,
|
||||
outputTokens: 300,
|
||||
callingContext: "chat",
|
||||
success: true,
|
||||
};
|
||||
|
||||
it("should build and track a telemetry event for Anthropic provider", () => {
|
||||
service.trackLlmCompletion(baseParams);
|
||||
|
||||
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
task_duration_ms: 1200,
|
||||
task_type: TaskType.IMPLEMENTATION,
|
||||
complexity: Complexity.LOW,
|
||||
harness: Harness.API_DIRECT,
|
||||
model: "claude-sonnet-4-5-20250929",
|
||||
provider: Provider.ANTHROPIC,
|
||||
actual_input_tokens: 150,
|
||||
actual_output_tokens: 300,
|
||||
outcome: Outcome.SUCCESS,
|
||||
})
|
||||
);
|
||||
|
||||
expect(mockTelemetryService.trackTaskCompletion).toHaveBeenCalledWith(mockEvent);
|
||||
});
|
||||
|
||||
it("should build and track a telemetry event for OpenAI provider", () => {
|
||||
service.trackLlmCompletion({
|
||||
...baseParams,
|
||||
model: "gpt-4o",
|
||||
providerType: "openai",
|
||||
});
|
||||
|
||||
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: "gpt-4o",
|
||||
provider: Provider.OPENAI,
|
||||
harness: Harness.API_DIRECT,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should build and track a telemetry event for Ollama provider", () => {
|
||||
service.trackLlmCompletion({
|
||||
...baseParams,
|
||||
model: "llama3.2",
|
||||
providerType: "ollama",
|
||||
});
|
||||
|
||||
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: "llama3.2",
|
||||
provider: Provider.OLLAMA,
|
||||
harness: Harness.OLLAMA_LOCAL,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should calculate cost in microdollars correctly", () => {
|
||||
service.trackLlmCompletion(baseParams);
|
||||
|
||||
// claude-sonnet-4-5: 150 * 3 + 300 * 15 = 450 + 4500 = 4950
|
||||
const expectedActualCost = 4950;
|
||||
|
||||
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
// Estimated values are 0 when no PredictionService is injected
|
||||
estimated_cost_usd_micros: 0,
|
||||
actual_cost_usd_micros: expectedActualCost,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should calculate zero cost for ollama models", () => {
|
||||
service.trackLlmCompletion({
|
||||
...baseParams,
|
||||
model: "llama3.2",
|
||||
providerType: "ollama",
|
||||
});
|
||||
|
||||
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
estimated_cost_usd_micros: 0,
|
||||
actual_cost_usd_micros: 0,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should track FAILURE outcome when success is false", () => {
|
||||
service.trackLlmCompletion({
|
||||
...baseParams,
|
||||
success: false,
|
||||
});
|
||||
|
||||
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
outcome: Outcome.FAILURE,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should infer task type from calling context", () => {
|
||||
service.trackLlmCompletion({
|
||||
...baseParams,
|
||||
callingContext: "brain",
|
||||
});
|
||||
|
||||
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
task_type: TaskType.PLANNING,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should set empty quality gates arrays for direct LLM calls", () => {
|
||||
service.trackLlmCompletion(baseParams);
|
||||
|
||||
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
quality_gate_passed: true,
|
||||
quality_gates_run: [],
|
||||
quality_gates_failed: [],
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should silently skip when telemetry is disabled (eventBuilder is null)", () => {
|
||||
mockTelemetryService.eventBuilder = null;
|
||||
|
||||
// Should not throw
|
||||
service.trackLlmCompletion(baseParams);
|
||||
|
||||
expect(mockTelemetryService.trackTaskCompletion).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("should not throw when eventBuilder.build throws an error", () => {
|
||||
mockTelemetryService.eventBuilder = {
|
||||
build: vi.fn().mockImplementation(() => {
|
||||
throw new Error("Build failed");
|
||||
}),
|
||||
};
|
||||
|
||||
// Should not throw
|
||||
expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
|
||||
});
|
||||
|
||||
it("should not throw when trackTaskCompletion throws an error", () => {
|
||||
mockTelemetryService.trackTaskCompletion.mockImplementation(() => {
|
||||
throw new Error("Track failed");
|
||||
});
|
||||
|
||||
// Should not throw
|
||||
expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
|
||||
});
|
||||
|
||||
it("should handle streaming operation with estimated tokens", () => {
|
||||
service.trackLlmCompletion({
|
||||
...baseParams,
|
||||
operation: "chatStream",
|
||||
inputTokens: 50,
|
||||
outputTokens: 100,
|
||||
});
|
||||
|
||||
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
actual_input_tokens: 50,
|
||||
actual_output_tokens: 100,
|
||||
// Estimated values are 0 when no PredictionService is injected
|
||||
estimated_input_tokens: 0,
|
||||
estimated_output_tokens: 0,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should handle embed operation", () => {
|
||||
service.trackLlmCompletion({
|
||||
...baseParams,
|
||||
operation: "embed",
|
||||
outputTokens: 0,
|
||||
callingContext: undefined,
|
||||
});
|
||||
|
||||
expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
task_type: TaskType.IMPLEMENTATION,
|
||||
actual_output_tokens: 0,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should pass all required EventBuilderParams fields", () => {
|
||||
service.trackLlmCompletion(baseParams);
|
||||
|
||||
const buildCall = (mockTelemetryService.eventBuilder?.build as ReturnType<typeof vi.fn>).mock
|
||||
.calls[0][0] as EventBuilderParams;
|
||||
|
||||
// Verify all required fields are present
|
||||
expect(buildCall).toHaveProperty("task_duration_ms");
|
||||
expect(buildCall).toHaveProperty("task_type");
|
||||
expect(buildCall).toHaveProperty("complexity");
|
||||
expect(buildCall).toHaveProperty("harness");
|
||||
expect(buildCall).toHaveProperty("model");
|
||||
expect(buildCall).toHaveProperty("provider");
|
||||
expect(buildCall).toHaveProperty("estimated_input_tokens");
|
||||
expect(buildCall).toHaveProperty("estimated_output_tokens");
|
||||
expect(buildCall).toHaveProperty("actual_input_tokens");
|
||||
expect(buildCall).toHaveProperty("actual_output_tokens");
|
||||
expect(buildCall).toHaveProperty("estimated_cost_usd_micros");
|
||||
expect(buildCall).toHaveProperty("actual_cost_usd_micros");
|
||||
expect(buildCall).toHaveProperty("quality_gate_passed");
|
||||
expect(buildCall).toHaveProperty("quality_gates_run");
|
||||
expect(buildCall).toHaveProperty("quality_gates_failed");
|
||||
expect(buildCall).toHaveProperty("context_compactions");
|
||||
expect(buildCall).toHaveProperty("context_rotations");
|
||||
expect(buildCall).toHaveProperty("context_utilization_final");
|
||||
expect(buildCall).toHaveProperty("outcome");
|
||||
expect(buildCall).toHaveProperty("retry_count");
|
||||
});
|
||||
});
|
||||
});
|
||||
224
apps/api/src/llm/llm-telemetry-tracker.service.ts
Normal file
224
apps/api/src/llm/llm-telemetry-tracker.service.ts
Normal file
@@ -0,0 +1,224 @@
|
||||
import { Injectable, Logger, Optional } from "@nestjs/common";
|
||||
import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
|
||||
import { PredictionService } from "../mosaic-telemetry/prediction.service";
|
||||
import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
|
||||
import type { LlmProviderType } from "./providers/llm-provider.interface";
|
||||
import { calculateCostMicrodollars } from "./llm-cost-table";
|
||||
|
||||
/**
|
||||
* Parameters for tracking an LLM completion event.
|
||||
*/
|
||||
export interface LlmCompletionParams {
|
||||
/** Full model name (e.g. "claude-sonnet-4-5-20250929") */
|
||||
model: string;
|
||||
/** Provider type discriminator */
|
||||
providerType: LlmProviderType;
|
||||
/** Operation type that was performed */
|
||||
operation: "chat" | "chatStream" | "embed";
|
||||
/** Duration of the LLM call in milliseconds */
|
||||
durationMs: number;
|
||||
/** Number of input (prompt) tokens consumed */
|
||||
inputTokens: number;
|
||||
/** Number of output (completion) tokens generated */
|
||||
outputTokens: number;
|
||||
/**
|
||||
* Optional calling context hint for task type inference.
|
||||
* Examples: "brain", "chat", "embed", "planning", "code-review"
|
||||
*/
|
||||
callingContext?: string | undefined;
|
||||
/** Whether the call succeeded or failed */
|
||||
success: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimated token count from text length.
|
||||
* Uses a rough approximation of ~4 characters per token (GPT/Claude average).
|
||||
*/
|
||||
export function estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
/** Map LLM provider type to telemetry Provider enum */
|
||||
export function mapProviderType(providerType: LlmProviderType): Provider {
|
||||
switch (providerType) {
|
||||
case "claude":
|
||||
return Provider.ANTHROPIC;
|
||||
case "openai":
|
||||
return Provider.OPENAI;
|
||||
case "ollama":
|
||||
return Provider.OLLAMA;
|
||||
default:
|
||||
return Provider.UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
/** Map LLM provider type to telemetry Harness enum */
|
||||
export function mapHarness(providerType: LlmProviderType): Harness {
|
||||
switch (providerType) {
|
||||
case "ollama":
|
||||
return Harness.OLLAMA_LOCAL;
|
||||
default:
|
||||
return Harness.API_DIRECT;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Infer the task type from calling context and operation.
|
||||
*
|
||||
* @param operation - The LLM operation (chat, chatStream, embed)
|
||||
* @param callingContext - Optional hint about the caller's purpose
|
||||
* @returns Inferred TaskType
|
||||
*/
|
||||
export function inferTaskType(
|
||||
operation: "chat" | "chatStream" | "embed",
|
||||
callingContext?: string
|
||||
): TaskType {
|
||||
// Embedding operations are typically for indexing/search
|
||||
if (operation === "embed") {
|
||||
return TaskType.IMPLEMENTATION;
|
||||
}
|
||||
|
||||
if (!callingContext) {
|
||||
return TaskType.UNKNOWN;
|
||||
}
|
||||
|
||||
const ctx = callingContext.toLowerCase();
|
||||
|
||||
if (ctx.includes("brain") || ctx.includes("planning") || ctx.includes("plan")) {
|
||||
return TaskType.PLANNING;
|
||||
}
|
||||
if (ctx.includes("review") || ctx.includes("code-review")) {
|
||||
return TaskType.CODE_REVIEW;
|
||||
}
|
||||
if (ctx.includes("test")) {
|
||||
return TaskType.TESTING;
|
||||
}
|
||||
if (ctx.includes("debug")) {
|
||||
return TaskType.DEBUGGING;
|
||||
}
|
||||
if (ctx.includes("refactor")) {
|
||||
return TaskType.REFACTORING;
|
||||
}
|
||||
if (ctx.includes("doc")) {
|
||||
return TaskType.DOCUMENTATION;
|
||||
}
|
||||
if (ctx.includes("config")) {
|
||||
return TaskType.CONFIGURATION;
|
||||
}
|
||||
if (ctx.includes("security") || ctx.includes("audit")) {
|
||||
return TaskType.SECURITY_AUDIT;
|
||||
}
|
||||
if (ctx.includes("chat") || ctx.includes("implement")) {
|
||||
return TaskType.IMPLEMENTATION;
|
||||
}
|
||||
|
||||
return TaskType.UNKNOWN;
|
||||
}
|
||||
|
||||
/**
|
||||
* LLM Telemetry Tracker Service
|
||||
*
|
||||
* Builds and submits telemetry events for LLM completions.
|
||||
* All tracking is non-blocking and fire-and-forget; telemetry errors
|
||||
* never propagate to the caller.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* // After a successful chat completion
|
||||
* this.telemetryTracker.trackLlmCompletion({
|
||||
* model: "claude-sonnet-4-5-20250929",
|
||||
* providerType: "claude",
|
||||
* operation: "chat",
|
||||
* durationMs: 1200,
|
||||
* inputTokens: 150,
|
||||
* outputTokens: 300,
|
||||
* callingContext: "chat",
|
||||
* success: true,
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
@Injectable()
|
||||
export class LlmTelemetryTrackerService {
|
||||
private readonly logger = new Logger(LlmTelemetryTrackerService.name);
|
||||
|
||||
constructor(
|
||||
private readonly telemetry: MosaicTelemetryService,
|
||||
@Optional() private readonly predictionService?: PredictionService
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Track an LLM completion event via Mosaic Telemetry.
|
||||
*
|
||||
* This method is intentionally fire-and-forget. It catches all errors
|
||||
* internally and logs them without propagating to the caller.
|
||||
*
|
||||
* @param params - LLM completion parameters
|
||||
*/
|
||||
trackLlmCompletion(params: LlmCompletionParams): void {
|
||||
try {
|
||||
const builder = this.telemetry.eventBuilder;
|
||||
if (!builder) {
|
||||
// Telemetry is disabled — silently skip
|
||||
return;
|
||||
}
|
||||
|
||||
const taskType = inferTaskType(params.operation, params.callingContext);
|
||||
const provider = mapProviderType(params.providerType);
|
||||
|
||||
const costMicrodollars = calculateCostMicrodollars(
|
||||
params.model,
|
||||
params.inputTokens,
|
||||
params.outputTokens
|
||||
);
|
||||
|
||||
// Query predictions for estimated fields (graceful degradation)
|
||||
let estimatedInputTokens = 0;
|
||||
let estimatedOutputTokens = 0;
|
||||
let estimatedCostMicros = 0;
|
||||
|
||||
if (this.predictionService) {
|
||||
const prediction = this.predictionService.getEstimate(
|
||||
taskType,
|
||||
params.model,
|
||||
provider,
|
||||
Complexity.LOW
|
||||
);
|
||||
|
||||
if (prediction?.prediction && prediction.metadata.confidence !== "none") {
|
||||
estimatedInputTokens = prediction.prediction.input_tokens.median;
|
||||
estimatedOutputTokens = prediction.prediction.output_tokens.median;
|
||||
estimatedCostMicros = prediction.prediction.cost_usd_micros.median ?? 0;
|
||||
}
|
||||
}
|
||||
|
||||
const event = builder.build({
|
||||
task_duration_ms: params.durationMs,
|
||||
task_type: taskType,
|
||||
complexity: Complexity.LOW,
|
||||
harness: mapHarness(params.providerType),
|
||||
model: params.model,
|
||||
provider,
|
||||
estimated_input_tokens: estimatedInputTokens,
|
||||
estimated_output_tokens: estimatedOutputTokens,
|
||||
actual_input_tokens: params.inputTokens,
|
||||
actual_output_tokens: params.outputTokens,
|
||||
estimated_cost_usd_micros: estimatedCostMicros,
|
||||
actual_cost_usd_micros: costMicrodollars,
|
||||
quality_gate_passed: true,
|
||||
quality_gates_run: [],
|
||||
quality_gates_failed: [],
|
||||
context_compactions: 0,
|
||||
context_rotations: 0,
|
||||
context_utilization_final: 0,
|
||||
outcome: params.success ? Outcome.SUCCESS : Outcome.FAILURE,
|
||||
retry_count: 0,
|
||||
});
|
||||
|
||||
this.telemetry.trackTaskCompletion(event);
|
||||
} catch (error: unknown) {
|
||||
// Never let telemetry errors propagate
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
this.logger.warn(`Failed to track LLM telemetry event: ${msg}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,13 +3,14 @@ import { LlmController } from "./llm.controller";
|
||||
import { LlmProviderAdminController } from "./llm-provider-admin.controller";
|
||||
import { LlmService } from "./llm.service";
|
||||
import { LlmManagerService } from "./llm-manager.service";
|
||||
import { LlmTelemetryTrackerService } from "./llm-telemetry-tracker.service";
|
||||
import { PrismaModule } from "../prisma/prisma.module";
|
||||
import { LlmUsageModule } from "../llm-usage/llm-usage.module";
|
||||
|
||||
@Module({
|
||||
imports: [PrismaModule, LlmUsageModule],
|
||||
controllers: [LlmController, LlmProviderAdminController],
|
||||
providers: [LlmService, LlmManagerService],
|
||||
providers: [LlmService, LlmManagerService, LlmTelemetryTrackerService],
|
||||
exports: [LlmService, LlmManagerService],
|
||||
})
|
||||
export class LlmModule {}
|
||||
|
||||
@@ -3,6 +3,7 @@ import { Test, TestingModule } from "@nestjs/testing";
|
||||
import { ServiceUnavailableException } from "@nestjs/common";
|
||||
import { LlmService } from "./llm.service";
|
||||
import { LlmManagerService } from "./llm-manager.service";
|
||||
import { LlmTelemetryTrackerService } from "./llm-telemetry-tracker.service";
|
||||
import type { ChatRequestDto, EmbedRequestDto, ChatResponseDto, EmbedResponseDto } from "./dto";
|
||||
import type {
|
||||
LlmProviderInterface,
|
||||
@@ -14,6 +15,9 @@ describe("LlmService", () => {
|
||||
let mockManagerService: {
|
||||
getDefaultProvider: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
let mockTelemetryTracker: {
|
||||
trackLlmCompletion: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
let mockProvider: {
|
||||
chat: ReturnType<typeof vi.fn>;
|
||||
chatStream: ReturnType<typeof vi.fn>;
|
||||
@@ -41,6 +45,11 @@ describe("LlmService", () => {
|
||||
getDefaultProvider: vi.fn().mockResolvedValue(mockProvider),
|
||||
};
|
||||
|
||||
// Create mock telemetry tracker
|
||||
mockTelemetryTracker = {
|
||||
trackLlmCompletion: vi.fn(),
|
||||
};
|
||||
|
||||
const module: TestingModule = await Test.createTestingModule({
|
||||
providers: [
|
||||
LlmService,
|
||||
@@ -48,6 +57,10 @@ describe("LlmService", () => {
|
||||
provide: LlmManagerService,
|
||||
useValue: mockManagerService,
|
||||
},
|
||||
{
|
||||
provide: LlmTelemetryTrackerService,
|
||||
useValue: mockTelemetryTracker,
|
||||
},
|
||||
],
|
||||
}).compile();
|
||||
|
||||
@@ -135,6 +148,45 @@ describe("LlmService", () => {
|
||||
expect(result).toEqual(response);
|
||||
});
|
||||
|
||||
it("should track telemetry on successful chat", async () => {
|
||||
const response: ChatResponseDto = {
|
||||
model: "llama3.2",
|
||||
message: { role: "assistant", content: "Hello" },
|
||||
done: true,
|
||||
promptEvalCount: 10,
|
||||
evalCount: 20,
|
||||
};
|
||||
mockProvider.chat.mockResolvedValue(response);
|
||||
|
||||
await service.chat(request, "chat");
|
||||
|
||||
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: "llama3.2",
|
||||
providerType: "ollama",
|
||||
operation: "chat",
|
||||
inputTokens: 10,
|
||||
outputTokens: 20,
|
||||
callingContext: "chat",
|
||||
success: true,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should track telemetry on failed chat", async () => {
|
||||
mockProvider.chat.mockRejectedValue(new Error("Chat failed"));
|
||||
|
||||
await expect(service.chat(request)).rejects.toThrow(ServiceUnavailableException);
|
||||
|
||||
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: "llama3.2",
|
||||
operation: "chat",
|
||||
success: false,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should throw ServiceUnavailableException on error", async () => {
|
||||
mockProvider.chat.mockRejectedValue(new Error("Chat failed"));
|
||||
|
||||
@@ -177,6 +229,94 @@ describe("LlmService", () => {
|
||||
expect(chunks[1].message.content).toBe(" world");
|
||||
});
|
||||
|
||||
it("should track telemetry after stream completes", async () => {
|
||||
async function* mockGenerator(): AsyncGenerator<ChatResponseDto> {
|
||||
yield {
|
||||
model: "llama3.2",
|
||||
message: { role: "assistant", content: "Hello" },
|
||||
done: false,
|
||||
};
|
||||
yield {
|
||||
model: "llama3.2",
|
||||
message: { role: "assistant", content: " world" },
|
||||
done: true,
|
||||
promptEvalCount: 5,
|
||||
evalCount: 10,
|
||||
};
|
||||
}
|
||||
|
||||
mockProvider.chatStream.mockReturnValue(mockGenerator());
|
||||
|
||||
const chunks: ChatResponseDto[] = [];
|
||||
for await (const chunk of service.chatStream(request, "brain")) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: "llama3.2",
|
||||
providerType: "ollama",
|
||||
operation: "chatStream",
|
||||
inputTokens: 5,
|
||||
outputTokens: 10,
|
||||
callingContext: "brain",
|
||||
success: true,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should estimate tokens when provider does not return counts in stream", async () => {
|
||||
async function* mockGenerator(): AsyncGenerator<ChatResponseDto> {
|
||||
yield {
|
||||
model: "llama3.2",
|
||||
message: { role: "assistant", content: "Hello world" },
|
||||
done: false,
|
||||
};
|
||||
yield {
|
||||
model: "llama3.2",
|
||||
message: { role: "assistant", content: "" },
|
||||
done: true,
|
||||
};
|
||||
}
|
||||
|
||||
mockProvider.chatStream.mockReturnValue(mockGenerator());
|
||||
|
||||
const chunks: ChatResponseDto[] = [];
|
||||
for await (const chunk of service.chatStream(request)) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
// Should use estimated tokens since no actual counts provided
|
||||
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
operation: "chatStream",
|
||||
success: true,
|
||||
// Input estimated from "Hi" -> ceil(2/4) = 1
|
||||
inputTokens: 1,
|
||||
// Output estimated from "Hello world" -> ceil(11/4) = 3
|
||||
outputTokens: 3,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should track telemetry on stream failure", async () => {
|
||||
async function* errorGenerator(): AsyncGenerator<ChatResponseDto> {
|
||||
throw new Error("Stream failed");
|
||||
}
|
||||
|
||||
mockProvider.chatStream.mockReturnValue(errorGenerator());
|
||||
|
||||
const generator = service.chatStream(request);
|
||||
await expect(generator.next()).rejects.toThrow(ServiceUnavailableException);
|
||||
|
||||
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
operation: "chatStream",
|
||||
success: false,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should throw ServiceUnavailableException on error", async () => {
|
||||
async function* errorGenerator(): AsyncGenerator<ChatResponseDto> {
|
||||
throw new Error("Stream failed");
|
||||
@@ -210,6 +350,41 @@ describe("LlmService", () => {
|
||||
expect(result).toEqual(response);
|
||||
});
|
||||
|
||||
it("should track telemetry on successful embed", async () => {
|
||||
const response: EmbedResponseDto = {
|
||||
model: "llama3.2",
|
||||
embeddings: [[0.1, 0.2, 0.3]],
|
||||
totalDuration: 500,
|
||||
};
|
||||
mockProvider.embed.mockResolvedValue(response);
|
||||
|
||||
await service.embed(request, "embed");
|
||||
|
||||
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: "llama3.2",
|
||||
providerType: "ollama",
|
||||
operation: "embed",
|
||||
outputTokens: 0,
|
||||
callingContext: "embed",
|
||||
success: true,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should track telemetry on failed embed", async () => {
|
||||
mockProvider.embed.mockRejectedValue(new Error("Embedding failed"));
|
||||
|
||||
await expect(service.embed(request)).rejects.toThrow(ServiceUnavailableException);
|
||||
|
||||
expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
operation: "embed",
|
||||
success: false,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should throw ServiceUnavailableException on error", async () => {
|
||||
mockProvider.embed.mockRejectedValue(new Error("Embedding failed"));
|
||||
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common";
|
||||
import { LlmManagerService } from "./llm-manager.service";
|
||||
import { LlmTelemetryTrackerService, estimateTokens } from "./llm-telemetry-tracker.service";
|
||||
import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "./dto";
|
||||
import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface";
|
||||
import type { LlmProviderHealthStatus, LlmProviderType } from "./providers/llm-provider.interface";
|
||||
|
||||
/**
|
||||
* LLM Service
|
||||
*
|
||||
* High-level service for LLM operations. Delegates to providers via LlmManagerService.
|
||||
* Maintains backward compatibility with the original API while supporting multiple providers.
|
||||
* Automatically tracks completions via Mosaic Telemetry (non-blocking).
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
@@ -33,7 +35,10 @@ import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface
|
||||
export class LlmService implements OnModuleInit {
|
||||
private readonly logger = new Logger(LlmService.name);
|
||||
|
||||
constructor(private readonly llmManager: LlmManagerService) {
|
||||
constructor(
|
||||
private readonly llmManager: LlmManagerService,
|
||||
private readonly telemetryTracker: LlmTelemetryTrackerService
|
||||
) {
|
||||
this.logger.log("LLM service initialized");
|
||||
}
|
||||
|
||||
@@ -91,14 +96,45 @@ export class LlmService implements OnModuleInit {
|
||||
* Perform a synchronous chat completion.
|
||||
*
|
||||
* @param request - Chat request with messages and configuration
|
||||
* @param callingContext - Optional context hint for telemetry task type inference
|
||||
* @returns Complete chat response
|
||||
* @throws {ServiceUnavailableException} If provider is unavailable or request fails
|
||||
*/
|
||||
async chat(request: ChatRequestDto): Promise<ChatResponseDto> {
|
||||
async chat(request: ChatRequestDto, callingContext?: string): Promise<ChatResponseDto> {
|
||||
const startTime = Date.now();
|
||||
let providerType: LlmProviderType = "ollama";
|
||||
|
||||
try {
|
||||
const provider = await this.llmManager.getDefaultProvider();
|
||||
return await provider.chat(request);
|
||||
providerType = provider.type;
|
||||
const response = await provider.chat(request);
|
||||
|
||||
// Fire-and-forget telemetry tracking
|
||||
this.telemetryTracker.trackLlmCompletion({
|
||||
model: response.model,
|
||||
providerType,
|
||||
operation: "chat",
|
||||
durationMs: Date.now() - startTime,
|
||||
inputTokens: response.promptEvalCount ?? 0,
|
||||
outputTokens: response.evalCount ?? 0,
|
||||
callingContext,
|
||||
success: true,
|
||||
});
|
||||
|
||||
return response;
|
||||
} catch (error: unknown) {
|
||||
// Track failure (fire-and-forget)
|
||||
this.telemetryTracker.trackLlmCompletion({
|
||||
model: request.model,
|
||||
providerType,
|
||||
operation: "chat",
|
||||
durationMs: Date.now() - startTime,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
callingContext,
|
||||
success: false,
|
||||
});
|
||||
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Chat failed: ${errorMessage}`);
|
||||
throw new ServiceUnavailableException(`Chat completion failed: ${errorMessage}`);
|
||||
@@ -107,20 +143,75 @@ export class LlmService implements OnModuleInit {
|
||||
/**
|
||||
* Perform a streaming chat completion.
|
||||
* Yields response chunks as they arrive from the provider.
|
||||
* Aggregates token usage and tracks telemetry after the stream ends.
|
||||
*
|
||||
* @param request - Chat request with messages and configuration
|
||||
* @param callingContext - Optional context hint for telemetry task type inference
|
||||
* @yields Chat response chunks
|
||||
* @throws {ServiceUnavailableException} If provider is unavailable or request fails
|
||||
*/
|
||||
async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatResponseDto, void, unknown> {
|
||||
async *chatStream(
|
||||
request: ChatRequestDto,
|
||||
callingContext?: string
|
||||
): AsyncGenerator<ChatResponseDto, void, unknown> {
|
||||
const startTime = Date.now();
|
||||
let providerType: LlmProviderType = "ollama";
|
||||
let aggregatedContent = "";
|
||||
let lastChunkInputTokens = 0;
|
||||
let lastChunkOutputTokens = 0;
|
||||
|
||||
try {
|
||||
const provider = await this.llmManager.getDefaultProvider();
|
||||
providerType = provider.type;
|
||||
const stream = provider.chatStream(request);
|
||||
|
||||
for await (const chunk of stream) {
|
||||
// Accumulate content for token estimation
|
||||
aggregatedContent += chunk.message.content;
|
||||
|
||||
// Some providers include token counts on the final chunk
|
||||
if (chunk.promptEvalCount !== undefined) {
|
||||
lastChunkInputTokens = chunk.promptEvalCount;
|
||||
}
|
||||
if (chunk.evalCount !== undefined) {
|
||||
lastChunkOutputTokens = chunk.evalCount;
|
||||
}
|
||||
|
||||
yield chunk;
|
||||
}
|
||||
|
||||
// After stream completes, track telemetry
|
||||
// Use actual token counts if available, otherwise estimate from content length
|
||||
const inputTokens =
|
||||
lastChunkInputTokens > 0
|
||||
? lastChunkInputTokens
|
||||
: estimateTokens(request.messages.map((m) => m.content).join(" "));
|
||||
const outputTokens =
|
||||
lastChunkOutputTokens > 0 ? lastChunkOutputTokens : estimateTokens(aggregatedContent);
|
||||
|
||||
this.telemetryTracker.trackLlmCompletion({
|
||||
model: request.model,
|
||||
providerType,
|
||||
operation: "chatStream",
|
||||
durationMs: Date.now() - startTime,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
callingContext,
|
||||
success: true,
|
||||
});
|
||||
} catch (error: unknown) {
|
||||
// Track failure (fire-and-forget)
|
||||
this.telemetryTracker.trackLlmCompletion({
|
||||
model: request.model,
|
||||
providerType,
|
||||
operation: "chatStream",
|
||||
durationMs: Date.now() - startTime,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
callingContext,
|
||||
success: false,
|
||||
});
|
||||
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Stream failed: ${errorMessage}`);
|
||||
throw new ServiceUnavailableException(`Streaming failed: ${errorMessage}`);
|
||||
@@ -130,14 +221,48 @@ export class LlmService implements OnModuleInit {
|
||||
* Generate embeddings for the given input texts.
|
||||
*
|
||||
* @param request - Embedding request with model and input texts
|
||||
* @param callingContext - Optional context hint for telemetry task type inference
|
||||
* @returns Embeddings response with vector arrays
|
||||
* @throws {ServiceUnavailableException} If provider is unavailable or request fails
|
||||
*/
|
||||
async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> {
|
||||
async embed(request: EmbedRequestDto, callingContext?: string): Promise<EmbedResponseDto> {
|
||||
const startTime = Date.now();
|
||||
let providerType: LlmProviderType = "ollama";
|
||||
|
||||
try {
|
||||
const provider = await this.llmManager.getDefaultProvider();
|
||||
return await provider.embed(request);
|
||||
providerType = provider.type;
|
||||
const response = await provider.embed(request);
|
||||
|
||||
// Estimate input tokens from the input text
|
||||
const inputTokens = estimateTokens(request.input.join(" "));
|
||||
|
||||
// Fire-and-forget telemetry tracking
|
||||
this.telemetryTracker.trackLlmCompletion({
|
||||
model: response.model,
|
||||
providerType,
|
||||
operation: "embed",
|
||||
durationMs: Date.now() - startTime,
|
||||
inputTokens,
|
||||
outputTokens: 0, // Embeddings don't produce output tokens
|
||||
callingContext,
|
||||
success: true,
|
||||
});
|
||||
|
||||
return response;
|
||||
} catch (error: unknown) {
|
||||
// Track failure (fire-and-forget)
|
||||
this.telemetryTracker.trackLlmCompletion({
|
||||
model: request.model,
|
||||
providerType,
|
||||
operation: "embed",
|
||||
durationMs: Date.now() - startTime,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
callingContext,
|
||||
success: false,
|
||||
});
|
||||
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Embed failed: ${errorMessage}`);
|
||||
throw new ServiceUnavailableException(`Embedding failed: ${errorMessage}`);
|
||||
|
||||
17
apps/api/src/mosaic-telemetry/index.ts
Normal file
17
apps/api/src/mosaic-telemetry/index.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
/**
|
||||
* Mosaic Telemetry module — task completion tracking and crowd-sourced predictions.
|
||||
*
|
||||
* **Not to be confused with the OpenTelemetry (OTEL) TelemetryModule** at
|
||||
* `src/telemetry/`, which handles distributed request tracing.
|
||||
*
|
||||
* @module mosaic-telemetry
|
||||
*/
|
||||
|
||||
export { MosaicTelemetryModule } from "./mosaic-telemetry.module";
|
||||
export { MosaicTelemetryService } from "./mosaic-telemetry.service";
|
||||
export {
|
||||
loadMosaicTelemetryConfig,
|
||||
toSdkConfig,
|
||||
MOSAIC_TELEMETRY_ENV,
|
||||
type MosaicTelemetryModuleConfig,
|
||||
} from "./mosaic-telemetry.config";
|
||||
78
apps/api/src/mosaic-telemetry/mosaic-telemetry.config.ts
Normal file
78
apps/api/src/mosaic-telemetry/mosaic-telemetry.config.ts
Normal file
@@ -0,0 +1,78 @@
|
||||
import type { ConfigService } from "@nestjs/config";
|
||||
import type { TelemetryConfig } from "@mosaicstack/telemetry-client";
|
||||
|
||||
/**
|
||||
* Configuration interface for the Mosaic Telemetry module.
|
||||
* Maps environment variables to SDK configuration.
|
||||
*/
|
||||
export interface MosaicTelemetryModuleConfig {
|
||||
/** Whether telemetry collection is enabled. Default: true */
|
||||
enabled: boolean;
|
||||
/** Base URL of the telemetry server */
|
||||
serverUrl: string;
|
||||
/** API key for authentication (64-char hex string) */
|
||||
apiKey: string;
|
||||
/** Instance UUID for this client */
|
||||
instanceId: string;
|
||||
/** If true, log events instead of sending them. Default: false */
|
||||
dryRun: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Environment variable names used by the Mosaic Telemetry module.
|
||||
*/
|
||||
export const MOSAIC_TELEMETRY_ENV = {
|
||||
ENABLED: "MOSAIC_TELEMETRY_ENABLED",
|
||||
SERVER_URL: "MOSAIC_TELEMETRY_SERVER_URL",
|
||||
API_KEY: "MOSAIC_TELEMETRY_API_KEY",
|
||||
INSTANCE_ID: "MOSAIC_TELEMETRY_INSTANCE_ID",
|
||||
DRY_RUN: "MOSAIC_TELEMETRY_DRY_RUN",
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Read Mosaic Telemetry configuration from environment variables via NestJS ConfigService.
|
||||
*
|
||||
* @param configService - NestJS ConfigService instance
|
||||
* @returns Parsed module configuration
|
||||
*/
|
||||
export function loadMosaicTelemetryConfig(
|
||||
configService: ConfigService
|
||||
): MosaicTelemetryModuleConfig {
|
||||
const enabledRaw = configService.get<string>(MOSAIC_TELEMETRY_ENV.ENABLED, "true");
|
||||
const dryRunRaw = configService.get<string>(MOSAIC_TELEMETRY_ENV.DRY_RUN, "false");
|
||||
|
||||
return {
|
||||
enabled: enabledRaw.toLowerCase() === "true",
|
||||
serverUrl: configService.get<string>(MOSAIC_TELEMETRY_ENV.SERVER_URL, ""),
|
||||
apiKey: configService.get<string>(MOSAIC_TELEMETRY_ENV.API_KEY, ""),
|
||||
instanceId: configService.get<string>(MOSAIC_TELEMETRY_ENV.INSTANCE_ID, ""),
|
||||
dryRun: dryRunRaw.toLowerCase() === "true",
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert module config to SDK TelemetryConfig format.
|
||||
* Includes the onError callback for NestJS Logger integration.
|
||||
*
|
||||
* @param config - Module configuration
|
||||
* @param onError - Error callback (typically NestJS Logger)
|
||||
* @returns SDK-compatible TelemetryConfig
|
||||
*/
|
||||
export function toSdkConfig(
|
||||
config: MosaicTelemetryModuleConfig,
|
||||
onError?: (error: Error) => void
|
||||
): TelemetryConfig {
|
||||
const sdkConfig: TelemetryConfig = {
|
||||
serverUrl: config.serverUrl,
|
||||
apiKey: config.apiKey,
|
||||
instanceId: config.instanceId,
|
||||
enabled: config.enabled,
|
||||
dryRun: config.dryRun,
|
||||
};
|
||||
|
||||
if (onError) {
|
||||
sdkConfig.onError = onError;
|
||||
}
|
||||
|
||||
return sdkConfig;
|
||||
}
|
||||
92
apps/api/src/mosaic-telemetry/mosaic-telemetry.controller.ts
Normal file
92
apps/api/src/mosaic-telemetry/mosaic-telemetry.controller.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
import { Controller, Get, Query, UseGuards, BadRequestException } from "@nestjs/common";
|
||||
import { AuthGuard } from "../auth/guards/auth.guard";
|
||||
import { PredictionService } from "./prediction.service";
|
||||
import {
|
||||
TaskType,
|
||||
Complexity,
|
||||
Provider,
|
||||
type PredictionResponse,
|
||||
} from "@mosaicstack/telemetry-client";
|
||||
|
||||
/**
|
||||
* Valid values for query parameter validation.
|
||||
*/
|
||||
const VALID_TASK_TYPES = new Set<string>(Object.values(TaskType));
|
||||
const VALID_COMPLEXITIES = new Set<string>(Object.values(Complexity));
|
||||
const VALID_PROVIDERS = new Set<string>(Object.values(Provider));
|
||||
|
||||
/**
|
||||
* Response DTO for the estimate endpoint.
|
||||
*/
|
||||
interface EstimateResponseDto {
|
||||
data: PredictionResponse | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mosaic Telemetry Controller
|
||||
*
|
||||
* Provides API endpoints for accessing telemetry prediction data.
|
||||
* All endpoints require authentication via AuthGuard.
|
||||
*
|
||||
* This controller is intentionally lightweight - it delegates to PredictionService
|
||||
* for the actual prediction logic and returns results directly to the frontend.
|
||||
*/
|
||||
@Controller("telemetry")
|
||||
@UseGuards(AuthGuard)
|
||||
export class MosaicTelemetryController {
|
||||
constructor(private readonly predictionService: PredictionService) {}
|
||||
|
||||
/**
|
||||
* GET /api/telemetry/estimate
|
||||
*
|
||||
* Get a cost/token estimate for a given task configuration.
|
||||
* Returns prediction data including confidence level, or null if
|
||||
* no prediction is available.
|
||||
*
|
||||
* @param taskType - Task type enum value (e.g. "implementation", "planning")
|
||||
* @param model - Model name (e.g. "claude-sonnet-4-5")
|
||||
* @param provider - Provider enum value (e.g. "anthropic", "openai")
|
||||
* @param complexity - Complexity level (e.g. "low", "medium", "high")
|
||||
* @returns Prediction response with estimates and confidence
|
||||
*/
|
||||
@Get("estimate")
|
||||
getEstimate(
|
||||
@Query("taskType") taskType: string,
|
||||
@Query("model") model: string,
|
||||
@Query("provider") provider: string,
|
||||
@Query("complexity") complexity: string
|
||||
): EstimateResponseDto {
|
||||
if (!taskType || !model || !provider || !complexity) {
|
||||
throw new BadRequestException(
|
||||
"Missing query parameters. Required: taskType, model, provider, complexity"
|
||||
);
|
||||
}
|
||||
|
||||
if (!VALID_TASK_TYPES.has(taskType)) {
|
||||
throw new BadRequestException(
|
||||
`Invalid taskType "${taskType}". Valid values: ${[...VALID_TASK_TYPES].join(", ")}`
|
||||
);
|
||||
}
|
||||
|
||||
if (!VALID_PROVIDERS.has(provider)) {
|
||||
throw new BadRequestException(
|
||||
`Invalid provider "${provider}". Valid values: ${[...VALID_PROVIDERS].join(", ")}`
|
||||
);
|
||||
}
|
||||
|
||||
if (!VALID_COMPLEXITIES.has(complexity)) {
|
||||
throw new BadRequestException(
|
||||
`Invalid complexity "${complexity}". Valid values: ${[...VALID_COMPLEXITIES].join(", ")}`
|
||||
);
|
||||
}
|
||||
|
||||
const prediction = this.predictionService.getEstimate(
|
||||
taskType as TaskType,
|
||||
model,
|
||||
provider as Provider,
|
||||
complexity as Complexity
|
||||
);
|
||||
|
||||
return { data: prediction };
|
||||
}
|
||||
}
|
||||
212
apps/api/src/mosaic-telemetry/mosaic-telemetry.module.spec.ts
Normal file
212
apps/api/src/mosaic-telemetry/mosaic-telemetry.module.spec.ts
Normal file
@@ -0,0 +1,212 @@
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import { Test, TestingModule } from "@nestjs/testing";
|
||||
import { ConfigModule } from "@nestjs/config";
|
||||
import { MosaicTelemetryModule } from "./mosaic-telemetry.module";
|
||||
import { MosaicTelemetryService } from "./mosaic-telemetry.service";
|
||||
|
||||
// Mock the telemetry client to avoid real HTTP calls
|
||||
vi.mock("@mosaicstack/telemetry-client", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("@mosaicstack/telemetry-client")>();
|
||||
|
||||
class MockTelemetryClient {
|
||||
private _isRunning = false;
|
||||
|
||||
constructor(_config: unknown) {
|
||||
// no-op
|
||||
}
|
||||
|
||||
get eventBuilder() {
|
||||
return { build: vi.fn().mockReturnValue({ event_id: "test-event-id" }) };
|
||||
}
|
||||
|
||||
start(): void {
|
||||
this._isRunning = true;
|
||||
}
|
||||
|
||||
async stop(): Promise<void> {
|
||||
this._isRunning = false;
|
||||
}
|
||||
|
||||
track(_event: unknown): void {
|
||||
// no-op
|
||||
}
|
||||
|
||||
getPrediction(_query: unknown): unknown {
|
||||
return null;
|
||||
}
|
||||
|
||||
async refreshPredictions(_queries: unknown): Promise<void> {
|
||||
// no-op
|
||||
}
|
||||
|
||||
get queueSize(): number {
|
||||
return 0;
|
||||
}
|
||||
|
||||
get isRunning(): boolean {
|
||||
return this._isRunning;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
...actual,
|
||||
TelemetryClient: MockTelemetryClient,
|
||||
};
|
||||
});
|
||||
|
||||
describe("MosaicTelemetryModule", () => {
|
||||
let module: TestingModule;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("module initialization", () => {
|
||||
it("should compile the module successfully", async () => {
|
||||
module = await Test.createTestingModule({
|
||||
imports: [
|
||||
ConfigModule.forRoot({
|
||||
isGlobal: true,
|
||||
envFilePath: [],
|
||||
load: [
|
||||
() => ({
|
||||
MOSAIC_TELEMETRY_ENABLED: "false",
|
||||
}),
|
||||
],
|
||||
}),
|
||||
MosaicTelemetryModule,
|
||||
],
|
||||
}).compile();
|
||||
|
||||
expect(module).toBeDefined();
|
||||
await module.close();
|
||||
});
|
||||
|
||||
it("should provide MosaicTelemetryService", async () => {
|
||||
module = await Test.createTestingModule({
|
||||
imports: [
|
||||
ConfigModule.forRoot({
|
||||
isGlobal: true,
|
||||
envFilePath: [],
|
||||
load: [
|
||||
() => ({
|
||||
MOSAIC_TELEMETRY_ENABLED: "false",
|
||||
}),
|
||||
],
|
||||
}),
|
||||
MosaicTelemetryModule,
|
||||
],
|
||||
}).compile();
|
||||
|
||||
const service = module.get<MosaicTelemetryService>(MosaicTelemetryService);
|
||||
expect(service).toBeDefined();
|
||||
expect(service).toBeInstanceOf(MosaicTelemetryService);
|
||||
|
||||
await module.close();
|
||||
});
|
||||
|
||||
it("should export MosaicTelemetryService for injection in other modules", async () => {
|
||||
module = await Test.createTestingModule({
|
||||
imports: [
|
||||
ConfigModule.forRoot({
|
||||
isGlobal: true,
|
||||
envFilePath: [],
|
||||
load: [
|
||||
() => ({
|
||||
MOSAIC_TELEMETRY_ENABLED: "false",
|
||||
}),
|
||||
],
|
||||
}),
|
||||
MosaicTelemetryModule,
|
||||
],
|
||||
}).compile();
|
||||
|
||||
const service = module.get(MosaicTelemetryService);
|
||||
expect(service).toBeDefined();
|
||||
|
||||
await module.close();
|
||||
});
|
||||
});
|
||||
|
||||
describe("lifecycle integration", () => {
|
||||
it("should initialize service on module init when enabled", async () => {
|
||||
module = await Test.createTestingModule({
|
||||
imports: [
|
||||
ConfigModule.forRoot({
|
||||
isGlobal: true,
|
||||
envFilePath: [],
|
||||
load: [
|
||||
() => ({
|
||||
MOSAIC_TELEMETRY_ENABLED: "true",
|
||||
MOSAIC_TELEMETRY_SERVER_URL: "https://tel.test.local",
|
||||
MOSAIC_TELEMETRY_API_KEY: "a".repeat(64),
|
||||
MOSAIC_TELEMETRY_INSTANCE_ID: "550e8400-e29b-41d4-a716-446655440000",
|
||||
MOSAIC_TELEMETRY_DRY_RUN: "false",
|
||||
}),
|
||||
],
|
||||
}),
|
||||
MosaicTelemetryModule,
|
||||
],
|
||||
}).compile();
|
||||
|
||||
await module.init();
|
||||
|
||||
const service = module.get<MosaicTelemetryService>(MosaicTelemetryService);
|
||||
expect(service.isEnabled).toBe(true);
|
||||
|
||||
await module.close();
|
||||
});
|
||||
|
||||
it("should not start client when disabled via env", async () => {
|
||||
module = await Test.createTestingModule({
|
||||
imports: [
|
||||
ConfigModule.forRoot({
|
||||
isGlobal: true,
|
||||
envFilePath: [],
|
||||
load: [
|
||||
() => ({
|
||||
MOSAIC_TELEMETRY_ENABLED: "false",
|
||||
}),
|
||||
],
|
||||
}),
|
||||
MosaicTelemetryModule,
|
||||
],
|
||||
}).compile();
|
||||
|
||||
await module.init();
|
||||
|
||||
const service = module.get<MosaicTelemetryService>(MosaicTelemetryService);
|
||||
expect(service.isEnabled).toBe(false);
|
||||
|
||||
await module.close();
|
||||
});
|
||||
|
||||
it("should cleanly shut down on module destroy", async () => {
|
||||
module = await Test.createTestingModule({
|
||||
imports: [
|
||||
ConfigModule.forRoot({
|
||||
isGlobal: true,
|
||||
envFilePath: [],
|
||||
load: [
|
||||
() => ({
|
||||
MOSAIC_TELEMETRY_ENABLED: "true",
|
||||
MOSAIC_TELEMETRY_SERVER_URL: "https://tel.test.local",
|
||||
MOSAIC_TELEMETRY_API_KEY: "a".repeat(64),
|
||||
MOSAIC_TELEMETRY_INSTANCE_ID: "550e8400-e29b-41d4-a716-446655440000",
|
||||
MOSAIC_TELEMETRY_DRY_RUN: "false",
|
||||
}),
|
||||
],
|
||||
}),
|
||||
MosaicTelemetryModule,
|
||||
],
|
||||
}).compile();
|
||||
|
||||
await module.init();
|
||||
|
||||
const service = module.get<MosaicTelemetryService>(MosaicTelemetryService);
|
||||
expect(service.isEnabled).toBe(true);
|
||||
|
||||
await expect(module.close()).resolves.not.toThrow();
|
||||
});
|
||||
});
|
||||
});
|
||||
41
apps/api/src/mosaic-telemetry/mosaic-telemetry.module.ts
Normal file
41
apps/api/src/mosaic-telemetry/mosaic-telemetry.module.ts
Normal file
@@ -0,0 +1,41 @@
|
||||
import { Module, Global } from "@nestjs/common";
|
||||
import { ConfigModule } from "@nestjs/config";
|
||||
import { AuthModule } from "../auth/auth.module";
|
||||
import { MosaicTelemetryService } from "./mosaic-telemetry.service";
|
||||
import { PredictionService } from "./prediction.service";
|
||||
import { MosaicTelemetryController } from "./mosaic-telemetry.controller";
|
||||
|
||||
/**
|
||||
* Global module providing Mosaic Telemetry integration via @mosaicstack/telemetry-client.
|
||||
*
|
||||
* Tracks task completion events and provides crowd-sourced predictions for
|
||||
* token usage, cost estimation, and quality metrics.
|
||||
*
|
||||
* **This is separate from the OpenTelemetry (OTEL) TelemetryModule** which
|
||||
* handles distributed request tracing. This module is specifically for
|
||||
* Mosaic Stack's own telemetry aggregation service.
|
||||
*
|
||||
* Configuration via environment variables:
|
||||
* - MOSAIC_TELEMETRY_ENABLED (boolean, default: true)
|
||||
* - MOSAIC_TELEMETRY_SERVER_URL (string)
|
||||
* - MOSAIC_TELEMETRY_API_KEY (string, 64-char hex)
|
||||
* - MOSAIC_TELEMETRY_INSTANCE_ID (string, UUID)
|
||||
* - MOSAIC_TELEMETRY_DRY_RUN (boolean, default: false)
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* // In any service (no need to import module — it's global):
|
||||
* @Injectable()
|
||||
* export class MyService {
|
||||
* constructor(private readonly telemetry: MosaicTelemetryService) {}
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
@Global()
|
||||
@Module({
|
||||
imports: [ConfigModule, AuthModule],
|
||||
controllers: [MosaicTelemetryController],
|
||||
providers: [MosaicTelemetryService, PredictionService],
|
||||
exports: [MosaicTelemetryService, PredictionService],
|
||||
})
|
||||
export class MosaicTelemetryModule {}
|
||||
504
apps/api/src/mosaic-telemetry/mosaic-telemetry.service.spec.ts
Normal file
504
apps/api/src/mosaic-telemetry/mosaic-telemetry.service.spec.ts
Normal file
@@ -0,0 +1,504 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { ConfigService } from "@nestjs/config";
|
||||
import { MOSAIC_TELEMETRY_ENV } from "./mosaic-telemetry.config";
|
||||
import type {
|
||||
TaskCompletionEvent,
|
||||
PredictionQuery,
|
||||
PredictionResponse,
|
||||
} from "@mosaicstack/telemetry-client";
|
||||
import { TaskType, Complexity, Provider, Outcome } from "@mosaicstack/telemetry-client";
|
||||
|
||||
// Track mock instances created during tests
|
||||
const mockStartFn = vi.fn();
|
||||
const mockStopFn = vi.fn().mockResolvedValue(undefined);
|
||||
const mockTrackFn = vi.fn();
|
||||
const mockGetPredictionFn = vi.fn().mockReturnValue(null);
|
||||
const mockRefreshPredictionsFn = vi.fn().mockResolvedValue(undefined);
|
||||
const mockBuildFn = vi.fn().mockReturnValue({ event_id: "test-event-id" });
|
||||
|
||||
vi.mock("@mosaicstack/telemetry-client", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("@mosaicstack/telemetry-client")>();
|
||||
|
||||
class MockTelemetryClient {
|
||||
private _isRunning = false;
|
||||
|
||||
constructor(_config: unknown) {
|
||||
// no-op
|
||||
}
|
||||
|
||||
get eventBuilder() {
|
||||
return { build: mockBuildFn };
|
||||
}
|
||||
|
||||
start(): void {
|
||||
this._isRunning = true;
|
||||
mockStartFn();
|
||||
}
|
||||
|
||||
async stop(): Promise<void> {
|
||||
this._isRunning = false;
|
||||
await mockStopFn();
|
||||
}
|
||||
|
||||
track(event: unknown): void {
|
||||
mockTrackFn(event);
|
||||
}
|
||||
|
||||
getPrediction(query: unknown): unknown {
|
||||
return mockGetPredictionFn(query);
|
||||
}
|
||||
|
||||
async refreshPredictions(queries: unknown): Promise<void> {
|
||||
await mockRefreshPredictionsFn(queries);
|
||||
}
|
||||
|
||||
get queueSize(): number {
|
||||
return 0;
|
||||
}
|
||||
|
||||
get isRunning(): boolean {
|
||||
return this._isRunning;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
...actual,
|
||||
TelemetryClient: MockTelemetryClient,
|
||||
};
|
||||
});
|
||||
|
||||
// Lazy-import the service after the mock is in place
|
||||
const { MosaicTelemetryService } = await import("./mosaic-telemetry.service");
|
||||
|
||||
/**
|
||||
* Create a ConfigService mock that returns environment values from the provided map.
|
||||
*/
|
||||
function createConfigService(envMap: Record<string, string | undefined> = {}): ConfigService {
|
||||
const configService = {
|
||||
get: vi.fn((key: string, defaultValue?: string): string => {
|
||||
const value = envMap[key];
|
||||
if (value !== undefined) {
|
||||
return value;
|
||||
}
|
||||
return defaultValue ?? "";
|
||||
}),
|
||||
} as unknown as ConfigService;
|
||||
return configService;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default env config for an enabled telemetry service.
|
||||
*/
|
||||
const ENABLED_CONFIG: Record<string, string> = {
|
||||
[MOSAIC_TELEMETRY_ENV.ENABLED]: "true",
|
||||
[MOSAIC_TELEMETRY_ENV.SERVER_URL]: "https://tel.test.local",
|
||||
[MOSAIC_TELEMETRY_ENV.API_KEY]: "a".repeat(64),
|
||||
[MOSAIC_TELEMETRY_ENV.INSTANCE_ID]: "550e8400-e29b-41d4-a716-446655440000",
|
||||
[MOSAIC_TELEMETRY_ENV.DRY_RUN]: "false",
|
||||
};
|
||||
|
||||
/**
|
||||
* Create a minimal TaskCompletionEvent for testing.
|
||||
*/
|
||||
function createTestEvent(): TaskCompletionEvent {
|
||||
return {
|
||||
schema_version: "1.0.0",
|
||||
event_id: "test-event-123",
|
||||
timestamp: new Date().toISOString(),
|
||||
instance_id: "550e8400-e29b-41d4-a716-446655440000",
|
||||
task_duration_ms: 5000,
|
||||
task_type: TaskType.FEATURE,
|
||||
complexity: Complexity.MEDIUM,
|
||||
harness: "claude-code" as TaskCompletionEvent["harness"],
|
||||
model: "claude-sonnet-4-20250514",
|
||||
provider: Provider.ANTHROPIC,
|
||||
estimated_input_tokens: 1000,
|
||||
estimated_output_tokens: 500,
|
||||
actual_input_tokens: 1100,
|
||||
actual_output_tokens: 450,
|
||||
estimated_cost_usd_micros: 5000,
|
||||
actual_cost_usd_micros: 4800,
|
||||
quality_gate_passed: true,
|
||||
quality_gates_run: [],
|
||||
quality_gates_failed: [],
|
||||
context_compactions: 0,
|
||||
context_rotations: 0,
|
||||
context_utilization_final: 0.45,
|
||||
outcome: Outcome.SUCCESS,
|
||||
retry_count: 0,
|
||||
};
|
||||
}
|
||||
|
||||
describe("MosaicTelemetryService", () => {
|
||||
let service: InstanceType<typeof MosaicTelemetryService>;
|
||||
|
||||
afterEach(async () => {
|
||||
if (service) {
|
||||
await service.onModuleDestroy();
|
||||
}
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("onModuleInit", () => {
|
||||
it("should initialize the client when enabled with valid config", () => {
|
||||
const configService = createConfigService(ENABLED_CONFIG);
|
||||
service = new MosaicTelemetryService(configService);
|
||||
|
||||
service.onModuleInit();
|
||||
|
||||
expect(mockStartFn).toHaveBeenCalledOnce();
|
||||
expect(service.isEnabled).toBe(true);
|
||||
});
|
||||
|
||||
it("should not initialize client when disabled", () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
|
||||
service.onModuleInit();
|
||||
|
||||
expect(mockStartFn).not.toHaveBeenCalled();
|
||||
expect(service.isEnabled).toBe(false);
|
||||
});
|
||||
|
||||
it("should disable when server URL is missing", () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.SERVER_URL]: "",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
|
||||
service.onModuleInit();
|
||||
|
||||
expect(service.isEnabled).toBe(false);
|
||||
});
|
||||
|
||||
it("should disable when API key is missing", () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.API_KEY]: "",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
|
||||
service.onModuleInit();
|
||||
|
||||
expect(service.isEnabled).toBe(false);
|
||||
});
|
||||
|
||||
it("should disable when instance ID is missing", () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.INSTANCE_ID]: "",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
|
||||
service.onModuleInit();
|
||||
|
||||
expect(service.isEnabled).toBe(false);
|
||||
});
|
||||
|
||||
it("should log dry-run mode when configured", () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.DRY_RUN]: "true",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
|
||||
service.onModuleInit();
|
||||
|
||||
expect(mockStartFn).toHaveBeenCalledOnce();
|
||||
});
|
||||
});
|
||||
|
||||
describe("onModuleDestroy", () => {
|
||||
it("should stop the client on shutdown", async () => {
|
||||
const configService = createConfigService(ENABLED_CONFIG);
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
await service.onModuleDestroy();
|
||||
|
||||
expect(mockStopFn).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("should not throw when client is not initialized (disabled)", async () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
await expect(service.onModuleDestroy()).resolves.not.toThrow();
|
||||
});
|
||||
|
||||
it("should not throw when called multiple times", async () => {
|
||||
const configService = createConfigService(ENABLED_CONFIG);
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
await service.onModuleDestroy();
|
||||
await expect(service.onModuleDestroy()).resolves.not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe("trackTaskCompletion", () => {
|
||||
it("should queue event via client.track() when enabled", () => {
|
||||
const configService = createConfigService(ENABLED_CONFIG);
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
const event = createTestEvent();
|
||||
service.trackTaskCompletion(event);
|
||||
|
||||
expect(mockTrackFn).toHaveBeenCalledWith(event);
|
||||
});
|
||||
|
||||
it("should be a no-op when disabled", () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
const event = createTestEvent();
|
||||
service.trackTaskCompletion(event);
|
||||
|
||||
expect(mockTrackFn).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("getPrediction", () => {
|
||||
const testQuery: PredictionQuery = {
|
||||
task_type: TaskType.FEATURE,
|
||||
model: "claude-sonnet-4-20250514",
|
||||
provider: Provider.ANTHROPIC,
|
||||
complexity: Complexity.MEDIUM,
|
||||
};
|
||||
|
||||
it("should return cached prediction when available", () => {
|
||||
const mockPrediction: PredictionResponse = {
|
||||
prediction: {
|
||||
input_tokens: { p10: 100, p25: 200, median: 300, p75: 400, p90: 500 },
|
||||
output_tokens: { p10: 50, p25: 100, median: 150, p75: 200, p90: 250 },
|
||||
cost_usd_micros: { median: 5000 },
|
||||
duration_ms: { median: 10000 },
|
||||
correction_factors: { input: 1.0, output: 1.0 },
|
||||
quality: { gate_pass_rate: 0.95, success_rate: 0.9 },
|
||||
},
|
||||
metadata: {
|
||||
sample_size: 100,
|
||||
fallback_level: 0,
|
||||
confidence: "high",
|
||||
last_updated: new Date().toISOString(),
|
||||
cache_hit: true,
|
||||
},
|
||||
};
|
||||
|
||||
const configService = createConfigService(ENABLED_CONFIG);
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
mockGetPredictionFn.mockReturnValueOnce(mockPrediction);
|
||||
|
||||
const result = service.getPrediction(testQuery);
|
||||
|
||||
expect(result).toEqual(mockPrediction);
|
||||
expect(mockGetPredictionFn).toHaveBeenCalledWith(testQuery);
|
||||
});
|
||||
|
||||
it("should return null when disabled", () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
const result = service.getPrediction(testQuery);
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("should return null when no cached prediction exists", () => {
|
||||
const configService = createConfigService(ENABLED_CONFIG);
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
mockGetPredictionFn.mockReturnValueOnce(null);
|
||||
|
||||
const result = service.getPrediction(testQuery);
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("refreshPredictions", () => {
|
||||
const testQueries: PredictionQuery[] = [
|
||||
{
|
||||
task_type: TaskType.FEATURE,
|
||||
model: "claude-sonnet-4-20250514",
|
||||
provider: Provider.ANTHROPIC,
|
||||
complexity: Complexity.MEDIUM,
|
||||
},
|
||||
];
|
||||
|
||||
it("should call client.refreshPredictions when enabled", async () => {
|
||||
const configService = createConfigService(ENABLED_CONFIG);
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
await service.refreshPredictions(testQueries);
|
||||
|
||||
expect(mockRefreshPredictionsFn).toHaveBeenCalledWith(testQueries);
|
||||
});
|
||||
|
||||
it("should be a no-op when disabled", async () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
await service.refreshPredictions(testQueries);
|
||||
|
||||
expect(mockRefreshPredictionsFn).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("eventBuilder", () => {
|
||||
it("should return EventBuilder when enabled", () => {
|
||||
const configService = createConfigService(ENABLED_CONFIG);
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
const builder = service.eventBuilder;
|
||||
|
||||
expect(builder).toBeDefined();
|
||||
expect(builder).not.toBeNull();
|
||||
expect(typeof builder?.build).toBe("function");
|
||||
});
|
||||
|
||||
it("should return null when disabled", () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
const builder = service.eventBuilder;
|
||||
|
||||
expect(builder).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("isEnabled", () => {
|
||||
it("should return true when client is running", () => {
|
||||
const configService = createConfigService(ENABLED_CONFIG);
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
expect(service.isEnabled).toBe(true);
|
||||
});
|
||||
|
||||
it("should return false when disabled", () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
expect(service.isEnabled).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("queueSize", () => {
|
||||
it("should return 0 when disabled", () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
expect(service.queueSize).toBe(0);
|
||||
});
|
||||
|
||||
it("should delegate to client.queueSize when enabled", () => {
|
||||
const configService = createConfigService(ENABLED_CONFIG);
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
expect(service.queueSize).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("disabled mode (comprehensive)", () => {
|
||||
beforeEach(() => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
});
|
||||
|
||||
it("should not make any HTTP calls when disabled", () => {
|
||||
const event = createTestEvent();
|
||||
service.trackTaskCompletion(event);
|
||||
|
||||
expect(mockTrackFn).not.toHaveBeenCalled();
|
||||
expect(mockStartFn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("should safely handle all method calls when disabled", async () => {
|
||||
expect(() => service.trackTaskCompletion(createTestEvent())).not.toThrow();
|
||||
expect(
|
||||
service.getPrediction({
|
||||
task_type: TaskType.FEATURE,
|
||||
model: "test",
|
||||
provider: Provider.ANTHROPIC,
|
||||
complexity: Complexity.LOW,
|
||||
})
|
||||
).toBeNull();
|
||||
await expect(service.refreshPredictions([])).resolves.not.toThrow();
|
||||
expect(service.eventBuilder).toBeNull();
|
||||
expect(service.isEnabled).toBe(false);
|
||||
expect(service.queueSize).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("dry-run mode", () => {
|
||||
it("should create client in dry-run mode", () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.DRY_RUN]: "true",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
expect(mockStartFn).toHaveBeenCalledOnce();
|
||||
expect(service.isEnabled).toBe(true);
|
||||
});
|
||||
|
||||
it("should accept events in dry-run mode", () => {
|
||||
const configService = createConfigService({
|
||||
...ENABLED_CONFIG,
|
||||
[MOSAIC_TELEMETRY_ENV.DRY_RUN]: "true",
|
||||
});
|
||||
service = new MosaicTelemetryService(configService);
|
||||
service.onModuleInit();
|
||||
|
||||
const event = createTestEvent();
|
||||
service.trackTaskCompletion(event);
|
||||
|
||||
expect(mockTrackFn).toHaveBeenCalledWith(event);
|
||||
});
|
||||
});
|
||||
});
|
||||
164
apps/api/src/mosaic-telemetry/mosaic-telemetry.service.ts
Normal file
164
apps/api/src/mosaic-telemetry/mosaic-telemetry.service.ts
Normal file
@@ -0,0 +1,164 @@
|
||||
import { Injectable, Logger, OnModuleInit, OnModuleDestroy } from "@nestjs/common";
|
||||
import { ConfigService } from "@nestjs/config";
|
||||
import {
|
||||
TelemetryClient,
|
||||
type TaskCompletionEvent,
|
||||
type PredictionQuery,
|
||||
type PredictionResponse,
|
||||
type EventBuilder,
|
||||
} from "@mosaicstack/telemetry-client";
|
||||
import {
|
||||
loadMosaicTelemetryConfig,
|
||||
toSdkConfig,
|
||||
type MosaicTelemetryModuleConfig,
|
||||
} from "./mosaic-telemetry.config";
|
||||
|
||||
/**
|
||||
* NestJS service wrapping the @mosaicstack/telemetry-client SDK.
|
||||
*
|
||||
* Provides convenience methods for tracking task completions and reading
|
||||
* crowd-sourced predictions. When telemetry is disabled via
|
||||
* MOSAIC_TELEMETRY_ENABLED=false, all methods are safe no-ops.
|
||||
*
|
||||
* This service is provided globally by MosaicTelemetryModule — any service
|
||||
* can inject it without importing the module explicitly.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* @Injectable()
|
||||
* export class TasksService {
|
||||
* constructor(private readonly telemetry: MosaicTelemetryService) {}
|
||||
*
|
||||
* async completeTask(taskId: string): Promise<void> {
|
||||
* // ... complete the task ...
|
||||
* const event = this.telemetry.eventBuilder.build({ ... });
|
||||
* this.telemetry.trackTaskCompletion(event);
|
||||
* }
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
@Injectable()
|
||||
export class MosaicTelemetryService implements OnModuleInit, OnModuleDestroy {
|
||||
private readonly logger = new Logger(MosaicTelemetryService.name);
|
||||
private client: TelemetryClient | null = null;
|
||||
private config: MosaicTelemetryModuleConfig | null = null;
|
||||
|
||||
constructor(private readonly configService: ConfigService) {}
|
||||
|
||||
/**
|
||||
* Initialize the telemetry client on module startup.
|
||||
* Reads configuration from environment variables and starts background submission.
|
||||
*/
|
||||
onModuleInit(): void {
|
||||
this.config = loadMosaicTelemetryConfig(this.configService);
|
||||
|
||||
if (!this.config.enabled) {
|
||||
this.logger.log("Mosaic Telemetry is disabled");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this.config.serverUrl || !this.config.apiKey || !this.config.instanceId) {
|
||||
this.logger.warn(
|
||||
"Mosaic Telemetry is enabled but missing configuration " +
|
||||
"(MOSAIC_TELEMETRY_SERVER_URL, MOSAIC_TELEMETRY_API_KEY, or MOSAIC_TELEMETRY_INSTANCE_ID). " +
|
||||
"Telemetry will remain disabled."
|
||||
);
|
||||
this.config = { ...this.config, enabled: false };
|
||||
return;
|
||||
}
|
||||
|
||||
const sdkConfig = toSdkConfig(this.config, (error: Error) => {
|
||||
this.logger.error(`Telemetry client error: ${error.message}`, error.stack);
|
||||
});
|
||||
|
||||
this.client = new TelemetryClient(sdkConfig);
|
||||
this.client.start();
|
||||
|
||||
const mode = this.config.dryRun ? "dry-run" : "live";
|
||||
this.logger.log(`Mosaic Telemetry client started (${mode}) -> ${this.config.serverUrl}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the telemetry client on module shutdown.
|
||||
* Flushes any remaining queued events before stopping.
|
||||
*/
|
||||
async onModuleDestroy(): Promise<void> {
|
||||
if (this.client) {
|
||||
this.logger.log("Stopping Mosaic Telemetry client...");
|
||||
await this.client.stop();
|
||||
this.client = null;
|
||||
this.logger.log("Mosaic Telemetry client stopped");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Queue a task completion event for batch submission.
|
||||
* No-op when telemetry is disabled.
|
||||
*
|
||||
* @param event - The task completion event to track
|
||||
*/
|
||||
trackTaskCompletion(event: TaskCompletionEvent): void {
|
||||
if (!this.client) {
|
||||
return;
|
||||
}
|
||||
this.client.track(event);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a cached prediction for the given query.
|
||||
* Returns null when telemetry is disabled or if not cached/expired.
|
||||
*
|
||||
* @param query - The prediction query parameters
|
||||
* @returns Cached prediction response, or null
|
||||
*/
|
||||
getPrediction(query: PredictionQuery): PredictionResponse | null {
|
||||
if (!this.client) {
|
||||
return null;
|
||||
}
|
||||
return this.client.getPrediction(query);
|
||||
}
|
||||
|
||||
/**
|
||||
* Force-refresh predictions from the telemetry server.
|
||||
* No-op when telemetry is disabled.
|
||||
*
|
||||
* @param queries - Array of prediction queries to refresh
|
||||
*/
|
||||
async refreshPredictions(queries: PredictionQuery[]): Promise<void> {
|
||||
if (!this.client) {
|
||||
return;
|
||||
}
|
||||
await this.client.refreshPredictions(queries);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the EventBuilder for constructing TaskCompletionEvent objects.
|
||||
* Returns null when telemetry is disabled.
|
||||
*
|
||||
* @returns EventBuilder instance, or null if disabled
|
||||
*/
|
||||
get eventBuilder(): EventBuilder | null {
|
||||
if (!this.client) {
|
||||
return null;
|
||||
}
|
||||
return this.client.eventBuilder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the telemetry client is currently active and running.
|
||||
*/
|
||||
get isEnabled(): boolean {
|
||||
return this.client?.isRunning ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of events currently queued for submission.
|
||||
* Returns 0 when telemetry is disabled.
|
||||
*/
|
||||
get queueSize(): number {
|
||||
if (!this.client) {
|
||||
return 0;
|
||||
}
|
||||
return this.client.queueSize;
|
||||
}
|
||||
}
|
||||
297
apps/api/src/mosaic-telemetry/prediction.service.spec.ts
Normal file
297
apps/api/src/mosaic-telemetry/prediction.service.spec.ts
Normal file
@@ -0,0 +1,297 @@
|
||||
import { describe, it, expect, beforeEach, vi } from "vitest";
|
||||
import { Test, TestingModule } from "@nestjs/testing";
|
||||
import { TaskType, Complexity, Provider } from "@mosaicstack/telemetry-client";
|
||||
import type { PredictionResponse, PredictionQuery } from "@mosaicstack/telemetry-client";
|
||||
import { MosaicTelemetryService } from "./mosaic-telemetry.service";
|
||||
import { PredictionService } from "./prediction.service";
|
||||
|
||||
describe("PredictionService", () => {
|
||||
let service: PredictionService;
|
||||
let mockTelemetryService: {
|
||||
isEnabled: boolean;
|
||||
getPrediction: ReturnType<typeof vi.fn>;
|
||||
refreshPredictions: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
|
||||
const mockPredictionResponse: PredictionResponse = {
|
||||
prediction: {
|
||||
input_tokens: {
|
||||
p10: 50,
|
||||
p25: 80,
|
||||
median: 120,
|
||||
p75: 200,
|
||||
p90: 350,
|
||||
},
|
||||
output_tokens: {
|
||||
p10: 100,
|
||||
p25: 150,
|
||||
median: 250,
|
||||
p75: 400,
|
||||
p90: 600,
|
||||
},
|
||||
cost_usd_micros: {
|
||||
p10: 500,
|
||||
p25: 800,
|
||||
median: 1200,
|
||||
p75: 2000,
|
||||
p90: 3500,
|
||||
},
|
||||
duration_ms: {
|
||||
p10: 200,
|
||||
p25: 400,
|
||||
median: 800,
|
||||
p75: 1500,
|
||||
p90: 3000,
|
||||
},
|
||||
correction_factors: {
|
||||
input: 1.0,
|
||||
output: 1.0,
|
||||
},
|
||||
quality: {
|
||||
gate_pass_rate: 0.95,
|
||||
success_rate: 0.92,
|
||||
},
|
||||
},
|
||||
metadata: {
|
||||
sample_size: 150,
|
||||
fallback_level: 0,
|
||||
confidence: "high",
|
||||
last_updated: "2026-02-15T00:00:00Z",
|
||||
cache_hit: true,
|
||||
},
|
||||
};
|
||||
|
||||
const nullPredictionResponse: PredictionResponse = {
|
||||
prediction: null,
|
||||
metadata: {
|
||||
sample_size: 0,
|
||||
fallback_level: 3,
|
||||
confidence: "none",
|
||||
last_updated: null,
|
||||
cache_hit: false,
|
||||
},
|
||||
};
|
||||
|
||||
beforeEach(async () => {
|
||||
mockTelemetryService = {
|
||||
isEnabled: true,
|
||||
getPrediction: vi.fn().mockReturnValue(mockPredictionResponse),
|
||||
refreshPredictions: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
||||
const module: TestingModule = await Test.createTestingModule({
|
||||
providers: [
|
||||
PredictionService,
|
||||
{
|
||||
provide: MosaicTelemetryService,
|
||||
useValue: mockTelemetryService,
|
||||
},
|
||||
],
|
||||
}).compile();
|
||||
|
||||
service = module.get<PredictionService>(PredictionService);
|
||||
});
|
||||
|
||||
it("should be defined", () => {
|
||||
expect(service).toBeDefined();
|
||||
});
|
||||
|
||||
// ---------- getEstimate ----------
|
||||
|
||||
describe("getEstimate", () => {
|
||||
it("should return prediction response for valid query", () => {
|
||||
const result = service.getEstimate(
|
||||
TaskType.IMPLEMENTATION,
|
||||
"claude-sonnet-4-5",
|
||||
Provider.ANTHROPIC,
|
||||
Complexity.LOW
|
||||
);
|
||||
|
||||
expect(result).toEqual(mockPredictionResponse);
|
||||
expect(mockTelemetryService.getPrediction).toHaveBeenCalledWith({
|
||||
task_type: TaskType.IMPLEMENTATION,
|
||||
model: "claude-sonnet-4-5",
|
||||
provider: Provider.ANTHROPIC,
|
||||
complexity: Complexity.LOW,
|
||||
});
|
||||
});
|
||||
|
||||
it("should pass correct query parameters to telemetry service", () => {
|
||||
service.getEstimate(TaskType.CODE_REVIEW, "gpt-4o", Provider.OPENAI, Complexity.HIGH);
|
||||
|
||||
expect(mockTelemetryService.getPrediction).toHaveBeenCalledWith({
|
||||
task_type: TaskType.CODE_REVIEW,
|
||||
model: "gpt-4o",
|
||||
provider: Provider.OPENAI,
|
||||
complexity: Complexity.HIGH,
|
||||
});
|
||||
});
|
||||
|
||||
it("should return null when telemetry returns null", () => {
|
||||
mockTelemetryService.getPrediction.mockReturnValue(null);
|
||||
|
||||
const result = service.getEstimate(
|
||||
TaskType.IMPLEMENTATION,
|
||||
"claude-sonnet-4-5",
|
||||
Provider.ANTHROPIC,
|
||||
Complexity.LOW
|
||||
);
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("should return null prediction response when confidence is none", () => {
|
||||
mockTelemetryService.getPrediction.mockReturnValue(nullPredictionResponse);
|
||||
|
||||
const result = service.getEstimate(
|
||||
TaskType.IMPLEMENTATION,
|
||||
"unknown-model",
|
||||
Provider.UNKNOWN,
|
||||
Complexity.LOW
|
||||
);
|
||||
|
||||
expect(result).toEqual(nullPredictionResponse);
|
||||
expect(result?.metadata.confidence).toBe("none");
|
||||
});
|
||||
|
||||
it("should return null and not throw when getPrediction throws", () => {
|
||||
mockTelemetryService.getPrediction.mockImplementation(() => {
|
||||
throw new Error("Prediction fetch failed");
|
||||
});
|
||||
|
||||
const result = service.getEstimate(
|
||||
TaskType.IMPLEMENTATION,
|
||||
"claude-sonnet-4-5",
|
||||
Provider.ANTHROPIC,
|
||||
Complexity.LOW
|
||||
);
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("should handle non-Error thrown objects gracefully", () => {
|
||||
mockTelemetryService.getPrediction.mockImplementation(() => {
|
||||
throw "string error";
|
||||
});
|
||||
|
||||
const result = service.getEstimate(
|
||||
TaskType.IMPLEMENTATION,
|
||||
"claude-sonnet-4-5",
|
||||
Provider.ANTHROPIC,
|
||||
Complexity.LOW
|
||||
);
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- refreshCommonPredictions ----------
|
||||
|
||||
describe("refreshCommonPredictions", () => {
|
||||
it("should call refreshPredictions with multiple query combinations", async () => {
|
||||
await service.refreshCommonPredictions();
|
||||
|
||||
expect(mockTelemetryService.refreshPredictions).toHaveBeenCalledTimes(1);
|
||||
|
||||
const queries: PredictionQuery[] = mockTelemetryService.refreshPredictions.mock.calls[0][0];
|
||||
|
||||
// Should have queries for cross-product of models, task types, and complexities
|
||||
expect(queries.length).toBeGreaterThan(0);
|
||||
|
||||
// Verify all queries have valid structure
|
||||
for (const query of queries) {
|
||||
expect(query).toHaveProperty("task_type");
|
||||
expect(query).toHaveProperty("model");
|
||||
expect(query).toHaveProperty("provider");
|
||||
expect(query).toHaveProperty("complexity");
|
||||
}
|
||||
});
|
||||
|
||||
it("should include Anthropic model predictions", async () => {
|
||||
await service.refreshCommonPredictions();
|
||||
|
||||
const queries: PredictionQuery[] = mockTelemetryService.refreshPredictions.mock.calls[0][0];
|
||||
|
||||
const anthropicQueries = queries.filter(
|
||||
(q: PredictionQuery) => q.provider === Provider.ANTHROPIC
|
||||
);
|
||||
expect(anthropicQueries.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should include OpenAI model predictions", async () => {
|
||||
await service.refreshCommonPredictions();
|
||||
|
||||
const queries: PredictionQuery[] = mockTelemetryService.refreshPredictions.mock.calls[0][0];
|
||||
|
||||
const openaiQueries = queries.filter((q: PredictionQuery) => q.provider === Provider.OPENAI);
|
||||
expect(openaiQueries.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should not call refreshPredictions when telemetry is disabled", async () => {
|
||||
mockTelemetryService.isEnabled = false;
|
||||
|
||||
await service.refreshCommonPredictions();
|
||||
|
||||
expect(mockTelemetryService.refreshPredictions).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("should not throw when refreshPredictions rejects", async () => {
|
||||
mockTelemetryService.refreshPredictions.mockRejectedValue(new Error("Server unreachable"));
|
||||
|
||||
// Should not throw
|
||||
await expect(service.refreshCommonPredictions()).resolves.not.toThrow();
|
||||
});
|
||||
|
||||
it("should include common task types in queries", async () => {
|
||||
await service.refreshCommonPredictions();
|
||||
|
||||
const queries: PredictionQuery[] = mockTelemetryService.refreshPredictions.mock.calls[0][0];
|
||||
|
||||
const taskTypes = new Set(queries.map((q: PredictionQuery) => q.task_type));
|
||||
|
||||
expect(taskTypes.has(TaskType.IMPLEMENTATION)).toBe(true);
|
||||
expect(taskTypes.has(TaskType.PLANNING)).toBe(true);
|
||||
expect(taskTypes.has(TaskType.CODE_REVIEW)).toBe(true);
|
||||
});
|
||||
|
||||
it("should include common complexity levels in queries", async () => {
|
||||
await service.refreshCommonPredictions();
|
||||
|
||||
const queries: PredictionQuery[] = mockTelemetryService.refreshPredictions.mock.calls[0][0];
|
||||
|
||||
const complexities = new Set(queries.map((q: PredictionQuery) => q.complexity));
|
||||
|
||||
expect(complexities.has(Complexity.LOW)).toBe(true);
|
||||
expect(complexities.has(Complexity.MEDIUM)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- onModuleInit ----------
|
||||
|
||||
describe("onModuleInit", () => {
|
||||
it("should trigger refreshCommonPredictions on init when telemetry is enabled", () => {
|
||||
// refreshPredictions is async, but onModuleInit fires it and forgets
|
||||
service.onModuleInit();
|
||||
|
||||
// Give the promise microtask a chance to execute
|
||||
expect(mockTelemetryService.isEnabled).toBe(true);
|
||||
// refreshPredictions will be called asynchronously
|
||||
});
|
||||
|
||||
it("should not refresh when telemetry is disabled", () => {
|
||||
mockTelemetryService.isEnabled = false;
|
||||
|
||||
service.onModuleInit();
|
||||
|
||||
// refreshPredictions should not be called since we returned early
|
||||
expect(mockTelemetryService.refreshPredictions).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("should not throw when refresh fails on init", () => {
|
||||
mockTelemetryService.refreshPredictions.mockRejectedValue(new Error("Connection refused"));
|
||||
|
||||
// Should not throw
|
||||
expect(() => service.onModuleInit()).not.toThrow();
|
||||
});
|
||||
});
|
||||
});
|
||||
161
apps/api/src/mosaic-telemetry/prediction.service.ts
Normal file
161
apps/api/src/mosaic-telemetry/prediction.service.ts
Normal file
@@ -0,0 +1,161 @@
|
||||
import { Injectable, Logger, OnModuleInit } from "@nestjs/common";
|
||||
import {
|
||||
TaskType,
|
||||
Complexity,
|
||||
Provider,
|
||||
type PredictionQuery,
|
||||
type PredictionResponse,
|
||||
} from "@mosaicstack/telemetry-client";
|
||||
import { MosaicTelemetryService } from "./mosaic-telemetry.service";
|
||||
|
||||
/**
|
||||
* Common model-provider combinations used for pre-fetching predictions.
|
||||
* These represent the most frequently used LLM configurations.
|
||||
*/
|
||||
const COMMON_MODELS: { model: string; provider: Provider }[] = [
|
||||
{ model: "claude-sonnet-4-5", provider: Provider.ANTHROPIC },
|
||||
{ model: "claude-opus-4", provider: Provider.ANTHROPIC },
|
||||
{ model: "claude-haiku-4-5", provider: Provider.ANTHROPIC },
|
||||
{ model: "gpt-4o", provider: Provider.OPENAI },
|
||||
{ model: "gpt-4o-mini", provider: Provider.OPENAI },
|
||||
];
|
||||
|
||||
/**
|
||||
* Common task types to pre-fetch predictions for.
|
||||
*/
|
||||
const COMMON_TASK_TYPES: TaskType[] = [
|
||||
TaskType.IMPLEMENTATION,
|
||||
TaskType.PLANNING,
|
||||
TaskType.CODE_REVIEW,
|
||||
];
|
||||
|
||||
/**
|
||||
* Common complexity levels to pre-fetch predictions for.
|
||||
*/
|
||||
const COMMON_COMPLEXITIES: Complexity[] = [Complexity.LOW, Complexity.MEDIUM];
|
||||
|
||||
/**
|
||||
* PredictionService
|
||||
*
|
||||
* Provides pre-task cost and token estimates using crowd-sourced prediction data
|
||||
* from the Mosaic Telemetry server. Predictions are cached by the underlying SDK
|
||||
* with a 6-hour TTL.
|
||||
*
|
||||
* This service is intentionally non-blocking: if predictions are unavailable
|
||||
* (telemetry disabled, server unreachable, no data), all methods return null
|
||||
* without throwing errors. Task execution should never be blocked by prediction
|
||||
* failures.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const estimate = this.predictionService.getEstimate(
|
||||
* TaskType.IMPLEMENTATION,
|
||||
* "claude-sonnet-4-5",
|
||||
* Provider.ANTHROPIC,
|
||||
* Complexity.LOW,
|
||||
* );
|
||||
* if (estimate?.prediction) {
|
||||
* console.log(`Estimated cost: ${estimate.prediction.cost_usd_micros}`);
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
@Injectable()
|
||||
export class PredictionService implements OnModuleInit {
|
||||
private readonly logger = new Logger(PredictionService.name);
|
||||
|
||||
constructor(private readonly telemetry: MosaicTelemetryService) {}
|
||||
|
||||
/**
|
||||
* Refresh common predictions on startup.
|
||||
* Runs asynchronously and never blocks module initialization.
|
||||
*/
|
||||
onModuleInit(): void {
|
||||
if (!this.telemetry.isEnabled) {
|
||||
this.logger.log("Telemetry disabled - skipping prediction refresh");
|
||||
return;
|
||||
}
|
||||
|
||||
// Fire-and-forget: refresh in the background
|
||||
this.refreshCommonPredictions().catch((error: unknown) => {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
this.logger.warn(`Failed to refresh common predictions on startup: ${msg}`);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a cost/token estimate for a given task configuration.
|
||||
*
|
||||
* Returns the cached prediction from the SDK, or null if:
|
||||
* - Telemetry is disabled
|
||||
* - No prediction data exists for this combination
|
||||
* - The prediction has expired
|
||||
*
|
||||
* @param taskType - The type of task to estimate
|
||||
* @param model - The model name (e.g. "claude-sonnet-4-5")
|
||||
* @param provider - The provider enum value
|
||||
* @param complexity - The complexity level
|
||||
* @returns Prediction response with estimates and confidence, or null
|
||||
*/
|
||||
getEstimate(
|
||||
taskType: TaskType,
|
||||
model: string,
|
||||
provider: Provider,
|
||||
complexity: Complexity
|
||||
): PredictionResponse | null {
|
||||
try {
|
||||
const query: PredictionQuery = {
|
||||
task_type: taskType,
|
||||
model,
|
||||
provider,
|
||||
complexity,
|
||||
};
|
||||
|
||||
return this.telemetry.getPrediction(query);
|
||||
} catch (error: unknown) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
this.logger.warn(`Failed to get prediction estimate: ${msg}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh predictions for commonly used (taskType, model, provider, complexity) combinations.
|
||||
*
|
||||
* Generates the cross-product of common models, task types, and complexities,
|
||||
* then batch-refreshes them from the telemetry server. The SDK caches the
|
||||
* results with a 6-hour TTL.
|
||||
*
|
||||
* This method is safe to call at any time. If telemetry is disabled or the
|
||||
* server is unreachable, it completes without error.
|
||||
*/
|
||||
async refreshCommonPredictions(): Promise<void> {
|
||||
if (!this.telemetry.isEnabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
const queries: PredictionQuery[] = [];
|
||||
|
||||
for (const { model, provider } of COMMON_MODELS) {
|
||||
for (const taskType of COMMON_TASK_TYPES) {
|
||||
for (const complexity of COMMON_COMPLEXITIES) {
|
||||
queries.push({
|
||||
task_type: taskType,
|
||||
model,
|
||||
provider,
|
||||
complexity,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.log(`Refreshing ${String(queries.length)} common prediction queries...`);
|
||||
|
||||
try {
|
||||
await this.telemetry.refreshPredictions(queries);
|
||||
this.logger.log(`Successfully refreshed ${String(queries.length)} predictions`);
|
||||
} catch (error: unknown) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
this.logger.warn(`Failed to refresh predictions: ${msg}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,8 +9,9 @@ RUN apt-get update && \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy dependency files
|
||||
# Copy dependency files and private registry config
|
||||
COPY pyproject.toml .
|
||||
COPY pip.conf /etc/pip.conf
|
||||
|
||||
# Create virtual environment and install dependencies
|
||||
RUN python -m venv /opt/venv
|
||||
|
||||
2
apps/coordinator/pip.conf
Normal file
2
apps/coordinator/pip.conf
Normal file
@@ -0,0 +1,2 @@
|
||||
[global]
|
||||
extra-index-url = https://git.mosaicstack.dev/api/packages/mosaic/pypi/simple/
|
||||
@@ -15,6 +15,7 @@ dependencies = [
|
||||
"opentelemetry-sdk>=1.20.0",
|
||||
"opentelemetry-instrumentation-fastapi>=0.41b0",
|
||||
"opentelemetry-exporter-otlp>=1.20.0",
|
||||
"mosaicstack-telemetry>=0.1.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
@@ -2,12 +2,24 @@
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from mosaicstack_telemetry import ( # type: ignore[import-untyped]
|
||||
Complexity,
|
||||
Harness,
|
||||
Outcome,
|
||||
Provider,
|
||||
QualityGate,
|
||||
TaskType,
|
||||
TelemetryClient,
|
||||
)
|
||||
|
||||
from src.circuit_breaker import CircuitBreaker, CircuitBreakerError
|
||||
from src.context_monitor import ContextMonitor
|
||||
from src.forced_continuation import ForcedContinuationService
|
||||
from src.models import ContextAction
|
||||
from src.mosaic_telemetry import build_task_event
|
||||
from src.quality_orchestrator import QualityOrchestrator, VerificationResult
|
||||
from src.queue import QueueItem, QueueManager
|
||||
from src.tracing_decorators import trace_agent_operation
|
||||
@@ -17,6 +29,49 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Agent-name → telemetry-field mapping helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Maps assigned_agent strings to (model, Provider, Harness)
|
||||
_AGENT_TELEMETRY_MAP: dict[str, tuple[str, Provider, Harness]] = {
|
||||
"sonnet": ("claude-sonnet-4-20250514", Provider.ANTHROPIC, Harness.CLAUDE_CODE),
|
||||
"opus": ("claude-opus-4-20250514", Provider.ANTHROPIC, Harness.CLAUDE_CODE),
|
||||
"haiku": ("claude-haiku-3.5-20241022", Provider.ANTHROPIC, Harness.CLAUDE_CODE),
|
||||
"glm": ("glm-4", Provider.CUSTOM, Harness.CUSTOM),
|
||||
"minimax": ("minimax", Provider.CUSTOM, Harness.CUSTOM),
|
||||
}
|
||||
|
||||
_DIFFICULTY_TO_COMPLEXITY: dict[str, Complexity] = {
|
||||
"easy": Complexity.LOW,
|
||||
"medium": Complexity.MEDIUM,
|
||||
"hard": Complexity.HIGH,
|
||||
}
|
||||
|
||||
_GATE_NAME_TO_ENUM: dict[str, QualityGate] = {
|
||||
"build": QualityGate.BUILD,
|
||||
"lint": QualityGate.LINT,
|
||||
"test": QualityGate.TEST,
|
||||
"coverage": QualityGate.COVERAGE,
|
||||
}
|
||||
|
||||
|
||||
def _resolve_agent_fields(
|
||||
assigned_agent: str,
|
||||
) -> tuple[str, Provider, Harness]:
|
||||
"""Resolve agent name to (model, provider, harness) for telemetry.
|
||||
|
||||
Args:
|
||||
assigned_agent: The agent name string from issue metadata.
|
||||
|
||||
Returns:
|
||||
Tuple of (model_name, Provider, Harness).
|
||||
"""
|
||||
return _AGENT_TELEMETRY_MAP.get(
|
||||
assigned_agent,
|
||||
("unknown", Provider.UNKNOWN, Harness.UNKNOWN),
|
||||
)
|
||||
|
||||
|
||||
class Coordinator:
|
||||
"""Main orchestration loop for processing the issue queue.
|
||||
@@ -41,6 +96,8 @@ class Coordinator:
|
||||
poll_interval: float = 5.0,
|
||||
circuit_breaker_threshold: int = 5,
|
||||
circuit_breaker_cooldown: float = 30.0,
|
||||
telemetry_client: TelemetryClient | None = None,
|
||||
instance_id: str = "",
|
||||
) -> None:
|
||||
"""Initialize the Coordinator.
|
||||
|
||||
@@ -49,12 +106,16 @@ class Coordinator:
|
||||
poll_interval: Seconds between queue polls (default: 5.0)
|
||||
circuit_breaker_threshold: Consecutive failures before opening circuit (default: 5)
|
||||
circuit_breaker_cooldown: Seconds to wait before retry after circuit opens (default: 30)
|
||||
telemetry_client: Optional Mosaic telemetry client for tracking task events
|
||||
instance_id: UUID identifying this coordinator instance for telemetry
|
||||
"""
|
||||
self.queue_manager = queue_manager
|
||||
self.poll_interval = poll_interval
|
||||
self._running = False
|
||||
self._stop_event: asyncio.Event | None = None
|
||||
self._active_agents: dict[int, dict[str, Any]] = {}
|
||||
self._telemetry_client = telemetry_client
|
||||
self._instance_id = instance_id
|
||||
|
||||
# Circuit breaker for preventing infinite retry loops (SEC-ORCH-7)
|
||||
self._circuit_breaker = CircuitBreaker(
|
||||
@@ -197,7 +258,8 @@ class Coordinator:
|
||||
"""Process the next ready item from the queue.
|
||||
|
||||
Gets the next ready item, spawns an agent to process it,
|
||||
and marks it complete on success.
|
||||
and marks it complete on success. Emits a Mosaic telemetry
|
||||
TaskCompletionEvent after each task attempt.
|
||||
|
||||
Returns:
|
||||
The QueueItem that was processed, or None if queue is empty
|
||||
@@ -218,6 +280,10 @@ class Coordinator:
|
||||
# Mark as in progress
|
||||
self.queue_manager.mark_in_progress(item.issue_number)
|
||||
|
||||
# Track timing for telemetry
|
||||
start_mono = time.monotonic()
|
||||
outcome = Outcome.FAILURE
|
||||
|
||||
# Spawn agent (stub implementation)
|
||||
try:
|
||||
success = await self.spawn_agent(item)
|
||||
@@ -225,6 +291,7 @@ class Coordinator:
|
||||
if success:
|
||||
# Mark as complete
|
||||
self.queue_manager.mark_complete(item.issue_number)
|
||||
outcome = Outcome.SUCCESS
|
||||
logger.info(f"Issue #{item.issue_number} completed successfully")
|
||||
else:
|
||||
logger.warning(f"Issue #{item.issue_number} agent failed - remains in progress")
|
||||
@@ -233,8 +300,81 @@ class Coordinator:
|
||||
logger.error(f"Error spawning agent for issue #{item.issue_number}: {e}")
|
||||
# Item remains in progress on error
|
||||
|
||||
finally:
|
||||
elapsed_ms = int((time.monotonic() - start_mono) * 1000)
|
||||
self._emit_task_telemetry(item, outcome=outcome, duration_ms=elapsed_ms)
|
||||
|
||||
return item
|
||||
|
||||
def _emit_task_telemetry(
|
||||
self,
|
||||
item: QueueItem,
|
||||
*,
|
||||
outcome: Outcome,
|
||||
duration_ms: int,
|
||||
retry_count: int = 0,
|
||||
actual_input_tokens: int = 0,
|
||||
actual_output_tokens: int = 0,
|
||||
quality_passed: bool = False,
|
||||
quality_gates_run: list[QualityGate] | None = None,
|
||||
quality_gates_failed: list[QualityGate] | None = None,
|
||||
) -> None:
|
||||
"""Emit a Mosaic telemetry TaskCompletionEvent (non-blocking).
|
||||
|
||||
This method never raises; any telemetry errors are logged and swallowed
|
||||
so they do not interfere with task processing.
|
||||
|
||||
Args:
|
||||
item: The QueueItem that was processed.
|
||||
outcome: Task outcome (SUCCESS, FAILURE, TIMEOUT, etc.).
|
||||
duration_ms: Wall-clock duration in milliseconds.
|
||||
retry_count: Number of retries before this attempt.
|
||||
actual_input_tokens: Actual input tokens consumed by the harness.
|
||||
actual_output_tokens: Actual output tokens consumed by the harness.
|
||||
quality_passed: Whether all quality gates passed.
|
||||
quality_gates_run: Quality gates that were executed.
|
||||
quality_gates_failed: Quality gates that failed.
|
||||
"""
|
||||
if self._telemetry_client is None or not self._instance_id:
|
||||
return
|
||||
|
||||
try:
|
||||
model, provider, harness = _resolve_agent_fields(
|
||||
item.metadata.assigned_agent,
|
||||
)
|
||||
complexity = _DIFFICULTY_TO_COMPLEXITY.get(
|
||||
item.metadata.difficulty, Complexity.MEDIUM
|
||||
)
|
||||
|
||||
event = build_task_event(
|
||||
instance_id=self._instance_id,
|
||||
task_type=TaskType.IMPLEMENTATION,
|
||||
complexity=complexity,
|
||||
outcome=outcome,
|
||||
duration_ms=duration_ms,
|
||||
model=model,
|
||||
provider=provider,
|
||||
harness=harness,
|
||||
actual_input_tokens=actual_input_tokens,
|
||||
actual_output_tokens=actual_output_tokens,
|
||||
estimated_input_tokens=item.metadata.estimated_context,
|
||||
quality_passed=quality_passed,
|
||||
quality_gates_run=quality_gates_run,
|
||||
quality_gates_failed=quality_gates_failed,
|
||||
retry_count=retry_count,
|
||||
)
|
||||
self._telemetry_client.track(event)
|
||||
logger.debug(
|
||||
"Telemetry event emitted for issue #%d (outcome=%s)",
|
||||
item.issue_number,
|
||||
outcome.value,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to emit telemetry for issue #%d (non-fatal)",
|
||||
item.issue_number,
|
||||
)
|
||||
|
||||
@trace_agent_operation(operation_name="spawn_agent")
|
||||
async def spawn_agent(self, item: QueueItem) -> bool:
|
||||
"""Spawn an agent to process the given item.
|
||||
@@ -294,6 +434,8 @@ class OrchestrationLoop:
|
||||
poll_interval: float = 5.0,
|
||||
circuit_breaker_threshold: int = 5,
|
||||
circuit_breaker_cooldown: float = 30.0,
|
||||
telemetry_client: TelemetryClient | None = None,
|
||||
instance_id: str = "",
|
||||
) -> None:
|
||||
"""Initialize the OrchestrationLoop.
|
||||
|
||||
@@ -305,6 +447,8 @@ class OrchestrationLoop:
|
||||
poll_interval: Seconds between queue polls (default: 5.0)
|
||||
circuit_breaker_threshold: Consecutive failures before opening circuit (default: 5)
|
||||
circuit_breaker_cooldown: Seconds to wait before retry after circuit opens (default: 30)
|
||||
telemetry_client: Optional Mosaic telemetry client for tracking task events
|
||||
instance_id: UUID identifying this coordinator instance for telemetry
|
||||
"""
|
||||
self.queue_manager = queue_manager
|
||||
self.quality_orchestrator = quality_orchestrator
|
||||
@@ -314,6 +458,11 @@ class OrchestrationLoop:
|
||||
self._running = False
|
||||
self._stop_event: asyncio.Event | None = None
|
||||
self._active_agents: dict[int, dict[str, Any]] = {}
|
||||
self._telemetry_client = telemetry_client
|
||||
self._instance_id = instance_id
|
||||
|
||||
# Per-issue retry tracking
|
||||
self._retry_counts: dict[int, int] = {}
|
||||
|
||||
# Metrics tracking
|
||||
self._processed_count = 0
|
||||
@@ -493,6 +642,7 @@ class OrchestrationLoop:
|
||||
3. Spawns an agent to process it
|
||||
4. Runs quality gates on completion
|
||||
5. Handles rejection with forced continuation or marks complete
|
||||
6. Emits a Mosaic telemetry TaskCompletionEvent
|
||||
|
||||
Returns:
|
||||
The QueueItem that was processed, or None if queue is empty
|
||||
@@ -524,12 +674,21 @@ class OrchestrationLoop:
|
||||
"status": "running",
|
||||
}
|
||||
|
||||
# Track timing for telemetry
|
||||
start_mono = time.monotonic()
|
||||
outcome = Outcome.FAILURE
|
||||
quality_passed = False
|
||||
gates_run: list[QualityGate] = []
|
||||
gates_failed: list[QualityGate] = []
|
||||
retry_count = self._retry_counts.get(item.issue_number, 0)
|
||||
|
||||
try:
|
||||
# Spawn agent (stub implementation)
|
||||
agent_success = await self._spawn_agent(item)
|
||||
|
||||
if not agent_success:
|
||||
logger.warning(f"Issue #{item.issue_number} agent failed - remains in progress")
|
||||
self._retry_counts[item.issue_number] = retry_count + 1
|
||||
return item
|
||||
|
||||
# Check context usage (stub - no real monitoring in Phase 0)
|
||||
@@ -538,24 +697,123 @@ class OrchestrationLoop:
|
||||
# Run quality gates on completion
|
||||
verification = await self._verify_quality(item)
|
||||
|
||||
# Map gate results for telemetry
|
||||
gates_run = [
|
||||
_GATE_NAME_TO_ENUM[name]
|
||||
for name in verification.gate_results
|
||||
if name in _GATE_NAME_TO_ENUM
|
||||
]
|
||||
gates_failed = [
|
||||
_GATE_NAME_TO_ENUM[name]
|
||||
for name, result in verification.gate_results.items()
|
||||
if name in _GATE_NAME_TO_ENUM and not result.passed
|
||||
]
|
||||
quality_passed = verification.all_passed
|
||||
|
||||
if verification.all_passed:
|
||||
# All gates passed - mark as complete
|
||||
self.queue_manager.mark_complete(item.issue_number)
|
||||
self._success_count += 1
|
||||
outcome = Outcome.SUCCESS
|
||||
# Clear retry counter on success
|
||||
self._retry_counts.pop(item.issue_number, None)
|
||||
logger.info(
|
||||
f"Issue #{item.issue_number} completed successfully - all gates passed"
|
||||
)
|
||||
else:
|
||||
# Gates failed - generate continuation prompt
|
||||
self._rejection_count += 1
|
||||
outcome = Outcome.FAILURE
|
||||
self._retry_counts[item.issue_number] = retry_count + 1
|
||||
await self._handle_rejection(item, verification)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing issue #{item.issue_number}: {e}")
|
||||
# Item remains in progress on error
|
||||
|
||||
finally:
|
||||
elapsed_ms = int((time.monotonic() - start_mono) * 1000)
|
||||
self._emit_task_telemetry(
|
||||
item,
|
||||
outcome=outcome,
|
||||
duration_ms=elapsed_ms,
|
||||
retry_count=retry_count,
|
||||
quality_passed=quality_passed,
|
||||
quality_gates_run=gates_run,
|
||||
quality_gates_failed=gates_failed,
|
||||
)
|
||||
|
||||
return item
|
||||
|
||||
def _emit_task_telemetry(
|
||||
self,
|
||||
item: QueueItem,
|
||||
*,
|
||||
outcome: Outcome,
|
||||
duration_ms: int,
|
||||
retry_count: int = 0,
|
||||
actual_input_tokens: int = 0,
|
||||
actual_output_tokens: int = 0,
|
||||
quality_passed: bool = False,
|
||||
quality_gates_run: list[QualityGate] | None = None,
|
||||
quality_gates_failed: list[QualityGate] | None = None,
|
||||
) -> None:
|
||||
"""Emit a Mosaic telemetry TaskCompletionEvent (non-blocking).
|
||||
|
||||
This method never raises; any telemetry errors are logged and swallowed
|
||||
so they do not interfere with task processing.
|
||||
|
||||
Args:
|
||||
item: The QueueItem that was processed.
|
||||
outcome: Task outcome (SUCCESS, FAILURE, TIMEOUT, etc.).
|
||||
duration_ms: Wall-clock duration in milliseconds.
|
||||
retry_count: Number of retries before this attempt.
|
||||
actual_input_tokens: Actual input tokens consumed by the harness.
|
||||
actual_output_tokens: Actual output tokens consumed by the harness.
|
||||
quality_passed: Whether all quality gates passed.
|
||||
quality_gates_run: Quality gates that were executed.
|
||||
quality_gates_failed: Quality gates that failed.
|
||||
"""
|
||||
if self._telemetry_client is None or not self._instance_id:
|
||||
return
|
||||
|
||||
try:
|
||||
model, provider, harness = _resolve_agent_fields(
|
||||
item.metadata.assigned_agent,
|
||||
)
|
||||
complexity = _DIFFICULTY_TO_COMPLEXITY.get(
|
||||
item.metadata.difficulty, Complexity.MEDIUM
|
||||
)
|
||||
|
||||
event = build_task_event(
|
||||
instance_id=self._instance_id,
|
||||
task_type=TaskType.IMPLEMENTATION,
|
||||
complexity=complexity,
|
||||
outcome=outcome,
|
||||
duration_ms=duration_ms,
|
||||
model=model,
|
||||
provider=provider,
|
||||
harness=harness,
|
||||
actual_input_tokens=actual_input_tokens,
|
||||
actual_output_tokens=actual_output_tokens,
|
||||
estimated_input_tokens=item.metadata.estimated_context,
|
||||
quality_passed=quality_passed,
|
||||
quality_gates_run=quality_gates_run,
|
||||
quality_gates_failed=quality_gates_failed,
|
||||
retry_count=retry_count,
|
||||
)
|
||||
self._telemetry_client.track(event)
|
||||
logger.debug(
|
||||
"Telemetry event emitted for issue #%d (outcome=%s)",
|
||||
item.issue_number,
|
||||
outcome.value,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to emit telemetry for issue #%d (non-fatal)",
|
||||
item.issue_number,
|
||||
)
|
||||
|
||||
async def _spawn_agent(self, item: QueueItem) -> bool:
|
||||
"""Spawn an agent to process the given item.
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI
|
||||
from mosaicstack_telemetry import TelemetryClient # type: ignore[import-untyped]
|
||||
from pydantic import BaseModel
|
||||
from slowapi import Limiter, _rate_limit_exceeded_handler
|
||||
from slowapi.errors import RateLimitExceeded
|
||||
@@ -18,6 +19,7 @@ from starlette.responses import Response
|
||||
|
||||
from .config import settings
|
||||
from .coordinator import Coordinator
|
||||
from .mosaic_telemetry import create_telemetry_config
|
||||
from .queue import QueueManager
|
||||
from .telemetry import TelemetryService, shutdown_telemetry
|
||||
from .webhook import router as webhook_router
|
||||
@@ -76,6 +78,18 @@ async def lifespan(app: FastAPI) -> AsyncIterator[dict[str, Any]]:
|
||||
telemetry_service.initialize()
|
||||
logger.info("OpenTelemetry telemetry initialized")
|
||||
|
||||
# Initialize Mosaic telemetry client
|
||||
mosaic_telemetry_config = create_telemetry_config()
|
||||
mosaic_telemetry_client: TelemetryClient | None = None
|
||||
if mosaic_telemetry_config.enabled:
|
||||
mosaic_telemetry_client = TelemetryClient(mosaic_telemetry_config)
|
||||
await mosaic_telemetry_client.start_async()
|
||||
app.state.mosaic_telemetry = mosaic_telemetry_client
|
||||
logger.info("Mosaic telemetry client started")
|
||||
else:
|
||||
app.state.mosaic_telemetry = None
|
||||
logger.info("Mosaic telemetry disabled via configuration")
|
||||
|
||||
# Initialize queue manager
|
||||
queue_file = Path("queue.json")
|
||||
queue_manager = QueueManager(queue_file=queue_file)
|
||||
@@ -86,6 +100,8 @@ async def lifespan(app: FastAPI) -> AsyncIterator[dict[str, Any]]:
|
||||
_coordinator = Coordinator(
|
||||
queue_manager=queue_manager,
|
||||
poll_interval=settings.coordinator_poll_interval,
|
||||
telemetry_client=mosaic_telemetry_client,
|
||||
instance_id=mosaic_telemetry_config.instance_id or "",
|
||||
)
|
||||
logger.info(
|
||||
f"Coordinator initialized (poll interval: {settings.coordinator_poll_interval}s, "
|
||||
@@ -115,6 +131,11 @@ async def lifespan(app: FastAPI) -> AsyncIterator[dict[str, Any]]:
|
||||
pass
|
||||
logger.info("Coordinator stopped")
|
||||
|
||||
# Shutdown Mosaic telemetry client
|
||||
if mosaic_telemetry_client is not None:
|
||||
await mosaic_telemetry_client.stop_async()
|
||||
logger.info("Mosaic telemetry client stopped")
|
||||
|
||||
# Shutdown OpenTelemetry
|
||||
if telemetry_enabled:
|
||||
shutdown_telemetry()
|
||||
|
||||
157
apps/coordinator/src/mosaic_telemetry.py
Normal file
157
apps/coordinator/src/mosaic_telemetry.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""Mosaic Stack telemetry integration for the Coordinator.
|
||||
|
||||
This module provides helpers for tracking task completion events using the
|
||||
mosaicstack-telemetry SDK. It is separate from the OpenTelemetry distributed
|
||||
tracing configured in telemetry.py.
|
||||
|
||||
Environment variables (auto-read by the SDK):
|
||||
MOSAIC_TELEMETRY_ENABLED: Enable/disable telemetry (default: true)
|
||||
MOSAIC_TELEMETRY_SERVER_URL: Telemetry server endpoint
|
||||
MOSAIC_TELEMETRY_API_KEY: API key for authentication
|
||||
MOSAIC_TELEMETRY_INSTANCE_ID: UUID identifying this coordinator instance
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from mosaicstack_telemetry import ( # type: ignore[import-untyped]
|
||||
Complexity,
|
||||
EventBuilder,
|
||||
Harness,
|
||||
Outcome,
|
||||
Provider,
|
||||
QualityGate,
|
||||
TaskType,
|
||||
TelemetryClient,
|
||||
TelemetryConfig,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fastapi import FastAPI
|
||||
from mosaicstack_telemetry import TaskCompletionEvent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_telemetry_client(app: FastAPI) -> TelemetryClient | None:
|
||||
"""Retrieve the Mosaic telemetry client from FastAPI app state.
|
||||
|
||||
Args:
|
||||
app: The FastAPI application instance.
|
||||
|
||||
Returns:
|
||||
The TelemetryClient if initialised and telemetry is enabled,
|
||||
or None if telemetry is disabled or not yet initialised.
|
||||
"""
|
||||
client: TelemetryClient | None = getattr(app.state, "mosaic_telemetry", None)
|
||||
return client
|
||||
|
||||
|
||||
def build_task_event(
|
||||
*,
|
||||
instance_id: str,
|
||||
task_type: TaskType = TaskType.IMPLEMENTATION,
|
||||
complexity: Complexity = Complexity.MEDIUM,
|
||||
outcome: Outcome = Outcome.SUCCESS,
|
||||
duration_ms: int = 0,
|
||||
model: str = "claude-sonnet-4-20250514",
|
||||
provider: Provider = Provider.ANTHROPIC,
|
||||
harness: Harness = Harness.CLAUDE_CODE,
|
||||
estimated_input_tokens: int = 0,
|
||||
estimated_output_tokens: int = 0,
|
||||
actual_input_tokens: int = 0,
|
||||
actual_output_tokens: int = 0,
|
||||
estimated_cost_micros: int = 0,
|
||||
actual_cost_micros: int = 0,
|
||||
quality_passed: bool = False,
|
||||
quality_gates_run: list[QualityGate] | None = None,
|
||||
quality_gates_failed: list[QualityGate] | None = None,
|
||||
context_compactions: int = 0,
|
||||
context_rotations: int = 0,
|
||||
context_utilization: float = 0.0,
|
||||
retry_count: int = 0,
|
||||
language: str | None = "typescript",
|
||||
) -> TaskCompletionEvent:
|
||||
"""Build a TaskCompletionEvent for a coordinator task.
|
||||
|
||||
Provides sensible defaults for the coordinator context (Claude Code harness,
|
||||
Anthropic provider, TypeScript language).
|
||||
|
||||
Args:
|
||||
instance_id: UUID identifying this coordinator instance.
|
||||
task_type: The kind of task that was performed.
|
||||
complexity: Complexity level of the task.
|
||||
outcome: Whether the task succeeded, failed, etc.
|
||||
duration_ms: Task duration in milliseconds.
|
||||
model: The AI model used.
|
||||
provider: The AI model provider.
|
||||
harness: The coding harness used.
|
||||
estimated_input_tokens: Estimated input token count.
|
||||
estimated_output_tokens: Estimated output token count.
|
||||
actual_input_tokens: Actual input token count.
|
||||
actual_output_tokens: Actual output token count.
|
||||
estimated_cost_micros: Estimated cost in USD micros.
|
||||
actual_cost_micros: Actual cost in USD micros.
|
||||
quality_passed: Whether all quality gates passed.
|
||||
quality_gates_run: List of quality gates that were executed.
|
||||
quality_gates_failed: List of quality gates that failed.
|
||||
context_compactions: Number of context compactions during the task.
|
||||
context_rotations: Number of context rotations during the task.
|
||||
context_utilization: Final context window utilization (0.0-1.0).
|
||||
retry_count: Number of retries before the task completed.
|
||||
language: Primary programming language (default: typescript).
|
||||
|
||||
Returns:
|
||||
A fully populated TaskCompletionEvent ready to be tracked.
|
||||
"""
|
||||
builder = (
|
||||
EventBuilder(instance_id=instance_id)
|
||||
.task_type(task_type)
|
||||
.complexity_level(complexity)
|
||||
.harness_type(harness)
|
||||
.model(model)
|
||||
.provider(provider)
|
||||
.duration_ms(duration_ms)
|
||||
.outcome_value(outcome)
|
||||
.tokens(
|
||||
estimated_in=estimated_input_tokens,
|
||||
estimated_out=estimated_output_tokens,
|
||||
actual_in=actual_input_tokens,
|
||||
actual_out=actual_output_tokens,
|
||||
)
|
||||
.cost(estimated=estimated_cost_micros, actual=actual_cost_micros)
|
||||
.quality(
|
||||
passed=quality_passed,
|
||||
gates_run=quality_gates_run or [],
|
||||
gates_failed=quality_gates_failed or [],
|
||||
)
|
||||
.context(
|
||||
compactions=context_compactions,
|
||||
rotations=context_rotations,
|
||||
utilization=context_utilization,
|
||||
)
|
||||
.retry_count(retry_count)
|
||||
.language(language)
|
||||
)
|
||||
return builder.build()
|
||||
|
||||
|
||||
def create_telemetry_config() -> TelemetryConfig:
|
||||
"""Create a TelemetryConfig instance.
|
||||
|
||||
The config reads from MOSAIC_TELEMETRY_* environment variables automatically.
|
||||
Validation warnings are logged but do not prevent creation.
|
||||
|
||||
Returns:
|
||||
A TelemetryConfig instance with env-var overrides applied.
|
||||
"""
|
||||
config = TelemetryConfig()
|
||||
errors = config.validate()
|
||||
if errors and config.enabled:
|
||||
logger.warning(
|
||||
"Mosaic telemetry config has validation issues (telemetry may not submit): %s",
|
||||
"; ".join(errors),
|
||||
)
|
||||
return config
|
||||
426
apps/coordinator/tests/test_mosaic_telemetry.py
Normal file
426
apps/coordinator/tests/test_mosaic_telemetry.py
Normal file
@@ -0,0 +1,426 @@
|
||||
"""Tests for Mosaic Stack telemetry integration (mosaic_telemetry module).
|
||||
|
||||
These tests cover the mosaicstack-telemetry SDK integration, NOT the
|
||||
OpenTelemetry distributed tracing (which is tested in test_telemetry.py).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
from mosaicstack_telemetry import (
|
||||
Complexity,
|
||||
Harness,
|
||||
Outcome,
|
||||
Provider,
|
||||
QualityGate,
|
||||
TaskCompletionEvent,
|
||||
TaskType,
|
||||
TelemetryClient,
|
||||
TelemetryConfig,
|
||||
)
|
||||
|
||||
from src.mosaic_telemetry import (
|
||||
build_task_event,
|
||||
create_telemetry_config,
|
||||
get_telemetry_client,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TelemetryConfig creation from environment variables
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCreateTelemetryConfig:
|
||||
"""Tests for create_telemetry_config helper."""
|
||||
|
||||
def test_config_reads_enabled_from_env(self) -> None:
|
||||
"""TelemetryConfig should read MOSAIC_TELEMETRY_ENABLED from env."""
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{"MOSAIC_TELEMETRY_ENABLED": "true"},
|
||||
clear=False,
|
||||
):
|
||||
config = create_telemetry_config()
|
||||
assert config.enabled is True
|
||||
|
||||
def test_config_disabled_from_env(self) -> None:
|
||||
"""TelemetryConfig should be disabled when env var is false."""
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{"MOSAIC_TELEMETRY_ENABLED": "false"},
|
||||
clear=False,
|
||||
):
|
||||
config = create_telemetry_config()
|
||||
assert config.enabled is False
|
||||
|
||||
def test_config_reads_server_url_from_env(self) -> None:
|
||||
"""TelemetryConfig should read MOSAIC_TELEMETRY_SERVER_URL from env."""
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{"MOSAIC_TELEMETRY_SERVER_URL": "https://telemetry.example.com"},
|
||||
clear=False,
|
||||
):
|
||||
config = create_telemetry_config()
|
||||
assert config.server_url == "https://telemetry.example.com"
|
||||
|
||||
def test_config_reads_api_key_from_env(self) -> None:
|
||||
"""TelemetryConfig should read MOSAIC_TELEMETRY_API_KEY from env."""
|
||||
api_key = "a" * 64 # 64-char hex string
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{"MOSAIC_TELEMETRY_API_KEY": api_key},
|
||||
clear=False,
|
||||
):
|
||||
config = create_telemetry_config()
|
||||
assert config.api_key == api_key
|
||||
|
||||
def test_config_reads_instance_id_from_env(self) -> None:
|
||||
"""TelemetryConfig should read MOSAIC_TELEMETRY_INSTANCE_ID from env."""
|
||||
instance_id = "12345678-1234-1234-1234-123456789abc"
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{"MOSAIC_TELEMETRY_INSTANCE_ID": instance_id},
|
||||
clear=False,
|
||||
):
|
||||
config = create_telemetry_config()
|
||||
assert config.instance_id == instance_id
|
||||
|
||||
def test_config_defaults_to_enabled(self) -> None:
|
||||
"""TelemetryConfig should default to enabled when env var is not set."""
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{},
|
||||
clear=True,
|
||||
):
|
||||
config = create_telemetry_config()
|
||||
assert config.enabled is True
|
||||
|
||||
def test_config_logs_validation_warnings_when_enabled(self) -> None:
|
||||
"""Config creation should log warnings for validation errors when enabled."""
|
||||
with (
|
||||
patch.dict(
|
||||
"os.environ",
|
||||
{"MOSAIC_TELEMETRY_ENABLED": "true"},
|
||||
clear=True,
|
||||
),
|
||||
patch("src.mosaic_telemetry.logger") as mock_logger,
|
||||
):
|
||||
config = create_telemetry_config()
|
||||
# server_url, api_key, and instance_id are all empty = validation errors
|
||||
assert config.enabled is True
|
||||
mock_logger.warning.assert_called_once()
|
||||
warning_msg = mock_logger.warning.call_args[0][0]
|
||||
assert "validation issues" in warning_msg
|
||||
|
||||
def test_config_no_warnings_when_disabled(self) -> None:
|
||||
"""Config creation should not log warnings when telemetry is disabled."""
|
||||
with (
|
||||
patch.dict(
|
||||
"os.environ",
|
||||
{"MOSAIC_TELEMETRY_ENABLED": "false"},
|
||||
clear=True,
|
||||
),
|
||||
patch("src.mosaic_telemetry.logger") as mock_logger,
|
||||
):
|
||||
create_telemetry_config()
|
||||
mock_logger.warning.assert_not_called()
|
||||
|
||||
def test_config_strips_trailing_slashes(self) -> None:
|
||||
"""TelemetryConfig should strip trailing slashes from server_url."""
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{"MOSAIC_TELEMETRY_SERVER_URL": "https://telemetry.example.com/"},
|
||||
clear=False,
|
||||
):
|
||||
config = create_telemetry_config()
|
||||
assert config.server_url == "https://telemetry.example.com"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_telemetry_client from app state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetTelemetryClient:
|
||||
"""Tests for get_telemetry_client helper."""
|
||||
|
||||
def test_returns_client_when_set(self) -> None:
|
||||
"""Should return the telemetry client from app state."""
|
||||
app = FastAPI()
|
||||
mock_client = MagicMock(spec=TelemetryClient)
|
||||
app.state.mosaic_telemetry = mock_client
|
||||
|
||||
result = get_telemetry_client(app)
|
||||
assert result is mock_client
|
||||
|
||||
def test_returns_none_when_not_set(self) -> None:
|
||||
"""Should return None when mosaic_telemetry is not in app state."""
|
||||
app = FastAPI()
|
||||
# Do not set app.state.mosaic_telemetry
|
||||
|
||||
result = get_telemetry_client(app)
|
||||
assert result is None
|
||||
|
||||
def test_returns_none_when_explicitly_none(self) -> None:
|
||||
"""Should return None when mosaic_telemetry is explicitly set to None."""
|
||||
app = FastAPI()
|
||||
app.state.mosaic_telemetry = None
|
||||
|
||||
result = get_telemetry_client(app)
|
||||
assert result is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_task_event helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBuildTaskEvent:
|
||||
"""Tests for build_task_event helper."""
|
||||
|
||||
VALID_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc"
|
||||
|
||||
def test_builds_event_with_defaults(self) -> None:
|
||||
"""Should build a TaskCompletionEvent with default values."""
|
||||
event = build_task_event(instance_id=self.VALID_INSTANCE_ID)
|
||||
|
||||
assert isinstance(event, TaskCompletionEvent)
|
||||
assert str(event.instance_id) == self.VALID_INSTANCE_ID
|
||||
assert event.task_type == TaskType.IMPLEMENTATION
|
||||
assert event.complexity == Complexity.MEDIUM
|
||||
assert event.outcome == Outcome.SUCCESS
|
||||
assert event.harness == Harness.CLAUDE_CODE
|
||||
assert event.provider == Provider.ANTHROPIC
|
||||
assert event.language == "typescript"
|
||||
|
||||
def test_builds_event_with_custom_task_type(self) -> None:
|
||||
"""Should respect custom task_type parameter."""
|
||||
event = build_task_event(
|
||||
instance_id=self.VALID_INSTANCE_ID,
|
||||
task_type=TaskType.TESTING,
|
||||
)
|
||||
assert event.task_type == TaskType.TESTING
|
||||
|
||||
def test_builds_event_with_custom_outcome(self) -> None:
|
||||
"""Should respect custom outcome parameter."""
|
||||
event = build_task_event(
|
||||
instance_id=self.VALID_INSTANCE_ID,
|
||||
outcome=Outcome.FAILURE,
|
||||
)
|
||||
assert event.outcome == Outcome.FAILURE
|
||||
|
||||
def test_builds_event_with_duration(self) -> None:
|
||||
"""Should set duration_ms correctly."""
|
||||
event = build_task_event(
|
||||
instance_id=self.VALID_INSTANCE_ID,
|
||||
duration_ms=45000,
|
||||
)
|
||||
assert event.task_duration_ms == 45000
|
||||
|
||||
def test_builds_event_with_token_counts(self) -> None:
|
||||
"""Should set all token counts correctly."""
|
||||
event = build_task_event(
|
||||
instance_id=self.VALID_INSTANCE_ID,
|
||||
estimated_input_tokens=1000,
|
||||
estimated_output_tokens=500,
|
||||
actual_input_tokens=1100,
|
||||
actual_output_tokens=480,
|
||||
)
|
||||
assert event.estimated_input_tokens == 1000
|
||||
assert event.estimated_output_tokens == 500
|
||||
assert event.actual_input_tokens == 1100
|
||||
assert event.actual_output_tokens == 480
|
||||
|
||||
def test_builds_event_with_cost(self) -> None:
|
||||
"""Should set cost fields correctly."""
|
||||
event = build_task_event(
|
||||
instance_id=self.VALID_INSTANCE_ID,
|
||||
estimated_cost_micros=50000,
|
||||
actual_cost_micros=48000,
|
||||
)
|
||||
assert event.estimated_cost_usd_micros == 50000
|
||||
assert event.actual_cost_usd_micros == 48000
|
||||
|
||||
def test_builds_event_with_quality_gates(self) -> None:
|
||||
"""Should set quality gate information correctly."""
|
||||
gates_run = [QualityGate.LINT, QualityGate.TEST, QualityGate.BUILD]
|
||||
gates_failed = [QualityGate.TEST]
|
||||
event = build_task_event(
|
||||
instance_id=self.VALID_INSTANCE_ID,
|
||||
quality_passed=False,
|
||||
quality_gates_run=gates_run,
|
||||
quality_gates_failed=gates_failed,
|
||||
)
|
||||
assert event.quality_gate_passed is False
|
||||
assert event.quality_gates_run == gates_run
|
||||
assert event.quality_gates_failed == gates_failed
|
||||
|
||||
def test_builds_event_with_context_info(self) -> None:
|
||||
"""Should set context compaction/rotation/utilization correctly."""
|
||||
event = build_task_event(
|
||||
instance_id=self.VALID_INSTANCE_ID,
|
||||
context_compactions=2,
|
||||
context_rotations=1,
|
||||
context_utilization=0.75,
|
||||
)
|
||||
assert event.context_compactions == 2
|
||||
assert event.context_rotations == 1
|
||||
assert event.context_utilization_final == 0.75
|
||||
|
||||
def test_builds_event_with_retry_count(self) -> None:
|
||||
"""Should set retry count correctly."""
|
||||
event = build_task_event(
|
||||
instance_id=self.VALID_INSTANCE_ID,
|
||||
retry_count=3,
|
||||
)
|
||||
assert event.retry_count == 3
|
||||
|
||||
def test_builds_event_with_custom_language(self) -> None:
|
||||
"""Should allow overriding the default language."""
|
||||
event = build_task_event(
|
||||
instance_id=self.VALID_INSTANCE_ID,
|
||||
language="python",
|
||||
)
|
||||
assert event.language == "python"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TelemetryClient lifecycle (disabled mode)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTelemetryDisabledMode:
|
||||
"""Tests for disabled telemetry mode (no HTTP calls)."""
|
||||
|
||||
def test_disabled_client_does_not_start(self) -> None:
|
||||
"""Client start_async should be a no-op when disabled."""
|
||||
config = TelemetryConfig(enabled=False)
|
||||
client = TelemetryClient(config)
|
||||
# Should not raise
|
||||
assert client.is_running is False
|
||||
|
||||
def test_disabled_client_track_is_noop(self) -> None:
|
||||
"""Tracking events when disabled should silently drop them."""
|
||||
config = TelemetryConfig(enabled=False)
|
||||
client = TelemetryClient(config)
|
||||
|
||||
event = build_task_event(
|
||||
instance_id="12345678-1234-1234-1234-123456789abc",
|
||||
)
|
||||
# Should not raise, should silently drop
|
||||
client.track(event)
|
||||
assert client.queue_size == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_disabled_client_start_stop_async(self) -> None:
|
||||
"""Async start/stop should be safe when disabled."""
|
||||
config = TelemetryConfig(enabled=False)
|
||||
client = TelemetryClient(config)
|
||||
|
||||
await client.start_async()
|
||||
assert client.is_running is False
|
||||
await client.stop_async()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lifespan integration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLifespanIntegration:
|
||||
"""Tests for Mosaic telemetry in the FastAPI lifespan."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lifespan_sets_mosaic_telemetry_on_app_state(self) -> None:
|
||||
"""Lifespan should store mosaic_telemetry client on app.state."""
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{
|
||||
"GITEA_WEBHOOK_SECRET": "test-secret",
|
||||
"GITEA_URL": "https://git.mosaicstack.dev",
|
||||
"ANTHROPIC_API_KEY": "test-key",
|
||||
"MOSAIC_TELEMETRY_ENABLED": "true",
|
||||
"MOSAIC_TELEMETRY_SERVER_URL": "https://telemetry.example.com",
|
||||
"MOSAIC_TELEMETRY_API_KEY": "a" * 64,
|
||||
"MOSAIC_TELEMETRY_INSTANCE_ID": "12345678-1234-1234-1234-123456789abc",
|
||||
"OTEL_ENABLED": "false",
|
||||
"COORDINATOR_ENABLED": "false",
|
||||
},
|
||||
):
|
||||
# Reload config to pick up test env vars
|
||||
import importlib
|
||||
|
||||
from src import config
|
||||
importlib.reload(config)
|
||||
|
||||
from src.main import lifespan
|
||||
|
||||
app = FastAPI()
|
||||
async with lifespan(app) as _state:
|
||||
client = getattr(app.state, "mosaic_telemetry", None)
|
||||
assert client is not None
|
||||
assert isinstance(client, TelemetryClient)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lifespan_sets_none_when_disabled(self) -> None:
|
||||
"""Lifespan should set mosaic_telemetry to None when disabled."""
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{
|
||||
"GITEA_WEBHOOK_SECRET": "test-secret",
|
||||
"GITEA_URL": "https://git.mosaicstack.dev",
|
||||
"ANTHROPIC_API_KEY": "test-key",
|
||||
"MOSAIC_TELEMETRY_ENABLED": "false",
|
||||
"OTEL_ENABLED": "false",
|
||||
"COORDINATOR_ENABLED": "false",
|
||||
},
|
||||
):
|
||||
import importlib
|
||||
|
||||
from src import config
|
||||
importlib.reload(config)
|
||||
|
||||
from src.main import lifespan
|
||||
|
||||
app = FastAPI()
|
||||
async with lifespan(app) as _state:
|
||||
client = getattr(app.state, "mosaic_telemetry", None)
|
||||
assert client is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lifespan_stops_client_on_shutdown(self) -> None:
|
||||
"""Lifespan should call stop_async on shutdown."""
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{
|
||||
"GITEA_WEBHOOK_SECRET": "test-secret",
|
||||
"GITEA_URL": "https://git.mosaicstack.dev",
|
||||
"ANTHROPIC_API_KEY": "test-key",
|
||||
"MOSAIC_TELEMETRY_ENABLED": "true",
|
||||
"MOSAIC_TELEMETRY_SERVER_URL": "https://telemetry.example.com",
|
||||
"MOSAIC_TELEMETRY_API_KEY": "a" * 64,
|
||||
"MOSAIC_TELEMETRY_INSTANCE_ID": "12345678-1234-1234-1234-123456789abc",
|
||||
"OTEL_ENABLED": "false",
|
||||
"COORDINATOR_ENABLED": "false",
|
||||
},
|
||||
):
|
||||
import importlib
|
||||
|
||||
from src import config
|
||||
importlib.reload(config)
|
||||
|
||||
from src.main import lifespan
|
||||
|
||||
app = FastAPI()
|
||||
async with lifespan(app) as _state:
|
||||
client = app.state.mosaic_telemetry
|
||||
assert isinstance(client, TelemetryClient)
|
||||
# Client was started
|
||||
# After context manager exits, stop_async should have been called
|
||||
|
||||
# After lifespan exits, client should no longer be running
|
||||
# (stop_async was called in the shutdown section)
|
||||
assert not client.is_running
|
||||
796
apps/coordinator/tests/test_task_telemetry.py
Normal file
796
apps/coordinator/tests/test_task_telemetry.py
Normal file
@@ -0,0 +1,796 @@
|
||||
"""Tests for task completion telemetry instrumentation in the coordinator.
|
||||
|
||||
These tests verify that the Coordinator and OrchestrationLoop correctly
|
||||
emit TaskCompletionEvents via the Mosaic telemetry SDK after each task
|
||||
dispatch attempt.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
from mosaicstack_telemetry import ( # type: ignore[import-untyped]
|
||||
Complexity,
|
||||
Harness,
|
||||
Outcome,
|
||||
Provider,
|
||||
QualityGate,
|
||||
TaskCompletionEvent,
|
||||
TaskType,
|
||||
TelemetryClient,
|
||||
)
|
||||
|
||||
from src.coordinator import (
|
||||
_AGENT_TELEMETRY_MAP,
|
||||
_DIFFICULTY_TO_COMPLEXITY,
|
||||
_GATE_NAME_TO_ENUM,
|
||||
Coordinator,
|
||||
OrchestrationLoop,
|
||||
_resolve_agent_fields,
|
||||
)
|
||||
from src.gates.quality_gate import GateResult
|
||||
from src.models import IssueMetadata
|
||||
from src.quality_orchestrator import QualityOrchestrator, VerificationResult
|
||||
from src.queue import QueueManager
|
||||
|
||||
VALID_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_queue_file() -> Generator[Path, None, None]:
|
||||
"""Create a temporary file for queue persistence."""
|
||||
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".json") as f:
|
||||
temp_path = Path(f.name)
|
||||
yield temp_path
|
||||
if temp_path.exists():
|
||||
temp_path.unlink()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def queue_manager(temp_queue_file: Path) -> QueueManager:
|
||||
"""Create a queue manager with temporary storage."""
|
||||
return QueueManager(queue_file=temp_queue_file)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_telemetry_client() -> MagicMock:
|
||||
"""Create a mock TelemetryClient."""
|
||||
client = MagicMock(spec=TelemetryClient)
|
||||
client.track = MagicMock()
|
||||
return client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sonnet_metadata() -> IssueMetadata:
|
||||
"""Metadata for a sonnet agent task."""
|
||||
return IssueMetadata(
|
||||
assigned_agent="sonnet",
|
||||
difficulty="medium",
|
||||
estimated_context=50000,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def opus_metadata() -> IssueMetadata:
|
||||
"""Metadata for an opus agent task (hard difficulty)."""
|
||||
return IssueMetadata(
|
||||
assigned_agent="opus",
|
||||
difficulty="hard",
|
||||
estimated_context=120000,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _resolve_agent_fields tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestResolveAgentFields:
|
||||
"""Tests for the _resolve_agent_fields helper."""
|
||||
|
||||
def test_known_agent_sonnet(self) -> None:
|
||||
"""Should return correct fields for sonnet agent."""
|
||||
model, provider, harness = _resolve_agent_fields("sonnet")
|
||||
assert model == "claude-sonnet-4-20250514"
|
||||
assert provider == Provider.ANTHROPIC
|
||||
assert harness == Harness.CLAUDE_CODE
|
||||
|
||||
def test_known_agent_opus(self) -> None:
|
||||
"""Should return correct fields for opus agent."""
|
||||
model, provider, harness = _resolve_agent_fields("opus")
|
||||
assert model == "claude-opus-4-20250514"
|
||||
assert provider == Provider.ANTHROPIC
|
||||
assert harness == Harness.CLAUDE_CODE
|
||||
|
||||
def test_known_agent_haiku(self) -> None:
|
||||
"""Should return correct fields for haiku agent."""
|
||||
model, provider, harness = _resolve_agent_fields("haiku")
|
||||
assert model == "claude-haiku-3.5-20241022"
|
||||
assert provider == Provider.ANTHROPIC
|
||||
assert harness == Harness.CLAUDE_CODE
|
||||
|
||||
def test_known_agent_glm(self) -> None:
|
||||
"""Should return correct fields for glm (self-hosted) agent."""
|
||||
model, provider, harness = _resolve_agent_fields("glm")
|
||||
assert model == "glm-4"
|
||||
assert provider == Provider.CUSTOM
|
||||
assert harness == Harness.CUSTOM
|
||||
|
||||
def test_known_agent_minimax(self) -> None:
|
||||
"""Should return correct fields for minimax (self-hosted) agent."""
|
||||
model, provider, harness = _resolve_agent_fields("minimax")
|
||||
assert model == "minimax"
|
||||
assert provider == Provider.CUSTOM
|
||||
assert harness == Harness.CUSTOM
|
||||
|
||||
def test_unknown_agent_returns_defaults(self) -> None:
|
||||
"""Should return unknown values for unrecognised agent names."""
|
||||
model, provider, harness = _resolve_agent_fields("nonexistent")
|
||||
assert model == "unknown"
|
||||
assert provider == Provider.UNKNOWN
|
||||
assert harness == Harness.UNKNOWN
|
||||
|
||||
def test_all_map_entries_covered(self) -> None:
|
||||
"""Ensure every entry in _AGENT_TELEMETRY_MAP is resolvable."""
|
||||
for agent_name in _AGENT_TELEMETRY_MAP:
|
||||
model, provider, harness = _resolve_agent_fields(agent_name)
|
||||
assert model != "unknown"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Coordinator telemetry emission tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCoordinatorTelemetry:
|
||||
"""Tests for telemetry emission in the Coordinator class."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_emits_success_event_on_completion(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Should emit a SUCCESS event when task completes successfully."""
|
||||
queue_manager.enqueue(100, sonnet_metadata)
|
||||
|
||||
coordinator = Coordinator(
|
||||
queue_manager=queue_manager,
|
||||
telemetry_client=mock_telemetry_client,
|
||||
instance_id=VALID_INSTANCE_ID,
|
||||
)
|
||||
|
||||
await coordinator.process_queue()
|
||||
|
||||
mock_telemetry_client.track.assert_called_once()
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert isinstance(event, TaskCompletionEvent)
|
||||
assert event.outcome == Outcome.SUCCESS
|
||||
assert event.task_type == TaskType.IMPLEMENTATION
|
||||
assert event.complexity == Complexity.MEDIUM
|
||||
assert event.provider == Provider.ANTHROPIC
|
||||
assert event.harness == Harness.CLAUDE_CODE
|
||||
assert str(event.instance_id) == VALID_INSTANCE_ID
|
||||
assert event.task_duration_ms >= 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_emits_failure_event_when_agent_fails(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Should emit a FAILURE event when spawn_agent returns False."""
|
||||
queue_manager.enqueue(101, sonnet_metadata)
|
||||
|
||||
coordinator = Coordinator(
|
||||
queue_manager=queue_manager,
|
||||
telemetry_client=mock_telemetry_client,
|
||||
instance_id=VALID_INSTANCE_ID,
|
||||
)
|
||||
# Override spawn_agent to fail
|
||||
coordinator.spawn_agent = AsyncMock(return_value=False) # type: ignore[method-assign]
|
||||
|
||||
await coordinator.process_queue()
|
||||
|
||||
mock_telemetry_client.track.assert_called_once()
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event.outcome == Outcome.FAILURE
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_emits_failure_event_on_exception(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Should emit a FAILURE event when spawn_agent raises an exception."""
|
||||
queue_manager.enqueue(102, sonnet_metadata)
|
||||
|
||||
coordinator = Coordinator(
|
||||
queue_manager=queue_manager,
|
||||
telemetry_client=mock_telemetry_client,
|
||||
instance_id=VALID_INSTANCE_ID,
|
||||
)
|
||||
coordinator.spawn_agent = AsyncMock(side_effect=RuntimeError("agent crashed")) # type: ignore[method-assign]
|
||||
|
||||
await coordinator.process_queue()
|
||||
|
||||
mock_telemetry_client.track.assert_called_once()
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event.outcome == Outcome.FAILURE
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_maps_difficulty_to_complexity(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
opus_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Should map difficulty='hard' to Complexity.HIGH in the event."""
|
||||
queue_manager.enqueue(103, opus_metadata)
|
||||
|
||||
coordinator = Coordinator(
|
||||
queue_manager=queue_manager,
|
||||
telemetry_client=mock_telemetry_client,
|
||||
instance_id=VALID_INSTANCE_ID,
|
||||
)
|
||||
|
||||
await coordinator.process_queue()
|
||||
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event.complexity == Complexity.HIGH
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_maps_agent_to_model_and_provider(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
opus_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Should map 'opus' agent to opus model and ANTHROPIC provider."""
|
||||
queue_manager.enqueue(104, opus_metadata)
|
||||
|
||||
coordinator = Coordinator(
|
||||
queue_manager=queue_manager,
|
||||
telemetry_client=mock_telemetry_client,
|
||||
instance_id=VALID_INSTANCE_ID,
|
||||
)
|
||||
|
||||
await coordinator.process_queue()
|
||||
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert "opus" in event.model
|
||||
assert event.provider == Provider.ANTHROPIC
|
||||
assert event.harness == Harness.CLAUDE_CODE
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_event_when_telemetry_disabled(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Should not call track when telemetry_client is None."""
|
||||
queue_manager.enqueue(105, sonnet_metadata)
|
||||
|
||||
coordinator = Coordinator(
|
||||
queue_manager=queue_manager,
|
||||
telemetry_client=None,
|
||||
instance_id=VALID_INSTANCE_ID,
|
||||
)
|
||||
|
||||
# Should not raise
|
||||
await coordinator.process_queue()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_event_when_instance_id_empty(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Should not call track when instance_id is empty."""
|
||||
queue_manager.enqueue(106, sonnet_metadata)
|
||||
|
||||
coordinator = Coordinator(
|
||||
queue_manager=queue_manager,
|
||||
telemetry_client=mock_telemetry_client,
|
||||
instance_id="",
|
||||
)
|
||||
|
||||
await coordinator.process_queue()
|
||||
mock_telemetry_client.track.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_telemetry_exception_does_not_propagate(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Telemetry failures must never break task processing."""
|
||||
queue_manager.enqueue(107, sonnet_metadata)
|
||||
|
||||
bad_client = MagicMock(spec=TelemetryClient)
|
||||
bad_client.track = MagicMock(side_effect=RuntimeError("telemetry down"))
|
||||
|
||||
coordinator = Coordinator(
|
||||
queue_manager=queue_manager,
|
||||
telemetry_client=bad_client,
|
||||
instance_id=VALID_INSTANCE_ID,
|
||||
)
|
||||
|
||||
# Should complete without raising, despite telemetry failure
|
||||
result = await coordinator.process_queue()
|
||||
assert result is not None
|
||||
assert result.issue_number == 107
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_event_when_queue_empty(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
) -> None:
|
||||
"""Should not emit any event when the queue is empty."""
|
||||
coordinator = Coordinator(
|
||||
queue_manager=queue_manager,
|
||||
telemetry_client=mock_telemetry_client,
|
||||
instance_id=VALID_INSTANCE_ID,
|
||||
)
|
||||
|
||||
result = await coordinator.process_queue()
|
||||
assert result is None
|
||||
mock_telemetry_client.track.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_estimated_input_tokens_from_metadata(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Should set estimated_input_tokens from issue metadata."""
|
||||
queue_manager.enqueue(108, sonnet_metadata)
|
||||
|
||||
coordinator = Coordinator(
|
||||
queue_manager=queue_manager,
|
||||
telemetry_client=mock_telemetry_client,
|
||||
instance_id=VALID_INSTANCE_ID,
|
||||
)
|
||||
|
||||
await coordinator.process_queue()
|
||||
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event.estimated_input_tokens == 50000
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OrchestrationLoop telemetry emission tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_orchestration_loop(
|
||||
queue_manager: QueueManager,
|
||||
telemetry_client: TelemetryClient | None = None,
|
||||
instance_id: str = VALID_INSTANCE_ID,
|
||||
quality_result: VerificationResult | None = None,
|
||||
) -> OrchestrationLoop:
|
||||
"""Create an OrchestrationLoop with mocked dependencies.
|
||||
|
||||
Args:
|
||||
queue_manager: Queue manager instance.
|
||||
telemetry_client: Optional telemetry client.
|
||||
instance_id: Coordinator instance ID.
|
||||
quality_result: Override quality verification result.
|
||||
|
||||
Returns:
|
||||
Configured OrchestrationLoop.
|
||||
"""
|
||||
# Create quality orchestrator mock
|
||||
qo = MagicMock(spec=QualityOrchestrator)
|
||||
default_result = quality_result or VerificationResult(
|
||||
all_passed=True,
|
||||
gate_results={
|
||||
"build": GateResult(passed=True, message="Build OK"),
|
||||
"lint": GateResult(passed=True, message="Lint OK"),
|
||||
"test": GateResult(passed=True, message="Test OK"),
|
||||
"coverage": GateResult(passed=True, message="Coverage OK"),
|
||||
},
|
||||
)
|
||||
qo.verify_completion = AsyncMock(return_value=default_result)
|
||||
|
||||
# Continuation service mock
|
||||
from src.forced_continuation import ForcedContinuationService
|
||||
|
||||
cs = MagicMock(spec=ForcedContinuationService)
|
||||
cs.generate_prompt = MagicMock(return_value="Fix: build failed")
|
||||
|
||||
# Context monitor mock
|
||||
from src.context_monitor import ContextMonitor
|
||||
|
||||
cm = MagicMock(spec=ContextMonitor)
|
||||
cm.determine_action = AsyncMock(return_value="continue")
|
||||
|
||||
return OrchestrationLoop(
|
||||
queue_manager=queue_manager,
|
||||
quality_orchestrator=qo,
|
||||
continuation_service=cs,
|
||||
context_monitor=cm,
|
||||
telemetry_client=telemetry_client,
|
||||
instance_id=instance_id,
|
||||
)
|
||||
|
||||
|
||||
class TestOrchestrationLoopTelemetry:
|
||||
"""Tests for telemetry emission in the OrchestrationLoop class."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_emits_success_with_quality_gates(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Should emit SUCCESS event with quality gate details."""
|
||||
queue_manager.enqueue(200, sonnet_metadata)
|
||||
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager, telemetry_client=mock_telemetry_client
|
||||
)
|
||||
|
||||
await loop.process_next_issue()
|
||||
|
||||
mock_telemetry_client.track.assert_called_once()
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event.outcome == Outcome.SUCCESS
|
||||
assert event.quality_gate_passed is True
|
||||
assert set(event.quality_gates_run) == {
|
||||
QualityGate.BUILD,
|
||||
QualityGate.LINT,
|
||||
QualityGate.TEST,
|
||||
QualityGate.COVERAGE,
|
||||
}
|
||||
assert event.quality_gates_failed == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_emits_failure_with_failed_gates(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Should emit FAILURE event with failed gate details."""
|
||||
queue_manager.enqueue(201, sonnet_metadata)
|
||||
|
||||
failed_result = VerificationResult(
|
||||
all_passed=False,
|
||||
gate_results={
|
||||
"build": GateResult(passed=True, message="Build OK"),
|
||||
"lint": GateResult(passed=True, message="Lint OK"),
|
||||
"test": GateResult(passed=False, message="3 tests failed"),
|
||||
"coverage": GateResult(passed=False, message="Coverage 70% < 85%"),
|
||||
},
|
||||
)
|
||||
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager,
|
||||
telemetry_client=mock_telemetry_client,
|
||||
quality_result=failed_result,
|
||||
)
|
||||
|
||||
await loop.process_next_issue()
|
||||
|
||||
mock_telemetry_client.track.assert_called_once()
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event.outcome == Outcome.FAILURE
|
||||
assert event.quality_gate_passed is False
|
||||
assert set(event.quality_gates_failed) == {
|
||||
QualityGate.TEST,
|
||||
QualityGate.COVERAGE,
|
||||
}
|
||||
assert set(event.quality_gates_run) == {
|
||||
QualityGate.BUILD,
|
||||
QualityGate.LINT,
|
||||
QualityGate.TEST,
|
||||
QualityGate.COVERAGE,
|
||||
}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_retry_count_starts_at_zero(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""First attempt should report retry_count=0."""
|
||||
queue_manager.enqueue(202, sonnet_metadata)
|
||||
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager, telemetry_client=mock_telemetry_client
|
||||
)
|
||||
|
||||
await loop.process_next_issue()
|
||||
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event.retry_count == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_retry_count_increments_on_failure(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Retry count should increment after a quality gate failure."""
|
||||
queue_manager.enqueue(203, sonnet_metadata)
|
||||
|
||||
failed_result = VerificationResult(
|
||||
all_passed=False,
|
||||
gate_results={
|
||||
"build": GateResult(passed=False, message="Build failed"),
|
||||
},
|
||||
)
|
||||
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager,
|
||||
telemetry_client=mock_telemetry_client,
|
||||
quality_result=failed_result,
|
||||
)
|
||||
|
||||
# First attempt
|
||||
await loop.process_next_issue()
|
||||
event1 = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event1.retry_count == 0
|
||||
|
||||
# Re-enqueue and process again (simulates retry)
|
||||
queue_manager.enqueue(203, sonnet_metadata)
|
||||
mock_telemetry_client.track.reset_mock()
|
||||
|
||||
await loop.process_next_issue()
|
||||
event2 = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event2.retry_count == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_retry_count_clears_on_success(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Retry count should be cleared after a successful completion."""
|
||||
queue_manager.enqueue(204, sonnet_metadata)
|
||||
|
||||
# First: fail
|
||||
failed_result = VerificationResult(
|
||||
all_passed=False,
|
||||
gate_results={
|
||||
"build": GateResult(passed=False, message="Build failed"),
|
||||
},
|
||||
)
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager,
|
||||
telemetry_client=mock_telemetry_client,
|
||||
quality_result=failed_result,
|
||||
)
|
||||
|
||||
await loop.process_next_issue()
|
||||
assert loop._retry_counts.get(204) == 1
|
||||
|
||||
# Now succeed
|
||||
success_result = VerificationResult(
|
||||
all_passed=True,
|
||||
gate_results={
|
||||
"build": GateResult(passed=True, message="Build OK"),
|
||||
},
|
||||
)
|
||||
loop.quality_orchestrator.verify_completion = AsyncMock(return_value=success_result) # type: ignore[method-assign]
|
||||
queue_manager.enqueue(204, sonnet_metadata)
|
||||
mock_telemetry_client.track.reset_mock()
|
||||
|
||||
await loop.process_next_issue()
|
||||
assert 204 not in loop._retry_counts
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_emits_failure_when_agent_spawn_fails(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Should emit FAILURE when _spawn_agent returns False."""
|
||||
queue_manager.enqueue(205, sonnet_metadata)
|
||||
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager, telemetry_client=mock_telemetry_client
|
||||
)
|
||||
loop._spawn_agent = AsyncMock(return_value=False) # type: ignore[method-assign]
|
||||
|
||||
await loop.process_next_issue()
|
||||
|
||||
mock_telemetry_client.track.assert_called_once()
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event.outcome == Outcome.FAILURE
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_event_when_telemetry_disabled(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Should not call track when telemetry_client is None."""
|
||||
queue_manager.enqueue(206, sonnet_metadata)
|
||||
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager, telemetry_client=None
|
||||
)
|
||||
|
||||
# Should not raise
|
||||
result = await loop.process_next_issue()
|
||||
assert result is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_telemetry_exception_does_not_propagate(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Telemetry failures must never disrupt task processing."""
|
||||
queue_manager.enqueue(207, sonnet_metadata)
|
||||
|
||||
bad_client = MagicMock(spec=TelemetryClient)
|
||||
bad_client.track = MagicMock(side_effect=RuntimeError("telemetry down"))
|
||||
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager, telemetry_client=bad_client
|
||||
)
|
||||
|
||||
result = await loop.process_next_issue()
|
||||
assert result is not None
|
||||
assert result.issue_number == 207
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_duration_is_positive(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Duration should be a non-negative integer."""
|
||||
queue_manager.enqueue(208, sonnet_metadata)
|
||||
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager, telemetry_client=mock_telemetry_client
|
||||
)
|
||||
|
||||
await loop.process_next_issue()
|
||||
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event.task_duration_ms >= 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_maps_glm_agent_correctly(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
) -> None:
|
||||
"""Should map GLM (self-hosted) agent to CUSTOM provider/harness."""
|
||||
glm_meta = IssueMetadata(
|
||||
assigned_agent="glm",
|
||||
difficulty="medium",
|
||||
estimated_context=30000,
|
||||
)
|
||||
queue_manager.enqueue(209, glm_meta)
|
||||
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager, telemetry_client=mock_telemetry_client
|
||||
)
|
||||
|
||||
await loop.process_next_issue()
|
||||
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event.model == "glm-4"
|
||||
assert event.provider == Provider.CUSTOM
|
||||
assert event.harness == Harness.CUSTOM
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_maps_easy_difficulty_to_low_complexity(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
) -> None:
|
||||
"""Should map difficulty='easy' to Complexity.LOW."""
|
||||
easy_meta = IssueMetadata(
|
||||
assigned_agent="haiku",
|
||||
difficulty="easy",
|
||||
estimated_context=10000,
|
||||
)
|
||||
queue_manager.enqueue(210, easy_meta)
|
||||
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager, telemetry_client=mock_telemetry_client
|
||||
)
|
||||
|
||||
await loop.process_next_issue()
|
||||
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert event.complexity == Complexity.LOW
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_event_when_queue_empty(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
) -> None:
|
||||
"""Should not emit an event when queue is empty."""
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager, telemetry_client=mock_telemetry_client
|
||||
)
|
||||
|
||||
result = await loop.process_next_issue()
|
||||
assert result is None
|
||||
mock_telemetry_client.track.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_gate_names_excluded(
|
||||
self,
|
||||
queue_manager: QueueManager,
|
||||
mock_telemetry_client: MagicMock,
|
||||
sonnet_metadata: IssueMetadata,
|
||||
) -> None:
|
||||
"""Gate names not in _GATE_NAME_TO_ENUM should be excluded from telemetry."""
|
||||
queue_manager.enqueue(211, sonnet_metadata)
|
||||
|
||||
result_with_unknown = VerificationResult(
|
||||
all_passed=False,
|
||||
gate_results={
|
||||
"build": GateResult(passed=True, message="Build OK"),
|
||||
"unknown_gate": GateResult(passed=False, message="Unknown gate"),
|
||||
},
|
||||
)
|
||||
|
||||
loop = _make_orchestration_loop(
|
||||
queue_manager,
|
||||
telemetry_client=mock_telemetry_client,
|
||||
quality_result=result_with_unknown,
|
||||
)
|
||||
|
||||
await loop.process_next_issue()
|
||||
|
||||
event = mock_telemetry_client.track.call_args[0][0]
|
||||
assert QualityGate.BUILD in event.quality_gates_run
|
||||
# unknown_gate should not appear
|
||||
assert len(event.quality_gates_run) == 1
|
||||
assert len(event.quality_gates_failed) == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mapping dict completeness tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMappingCompleteness:
|
||||
"""Tests to verify mapping dicts cover expected values."""
|
||||
|
||||
def test_difficulty_map_covers_all_metadata_values(self) -> None:
|
||||
"""All valid difficulty levels should have Complexity mappings."""
|
||||
expected_difficulties = {"easy", "medium", "hard"}
|
||||
assert expected_difficulties == set(_DIFFICULTY_TO_COMPLEXITY.keys())
|
||||
|
||||
def test_gate_name_map_covers_all_orchestrator_gates(self) -> None:
|
||||
"""All gate names used by QualityOrchestrator should be mappable."""
|
||||
expected_gates = {"build", "lint", "test", "coverage"}
|
||||
assert expected_gates == set(_GATE_NAME_TO_ENUM.keys())
|
||||
|
||||
def test_agent_map_covers_all_configured_agents(self) -> None:
|
||||
"""All agents used by the coordinator should have telemetry mappings."""
|
||||
expected_agents = {"sonnet", "opus", "haiku", "glm", "minimax"}
|
||||
assert expected_agents == set(_AGENT_TELEMETRY_MAP.keys())
|
||||
@@ -33,6 +33,7 @@
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0",
|
||||
"react-grid-layout": "^2.2.2",
|
||||
"recharts": "^3.7.0",
|
||||
"socket.io-client": "^4.8.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
288
apps/web/src/app/(authenticated)/usage/page.test.tsx
Normal file
288
apps/web/src/app/(authenticated)/usage/page.test.tsx
Normal file
@@ -0,0 +1,288 @@
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import { render, screen, waitFor, fireEvent } from "@testing-library/react";
|
||||
import type { ReactNode } from "react";
|
||||
import UsagePage from "./page";
|
||||
|
||||
// ─── Component Prop Types ────────────────────────────────────────────
|
||||
|
||||
interface ChildrenProps {
|
||||
children: ReactNode;
|
||||
}
|
||||
|
||||
interface StyledChildrenProps extends ChildrenProps {
|
||||
className?: string;
|
||||
}
|
||||
|
||||
// ─── Mocks ───────────────────────────────────────────────────────────
|
||||
|
||||
// Mock @/components/ui/card — @mosaic/ui can't be resolved in vitest
|
||||
vi.mock("@/components/ui/card", () => ({
|
||||
Card: ({ children, className }: StyledChildrenProps): React.JSX.Element => (
|
||||
<div className={className}>{children}</div>
|
||||
),
|
||||
CardHeader: ({ children }: ChildrenProps): React.JSX.Element => <div>{children}</div>,
|
||||
CardContent: ({ children, className }: StyledChildrenProps): React.JSX.Element => (
|
||||
<div className={className}>{children}</div>
|
||||
),
|
||||
CardFooter: ({ children }: ChildrenProps): React.JSX.Element => <div>{children}</div>,
|
||||
CardTitle: ({ children, className }: StyledChildrenProps): React.JSX.Element => (
|
||||
<h3 className={className}>{children}</h3>
|
||||
),
|
||||
CardDescription: ({ children, className }: StyledChildrenProps): React.JSX.Element => (
|
||||
<p className={className}>{children}</p>
|
||||
),
|
||||
}));
|
||||
|
||||
// Mock recharts — jsdom has no SVG layout engine, so we render stubs
|
||||
vi.mock("recharts", () => ({
|
||||
LineChart: ({ children }: ChildrenProps): React.JSX.Element => (
|
||||
<div data-testid="recharts-line-chart">{children}</div>
|
||||
),
|
||||
Line: (): React.JSX.Element => <div />,
|
||||
BarChart: ({ children }: ChildrenProps): React.JSX.Element => (
|
||||
<div data-testid="recharts-bar-chart">{children}</div>
|
||||
),
|
||||
Bar: (): React.JSX.Element => <div />,
|
||||
PieChart: ({ children }: ChildrenProps): React.JSX.Element => (
|
||||
<div data-testid="recharts-pie-chart">{children}</div>
|
||||
),
|
||||
Pie: (): React.JSX.Element => <div />,
|
||||
Cell: (): React.JSX.Element => <div />,
|
||||
XAxis: (): React.JSX.Element => <div />,
|
||||
YAxis: (): React.JSX.Element => <div />,
|
||||
CartesianGrid: (): React.JSX.Element => <div />,
|
||||
Tooltip: (): React.JSX.Element => <div />,
|
||||
ResponsiveContainer: ({ children }: ChildrenProps): React.JSX.Element => <div>{children}</div>,
|
||||
Legend: (): React.JSX.Element => <div />,
|
||||
}));
|
||||
|
||||
// Mock the telemetry API module
|
||||
vi.mock("@/lib/api/telemetry", () => ({
|
||||
fetchUsageSummary: vi.fn(),
|
||||
fetchTokenUsage: vi.fn(),
|
||||
fetchCostBreakdown: vi.fn(),
|
||||
fetchTaskOutcomes: vi.fn(),
|
||||
}));
|
||||
|
||||
// Import mocked modules after vi.mock
|
||||
import {
|
||||
fetchUsageSummary,
|
||||
fetchTokenUsage,
|
||||
fetchCostBreakdown,
|
||||
fetchTaskOutcomes,
|
||||
} from "@/lib/api/telemetry";
|
||||
|
||||
// ─── Test Data ───────────────────────────────────────────────────────
|
||||
|
||||
const mockSummary = {
|
||||
totalTokens: 245800,
|
||||
totalCost: 3.42,
|
||||
taskCount: 47,
|
||||
avgQualityGatePassRate: 0.87,
|
||||
};
|
||||
|
||||
const mockTokenUsage = [
|
||||
{ date: "2026-02-08", inputTokens: 10000, outputTokens: 5000, totalTokens: 15000 },
|
||||
{ date: "2026-02-09", inputTokens: 12000, outputTokens: 6000, totalTokens: 18000 },
|
||||
];
|
||||
|
||||
const mockCostBreakdown = [
|
||||
{ model: "claude-sonnet-4-5", provider: "anthropic", cost: 18.5, taskCount: 124 },
|
||||
{ model: "gpt-4o", provider: "openai", cost: 12.3, taskCount: 89 },
|
||||
];
|
||||
|
||||
const mockTaskOutcomes = [
|
||||
{ outcome: "Success", count: 312, color: "#6EBF8B" },
|
||||
{ outcome: "Partial", count: 48, color: "#F5C862" },
|
||||
];
|
||||
|
||||
function setupMocks(overrides?: { empty?: boolean; error?: boolean }): void {
|
||||
if (overrides?.error) {
|
||||
vi.mocked(fetchUsageSummary).mockRejectedValue(new Error("Network error"));
|
||||
vi.mocked(fetchTokenUsage).mockRejectedValue(new Error("Network error"));
|
||||
vi.mocked(fetchCostBreakdown).mockRejectedValue(new Error("Network error"));
|
||||
vi.mocked(fetchTaskOutcomes).mockRejectedValue(new Error("Network error"));
|
||||
return;
|
||||
}
|
||||
|
||||
const summary = overrides?.empty ? { ...mockSummary, taskCount: 0 } : mockSummary;
|
||||
|
||||
vi.mocked(fetchUsageSummary).mockResolvedValue(summary);
|
||||
vi.mocked(fetchTokenUsage).mockResolvedValue(mockTokenUsage);
|
||||
vi.mocked(fetchCostBreakdown).mockResolvedValue(mockCostBreakdown);
|
||||
vi.mocked(fetchTaskOutcomes).mockResolvedValue(mockTaskOutcomes);
|
||||
}
|
||||
|
||||
// ─── Tests ───────────────────────────────────────────────────────────
|
||||
|
||||
describe("UsagePage", (): void => {
|
||||
beforeEach((): void => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("should render the page title and subtitle", (): void => {
|
||||
setupMocks();
|
||||
render(<UsagePage />);
|
||||
|
||||
expect(screen.getByRole("heading", { level: 1 })).toHaveTextContent("Usage");
|
||||
expect(screen.getByText("Token usage and cost overview")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should have proper layout structure", (): void => {
|
||||
setupMocks();
|
||||
const { container } = render(<UsagePage />);
|
||||
const main = container.querySelector("main");
|
||||
expect(main).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should show loading skeleton initially", (): void => {
|
||||
setupMocks();
|
||||
render(<UsagePage />);
|
||||
expect(screen.getByTestId("loading-skeleton")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render summary cards after loading", async (): Promise<void> => {
|
||||
setupMocks();
|
||||
render(<UsagePage />);
|
||||
|
||||
await waitFor((): void => {
|
||||
expect(screen.getByTestId("summary-cards")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Check summary card values
|
||||
expect(screen.getByText("Total Tokens")).toBeInTheDocument();
|
||||
expect(screen.getByText("245.8K")).toBeInTheDocument();
|
||||
expect(screen.getByText("Estimated Cost")).toBeInTheDocument();
|
||||
expect(screen.getByText("$3.42")).toBeInTheDocument();
|
||||
expect(screen.getByText("Task Count")).toBeInTheDocument();
|
||||
expect(screen.getByText("47")).toBeInTheDocument();
|
||||
expect(screen.getByText("Quality Gate Pass Rate")).toBeInTheDocument();
|
||||
expect(screen.getByText("87.0%")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render all chart sections after loading", async (): Promise<void> => {
|
||||
setupMocks();
|
||||
render(<UsagePage />);
|
||||
|
||||
await waitFor((): void => {
|
||||
expect(screen.getByTestId("token-usage-chart")).toBeInTheDocument();
|
||||
expect(screen.getByTestId("cost-breakdown-chart")).toBeInTheDocument();
|
||||
expect(screen.getByTestId("task-outcomes-chart")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
it("should render the time range selector with three options", (): void => {
|
||||
setupMocks();
|
||||
render(<UsagePage />);
|
||||
|
||||
expect(screen.getByText("7 Days")).toBeInTheDocument();
|
||||
expect(screen.getByText("30 Days")).toBeInTheDocument();
|
||||
expect(screen.getByText("90 Days")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should have 30 Days selected by default", (): void => {
|
||||
setupMocks();
|
||||
render(<UsagePage />);
|
||||
|
||||
const button30d = screen.getByText("30 Days");
|
||||
expect(button30d).toHaveAttribute("aria-pressed", "true");
|
||||
});
|
||||
|
||||
it("should change time range when a different option is clicked", async (): Promise<void> => {
|
||||
setupMocks();
|
||||
render(<UsagePage />);
|
||||
|
||||
// Wait for initial load
|
||||
await waitFor((): void => {
|
||||
expect(screen.getByTestId("summary-cards")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Click 7 Days
|
||||
const button7d = screen.getByText("7 Days");
|
||||
fireEvent.click(button7d);
|
||||
|
||||
expect(button7d).toHaveAttribute("aria-pressed", "true");
|
||||
expect(screen.getByText("30 Days")).toHaveAttribute("aria-pressed", "false");
|
||||
});
|
||||
|
||||
it("should refetch data when time range changes", async (): Promise<void> => {
|
||||
setupMocks();
|
||||
render(<UsagePage />);
|
||||
|
||||
// Wait for initial load (30d default)
|
||||
await waitFor((): void => {
|
||||
expect(screen.getByTestId("summary-cards")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Initial call was with "30d"
|
||||
expect(fetchUsageSummary).toHaveBeenCalledWith("30d");
|
||||
|
||||
// Change to 7d
|
||||
fireEvent.click(screen.getByText("7 Days"));
|
||||
|
||||
await waitFor((): void => {
|
||||
expect(fetchUsageSummary).toHaveBeenCalledWith("7d");
|
||||
});
|
||||
});
|
||||
|
||||
it("should show empty state when no tasks exist", async (): Promise<void> => {
|
||||
setupMocks({ empty: true });
|
||||
render(<UsagePage />);
|
||||
|
||||
await waitFor((): void => {
|
||||
expect(screen.getByTestId("empty-state")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
expect(screen.getByText("No usage data yet")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should show error state on fetch failure", async (): Promise<void> => {
|
||||
setupMocks({ error: true });
|
||||
render(<UsagePage />);
|
||||
|
||||
await waitFor((): void => {
|
||||
expect(screen.getByText("Network error")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
expect(screen.getByText("Try again")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should retry loading when Try again button is clicked after error", async (): Promise<void> => {
|
||||
setupMocks({ error: true });
|
||||
render(<UsagePage />);
|
||||
|
||||
await waitFor((): void => {
|
||||
expect(screen.getByText("Try again")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Now set up success mocks and click retry
|
||||
setupMocks();
|
||||
fireEvent.click(screen.getByText("Try again"));
|
||||
|
||||
await waitFor((): void => {
|
||||
expect(screen.getByTestId("summary-cards")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
it("should display chart section titles", async (): Promise<void> => {
|
||||
setupMocks();
|
||||
render(<UsagePage />);
|
||||
|
||||
await waitFor((): void => {
|
||||
expect(screen.getByText("Token Usage Over Time")).toBeInTheDocument();
|
||||
expect(screen.getByText("Cost by Model")).toBeInTheDocument();
|
||||
expect(screen.getByText("Task Outcomes")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
it("should render recharts components within chart containers", async (): Promise<void> => {
|
||||
setupMocks();
|
||||
render(<UsagePage />);
|
||||
|
||||
await waitFor((): void => {
|
||||
expect(screen.getByTestId("recharts-line-chart")).toBeInTheDocument();
|
||||
expect(screen.getByTestId("recharts-bar-chart")).toBeInTheDocument();
|
||||
expect(screen.getByTestId("recharts-pie-chart")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
});
|
||||
430
apps/web/src/app/(authenticated)/usage/page.tsx
Normal file
430
apps/web/src/app/(authenticated)/usage/page.tsx
Normal file
@@ -0,0 +1,430 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect, useCallback } from "react";
|
||||
import type { ReactElement } from "react";
|
||||
import {
|
||||
LineChart,
|
||||
Line,
|
||||
BarChart,
|
||||
Bar,
|
||||
PieChart,
|
||||
Pie,
|
||||
XAxis,
|
||||
YAxis,
|
||||
CartesianGrid,
|
||||
Tooltip,
|
||||
ResponsiveContainer,
|
||||
Legend,
|
||||
} from "recharts";
|
||||
import { Card, CardHeader, CardContent, CardTitle, CardDescription } from "@/components/ui/card";
|
||||
import {
|
||||
fetchUsageSummary,
|
||||
fetchTokenUsage,
|
||||
fetchCostBreakdown,
|
||||
fetchTaskOutcomes,
|
||||
} from "@/lib/api/telemetry";
|
||||
import type {
|
||||
TimeRange,
|
||||
UsageSummary,
|
||||
TokenUsagePoint,
|
||||
CostBreakdownItem,
|
||||
TaskOutcomeItem,
|
||||
} from "@/lib/api/telemetry";
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────
|
||||
|
||||
const TIME_RANGES: { value: TimeRange; label: string }[] = [
|
||||
{ value: "7d", label: "7 Days" },
|
||||
{ value: "30d", label: "30 Days" },
|
||||
{ value: "90d", label: "90 Days" },
|
||||
];
|
||||
|
||||
// Calm, PDA-friendly chart colors (no aggressive reds)
|
||||
const CHART_COLORS = {
|
||||
inputTokens: "#6366F1", // Indigo
|
||||
outputTokens: "#38BDF8", // Sky blue
|
||||
grid: "#E2E8F0", // Slate 200
|
||||
barFill: "#818CF8", // Indigo 400
|
||||
};
|
||||
|
||||
// ─── Helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
function formatNumber(value: number): string {
|
||||
if (value >= 1_000_000) {
|
||||
return `${(value / 1_000_000).toFixed(1)}M`;
|
||||
}
|
||||
if (value >= 1_000) {
|
||||
return `${(value / 1_000).toFixed(1)}K`;
|
||||
}
|
||||
return value.toFixed(0);
|
||||
}
|
||||
|
||||
function formatCurrency(value: number): string {
|
||||
return `$${value.toFixed(2)}`;
|
||||
}
|
||||
|
||||
function formatPercent(value: number): string {
|
||||
return `${(value * 100).toFixed(1)}%`;
|
||||
}
|
||||
|
||||
function formatDateLabel(dateStr: string): string {
|
||||
const date = new Date(dateStr + "T00:00:00");
|
||||
return date.toLocaleDateString("en-US", { month: "short", day: "numeric" });
|
||||
}
|
||||
|
||||
/**
|
||||
* Map TaskOutcomeItem[] to recharts-compatible data with `fill` property.
|
||||
* This replaces deprecated Cell component (removed in Recharts 4.0).
|
||||
*/
|
||||
function toFillData(
|
||||
outcomes: TaskOutcomeItem[]
|
||||
): { outcome: string; count: number; fill: string }[] {
|
||||
return outcomes.map((item) => ({
|
||||
outcome: item.outcome,
|
||||
count: item.count,
|
||||
fill: item.color,
|
||||
}));
|
||||
}
|
||||
|
||||
// ─── Sub-components ──────────────────────────────────────────────────
|
||||
|
||||
function SummaryCard({
|
||||
title,
|
||||
value,
|
||||
subtitle,
|
||||
}: {
|
||||
title: string;
|
||||
value: string;
|
||||
subtitle?: string;
|
||||
}): ReactElement {
|
||||
return (
|
||||
<Card>
|
||||
<CardContent className="pt-6">
|
||||
<p className="text-sm font-medium text-gray-500">{title}</p>
|
||||
<p className="text-2xl font-bold text-gray-900 mt-1">{value}</p>
|
||||
{subtitle ? <p className="text-xs text-gray-400 mt-1">{subtitle}</p> : null}
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function LoadingSkeleton(): ReactElement {
|
||||
return (
|
||||
<div className="space-y-6" data-testid="loading-skeleton">
|
||||
{/* Summary cards skeleton */}
|
||||
<div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 gap-4">
|
||||
{Array.from({ length: 4 }).map((_, i) => (
|
||||
<Card key={i}>
|
||||
<CardContent className="pt-6">
|
||||
<div className="h-4 bg-gray-200 rounded w-24 animate-pulse" />
|
||||
<div className="h-8 bg-gray-200 rounded w-16 mt-2 animate-pulse" />
|
||||
</CardContent>
|
||||
</Card>
|
||||
))}
|
||||
</div>
|
||||
{/* Chart skeletons */}
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||
{Array.from({ length: 3 }).map((_, i) => (
|
||||
<Card key={i} className={i === 0 ? "lg:col-span-2" : ""}>
|
||||
<CardHeader>
|
||||
<div className="h-6 bg-gray-200 rounded w-40 animate-pulse" />
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="h-64 bg-gray-100 rounded animate-pulse" />
|
||||
</CardContent>
|
||||
</Card>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function EmptyState(): ReactElement {
|
||||
return (
|
||||
<div
|
||||
className="flex flex-col items-center justify-center py-16 text-center"
|
||||
data-testid="empty-state"
|
||||
>
|
||||
<div className="text-4xl mb-4">📊</div>
|
||||
<h2 className="text-xl font-semibold text-gray-700 mb-2">No usage data yet</h2>
|
||||
<p className="text-gray-500 max-w-md">
|
||||
Once you start using AI-powered features, your token usage and cost data will appear here.
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Main Page Component ─────────────────────────────────────────────
|
||||
|
||||
export default function UsagePage(): ReactElement {
|
||||
const [timeRange, setTimeRange] = useState<TimeRange>("30d");
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
const [isEmpty, setIsEmpty] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const [summary, setSummary] = useState<UsageSummary | null>(null);
|
||||
const [tokenUsage, setTokenUsage] = useState<TokenUsagePoint[]>([]);
|
||||
const [costBreakdown, setCostBreakdown] = useState<CostBreakdownItem[]>([]);
|
||||
const [taskOutcomes, setTaskOutcomes] = useState<TaskOutcomeItem[]>([]);
|
||||
|
||||
const loadData = useCallback(async (range: TimeRange): Promise<void> => {
|
||||
setIsLoading(true);
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
const [summaryData, tokenData, costData, outcomeData] = await Promise.all([
|
||||
fetchUsageSummary(range),
|
||||
fetchTokenUsage(range),
|
||||
fetchCostBreakdown(range),
|
||||
fetchTaskOutcomes(range),
|
||||
]);
|
||||
|
||||
setSummary(summaryData);
|
||||
setTokenUsage(tokenData);
|
||||
setCostBreakdown(costData);
|
||||
setTaskOutcomes(outcomeData);
|
||||
|
||||
// Check if there's any meaningful data
|
||||
setIsEmpty(summaryData.taskCount === 0);
|
||||
} catch (err) {
|
||||
setError(
|
||||
err instanceof Error
|
||||
? err.message
|
||||
: "We had trouble loading usage data. Please try again when you're ready."
|
||||
);
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
void loadData(timeRange);
|
||||
}, [timeRange, loadData]);
|
||||
|
||||
function handleTimeRangeChange(range: TimeRange): void {
|
||||
setTimeRange(range);
|
||||
}
|
||||
|
||||
return (
|
||||
<main className="container mx-auto px-4 py-8">
|
||||
{/* Header */}
|
||||
<div className="flex flex-col sm:flex-row sm:items-center sm:justify-between mb-8 gap-4">
|
||||
<div>
|
||||
<h1 className="text-3xl font-bold text-gray-900">Usage</h1>
|
||||
<p className="text-gray-600 mt-1">Token usage and cost overview</p>
|
||||
</div>
|
||||
|
||||
{/* Time range selector */}
|
||||
<div className="flex gap-1 bg-gray-100 rounded-lg p-1" role="group" aria-label="Time range">
|
||||
{TIME_RANGES.map(({ value, label }) => (
|
||||
<button
|
||||
key={value}
|
||||
onClick={() => {
|
||||
handleTimeRangeChange(value);
|
||||
}}
|
||||
className={`px-4 py-2 text-sm font-medium rounded-md transition-colors ${
|
||||
timeRange === value
|
||||
? "bg-white text-gray-900 shadow-sm"
|
||||
: "text-gray-600 hover:text-gray-900"
|
||||
}`}
|
||||
aria-pressed={timeRange === value}
|
||||
>
|
||||
{label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Error state */}
|
||||
{error !== null ? (
|
||||
<div className="rounded-lg border border-amber-200 bg-amber-50 p-6 text-center">
|
||||
<p className="text-amber-800">{error}</p>
|
||||
<button
|
||||
onClick={() => void loadData(timeRange)}
|
||||
className="mt-4 rounded-md bg-amber-600 px-4 py-2 text-sm font-medium text-white hover:bg-amber-700 transition-colors"
|
||||
>
|
||||
Try again
|
||||
</button>
|
||||
</div>
|
||||
) : isLoading ? (
|
||||
<LoadingSkeleton />
|
||||
) : isEmpty ? (
|
||||
<EmptyState />
|
||||
) : (
|
||||
<div className="space-y-6">
|
||||
{/* Summary Cards */}
|
||||
<div
|
||||
className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 gap-4"
|
||||
data-testid="summary-cards"
|
||||
>
|
||||
<SummaryCard
|
||||
title="Total Tokens"
|
||||
value={summary ? formatNumber(summary.totalTokens) : "0"}
|
||||
subtitle="Input + Output"
|
||||
/>
|
||||
<SummaryCard
|
||||
title="Estimated Cost"
|
||||
value={summary ? formatCurrency(summary.totalCost) : "$0.00"}
|
||||
subtitle="Based on provider pricing"
|
||||
/>
|
||||
<SummaryCard
|
||||
title="Task Count"
|
||||
value={summary ? formatNumber(summary.taskCount) : "0"}
|
||||
subtitle="AI-assisted tasks"
|
||||
/>
|
||||
<SummaryCard
|
||||
title="Quality Gate Pass Rate"
|
||||
value={summary ? formatPercent(summary.avgQualityGatePassRate) : "0%"}
|
||||
subtitle="Build, lint, test, typecheck"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Charts */}
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||
{/* Token Usage Over Time — Full width */}
|
||||
<Card className="lg:col-span-2">
|
||||
<CardHeader>
|
||||
<CardTitle className="text-lg">Token Usage Over Time</CardTitle>
|
||||
<CardDescription>Input and output tokens by day</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="h-72" data-testid="token-usage-chart">
|
||||
<ResponsiveContainer width="100%" height="100%">
|
||||
<LineChart data={tokenUsage}>
|
||||
<CartesianGrid strokeDasharray="3 3" stroke={CHART_COLORS.grid} />
|
||||
<XAxis
|
||||
dataKey="date"
|
||||
tickFormatter={formatDateLabel}
|
||||
tick={{ fontSize: 12, fill: "#64748B" }}
|
||||
interval="preserveStartEnd"
|
||||
/>
|
||||
<YAxis
|
||||
tickFormatter={formatNumber}
|
||||
tick={{ fontSize: 12, fill: "#64748B" }}
|
||||
width={60}
|
||||
/>
|
||||
<Tooltip
|
||||
formatter={
|
||||
((value: number, name: string) => [
|
||||
formatNumber(value),
|
||||
name === "inputTokens" ? "Input Tokens" : "Output Tokens",
|
||||
]) as never
|
||||
}
|
||||
labelFormatter={((label: string) => formatDateLabel(label)) as never}
|
||||
contentStyle={{
|
||||
borderRadius: "8px",
|
||||
border: "1px solid #E2E8F0",
|
||||
boxShadow: "0 2px 8px rgba(0,0,0,0.08)",
|
||||
}}
|
||||
/>
|
||||
<Legend
|
||||
formatter={(value: string) =>
|
||||
value === "inputTokens" ? "Input Tokens" : "Output Tokens"
|
||||
}
|
||||
/>
|
||||
<Line
|
||||
type="monotone"
|
||||
dataKey="inputTokens"
|
||||
stroke={CHART_COLORS.inputTokens}
|
||||
strokeWidth={2}
|
||||
dot={false}
|
||||
activeDot={{ r: 4 }}
|
||||
/>
|
||||
<Line
|
||||
type="monotone"
|
||||
dataKey="outputTokens"
|
||||
stroke={CHART_COLORS.outputTokens}
|
||||
strokeWidth={2}
|
||||
dot={false}
|
||||
activeDot={{ r: 4 }}
|
||||
/>
|
||||
</LineChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
{/* Cost Breakdown by Model */}
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-lg">Cost by Model</CardTitle>
|
||||
<CardDescription>Estimated cost breakdown</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="h-72" data-testid="cost-breakdown-chart">
|
||||
<ResponsiveContainer width="100%" height="100%">
|
||||
<BarChart data={costBreakdown} layout="vertical">
|
||||
<CartesianGrid strokeDasharray="3 3" stroke={CHART_COLORS.grid} />
|
||||
<XAxis
|
||||
type="number"
|
||||
tickFormatter={(v: number) => formatCurrency(v)}
|
||||
tick={{ fontSize: 12, fill: "#64748B" }}
|
||||
/>
|
||||
<YAxis
|
||||
type="category"
|
||||
dataKey="model"
|
||||
tick={{ fontSize: 11, fill: "#64748B" }}
|
||||
width={140}
|
||||
/>
|
||||
<Tooltip
|
||||
formatter={((value: number) => [formatCurrency(value), "Cost"]) as never}
|
||||
contentStyle={{
|
||||
borderRadius: "8px",
|
||||
border: "1px solid #E2E8F0",
|
||||
boxShadow: "0 2px 8px rgba(0,0,0,0.08)",
|
||||
}}
|
||||
/>
|
||||
<Bar dataKey="cost" fill={CHART_COLORS.barFill} radius={[0, 4, 4, 0]} />
|
||||
</BarChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
{/* Task Outcomes */}
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-lg">Task Outcomes</CardTitle>
|
||||
<CardDescription>Distribution of task completion results</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div
|
||||
className="h-72 flex items-center justify-center"
|
||||
data-testid="task-outcomes-chart"
|
||||
>
|
||||
<ResponsiveContainer width="100%" height="100%">
|
||||
<PieChart>
|
||||
<Pie
|
||||
data={toFillData(taskOutcomes)}
|
||||
cx="50%"
|
||||
cy="50%"
|
||||
innerRadius={60}
|
||||
outerRadius={100}
|
||||
paddingAngle={2}
|
||||
dataKey="count"
|
||||
nameKey="outcome"
|
||||
label={
|
||||
((props: { outcome?: string; count?: number }) =>
|
||||
`${props.outcome ?? ""}: ${String(props.count ?? 0)}`) as never
|
||||
}
|
||||
/>
|
||||
<Tooltip
|
||||
formatter={((value: number, name: string) => [value, name]) as never}
|
||||
contentStyle={{
|
||||
borderRadius: "8px",
|
||||
border: "1px solid #E2E8F0",
|
||||
boxShadow: "0 2px 8px rgba(0,0,0,0.08)",
|
||||
}}
|
||||
/>
|
||||
<Legend />
|
||||
</PieChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</main>
|
||||
);
|
||||
}
|
||||
@@ -16,6 +16,7 @@ export function Navigation(): React.JSX.Element {
|
||||
{ href: "/tasks", label: "Tasks" },
|
||||
{ href: "/calendar", label: "Calendar" },
|
||||
{ href: "/knowledge", label: "Knowledge" },
|
||||
{ href: "/usage", label: "Usage" },
|
||||
];
|
||||
|
||||
// Global keyboard shortcut for search (Cmd+K or Ctrl+K)
|
||||
|
||||
@@ -12,3 +12,4 @@ export * from "./knowledge";
|
||||
export * from "./domains";
|
||||
export * from "./teams";
|
||||
export * from "./personalities";
|
||||
export * from "./telemetry";
|
||||
|
||||
187
apps/web/src/lib/api/telemetry.ts
Normal file
187
apps/web/src/lib/api/telemetry.ts
Normal file
@@ -0,0 +1,187 @@
|
||||
/**
|
||||
* Telemetry API Client
|
||||
* Handles telemetry data fetching for the usage dashboard.
|
||||
*
|
||||
* NOTE: Currently returns mock/placeholder data since the telemetry API
|
||||
* aggregation endpoints don't exist yet. The important thing is the UI structure.
|
||||
* When the backend endpoints are ready, replace mock calls with real apiGet() calls.
|
||||
*/
|
||||
|
||||
import { apiGet, type ApiResponse } from "./client";
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────────────────
|
||||
|
||||
export type TimeRange = "7d" | "30d" | "90d";
|
||||
|
||||
export interface UsageSummary {
|
||||
totalTokens: number;
|
||||
totalCost: number;
|
||||
taskCount: number;
|
||||
avgQualityGatePassRate: number;
|
||||
}
|
||||
|
||||
export interface TokenUsagePoint {
|
||||
date: string;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
totalTokens: number;
|
||||
}
|
||||
|
||||
export interface CostBreakdownItem {
|
||||
model: string;
|
||||
provider: string;
|
||||
cost: number;
|
||||
taskCount: number;
|
||||
}
|
||||
|
||||
export interface TaskOutcomeItem {
|
||||
outcome: string;
|
||||
count: number;
|
||||
color: string;
|
||||
}
|
||||
|
||||
export interface EstimateParams {
|
||||
taskType: string;
|
||||
model: string;
|
||||
provider: string;
|
||||
complexity: string;
|
||||
}
|
||||
|
||||
export interface EstimateResponse {
|
||||
prediction: {
|
||||
input_tokens: { median: number; p75: number; p90: number };
|
||||
output_tokens: { median: number; p75: number; p90: number };
|
||||
cost_usd_micros: Record<string, number>;
|
||||
quality: { gate_pass_rate: number; success_rate: number };
|
||||
} | null;
|
||||
metadata: {
|
||||
sample_size: number;
|
||||
confidence: "none" | "low" | "medium" | "high";
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Mock Data Generators ────────────────────────────────────────────
|
||||
|
||||
function generateDateRange(range: TimeRange): string[] {
|
||||
const days = range === "7d" ? 7 : range === "30d" ? 30 : 90;
|
||||
const dates: string[] = [];
|
||||
const now = new Date();
|
||||
|
||||
for (let i = days - 1; i >= 0; i--) {
|
||||
const d = new Date(now);
|
||||
d.setDate(d.getDate() - i);
|
||||
dates.push(d.toISOString().split("T")[0] ?? "");
|
||||
}
|
||||
|
||||
return dates;
|
||||
}
|
||||
|
||||
function generateMockTokenUsage(range: TimeRange): TokenUsagePoint[] {
|
||||
const dates = generateDateRange(range);
|
||||
|
||||
return dates.map((date) => {
|
||||
const baseInput = 8000 + Math.floor(Math.random() * 12000);
|
||||
const baseOutput = 3000 + Math.floor(Math.random() * 7000);
|
||||
return {
|
||||
date,
|
||||
inputTokens: baseInput,
|
||||
outputTokens: baseOutput,
|
||||
totalTokens: baseInput + baseOutput,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function generateMockSummary(range: TimeRange): UsageSummary {
|
||||
const multiplier = range === "7d" ? 1 : range === "30d" ? 4 : 12;
|
||||
return {
|
||||
totalTokens: 245_800 * multiplier,
|
||||
totalCost: 3.42 * multiplier,
|
||||
taskCount: 47 * multiplier,
|
||||
avgQualityGatePassRate: 0.87,
|
||||
};
|
||||
}
|
||||
|
||||
function generateMockCostBreakdown(): CostBreakdownItem[] {
|
||||
return [
|
||||
{ model: "claude-sonnet-4-5", provider: "anthropic", cost: 18.5, taskCount: 124 },
|
||||
{ model: "gpt-4o", provider: "openai", cost: 12.3, taskCount: 89 },
|
||||
{ model: "claude-haiku-3.5", provider: "anthropic", cost: 4.2, taskCount: 156 },
|
||||
{ model: "llama-3.3-70b", provider: "ollama", cost: 0, taskCount: 67 },
|
||||
{ model: "gemini-2.0-flash", provider: "google", cost: 2.8, taskCount: 42 },
|
||||
];
|
||||
}
|
||||
|
||||
// PDA-friendly colors: calm, no aggressive reds
|
||||
function generateMockTaskOutcomes(): TaskOutcomeItem[] {
|
||||
return [
|
||||
{ outcome: "Success", count: 312, color: "#6EBF8B" },
|
||||
{ outcome: "Partial", count: 48, color: "#F5C862" },
|
||||
{ outcome: "Timeout", count: 18, color: "#94A3B8" },
|
||||
{ outcome: "Incomplete", count: 22, color: "#C4A5DE" },
|
||||
];
|
||||
}
|
||||
|
||||
// ─── API Functions ───────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Fetch usage summary data (total tokens, cost, task count, quality rate)
|
||||
*/
|
||||
export async function fetchUsageSummary(timeRange: TimeRange): Promise<UsageSummary> {
|
||||
// TODO: Replace with real API call when backend aggregation endpoints are ready
|
||||
// const response = await apiGet<ApiResponse<UsageSummary>>(`/api/telemetry/summary?range=${timeRange}`);
|
||||
// return response.data;
|
||||
void apiGet; // suppress unused import warning in the meantime
|
||||
await new Promise((resolve) => setTimeout(resolve, 200));
|
||||
return generateMockSummary(timeRange);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch token usage time series for charts
|
||||
*/
|
||||
export async function fetchTokenUsage(timeRange: TimeRange): Promise<TokenUsagePoint[]> {
|
||||
// TODO: Replace with real API call
|
||||
// const response = await apiGet<ApiResponse<TokenUsagePoint[]>>(`/api/telemetry/tokens?range=${timeRange}`);
|
||||
// return response.data;
|
||||
await new Promise((resolve) => setTimeout(resolve, 250));
|
||||
return generateMockTokenUsage(timeRange);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch cost breakdown by model
|
||||
*/
|
||||
export async function fetchCostBreakdown(timeRange: TimeRange): Promise<CostBreakdownItem[]> {
|
||||
// TODO: Replace with real API call
|
||||
// const response = await apiGet<ApiResponse<CostBreakdownItem[]>>(`/api/telemetry/costs?range=${timeRange}`);
|
||||
// return response.data;
|
||||
await new Promise((resolve) => setTimeout(resolve, 200));
|
||||
void timeRange;
|
||||
return generateMockCostBreakdown();
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch task outcome distribution
|
||||
*/
|
||||
export async function fetchTaskOutcomes(timeRange: TimeRange): Promise<TaskOutcomeItem[]> {
|
||||
// TODO: Replace with real API call
|
||||
// const response = await apiGet<ApiResponse<TaskOutcomeItem[]>>(`/api/telemetry/outcomes?range=${timeRange}`);
|
||||
// return response.data;
|
||||
await new Promise((resolve) => setTimeout(resolve, 150));
|
||||
void timeRange;
|
||||
return generateMockTaskOutcomes();
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch cost/token estimate for a given task configuration.
|
||||
* Uses the real GET /api/telemetry/estimate endpoint from TEL-006.
|
||||
*/
|
||||
export async function fetchEstimate(params: EstimateParams): Promise<EstimateResponse> {
|
||||
const query = new URLSearchParams({
|
||||
taskType: params.taskType,
|
||||
model: params.model,
|
||||
provider: params.provider,
|
||||
complexity: params.complexity,
|
||||
}).toString();
|
||||
|
||||
const response = await apiGet<ApiResponse<EstimateResponse>>(`/api/telemetry/estimate?${query}`);
|
||||
return response.data;
|
||||
}
|
||||
Reference in New Issue
Block a user