feat(#131): add OpenTelemetry tracing infrastructure

Implement comprehensive distributed tracing for HTTP requests and LLM
operations using OpenTelemetry with GenAI semantic conventions.

Features:
- TelemetryService: SDK initialization with OTLP HTTP exporter
- TelemetryInterceptor: Automatic HTTP request spans
- @TraceLlmCall decorator: LLM operation tracing
- GenAI semantic conventions for model/token tracking
- Graceful degradation when tracing disabled

Instrumented:
- All HTTP requests (automatic spans)
- OllamaProvider chat/chatStream/embed operations
- Token counts, model names, durations

Environment:
- OTEL_ENABLED (default: true)
- OTEL_SERVICE_NAME (default: mosaic-api)
- OTEL_EXPORTER_OTLP_ENDPOINT (default: localhost:4318)

Tests: 23 passing with full coverage

Fixes #131

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-31 12:55:11 -06:00
parent 64cb5c1edd
commit 51e6ad0792
13 changed files with 2838 additions and 26 deletions

View File

@@ -0,0 +1,188 @@
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { TelemetryService } from "./telemetry.service";
import type { Tracer, Span } from "@opentelemetry/api";
describe("TelemetryService", () => {
let service: TelemetryService;
let originalEnv: NodeJS.ProcessEnv;
beforeEach(() => {
originalEnv = { ...process.env };
// Enable tracing by default for tests
process.env.OTEL_ENABLED = "true";
process.env.OTEL_SERVICE_NAME = "mosaic-api-test";
process.env.OTEL_EXPORTER_OTLP_ENDPOINT = "http://localhost:4318/v1/traces";
});
afterEach(async () => {
process.env = originalEnv;
if (service) {
await service.onModuleDestroy();
}
});
describe("onModuleInit", () => {
it("should initialize the SDK when OTEL_ENABLED is true", async () => {
service = new TelemetryService();
await service.onModuleInit();
expect(service.getTracer()).toBeDefined();
});
it("should not initialize SDK when OTEL_ENABLED is false", async () => {
process.env.OTEL_ENABLED = "false";
service = new TelemetryService();
await service.onModuleInit();
expect(service.getTracer()).toBeDefined(); // Should return noop tracer
});
it("should use custom service name from env", async () => {
process.env.OTEL_SERVICE_NAME = "custom-service";
service = new TelemetryService();
await service.onModuleInit();
expect(service.getTracer()).toBeDefined();
});
it("should use default service name when not provided", async () => {
delete process.env.OTEL_SERVICE_NAME;
service = new TelemetryService();
await service.onModuleInit();
expect(service.getTracer()).toBeDefined();
});
});
describe("getTracer", () => {
beforeEach(async () => {
service = new TelemetryService();
await service.onModuleInit();
});
it("should return a tracer instance", () => {
const tracer = service.getTracer();
expect(tracer).toBeDefined();
expect(typeof tracer.startSpan).toBe("function");
});
it("should return the same tracer instance on multiple calls", () => {
const tracer1 = service.getTracer();
const tracer2 = service.getTracer();
expect(tracer1).toBe(tracer2);
});
});
describe("startSpan", () => {
beforeEach(async () => {
service = new TelemetryService();
await service.onModuleInit();
});
it("should create a span with the given name", () => {
const span = service.startSpan("test-span");
expect(span).toBeDefined();
expect(typeof span.end).toBe("function");
span.end();
});
it("should create a span with attributes", () => {
const span = service.startSpan("test-span", {
attributes: {
"test.attribute": "value",
},
});
expect(span).toBeDefined();
span.end();
});
it("should create nested spans", () => {
const parentSpan = service.startSpan("parent-span");
const childSpan = service.startSpan("child-span");
expect(parentSpan).toBeDefined();
expect(childSpan).toBeDefined();
childSpan.end();
parentSpan.end();
});
});
describe("recordException", () => {
let span: Span;
beforeEach(async () => {
service = new TelemetryService();
await service.onModuleInit();
span = service.startSpan("test-span");
});
afterEach(() => {
span.end();
});
it("should record an exception on the span", () => {
const error = new Error("Test error");
const recordExceptionSpy = vi.spyOn(span, "recordException");
service.recordException(span, error);
expect(recordExceptionSpy).toHaveBeenCalledWith(error);
});
it("should set span status to error", () => {
const error = new Error("Test error");
const setStatusSpy = vi.spyOn(span, "setStatus");
service.recordException(span, error);
expect(setStatusSpy).toHaveBeenCalled();
});
});
describe("onModuleDestroy", () => {
it("should shutdown the SDK gracefully", async () => {
service = new TelemetryService();
await service.onModuleInit();
await expect(service.onModuleDestroy()).resolves.not.toThrow();
});
it("should not throw if called multiple times", async () => {
service = new TelemetryService();
await service.onModuleInit();
await service.onModuleDestroy();
await expect(service.onModuleDestroy()).resolves.not.toThrow();
});
it("should not throw if SDK was not initialized", async () => {
process.env.OTEL_ENABLED = "false";
service = new TelemetryService();
await service.onModuleInit();
await expect(service.onModuleDestroy()).resolves.not.toThrow();
});
});
describe("disabled mode", () => {
beforeEach(() => {
process.env.OTEL_ENABLED = "false";
});
it("should return noop tracer when disabled", async () => {
service = new TelemetryService();
await service.onModuleInit();
const tracer = service.getTracer();
expect(tracer).toBeDefined();
});
it("should not throw when creating spans while disabled", async () => {
service = new TelemetryService();
await service.onModuleInit();
expect(() => service.startSpan("test-span")).not.toThrow();
});
});
});