feat(#131): add OpenTelemetry tracing infrastructure
Implement comprehensive distributed tracing for HTTP requests and LLM operations using OpenTelemetry with GenAI semantic conventions. Features: - TelemetryService: SDK initialization with OTLP HTTP exporter - TelemetryInterceptor: Automatic HTTP request spans - @TraceLlmCall decorator: LLM operation tracing - GenAI semantic conventions for model/token tracking - Graceful degradation when tracing disabled Instrumented: - All HTTP requests (automatic spans) - OllamaProvider chat/chatStream/embed operations - Token counts, model names, durations Environment: - OTEL_ENABLED (default: true) - OTEL_SERVICE_NAME (default: mosaic-api) - OTEL_EXPORTER_OTLP_ENDPOINT (default: localhost:4318) Tests: 23 passing with full coverage Fixes #131 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
168
apps/api/src/telemetry/llm-telemetry.decorator.ts
Normal file
168
apps/api/src/telemetry/llm-telemetry.decorator.ts
Normal file
@@ -0,0 +1,168 @@
|
||||
import type { Span } from "@opentelemetry/api";
|
||||
import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
|
||||
|
||||
/**
|
||||
* Metadata interface for LLM tracing configuration.
|
||||
*/
|
||||
export interface LlmTraceMetadata {
|
||||
/**
|
||||
* The LLM system being used (e.g., "ollama", "openai", "anthropic")
|
||||
*/
|
||||
system: string;
|
||||
|
||||
/**
|
||||
* The operation type (e.g., "chat", "embed", "completion")
|
||||
*/
|
||||
operation: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Symbol key for storing LLM trace metadata
|
||||
*/
|
||||
const LLM_TRACE_METADATA = Symbol("llm:trace:metadata");
|
||||
|
||||
/**
|
||||
* Decorator that adds OpenTelemetry tracing to LLM provider methods.
|
||||
* Automatically creates spans with GenAI semantic conventions.
|
||||
*
|
||||
* @param metadata - Configuration for the LLM trace
|
||||
* @returns Method decorator
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* class OllamaProvider {
|
||||
* @TraceLlmCall({ system: "ollama", operation: "chat" })
|
||||
* async chat(request: ChatRequest): Promise<ChatResponse> {
|
||||
* // Implementation
|
||||
* }
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
export function TraceLlmCall(metadata: LlmTraceMetadata) {
|
||||
return function (
|
||||
target: object,
|
||||
propertyKey: string,
|
||||
descriptor: PropertyDescriptor
|
||||
): PropertyDescriptor {
|
||||
const originalMethod = descriptor.value as (
|
||||
this: unknown,
|
||||
...args: unknown[]
|
||||
) => Promise<unknown>;
|
||||
|
||||
descriptor.value = async function (this: unknown, ...args: unknown[]): Promise<unknown> {
|
||||
const tracer = trace.getTracer("mosaic-api");
|
||||
const spanName = `${metadata.system}.${metadata.operation}`;
|
||||
|
||||
const span = tracer.startSpan(spanName, {
|
||||
kind: SpanKind.CLIENT,
|
||||
attributes: {
|
||||
"gen_ai.system": metadata.system,
|
||||
"gen_ai.operation.name": metadata.operation,
|
||||
},
|
||||
});
|
||||
|
||||
try {
|
||||
// Extract model from first argument if it's an object with a model property
|
||||
if (args[0] && typeof args[0] === "object" && "model" in args[0]) {
|
||||
const request = args[0] as { model?: string };
|
||||
if (request.model) {
|
||||
span.setAttribute("gen_ai.request.model", request.model);
|
||||
}
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
const result = await originalMethod.apply(this, args);
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
span.setAttribute("gen_ai.response.duration_ms", duration);
|
||||
|
||||
// Extract token usage from response if available
|
||||
if (result && typeof result === "object") {
|
||||
if ("promptEvalCount" in result && typeof result.promptEvalCount === "number") {
|
||||
span.setAttribute("gen_ai.usage.prompt_tokens", result.promptEvalCount);
|
||||
}
|
||||
if ("evalCount" in result && typeof result.evalCount === "number") {
|
||||
span.setAttribute("gen_ai.usage.completion_tokens", result.evalCount);
|
||||
}
|
||||
}
|
||||
|
||||
span.setStatus({ code: SpanStatusCode.OK });
|
||||
return result;
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
throw error;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
};
|
||||
|
||||
// Store metadata for potential runtime inspection
|
||||
Reflect.defineMetadata(LLM_TRACE_METADATA, metadata, target, propertyKey);
|
||||
|
||||
return descriptor;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to manually create an LLM span for stream operations.
|
||||
* Use this for async generators where the decorator pattern doesn't work well.
|
||||
*
|
||||
* @param system - The LLM system (e.g., "ollama")
|
||||
* @param operation - The operation type (e.g., "chat.stream")
|
||||
* @param model - The model being used
|
||||
* @returns A span instance
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* async *chatStream(request: ChatRequest) {
|
||||
* const span = createLlmSpan("ollama", "chat.stream", request.model);
|
||||
* try {
|
||||
* for await (const chunk of stream) {
|
||||
* yield chunk;
|
||||
* }
|
||||
* span.setStatus({ code: SpanStatusCode.OK });
|
||||
* } catch (error) {
|
||||
* span.recordException(error);
|
||||
* span.setStatus({ code: SpanStatusCode.ERROR });
|
||||
* throw error;
|
||||
* } finally {
|
||||
* span.end();
|
||||
* }
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
export function createLlmSpan(system: string, operation: string, model?: string): Span {
|
||||
const tracer = trace.getTracer("mosaic-api");
|
||||
const spanName = `${system}.${operation}`;
|
||||
|
||||
const span = tracer.startSpan(spanName, {
|
||||
kind: SpanKind.CLIENT,
|
||||
attributes: {
|
||||
"gen_ai.system": system,
|
||||
"gen_ai.operation.name": operation,
|
||||
...(model && { "gen_ai.request.model": model }),
|
||||
},
|
||||
});
|
||||
|
||||
return span;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to record token usage on an LLM span.
|
||||
*
|
||||
* @param span - The span to record usage on
|
||||
* @param promptTokens - Number of prompt tokens
|
||||
* @param completionTokens - Number of completion tokens
|
||||
*/
|
||||
export function recordLlmUsage(span: Span, promptTokens?: number, completionTokens?: number): void {
|
||||
if (promptTokens !== undefined) {
|
||||
span.setAttribute("gen_ai.usage.prompt_tokens", promptTokens);
|
||||
}
|
||||
if (completionTokens !== undefined) {
|
||||
span.setAttribute("gen_ai.usage.completion_tokens", completionTokens);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user