Implement comprehensive distributed tracing for HTTP requests and LLM operations using OpenTelemetry with GenAI semantic conventions. Features: - TelemetryService: SDK initialization with OTLP HTTP exporter - TelemetryInterceptor: Automatic HTTP request spans - @TraceLlmCall decorator: LLM operation tracing - GenAI semantic conventions for model/token tracking - Graceful degradation when tracing disabled Instrumented: - All HTTP requests (automatic spans) - OllamaProvider chat/chatStream/embed operations - Token counts, model names, durations Environment: - OTEL_ENABLED (default: true) - OTEL_SERVICE_NAME (default: mosaic-api) - OTEL_EXPORTER_OTLP_ENDPOINT (default: localhost:4318) Tests: 23 passing with full coverage Fixes #131 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
169 lines
5.0 KiB
TypeScript
169 lines
5.0 KiB
TypeScript
import type { Span } from "@opentelemetry/api";
|
|
import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
|
|
|
|
/**
|
|
* Metadata interface for LLM tracing configuration.
|
|
*/
|
|
export interface LlmTraceMetadata {
|
|
/**
|
|
* The LLM system being used (e.g., "ollama", "openai", "anthropic")
|
|
*/
|
|
system: string;
|
|
|
|
/**
|
|
* The operation type (e.g., "chat", "embed", "completion")
|
|
*/
|
|
operation: string;
|
|
}
|
|
|
|
/**
|
|
* Symbol key for storing LLM trace metadata
|
|
*/
|
|
const LLM_TRACE_METADATA = Symbol("llm:trace:metadata");
|
|
|
|
/**
|
|
* Decorator that adds OpenTelemetry tracing to LLM provider methods.
|
|
* Automatically creates spans with GenAI semantic conventions.
|
|
*
|
|
* @param metadata - Configuration for the LLM trace
|
|
* @returns Method decorator
|
|
*
|
|
* @example
|
|
* ```typescript
|
|
* class OllamaProvider {
|
|
* @TraceLlmCall({ system: "ollama", operation: "chat" })
|
|
* async chat(request: ChatRequest): Promise<ChatResponse> {
|
|
* // Implementation
|
|
* }
|
|
* }
|
|
* ```
|
|
*/
|
|
export function TraceLlmCall(metadata: LlmTraceMetadata) {
|
|
return function (
|
|
target: object,
|
|
propertyKey: string,
|
|
descriptor: PropertyDescriptor
|
|
): PropertyDescriptor {
|
|
const originalMethod = descriptor.value as (
|
|
this: unknown,
|
|
...args: unknown[]
|
|
) => Promise<unknown>;
|
|
|
|
descriptor.value = async function (this: unknown, ...args: unknown[]): Promise<unknown> {
|
|
const tracer = trace.getTracer("mosaic-api");
|
|
const spanName = `${metadata.system}.${metadata.operation}`;
|
|
|
|
const span = tracer.startSpan(spanName, {
|
|
kind: SpanKind.CLIENT,
|
|
attributes: {
|
|
"gen_ai.system": metadata.system,
|
|
"gen_ai.operation.name": metadata.operation,
|
|
},
|
|
});
|
|
|
|
try {
|
|
// Extract model from first argument if it's an object with a model property
|
|
if (args[0] && typeof args[0] === "object" && "model" in args[0]) {
|
|
const request = args[0] as { model?: string };
|
|
if (request.model) {
|
|
span.setAttribute("gen_ai.request.model", request.model);
|
|
}
|
|
}
|
|
|
|
const startTime = Date.now();
|
|
const result = await originalMethod.apply(this, args);
|
|
const duration = Date.now() - startTime;
|
|
|
|
span.setAttribute("gen_ai.response.duration_ms", duration);
|
|
|
|
// Extract token usage from response if available
|
|
if (result && typeof result === "object") {
|
|
if ("promptEvalCount" in result && typeof result.promptEvalCount === "number") {
|
|
span.setAttribute("gen_ai.usage.prompt_tokens", result.promptEvalCount);
|
|
}
|
|
if ("evalCount" in result && typeof result.evalCount === "number") {
|
|
span.setAttribute("gen_ai.usage.completion_tokens", result.evalCount);
|
|
}
|
|
}
|
|
|
|
span.setStatus({ code: SpanStatusCode.OK });
|
|
return result;
|
|
} catch (error) {
|
|
span.recordException(error as Error);
|
|
span.setStatus({
|
|
code: SpanStatusCode.ERROR,
|
|
message: error instanceof Error ? error.message : String(error),
|
|
});
|
|
throw error;
|
|
} finally {
|
|
span.end();
|
|
}
|
|
};
|
|
|
|
// Store metadata for potential runtime inspection
|
|
Reflect.defineMetadata(LLM_TRACE_METADATA, metadata, target, propertyKey);
|
|
|
|
return descriptor;
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Helper function to manually create an LLM span for stream operations.
|
|
* Use this for async generators where the decorator pattern doesn't work well.
|
|
*
|
|
* @param system - The LLM system (e.g., "ollama")
|
|
* @param operation - The operation type (e.g., "chat.stream")
|
|
* @param model - The model being used
|
|
* @returns A span instance
|
|
*
|
|
* @example
|
|
* ```typescript
|
|
* async *chatStream(request: ChatRequest) {
|
|
* const span = createLlmSpan("ollama", "chat.stream", request.model);
|
|
* try {
|
|
* for await (const chunk of stream) {
|
|
* yield chunk;
|
|
* }
|
|
* span.setStatus({ code: SpanStatusCode.OK });
|
|
* } catch (error) {
|
|
* span.recordException(error);
|
|
* span.setStatus({ code: SpanStatusCode.ERROR });
|
|
* throw error;
|
|
* } finally {
|
|
* span.end();
|
|
* }
|
|
* }
|
|
* ```
|
|
*/
|
|
export function createLlmSpan(system: string, operation: string, model?: string): Span {
|
|
const tracer = trace.getTracer("mosaic-api");
|
|
const spanName = `${system}.${operation}`;
|
|
|
|
const span = tracer.startSpan(spanName, {
|
|
kind: SpanKind.CLIENT,
|
|
attributes: {
|
|
"gen_ai.system": system,
|
|
"gen_ai.operation.name": operation,
|
|
...(model && { "gen_ai.request.model": model }),
|
|
},
|
|
});
|
|
|
|
return span;
|
|
}
|
|
|
|
/**
|
|
* Helper function to record token usage on an LLM span.
|
|
*
|
|
* @param span - The span to record usage on
|
|
* @param promptTokens - Number of prompt tokens
|
|
* @param completionTokens - Number of completion tokens
|
|
*/
|
|
export function recordLlmUsage(span: Span, promptTokens?: number, completionTokens?: number): void {
|
|
if (promptTokens !== undefined) {
|
|
span.setAttribute("gen_ai.usage.prompt_tokens", promptTokens);
|
|
}
|
|
if (completionTokens !== undefined) {
|
|
span.setAttribute("gen_ai.usage.completion_tokens", completionTokens);
|
|
}
|
|
}
|