feat(#131): add OpenTelemetry tracing infrastructure

Implement comprehensive distributed tracing for HTTP requests and LLM
operations using OpenTelemetry with GenAI semantic conventions.

Features:
- TelemetryService: SDK initialization with OTLP HTTP exporter
- TelemetryInterceptor: Automatic HTTP request spans
- @TraceLlmCall decorator: LLM operation tracing
- GenAI semantic conventions for model/token tracking
- Graceful degradation when tracing disabled

Instrumented:
- All HTTP requests (automatic spans)
- OllamaProvider chat/chatStream/embed operations
- Token counts, model names, durations

Environment:
- OTEL_ENABLED (default: true)
- OTEL_SERVICE_NAME (default: mosaic-api)
- OTEL_EXPORTER_OTLP_ENDPOINT (default: localhost:4318)

Tests: 23 passing with full coverage

Fixes #131

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-31 12:55:11 -06:00
parent 64cb5c1edd
commit 51e6ad0792
13 changed files with 2838 additions and 26 deletions

View File

@@ -6,6 +6,8 @@ import type {
LlmProviderHealthStatus,
} from "./llm-provider.interface";
import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "../dto";
import { TraceLlmCall, createLlmSpan } from "../../telemetry";
import { SpanStatusCode } from "@opentelemetry/api";
/**
* Configuration for Ollama LLM provider.
@@ -137,6 +139,7 @@ export class OllamaProvider implements LlmProviderInterface {
* @returns Complete chat response
* @throws {Error} If the request fails
*/
@TraceLlmCall({ system: "ollama", operation: "chat" })
async chat(request: ChatRequestDto): Promise<ChatResponseDto> {
try {
const messages = this.buildMessages(request);
@@ -176,6 +179,8 @@ export class OllamaProvider implements LlmProviderInterface {
* @throws {Error} If the request fails
*/
async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatResponseDto> {
const span = createLlmSpan("ollama", "chat.stream", request.model);
try {
const messages = this.buildMessages(request);
const options = this.buildChatOptions(request);
@@ -197,10 +202,21 @@ export class OllamaProvider implements LlmProviderInterface {
done: chunk.done,
};
}
span.setStatus({ code: SpanStatusCode.OK });
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
this.logger.error(`Streaming failed: ${errorMessage}`);
span.recordException(error instanceof Error ? error : new Error(errorMessage));
span.setStatus({
code: SpanStatusCode.ERROR,
message: errorMessage,
});
throw new Error(`Streaming failed: ${errorMessage}`);
} finally {
span.end();
}
}
@@ -211,6 +227,7 @@ export class OllamaProvider implements LlmProviderInterface {
* @returns Embeddings response with vector arrays
* @throws {Error} If the request fails
*/
@TraceLlmCall({ system: "ollama", operation: "embed" })
async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> {
try {
const response = await this.client.embed({