feat(#131): add OpenTelemetry tracing infrastructure
Implement comprehensive distributed tracing for HTTP requests and LLM operations using OpenTelemetry with GenAI semantic conventions. Features: - TelemetryService: SDK initialization with OTLP HTTP exporter - TelemetryInterceptor: Automatic HTTP request spans - @TraceLlmCall decorator: LLM operation tracing - GenAI semantic conventions for model/token tracking - Graceful degradation when tracing disabled Instrumented: - All HTTP requests (automatic spans) - OllamaProvider chat/chatStream/embed operations - Token counts, model names, durations Environment: - OTEL_ENABLED (default: true) - OTEL_SERVICE_NAME (default: mosaic-api) - OTEL_EXPORTER_OTLP_ENDPOINT (default: localhost:4318) Tests: 23 passing with full coverage Fixes #131 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,8 @@ import type {
|
||||
LlmProviderHealthStatus,
|
||||
} from "./llm-provider.interface";
|
||||
import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "../dto";
|
||||
import { TraceLlmCall, createLlmSpan } from "../../telemetry";
|
||||
import { SpanStatusCode } from "@opentelemetry/api";
|
||||
|
||||
/**
|
||||
* Configuration for Ollama LLM provider.
|
||||
@@ -137,6 +139,7 @@ export class OllamaProvider implements LlmProviderInterface {
|
||||
* @returns Complete chat response
|
||||
* @throws {Error} If the request fails
|
||||
*/
|
||||
@TraceLlmCall({ system: "ollama", operation: "chat" })
|
||||
async chat(request: ChatRequestDto): Promise<ChatResponseDto> {
|
||||
try {
|
||||
const messages = this.buildMessages(request);
|
||||
@@ -176,6 +179,8 @@ export class OllamaProvider implements LlmProviderInterface {
|
||||
* @throws {Error} If the request fails
|
||||
*/
|
||||
async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatResponseDto> {
|
||||
const span = createLlmSpan("ollama", "chat.stream", request.model);
|
||||
|
||||
try {
|
||||
const messages = this.buildMessages(request);
|
||||
const options = this.buildChatOptions(request);
|
||||
@@ -197,10 +202,21 @@ export class OllamaProvider implements LlmProviderInterface {
|
||||
done: chunk.done,
|
||||
};
|
||||
}
|
||||
|
||||
span.setStatus({ code: SpanStatusCode.OK });
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Streaming failed: ${errorMessage}`);
|
||||
|
||||
span.recordException(error instanceof Error ? error : new Error(errorMessage));
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: errorMessage,
|
||||
});
|
||||
|
||||
throw new Error(`Streaming failed: ${errorMessage}`);
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -211,6 +227,7 @@ export class OllamaProvider implements LlmProviderInterface {
|
||||
* @returns Embeddings response with vector arrays
|
||||
* @throws {Error} If the request fails
|
||||
*/
|
||||
@TraceLlmCall({ system: "ollama", operation: "embed" })
|
||||
async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> {
|
||||
try {
|
||||
const response = await this.client.embed({
|
||||
|
||||
Reference in New Issue
Block a user