feat(#131): add OpenTelemetry tracing infrastructure

Implement comprehensive distributed tracing for HTTP requests and LLM operations using OpenTelemetry with GenAI semantic conventions. Features: - TelemetryService: SDK initialization with OTLP HTTP exporter - TelemetryInterceptor: Automatic HTTP request spans - @TraceLlmCall decorator: LLM operation tracing - GenAI semantic conventions for model/token tracking - Graceful degradation when tracing disabled Instrumented: - All HTTP requests (automatic spans) - OllamaProvider chat/chatStream/embed operations - Token counts, model names, durations Environment: - OTEL_ENABLED (default: true) - OTEL_SERVICE_NAME (default: mosaic-api) - OTEL_EXPORTER_OTLP_ENDPOINT (default: localhost:4318) Tests: 23 passing with full coverage Fixes #131 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-31 12:55:11 -06:00
parent 64cb5c1edd
commit 51e6ad0792
13 changed files with 2838 additions and 26 deletions
--- a/apps/api/src/llm/providers/ollama.provider.ts
+++ b/apps/api/src/llm/providers/ollama.provider.ts
@@ -6,6 +6,8 @@ import type {
  LlmProviderHealthStatus,
 } from "./llm-provider.interface";
 import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "../dto";
+import { TraceLlmCall, createLlmSpan } from "../../telemetry";
+import { SpanStatusCode } from "@opentelemetry/api";

 /**
 * Configuration for Ollama LLM provider.
@@ -137,6 +139,7 @@ export class OllamaProvider implements LlmProviderInterface {
   * @returns Complete chat response
   * @throws {Error} If the request fails
   */
+  @TraceLlmCall({ system: "ollama", operation: "chat" })
  async chat(request: ChatRequestDto): Promise<ChatResponseDto> {
    try {
      const messages = this.buildMessages(request);
@@ -176,6 +179,8 @@ export class OllamaProvider implements LlmProviderInterface {
   * @throws {Error} If the request fails
   */
  async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatResponseDto> {
+    const span = createLlmSpan("ollama", "chat.stream", request.model);
+
    try {
      const messages = this.buildMessages(request);
      const options = this.buildChatOptions(request);
@@ -197,10 +202,21 @@ export class OllamaProvider implements LlmProviderInterface {
          done: chunk.done,
        };
      }
+
+      span.setStatus({ code: SpanStatusCode.OK });
    } catch (error: unknown) {
      const errorMessage = error instanceof Error ? error.message : String(error);
      this.logger.error(`Streaming failed: ${errorMessage}`);
+
+      span.recordException(error instanceof Error ? error : new Error(errorMessage));
+      span.setStatus({
+        code: SpanStatusCode.ERROR,
+        message: errorMessage,
+      });
+
      throw new Error(`Streaming failed: ${errorMessage}`);
+    } finally {
+      span.end();
    }
  }

@@ -211,6 +227,7 @@ export class OllamaProvider implements LlmProviderInterface {
   * @returns Embeddings response with vector arrays
   * @throws {Error} If the request fails
   */
+  @TraceLlmCall({ system: "ollama", operation: "embed" })
  async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> {
    try {
      const response = await this.client.embed({