Files
stack/apps/api/src/llm/providers/ollama.provider.ts
Jason Woltje 51e6ad0792 feat(#131): add OpenTelemetry tracing infrastructure
Implement comprehensive distributed tracing for HTTP requests and LLM
operations using OpenTelemetry with GenAI semantic conventions.

Features:
- TelemetryService: SDK initialization with OTLP HTTP exporter
- TelemetryInterceptor: Automatic HTTP request spans
- @TraceLlmCall decorator: LLM operation tracing
- GenAI semantic conventions for model/token tracking
- Graceful degradation when tracing disabled

Instrumented:
- All HTTP requests (automatic spans)
- OllamaProvider chat/chatStream/embed operations
- Token counts, model names, durations

Environment:
- OTEL_ENABLED (default: true)
- OTEL_SERVICE_NAME (default: mosaic-api)
- OTEL_EXPORTER_OTLP_ENDPOINT (default: localhost:4318)

Tests: 23 passing with full coverage

Fixes #131

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-31 12:55:11 -06:00

313 lines
8.6 KiB
TypeScript

import { Logger } from "@nestjs/common";
import { Ollama, type Message } from "ollama";
import type {
LlmProviderInterface,
LlmProviderConfig,
LlmProviderHealthStatus,
} from "./llm-provider.interface";
import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "../dto";
import { TraceLlmCall, createLlmSpan } from "../../telemetry";
import { SpanStatusCode } from "@opentelemetry/api";
/**
* Configuration for Ollama LLM provider.
* Extends base LlmProviderConfig with Ollama-specific options.
*
* @example
* ```typescript
* const config: OllamaProviderConfig = {
* endpoint: "http://localhost:11434",
* timeout: 30000
* };
* ```
*/
export interface OllamaProviderConfig extends LlmProviderConfig {
/**
* Ollama server endpoint URL
* @default "http://localhost:11434"
*/
endpoint: string;
/**
* Request timeout in milliseconds
* @default 30000
*/
timeout?: number;
}
/**
* Ollama LLM provider implementation.
* Provides integration with locally-hosted or remote Ollama instances.
*
* @example
* ```typescript
* const provider = new OllamaProvider({
* endpoint: "http://localhost:11434",
* timeout: 30000
* });
*
* await provider.initialize();
*
* const response = await provider.chat({
* model: "llama2",
* messages: [{ role: "user", content: "Hello" }]
* });
* ```
*/
export class OllamaProvider implements LlmProviderInterface {
readonly name = "Ollama";
readonly type = "ollama" as const;
private readonly logger = new Logger(OllamaProvider.name);
private readonly client: Ollama;
private readonly config: OllamaProviderConfig;
/**
* Creates a new Ollama provider instance.
*
* @param config - Ollama provider configuration
*/
constructor(config: OllamaProviderConfig) {
this.config = {
...config,
timeout: config.timeout ?? 30000,
};
this.client = new Ollama({ host: this.config.endpoint });
this.logger.log(`Ollama provider initialized with endpoint: ${this.config.endpoint}`);
}
/**
* Initialize the Ollama provider.
* This is a no-op for Ollama as the client is initialized in the constructor.
*/
async initialize(): Promise<void> {
// Ollama client is initialized in constructor
// No additional setup required
}
/**
* Check if the Ollama server is healthy and reachable.
*
* @returns Health status with available models if healthy
*/
async checkHealth(): Promise<LlmProviderHealthStatus> {
try {
const response = await this.client.list();
const models = response.models.map((m) => m.name);
return {
healthy: true,
provider: "ollama",
endpoint: this.config.endpoint,
models,
};
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
this.logger.warn(`Ollama health check failed: ${errorMessage}`);
return {
healthy: false,
provider: "ollama",
endpoint: this.config.endpoint,
error: errorMessage,
};
}
}
/**
* List all available models from the Ollama server.
*
* @returns Array of model names
* @throws {Error} If the request fails
*/
async listModels(): Promise<string[]> {
try {
const response = await this.client.list();
return response.models.map((m) => m.name);
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
this.logger.error(`Failed to list models: ${errorMessage}`);
throw new Error(`Failed to list models: ${errorMessage}`);
}
}
/**
* Perform a synchronous chat completion.
*
* @param request - Chat request with messages and configuration
* @returns Complete chat response
* @throws {Error} If the request fails
*/
@TraceLlmCall({ system: "ollama", operation: "chat" })
async chat(request: ChatRequestDto): Promise<ChatResponseDto> {
try {
const messages = this.buildMessages(request);
const options = this.buildChatOptions(request);
const response = await this.client.chat({
model: request.model,
messages,
stream: false,
options,
});
return {
model: response.model,
message: {
role: response.message.role as "assistant",
content: response.message.content,
},
done: response.done,
totalDuration: response.total_duration,
promptEvalCount: response.prompt_eval_count,
evalCount: response.eval_count,
};
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
this.logger.error(`Chat completion failed: ${errorMessage}`);
throw new Error(`Chat completion failed: ${errorMessage}`);
}
}
/**
* Perform a streaming chat completion.
* Yields response chunks as they arrive from the Ollama server.
*
* @param request - Chat request with messages and configuration
* @yields Chat response chunks
* @throws {Error} If the request fails
*/
async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatResponseDto> {
const span = createLlmSpan("ollama", "chat.stream", request.model);
try {
const messages = this.buildMessages(request);
const options = this.buildChatOptions(request);
const stream = await this.client.chat({
model: request.model,
messages,
stream: true,
options,
});
for await (const chunk of stream) {
yield {
model: chunk.model,
message: {
role: chunk.message.role as "assistant",
content: chunk.message.content,
},
done: chunk.done,
};
}
span.setStatus({ code: SpanStatusCode.OK });
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
this.logger.error(`Streaming failed: ${errorMessage}`);
span.recordException(error instanceof Error ? error : new Error(errorMessage));
span.setStatus({
code: SpanStatusCode.ERROR,
message: errorMessage,
});
throw new Error(`Streaming failed: ${errorMessage}`);
} finally {
span.end();
}
}
/**
* Generate embeddings for the given input texts.
*
* @param request - Embedding request with model and input texts
* @returns Embeddings response with vector arrays
* @throws {Error} If the request fails
*/
@TraceLlmCall({ system: "ollama", operation: "embed" })
async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> {
try {
const response = await this.client.embed({
model: request.model,
input: request.input,
truncate: request.truncate === "none" ? false : true,
});
return {
model: response.model,
embeddings: response.embeddings,
totalDuration: response.total_duration,
};
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
this.logger.error(`Embedding failed: ${errorMessage}`);
throw new Error(`Embedding failed: ${errorMessage}`);
}
}
/**
* Get the current provider configuration.
* Returns a copy to prevent external modification.
*
* @returns Provider configuration object
*/
getConfig(): OllamaProviderConfig {
return { ...this.config };
}
/**
* Build message array from chat request.
* Prepends system prompt if provided and not already in messages.
*
* @param request - Chat request
* @returns Array of messages for Ollama
*/
private buildMessages(request: ChatRequestDto): Message[] {
const messages: Message[] = [];
// Add system prompt if provided and not already in messages
if (request.systemPrompt && !request.messages.some((m) => m.role === "system")) {
messages.push({
role: "system",
content: request.systemPrompt,
});
}
// Add all request messages
for (const message of request.messages) {
messages.push({
role: message.role,
content: message.content,
});
}
return messages;
}
/**
* Build Ollama-specific chat options from request.
*
* @param request - Chat request
* @returns Ollama options object
*/
private buildChatOptions(request: ChatRequestDto): {
temperature?: number;
num_predict?: number;
} {
const options: { temperature?: number; num_predict?: number } = {};
if (request.temperature !== undefined) {
options.temperature = request.temperature;
}
if (request.maxTokens !== undefined) {
options.num_predict = request.maxTokens;
}
return options;
}
}