import { Logger } from "@nestjs/common"; import { Ollama, type Message } from "ollama"; import type { LlmProviderInterface, LlmProviderConfig, LlmProviderHealthStatus, } from "./llm-provider.interface"; import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "../dto"; import { TraceLlmCall, createLlmSpan } from "../../telemetry"; import { SpanStatusCode } from "@opentelemetry/api"; /** * Configuration for Ollama LLM provider. * Extends base LlmProviderConfig with Ollama-specific options. * * @example * ```typescript * const config: OllamaProviderConfig = { * endpoint: "http://localhost:11434", * timeout: 30000 * }; * ``` */ export interface OllamaProviderConfig extends LlmProviderConfig { /** * Ollama server endpoint URL * @default "http://localhost:11434" */ endpoint: string; /** * Request timeout in milliseconds * @default 30000 */ timeout?: number; } /** * Ollama LLM provider implementation. * Provides integration with locally-hosted or remote Ollama instances. * * @example * ```typescript * const provider = new OllamaProvider({ * endpoint: "http://localhost:11434", * timeout: 30000 * }); * * await provider.initialize(); * * const response = await provider.chat({ * model: "llama2", * messages: [{ role: "user", content: "Hello" }] * }); * ``` */ export class OllamaProvider implements LlmProviderInterface { readonly name = "Ollama"; readonly type = "ollama" as const; private readonly logger = new Logger(OllamaProvider.name); private readonly client: Ollama; private readonly config: OllamaProviderConfig; /** * Creates a new Ollama provider instance. * * @param config - Ollama provider configuration */ constructor(config: OllamaProviderConfig) { this.config = { ...config, timeout: config.timeout ?? 30000, }; this.client = new Ollama({ host: this.config.endpoint }); this.logger.log(`Ollama provider initialized with endpoint: ${this.config.endpoint}`); } /** * Initialize the Ollama provider. * This is a no-op for Ollama as the client is initialized in the constructor. */ async initialize(): Promise { // Ollama client is initialized in constructor // No additional setup required } /** * Check if the Ollama server is healthy and reachable. * * @returns Health status with available models if healthy */ async checkHealth(): Promise { try { const response = await this.client.list(); const models = response.models.map((m) => m.name); return { healthy: true, provider: "ollama", endpoint: this.config.endpoint, models, }; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); this.logger.warn(`Ollama health check failed: ${errorMessage}`); return { healthy: false, provider: "ollama", endpoint: this.config.endpoint, error: errorMessage, }; } } /** * List all available models from the Ollama server. * * @returns Array of model names * @throws {Error} If the request fails */ async listModels(): Promise { try { const response = await this.client.list(); return response.models.map((m) => m.name); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); this.logger.error(`Failed to list models: ${errorMessage}`); throw new Error(`Failed to list models: ${errorMessage}`); } } /** * Perform a synchronous chat completion. * * @param request - Chat request with messages and configuration * @returns Complete chat response * @throws {Error} If the request fails */ @TraceLlmCall({ system: "ollama", operation: "chat" }) async chat(request: ChatRequestDto): Promise { try { const messages = this.buildMessages(request); const options = this.buildChatOptions(request); const response = await this.client.chat({ model: request.model, messages, stream: false, options, }); return { model: response.model, message: { role: response.message.role as "assistant", content: response.message.content, }, done: response.done, totalDuration: response.total_duration, promptEvalCount: response.prompt_eval_count, evalCount: response.eval_count, }; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); this.logger.error(`Chat completion failed: ${errorMessage}`); throw new Error(`Chat completion failed: ${errorMessage}`); } } /** * Perform a streaming chat completion. * Yields response chunks as they arrive from the Ollama server. * * @param request - Chat request with messages and configuration * @yields Chat response chunks * @throws {Error} If the request fails */ async *chatStream(request: ChatRequestDto): AsyncGenerator { const span = createLlmSpan("ollama", "chat.stream", request.model); try { const messages = this.buildMessages(request); const options = this.buildChatOptions(request); const stream = await this.client.chat({ model: request.model, messages, stream: true, options, }); for await (const chunk of stream) { yield { model: chunk.model, message: { role: chunk.message.role as "assistant", content: chunk.message.content, }, done: chunk.done, }; } span.setStatus({ code: SpanStatusCode.OK }); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); this.logger.error(`Streaming failed: ${errorMessage}`); span.recordException(error instanceof Error ? error : new Error(errorMessage)); span.setStatus({ code: SpanStatusCode.ERROR, message: errorMessage, }); throw new Error(`Streaming failed: ${errorMessage}`); } finally { span.end(); } } /** * Generate embeddings for the given input texts. * * @param request - Embedding request with model and input texts * @returns Embeddings response with vector arrays * @throws {Error} If the request fails */ @TraceLlmCall({ system: "ollama", operation: "embed" }) async embed(request: EmbedRequestDto): Promise { try { const response = await this.client.embed({ model: request.model, input: request.input, truncate: request.truncate === "none" ? false : true, }); return { model: response.model, embeddings: response.embeddings, totalDuration: response.total_duration, }; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); this.logger.error(`Embedding failed: ${errorMessage}`); throw new Error(`Embedding failed: ${errorMessage}`); } } /** * Get the current provider configuration. * Returns a copy to prevent external modification. * * @returns Provider configuration object */ getConfig(): OllamaProviderConfig { return { ...this.config }; } /** * Build message array from chat request. * Prepends system prompt if provided and not already in messages. * * @param request - Chat request * @returns Array of messages for Ollama */ private buildMessages(request: ChatRequestDto): Message[] { const messages: Message[] = []; // Add system prompt if provided and not already in messages if (request.systemPrompt && !request.messages.some((m) => m.role === "system")) { messages.push({ role: "system", content: request.systemPrompt, }); } // Add all request messages for (const message of request.messages) { messages.push({ role: message.role, content: message.content, }); } return messages; } /** * Build Ollama-specific chat options from request. * * @param request - Chat request * @returns Ollama options object */ private buildChatOptions(request: ChatRequestDto): { temperature?: number; num_predict?: number; } { const options: { temperature?: number; num_predict?: number } = {}; if (request.temperature !== undefined) { options.temperature = request.temperature; } if (request.maxTokens !== undefined) { options.num_predict = request.maxTokens; } return options; } }