import { Logger } from '@nestjs/common'; import type { ModelRegistry } from '@mariozechner/pi-coding-agent'; import type { CompletionEvent, CompletionParams, IProviderAdapter, ModelInfo, ProviderHealth, } from '@mosaic/types'; /** Embedding models that Ollama ships with out of the box */ const OLLAMA_EMBEDDING_MODELS: ReadonlyArray<{ id: string; contextWindow: number; dimensions: number; }> = [ { id: 'nomic-embed-text', contextWindow: 8192, dimensions: 768 }, { id: 'mxbai-embed-large', contextWindow: 512, dimensions: 1024 }, ]; interface OllamaEmbeddingResponse { embedding?: number[]; } /** * Ollama provider adapter. * * Registers local Ollama models with the Pi ModelRegistry via the OpenAI-compatible * completions API. Also exposes embedding models and an `embed()` method for * vector generation (used by EmbeddingService / M3-009). * * Configuration is driven by environment variables: * OLLAMA_BASE_URL or OLLAMA_HOST — base URL of the Ollama instance * OLLAMA_MODELS — comma-separated list of model IDs (default: llama3.2,codellama,mistral) */ export class OllamaAdapter implements IProviderAdapter { readonly name = 'ollama'; private readonly logger = new Logger(OllamaAdapter.name); private registeredModels: ModelInfo[] = []; constructor(private readonly registry: ModelRegistry) {} async register(): Promise { const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST']; if (!ollamaUrl) { this.logger.debug('Skipping Ollama provider registration: OLLAMA_BASE_URL not set'); return; } const modelsEnv = process.env['OLLAMA_MODELS'] ?? 'llama3.2,codellama,mistral'; const modelIds = modelsEnv .split(',') .map((id: string) => id.trim()) .filter(Boolean); this.registry.registerProvider('ollama', { baseUrl: `${ollamaUrl}/v1`, apiKey: 'ollama', api: 'openai-completions' as never, models: modelIds.map((id) => ({ id, name: id, reasoning: false, input: ['text'] as ('text' | 'image')[], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 8192, maxTokens: 4096, })), }); // Chat / completion models const completionModels: ModelInfo[] = modelIds.map((id) => ({ id, provider: 'ollama', name: id, reasoning: false, contextWindow: 8192, maxTokens: 4096, inputTypes: ['text'] as ('text' | 'image')[], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, })); // Embedding models (tracked in registeredModels but not in Pi registry, // which only handles completion models) const embeddingModels: ModelInfo[] = OLLAMA_EMBEDDING_MODELS.map((em) => ({ id: em.id, provider: 'ollama', name: em.id, reasoning: false, contextWindow: em.contextWindow, maxTokens: 0, inputTypes: ['text'] as ('text' | 'image')[], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, })); this.registeredModels = [...completionModels, ...embeddingModels]; this.logger.log( `Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')} ` + `and embedding models: ${OLLAMA_EMBEDDING_MODELS.map((em) => em.id).join(', ')}`, ); } listModels(): ModelInfo[] { return this.registeredModels; } async healthCheck(): Promise { const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST']; if (!ollamaUrl) { return { status: 'down', lastChecked: new Date().toISOString(), error: 'OLLAMA_BASE_URL not configured', }; } const checkUrl = `${ollamaUrl}/v1/models`; const start = Date.now(); try { const res = await fetch(checkUrl, { method: 'GET', headers: { Accept: 'application/json' }, signal: AbortSignal.timeout(5000), }); const latencyMs = Date.now() - start; if (!res.ok) { return { status: 'degraded', latencyMs, lastChecked: new Date().toISOString(), error: `HTTP ${res.status}`, }; } return { status: 'healthy', latencyMs, lastChecked: new Date().toISOString() }; } catch (err) { const latencyMs = Date.now() - start; const error = err instanceof Error ? err.message : String(err); return { status: 'down', latencyMs, lastChecked: new Date().toISOString(), error }; } } /** * Generate an embedding vector for the given text using Ollama's /api/embeddings endpoint. * * Defaults to 'nomic-embed-text' when no model is specified. * Intended for use by EmbeddingService (M3-009). * * @param text - The input text to embed. * @param model - Optional embedding model ID (default: 'nomic-embed-text'). * @returns A float array representing the embedding vector. */ async embed(text: string, model = 'nomic-embed-text'): Promise { const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST']; if (!ollamaUrl) { throw new Error('OllamaAdapter: OLLAMA_BASE_URL not configured'); } const embeddingUrl = `${ollamaUrl}/api/embeddings`; const res = await fetch(embeddingUrl, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model, prompt: text }), signal: AbortSignal.timeout(30000), }); if (!res.ok) { throw new Error(`OllamaAdapter.embed: request failed with HTTP ${res.status}`); } const json = (await res.json()) as OllamaEmbeddingResponse; if (!Array.isArray(json.embedding)) { throw new Error('OllamaAdapter.embed: unexpected response — missing embedding array'); } return json.embedding; } /** * createCompletion is reserved for future direct-completion use. * The current integration routes completions through Pi SDK's ModelRegistry/AgentSession. */ async *createCompletion(_params: CompletionParams): AsyncIterable { throw new Error( 'OllamaAdapter.createCompletion is not yet implemented. ' + 'Use Pi SDK AgentSession for completions.', ); // Satisfy the AsyncGenerator return type — unreachable but required for TypeScript. yield undefined as never; } }