import { Injectable, Logger } from '@nestjs/common'; import type { EmbeddingProvider } from '@mosaicstack/memory'; // --------------------------------------------------------------------------- // Environment-driven configuration // // EMBEDDING_PROVIDER — 'ollama' (default) | 'openai' // EMBEDDING_MODEL — model id, defaults differ per provider // EMBEDDING_DIMENSIONS — integer, defaults differ per provider // OLLAMA_BASE_URL — base URL for Ollama (used when provider=ollama) // EMBEDDING_API_URL — full base URL for OpenAI-compatible API // OPENAI_API_KEY — required for OpenAI provider // --------------------------------------------------------------------------- const OLLAMA_DEFAULT_MODEL = 'nomic-embed-text'; const OLLAMA_DEFAULT_DIMENSIONS = 768; const OPENAI_DEFAULT_MODEL = 'text-embedding-3-small'; const OPENAI_DEFAULT_DIMENSIONS = 1536; /** Known dimension mismatch: warn if pgvector column likely has wrong size */ const PGVECTOR_SCHEMA_DIMENSIONS = 1536; type EmbeddingBackend = 'ollama' | 'openai'; interface OllamaEmbeddingResponse { embedding: number[]; } interface OpenAIEmbeddingResponse { data: Array<{ embedding: number[]; index: number }>; model: string; usage: { prompt_tokens: number; total_tokens: number }; } /** * Provider-agnostic embedding service. * * Defaults to Ollama's native embedding API using nomic-embed-text (768 dims). * Falls back to the OpenAI-compatible API when EMBEDDING_PROVIDER=openai or * when OPENAI_API_KEY is set and EMBEDDING_PROVIDER is not explicitly set to ollama. * * Dimension mismatch detection: if the configured dimensions differ from the * pgvector schema (1536), a warning is logged with re-embedding instructions. */ @Injectable() export class EmbeddingService implements EmbeddingProvider { private readonly logger = new Logger(EmbeddingService.name); private readonly backend: EmbeddingBackend; private readonly model: string; readonly dimensions: number; // Ollama-specific private readonly ollamaBaseUrl: string | undefined; // OpenAI-compatible private readonly openaiApiKey: string | undefined; private readonly openaiBaseUrl: string; constructor() { // Determine backend const providerEnv = process.env['EMBEDDING_PROVIDER']; const openaiKey = process.env['OPENAI_API_KEY']; const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST']; if (providerEnv === 'openai') { this.backend = 'openai'; } else if (providerEnv === 'ollama') { this.backend = 'ollama'; } else if (process.env['EMBEDDING_API_URL']) { // Legacy: explicit API URL configured → use openai-compat path this.backend = 'openai'; } else if (ollamaUrl) { // Ollama available and no explicit override → prefer Ollama this.backend = 'ollama'; } else if (openaiKey) { // OpenAI key present → use OpenAI this.backend = 'openai'; } else { // Nothing configured — default to ollama (will return zeros when unavailable) this.backend = 'ollama'; } // Set model and dimension defaults based on backend if (this.backend === 'ollama') { this.model = process.env['EMBEDDING_MODEL'] ?? OLLAMA_DEFAULT_MODEL; this.dimensions = parseInt(process.env['EMBEDDING_DIMENSIONS'] ?? '', 10) || OLLAMA_DEFAULT_DIMENSIONS; this.ollamaBaseUrl = ollamaUrl; this.openaiApiKey = undefined; this.openaiBaseUrl = ''; } else { this.model = process.env['EMBEDDING_MODEL'] ?? OPENAI_DEFAULT_MODEL; this.dimensions = parseInt(process.env['EMBEDDING_DIMENSIONS'] ?? '', 10) || OPENAI_DEFAULT_DIMENSIONS; this.ollamaBaseUrl = undefined; this.openaiApiKey = openaiKey; this.openaiBaseUrl = process.env['EMBEDDING_API_URL'] ?? 'https://api.openai.com/v1'; } // Warn on dimension mismatch with the current schema if (this.dimensions !== PGVECTOR_SCHEMA_DIMENSIONS) { this.logger.warn( `Embedding dimensions (${this.dimensions}) differ from pgvector schema (${PGVECTOR_SCHEMA_DIMENSIONS}). ` + `If insights already contain ${PGVECTOR_SCHEMA_DIMENSIONS}-dim vectors, similarity search will fail. ` + `To fix: truncate the insights table and re-embed, or run a migration to ALTER COLUMN embedding TYPE vector(${this.dimensions}).`, ); } this.logger.log( `EmbeddingService initialized: backend=${this.backend}, model=${this.model}, dimensions=${this.dimensions}`, ); } get available(): boolean { if (this.backend === 'ollama') { return !!this.ollamaBaseUrl; } return !!this.openaiApiKey; } async embed(text: string): Promise { const results = await this.embedBatch([text]); return results[0]!; } async embedBatch(texts: string[]): Promise { if (!this.available) { const reason = this.backend === 'ollama' ? 'OLLAMA_BASE_URL not configured' : 'No OPENAI_API_KEY configured'; this.logger.warn(`${reason} — returning zero vectors`); return texts.map(() => new Array(this.dimensions).fill(0)); } if (this.backend === 'ollama') { return this.embedBatchOllama(texts); } return this.embedBatchOpenAI(texts); } // --------------------------------------------------------------------------- // Ollama backend // --------------------------------------------------------------------------- private async embedBatchOllama(texts: string[]): Promise { const baseUrl = this.ollamaBaseUrl!; const results: number[][] = []; // Ollama's /api/embeddings endpoint processes one text at a time for (const text of texts) { const response = await fetch(`${baseUrl}/api/embeddings`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: this.model, prompt: text }), }); if (!response.ok) { const body = await response.text(); this.logger.error(`Ollama embedding API error: ${response.status} ${body}`); throw new Error(`Ollama embedding API returned ${response.status}`); } const json = (await response.json()) as OllamaEmbeddingResponse; results.push(json.embedding); } return results; } // --------------------------------------------------------------------------- // OpenAI-compatible backend // --------------------------------------------------------------------------- private async embedBatchOpenAI(texts: string[]): Promise { const response = await fetch(`${this.openaiBaseUrl}/embeddings`, { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${this.openaiApiKey}`, }, body: JSON.stringify({ model: this.model, input: texts, dimensions: this.dimensions, }), }); if (!response.ok) { const body = await response.text(); this.logger.error(`Embedding API error: ${response.status} ${body}`); throw new Error(`Embedding API returned ${response.status}`); } const json = (await response.json()) as OpenAIEmbeddingResponse; return json.data.sort((a, b) => a.index - b.index).map((d) => d.embedding); } }