feat(M3-007,M3-009): provider health check scheduler and Ollama embedding default (#308)

Co-authored-by: Jason Woltje <jason@diversecanvas.com> Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-03-21 21:30:15 +00:00
parent cfdd2b679c
commit 6c6bcbdb7f
3 changed files with 263 additions and 67 deletions
--- a/apps/gateway/src/memory/embedding.service.ts
+++ b/apps/gateway/src/memory/embedding.service.ts
@@ -1,36 +1,122 @@
 import { Injectable, Logger } from '@nestjs/common';
 import type { EmbeddingProvider } from '@mosaic/memory';

-const DEFAULT_MODEL = 'text-embedding-3-small';
-const DEFAULT_DIMENSIONS = 1536;
+// ---------------------------------------------------------------------------
+// Environment-driven configuration
+//
+//   EMBEDDING_PROVIDER      — 'ollama' (default) | 'openai'
+//   EMBEDDING_MODEL         — model id, defaults differ per provider
+//   EMBEDDING_DIMENSIONS    — integer, defaults differ per provider
+//   OLLAMA_BASE_URL         — base URL for Ollama (used when provider=ollama)
+//   EMBEDDING_API_URL       — full base URL for OpenAI-compatible API
+//   OPENAI_API_KEY          — required for OpenAI provider
+// ---------------------------------------------------------------------------

-interface EmbeddingResponse {
+const OLLAMA_DEFAULT_MODEL = 'nomic-embed-text';
+const OLLAMA_DEFAULT_DIMENSIONS = 768;
+
+const OPENAI_DEFAULT_MODEL = 'text-embedding-3-small';
+const OPENAI_DEFAULT_DIMENSIONS = 1536;
+
+/** Known dimension mismatch: warn if pgvector column likely has wrong size */
+const PGVECTOR_SCHEMA_DIMENSIONS = 1536;
+
+type EmbeddingBackend = 'ollama' | 'openai';
+
+interface OllamaEmbeddingResponse {
+  embedding: number[];
+}
+
+interface OpenAIEmbeddingResponse {
  data: Array<{ embedding: number[]; index: number }>;
  model: string;
  usage: { prompt_tokens: number; total_tokens: number };
 }

 /**
- * Generates embeddings via the OpenAI-compatible embeddings API.
- * Supports OpenAI, Azure OpenAI, and any provider with a compatible endpoint.
+ * Provider-agnostic embedding service.
+ *
+ * Defaults to Ollama's native embedding API using nomic-embed-text (768 dims).
+ * Falls back to the OpenAI-compatible API when EMBEDDING_PROVIDER=openai or
+ * when OPENAI_API_KEY is set and EMBEDDING_PROVIDER is not explicitly set to ollama.
+ *
+ * Dimension mismatch detection: if the configured dimensions differ from the
+ * pgvector schema (1536), a warning is logged with re-embedding instructions.
 */
@Injectable()
 export class EmbeddingService implements EmbeddingProvider {
  private readonly logger = new Logger(EmbeddingService.name);
-  private readonly apiKey: string | undefined;
-  private readonly baseUrl: string;
+  private readonly backend: EmbeddingBackend;
  private readonly model: string;
+  readonly dimensions: number;

-  readonly dimensions = DEFAULT_DIMENSIONS;
+  // Ollama-specific
+  private readonly ollamaBaseUrl: string | undefined;
+
+  // OpenAI-compatible
+  private readonly openaiApiKey: string | undefined;
+  private readonly openaiBaseUrl: string;

  constructor() {
-    this.apiKey = process.env['OPENAI_API_KEY'];
-    this.baseUrl = process.env['EMBEDDING_API_URL'] ?? 'https://api.openai.com/v1';
-    this.model = process.env['EMBEDDING_MODEL'] ?? DEFAULT_MODEL;
+    // Determine backend
+    const providerEnv = process.env['EMBEDDING_PROVIDER'];
+    const openaiKey = process.env['OPENAI_API_KEY'];
+    const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
+
+    if (providerEnv === 'openai') {
+      this.backend = 'openai';
+    } else if (providerEnv === 'ollama') {
+      this.backend = 'ollama';
+    } else if (process.env['EMBEDDING_API_URL']) {
+      // Legacy: explicit API URL configured → use openai-compat path
+      this.backend = 'openai';
+    } else if (ollamaUrl) {
+      // Ollama available and no explicit override → prefer Ollama
+      this.backend = 'ollama';
+    } else if (openaiKey) {
+      // OpenAI key present → use OpenAI
+      this.backend = 'openai';
+    } else {
+      // Nothing configured — default to ollama (will return zeros when unavailable)
+      this.backend = 'ollama';
+    }
+
+    // Set model and dimension defaults based on backend
+    if (this.backend === 'ollama') {
+      this.model = process.env['EMBEDDING_MODEL'] ?? OLLAMA_DEFAULT_MODEL;
+      this.dimensions =
+        parseInt(process.env['EMBEDDING_DIMENSIONS'] ?? '', 10) || OLLAMA_DEFAULT_DIMENSIONS;
+      this.ollamaBaseUrl = ollamaUrl;
+      this.openaiApiKey = undefined;
+      this.openaiBaseUrl = '';
+    } else {
+      this.model = process.env['EMBEDDING_MODEL'] ?? OPENAI_DEFAULT_MODEL;
+      this.dimensions =
+        parseInt(process.env['EMBEDDING_DIMENSIONS'] ?? '', 10) || OPENAI_DEFAULT_DIMENSIONS;
+      this.ollamaBaseUrl = undefined;
+      this.openaiApiKey = openaiKey;
+      this.openaiBaseUrl = process.env['EMBEDDING_API_URL'] ?? 'https://api.openai.com/v1';
+    }
+
+    // Warn on dimension mismatch with the current schema
+    if (this.dimensions !== PGVECTOR_SCHEMA_DIMENSIONS) {
+      this.logger.warn(
+        `Embedding dimensions (${this.dimensions}) differ from pgvector schema (${PGVECTOR_SCHEMA_DIMENSIONS}). ` +
+          `If insights already contain ${PGVECTOR_SCHEMA_DIMENSIONS}-dim vectors, similarity search will fail. ` +
+          `To fix: truncate the insights table and re-embed, or run a migration to ALTER COLUMN embedding TYPE vector(${this.dimensions}).`,
+      );
+    }
+
+    this.logger.log(
+      `EmbeddingService initialized: backend=${this.backend}, model=${this.model}, dimensions=${this.dimensions}`,
+    );
  }

  get available(): boolean {
-    return !!this.apiKey;
+    if (this.backend === 'ollama') {
+      return !!this.ollamaBaseUrl;
+    }
+    return !!this.openaiApiKey;
  }

  async embed(text: string): Promise<number[]> {
@@ -39,16 +125,60 @@ export class EmbeddingService implements EmbeddingProvider {
  }

  async embedBatch(texts: string[]): Promise<number[][]> {
-    if (!this.apiKey) {
-      this.logger.warn('No OPENAI_API_KEY configured — returning zero vectors');
+    if (!this.available) {
+      const reason =
+        this.backend === 'ollama'
+          ? 'OLLAMA_BASE_URL not configured'
+          : 'No OPENAI_API_KEY configured';
+      this.logger.warn(`${reason} — returning zero vectors`);
      return texts.map(() => new Array<number>(this.dimensions).fill(0));
    }

-    const response = await fetch(`${this.baseUrl}/embeddings`, {
+    if (this.backend === 'ollama') {
+      return this.embedBatchOllama(texts);
+    }
+    return this.embedBatchOpenAI(texts);
+  }
+
+  // ---------------------------------------------------------------------------
+  // Ollama backend
+  // ---------------------------------------------------------------------------
+
+  private async embedBatchOllama(texts: string[]): Promise<number[][]> {
+    const baseUrl = this.ollamaBaseUrl!;
+    const results: number[][] = [];
+
+    // Ollama's /api/embeddings endpoint processes one text at a time
+    for (const text of texts) {
+      const response = await fetch(`${baseUrl}/api/embeddings`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ model: this.model, prompt: text }),
+      });
+
+      if (!response.ok) {
+        const body = await response.text();
+        this.logger.error(`Ollama embedding API error: ${response.status} ${body}`);
+        throw new Error(`Ollama embedding API returned ${response.status}`);
+      }
+
+      const json = (await response.json()) as OllamaEmbeddingResponse;
+      results.push(json.embedding);
+    }
+
+    return results;
+  }
+
+  // ---------------------------------------------------------------------------
+  // OpenAI-compatible backend
+  // ---------------------------------------------------------------------------
+
+  private async embedBatchOpenAI(texts: string[]): Promise<number[][]> {
+    const response = await fetch(`${this.openaiBaseUrl}/embeddings`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
-        Authorization: `Bearer ${this.apiKey}`,
+        Authorization: `Bearer ${this.openaiApiKey}`,
      },
      body: JSON.stringify({
        model: this.model,
@@ -63,7 +193,7 @@ export class EmbeddingService implements EmbeddingProvider {
      throw new Error(`Embedding API returned ${response.status}`);
    }

-    const json = (await response.json()) as EmbeddingResponse;
+    const json = (await response.json()) as OpenAIEmbeddingResponse;
    return json.data.sort((a, b) => a.index - b.index).map((d) => d.embedding);
  }
 }