feat(M3-004,M3-006): add OpenRouter adapter and Ollama embedding support

- M3-004: Implement OpenRouterAdapter using openai SDK with custom base URL (https://openrouter.ai/api/v1). Fetches model catalog on registration, gracefully degrades when OPENROUTER_API_KEY is absent, streams completions via OpenAI-compatible API. - M3-006: Enhance OllamaAdapter with embedding model registration (nomic-embed-text, mxbai-embed-large) and an embed(text, model?) method that calls Ollama's /api/embeddings endpoint. listModels() now returns both completion and embedding models. Prepares for M3-009 EmbeddingService. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 16:31:36 -05:00
parent 08da6b76d1
commit 360d7fe96d
4 changed files with 295 additions and 4 deletions
--- a/apps/gateway/src/agent/adapters/ollama.adapter.ts
+++ b/apps/gateway/src/agent/adapters/ollama.adapter.ts
@@ -8,11 +8,28 @@ import type {
  ProviderHealth,
 } from '@mosaic/types';

+/** Embedding models that Ollama ships with out of the box */
+const OLLAMA_EMBEDDING_MODELS: ReadonlyArray<{
+  id: string;
+  contextWindow: number;
+  dimensions: number;
+}> = [
+  { id: 'nomic-embed-text', contextWindow: 8192, dimensions: 768 },
+  { id: 'mxbai-embed-large', contextWindow: 512, dimensions: 1024 },
+];
+
+interface OllamaEmbeddingResponse {
+  embedding?: number[];
+}
+
 /**
 * Ollama provider adapter.
 *
 * Registers local Ollama models with the Pi ModelRegistry via the OpenAI-compatible
- * completions API. Configuration is driven by environment variables:
+ * completions API. Also exposes embedding models and an `embed()` method for
+ * vector generation (used by EmbeddingService / M3-009).
+ *
+ * Configuration is driven by environment variables:
 *   OLLAMA_BASE_URL or OLLAMA_HOST — base URL of the Ollama instance
 *   OLLAMA_MODELS              — comma-separated list of model IDs (default: llama3.2,codellama,mistral)
 */
@@ -52,7 +69,8 @@ export class OllamaAdapter implements IProviderAdapter {
      })),
    });

-    this.registeredModels = modelIds.map((id) => ({
+    // Chat / completion models
+    const completionModels: ModelInfo[] = modelIds.map((id) => ({
      id,
      provider: 'ollama',
      name: id,
@@ -63,8 +81,24 @@ export class OllamaAdapter implements IProviderAdapter {
      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
    }));

+    // Embedding models (tracked in registeredModels but not in Pi registry,
+    // which only handles completion models)
+    const embeddingModels: ModelInfo[] = OLLAMA_EMBEDDING_MODELS.map((em) => ({
+      id: em.id,
+      provider: 'ollama',
+      name: em.id,
+      reasoning: false,
+      contextWindow: em.contextWindow,
+      maxTokens: 0,
+      inputTypes: ['text'] as ('text' | 'image')[],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+    }));
+
+    this.registeredModels = [...completionModels, ...embeddingModels];
+
    this.logger.log(
-      `Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')}`,
+      `Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')} ` +
+        `and embedding models: ${OLLAMA_EMBEDDING_MODELS.map((em) => em.id).join(', ')}`,
    );
  }

@@ -110,6 +144,44 @@ export class OllamaAdapter implements IProviderAdapter {
    }
  }

+  /**
+   * Generate an embedding vector for the given text using Ollama's /api/embeddings endpoint.
+   *
+   * Defaults to 'nomic-embed-text' when no model is specified.
+   * Intended for use by EmbeddingService (M3-009).
+   *
+   * @param text - The input text to embed.
+   * @param model - Optional embedding model ID (default: 'nomic-embed-text').
+   * @returns A float array representing the embedding vector.
+   */
+  async embed(text: string, model = 'nomic-embed-text'): Promise<number[]> {
+    const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
+    if (!ollamaUrl) {
+      throw new Error('OllamaAdapter: OLLAMA_BASE_URL not configured');
+    }
+
+    const embeddingUrl = `${ollamaUrl}/api/embeddings`;
+
+    const res = await fetch(embeddingUrl, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ model, prompt: text }),
+      signal: AbortSignal.timeout(30000),
+    });
+
+    if (!res.ok) {
+      throw new Error(`OllamaAdapter.embed: request failed with HTTP ${res.status}`);
+    }
+
+    const json = (await res.json()) as OllamaEmbeddingResponse;
+
+    if (!Array.isArray(json.embedding)) {
+      throw new Error('OllamaAdapter.embed: unexpected response — missing embedding array');
+    }
+
+    return json.embedding;
+  }
+
  /**
   * createCompletion is reserved for future direct-completion use.
   * The current integration routes completions through Pi SDK's ModelRegistry/AgentSession.