feat(providers): OpenRouter adapter + Ollama embedding support — M3-004/006 (#311)

Co-authored-by: Jason Woltje <jason@diversecanvas.com> Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-03-21 21:38:09 +00:00
parent 08da6b76d1
commit 10761f3e47
4 changed files with 295 additions and 4 deletions
--- a/apps/gateway/src/agent/adapters/index.ts
+++ b/apps/gateway/src/agent/adapters/index.ts
@@ -1,3 +1,4 @@
 export { OllamaAdapter } from './ollama.adapter.js';
 export { AnthropicAdapter } from './anthropic.adapter.js';
 export { OpenAIAdapter } from './openai.adapter.js';
+export { OpenRouterAdapter } from './openrouter.adapter.js';
--- a/apps/gateway/src/agent/adapters/ollama.adapter.ts
+++ b/apps/gateway/src/agent/adapters/ollama.adapter.ts
@@ -8,11 +8,28 @@ import type {
  ProviderHealth,
 } from '@mosaic/types';

+/** Embedding models that Ollama ships with out of the box */
+const OLLAMA_EMBEDDING_MODELS: ReadonlyArray<{
+  id: string;
+  contextWindow: number;
+  dimensions: number;
+}> = [
+  { id: 'nomic-embed-text', contextWindow: 8192, dimensions: 768 },
+  { id: 'mxbai-embed-large', contextWindow: 512, dimensions: 1024 },
+];
+
+interface OllamaEmbeddingResponse {
+  embedding?: number[];
+}
+
 /**
 * Ollama provider adapter.
 *
 * Registers local Ollama models with the Pi ModelRegistry via the OpenAI-compatible
- * completions API. Configuration is driven by environment variables:
+ * completions API. Also exposes embedding models and an `embed()` method for
+ * vector generation (used by EmbeddingService / M3-009).
+ *
+ * Configuration is driven by environment variables:
 *   OLLAMA_BASE_URL or OLLAMA_HOST — base URL of the Ollama instance
 *   OLLAMA_MODELS              — comma-separated list of model IDs (default: llama3.2,codellama,mistral)
 */
@@ -52,7 +69,8 @@ export class OllamaAdapter implements IProviderAdapter {
      })),
    });

-    this.registeredModels = modelIds.map((id) => ({
+    // Chat / completion models
+    const completionModels: ModelInfo[] = modelIds.map((id) => ({
      id,
      provider: 'ollama',
      name: id,
@@ -63,8 +81,24 @@ export class OllamaAdapter implements IProviderAdapter {
      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
    }));

+    // Embedding models (tracked in registeredModels but not in Pi registry,
+    // which only handles completion models)
+    const embeddingModels: ModelInfo[] = OLLAMA_EMBEDDING_MODELS.map((em) => ({
+      id: em.id,
+      provider: 'ollama',
+      name: em.id,
+      reasoning: false,
+      contextWindow: em.contextWindow,
+      maxTokens: 0,
+      inputTypes: ['text'] as ('text' | 'image')[],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+    }));
+
+    this.registeredModels = [...completionModels, ...embeddingModels];
+
    this.logger.log(
-      `Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')}`,
+      `Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')} ` +
+        `and embedding models: ${OLLAMA_EMBEDDING_MODELS.map((em) => em.id).join(', ')}`,
    );
  }

@@ -110,6 +144,44 @@ export class OllamaAdapter implements IProviderAdapter {
    }
  }

+  /**
+   * Generate an embedding vector for the given text using Ollama's /api/embeddings endpoint.
+   *
+   * Defaults to 'nomic-embed-text' when no model is specified.
+   * Intended for use by EmbeddingService (M3-009).
+   *
+   * @param text - The input text to embed.
+   * @param model - Optional embedding model ID (default: 'nomic-embed-text').
+   * @returns A float array representing the embedding vector.
+   */
+  async embed(text: string, model = 'nomic-embed-text'): Promise<number[]> {
+    const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
+    if (!ollamaUrl) {
+      throw new Error('OllamaAdapter: OLLAMA_BASE_URL not configured');
+    }
+
+    const embeddingUrl = `${ollamaUrl}/api/embeddings`;
+
+    const res = await fetch(embeddingUrl, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ model, prompt: text }),
+      signal: AbortSignal.timeout(30000),
+    });
+
+    if (!res.ok) {
+      throw new Error(`OllamaAdapter.embed: request failed with HTTP ${res.status}`);
+    }
+
+    const json = (await res.json()) as OllamaEmbeddingResponse;
+
+    if (!Array.isArray(json.embedding)) {
+      throw new Error('OllamaAdapter.embed: unexpected response — missing embedding array');
+    }
+
+    return json.embedding;
+  }
+
  /**
   * createCompletion is reserved for future direct-completion use.
   * The current integration routes completions through Pi SDK's ModelRegistry/AgentSession.
--- a/apps/gateway/src/agent/adapters/openrouter.adapter.ts
+++ b/apps/gateway/src/agent/adapters/openrouter.adapter.ts
@@ -0,0 +1,212 @@
+import { Logger } from '@nestjs/common';
+import OpenAI from 'openai';
+import type {
+  CompletionEvent,
+  CompletionParams,
+  IProviderAdapter,
+  ModelInfo,
+  ProviderHealth,
+} from '@mosaic/types';
+
+const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1';
+
+interface OpenRouterModel {
+  id: string;
+  name?: string;
+  context_length?: number;
+  top_provider?: {
+    max_completion_tokens?: number;
+  };
+  pricing?: {
+    prompt?: string | number;
+    completion?: string | number;
+  };
+  architecture?: {
+    input_modalities?: string[];
+  };
+}
+
+interface OpenRouterModelsResponse {
+  data?: OpenRouterModel[];
+}
+
+/**
+ * OpenRouter provider adapter.
+ *
+ * Routes completions through OpenRouter's OpenAI-compatible API.
+ * Configuration is driven by the OPENROUTER_API_KEY environment variable.
+ */
+export class OpenRouterAdapter implements IProviderAdapter {
+  readonly name = 'openrouter';
+
+  private readonly logger = new Logger(OpenRouterAdapter.name);
+  private client: OpenAI | null = null;
+  private registeredModels: ModelInfo[] = [];
+
+  async register(): Promise<void> {
+    const apiKey = process.env['OPENROUTER_API_KEY'];
+    if (!apiKey) {
+      this.logger.debug('Skipping OpenRouter provider registration: OPENROUTER_API_KEY not set');
+      return;
+    }
+
+    this.client = new OpenAI({
+      apiKey,
+      baseURL: OPENROUTER_BASE_URL,
+      defaultHeaders: {
+        'HTTP-Referer': 'https://mosaic.ai',
+        'X-Title': 'Mosaic',
+      },
+    });
+
+    try {
+      this.registeredModels = await this.fetchModels(apiKey);
+      this.logger.log(`OpenRouter provider registered with ${this.registeredModels.length} models`);
+    } catch (err) {
+      this.logger.warn(
+        `OpenRouter model discovery failed: ${err instanceof Error ? err.message : String(err)}. Registering with empty model list.`,
+      );
+      this.registeredModels = [];
+    }
+  }
+
+  listModels(): ModelInfo[] {
+    return this.registeredModels;
+  }
+
+  async healthCheck(): Promise<ProviderHealth> {
+    const apiKey = process.env['OPENROUTER_API_KEY'];
+    if (!apiKey) {
+      return {
+        status: 'down',
+        lastChecked: new Date().toISOString(),
+        error: 'OPENROUTER_API_KEY not configured',
+      };
+    }
+
+    const start = Date.now();
+    try {
+      const res = await fetch(`${OPENROUTER_BASE_URL}/models`, {
+        method: 'GET',
+        headers: {
+          Authorization: `Bearer ${apiKey}`,
+          Accept: 'application/json',
+        },
+        signal: AbortSignal.timeout(5000),
+      });
+      const latencyMs = Date.now() - start;
+
+      if (!res.ok) {
+        return {
+          status: 'degraded',
+          latencyMs,
+          lastChecked: new Date().toISOString(),
+          error: `HTTP ${res.status}`,
+        };
+      }
+
+      return { status: 'healthy', latencyMs, lastChecked: new Date().toISOString() };
+    } catch (err) {
+      const latencyMs = Date.now() - start;
+      const error = err instanceof Error ? err.message : String(err);
+      return { status: 'down', latencyMs, lastChecked: new Date().toISOString(), error };
+    }
+  }
+
+  /**
+   * Stream a completion through OpenRouter's OpenAI-compatible API.
+   */
+  async *createCompletion(params: CompletionParams): AsyncIterable<CompletionEvent> {
+    if (!this.client) {
+      throw new Error('OpenRouterAdapter is not initialized. Ensure OPENROUTER_API_KEY is set.');
+    }
+
+    const stream = await this.client.chat.completions.create({
+      model: params.model,
+      messages: params.messages.map((m) => ({ role: m.role, content: m.content })),
+      temperature: params.temperature,
+      max_tokens: params.maxTokens,
+      stream: true,
+    });
+
+    let inputTokens = 0;
+    let outputTokens = 0;
+
+    for await (const chunk of stream) {
+      const choice = chunk.choices[0];
+      if (!choice) continue;
+
+      const delta = choice.delta;
+
+      if (delta.content) {
+        yield { type: 'text_delta', content: delta.content };
+      }
+
+      if (choice.finish_reason === 'stop') {
+        const usage = (chunk as { usage?: { prompt_tokens?: number; completion_tokens?: number } })
+          .usage;
+        if (usage) {
+          inputTokens = usage.prompt_tokens ?? 0;
+          outputTokens = usage.completion_tokens ?? 0;
+        }
+      }
+    }
+
+    yield {
+      type: 'done',
+      usage: { inputTokens, outputTokens },
+    };
+  }
+
+  // ---------------------------------------------------------------------------
+  // Private helpers
+  // ---------------------------------------------------------------------------
+
+  private async fetchModels(apiKey: string): Promise<ModelInfo[]> {
+    const res = await fetch(`${OPENROUTER_BASE_URL}/models`, {
+      method: 'GET',
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+        Accept: 'application/json',
+      },
+      signal: AbortSignal.timeout(10000),
+    });
+
+    if (!res.ok) {
+      throw new Error(`OpenRouter models endpoint returned HTTP ${res.status}`);
+    }
+
+    const json = (await res.json()) as OpenRouterModelsResponse;
+    const data = json.data ?? [];
+
+    return data.map((model): ModelInfo => {
+      const inputPrice = model.pricing?.prompt
+        ? parseFloat(String(model.pricing.prompt)) * 1000
+        : 0;
+      const outputPrice = model.pricing?.completion
+        ? parseFloat(String(model.pricing.completion)) * 1000
+        : 0;
+
+      const inputModalities = model.architecture?.input_modalities ?? ['text'];
+      const inputTypes = inputModalities.includes('image')
+        ? (['text', 'image'] as const)
+        : (['text'] as const);
+
+      return {
+        id: model.id,
+        provider: 'openrouter',
+        name: model.name ?? model.id,
+        reasoning: false,
+        contextWindow: model.context_length ?? 4096,
+        maxTokens: model.top_provider?.max_completion_tokens ?? 4096,
+        inputTypes: [...inputTypes],
+        cost: {
+          input: inputPrice,
+          output: outputPrice,
+          cacheRead: 0,
+          cacheWrite: 0,
+        },
+      };
+    });
+  }
+}
--- a/apps/gateway/src/agent/provider.service.ts
+++ b/apps/gateway/src/agent/provider.service.ts
@@ -8,7 +8,12 @@ import type {
  ProviderHealth,
  ProviderInfo,
 } from '@mosaic/types';
-import { AnthropicAdapter, OllamaAdapter, OpenAIAdapter } from './adapters/index.js';
+import {
+  AnthropicAdapter,
+  OllamaAdapter,
+  OpenAIAdapter,
+  OpenRouterAdapter,
+} from './adapters/index.js';
 import type { TestConnectionResultDto } from './provider.dto.js';

 /** Default health check interval in seconds */
@@ -46,6 +51,7 @@ export class ProviderService implements OnModuleInit, OnModuleDestroy {
      new OllamaAdapter(this.registry),
      new AnthropicAdapter(this.registry),
      new OpenAIAdapter(this.registry),
+      new OpenRouterAdapter(),
    ];

    // Run all adapter registrations first (Ollama, Anthropic, and any future adapters)