diff --git a/apps/gateway/src/agent/adapters/index.ts b/apps/gateway/src/agent/adapters/index.ts index 5bf88f7..6585363 100644 --- a/apps/gateway/src/agent/adapters/index.ts +++ b/apps/gateway/src/agent/adapters/index.ts @@ -1,3 +1,4 @@ export { OllamaAdapter } from './ollama.adapter.js'; export { AnthropicAdapter } from './anthropic.adapter.js'; export { OpenAIAdapter } from './openai.adapter.js'; +export { OpenRouterAdapter } from './openrouter.adapter.js'; diff --git a/apps/gateway/src/agent/adapters/ollama.adapter.ts b/apps/gateway/src/agent/adapters/ollama.adapter.ts index b71d520..1a5e52a 100644 --- a/apps/gateway/src/agent/adapters/ollama.adapter.ts +++ b/apps/gateway/src/agent/adapters/ollama.adapter.ts @@ -8,11 +8,28 @@ import type { ProviderHealth, } from '@mosaic/types'; +/** Embedding models that Ollama ships with out of the box */ +const OLLAMA_EMBEDDING_MODELS: ReadonlyArray<{ + id: string; + contextWindow: number; + dimensions: number; +}> = [ + { id: 'nomic-embed-text', contextWindow: 8192, dimensions: 768 }, + { id: 'mxbai-embed-large', contextWindow: 512, dimensions: 1024 }, +]; + +interface OllamaEmbeddingResponse { + embedding?: number[]; +} + /** * Ollama provider adapter. * * Registers local Ollama models with the Pi ModelRegistry via the OpenAI-compatible - * completions API. Configuration is driven by environment variables: + * completions API. Also exposes embedding models and an `embed()` method for + * vector generation (used by EmbeddingService / M3-009). + * + * Configuration is driven by environment variables: * OLLAMA_BASE_URL or OLLAMA_HOST — base URL of the Ollama instance * OLLAMA_MODELS — comma-separated list of model IDs (default: llama3.2,codellama,mistral) */ @@ -52,7 +69,8 @@ export class OllamaAdapter implements IProviderAdapter { })), }); - this.registeredModels = modelIds.map((id) => ({ + // Chat / completion models + const completionModels: ModelInfo[] = modelIds.map((id) => ({ id, provider: 'ollama', name: id, @@ -63,8 +81,24 @@ export class OllamaAdapter implements IProviderAdapter { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, })); + // Embedding models (tracked in registeredModels but not in Pi registry, + // which only handles completion models) + const embeddingModels: ModelInfo[] = OLLAMA_EMBEDDING_MODELS.map((em) => ({ + id: em.id, + provider: 'ollama', + name: em.id, + reasoning: false, + contextWindow: em.contextWindow, + maxTokens: 0, + inputTypes: ['text'] as ('text' | 'image')[], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + })); + + this.registeredModels = [...completionModels, ...embeddingModels]; + this.logger.log( - `Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')}`, + `Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')} ` + + `and embedding models: ${OLLAMA_EMBEDDING_MODELS.map((em) => em.id).join(', ')}`, ); } @@ -110,6 +144,44 @@ export class OllamaAdapter implements IProviderAdapter { } } + /** + * Generate an embedding vector for the given text using Ollama's /api/embeddings endpoint. + * + * Defaults to 'nomic-embed-text' when no model is specified. + * Intended for use by EmbeddingService (M3-009). + * + * @param text - The input text to embed. + * @param model - Optional embedding model ID (default: 'nomic-embed-text'). + * @returns A float array representing the embedding vector. + */ + async embed(text: string, model = 'nomic-embed-text'): Promise { + const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST']; + if (!ollamaUrl) { + throw new Error('OllamaAdapter: OLLAMA_BASE_URL not configured'); + } + + const embeddingUrl = `${ollamaUrl}/api/embeddings`; + + const res = await fetch(embeddingUrl, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model, prompt: text }), + signal: AbortSignal.timeout(30000), + }); + + if (!res.ok) { + throw new Error(`OllamaAdapter.embed: request failed with HTTP ${res.status}`); + } + + const json = (await res.json()) as OllamaEmbeddingResponse; + + if (!Array.isArray(json.embedding)) { + throw new Error('OllamaAdapter.embed: unexpected response — missing embedding array'); + } + + return json.embedding; + } + /** * createCompletion is reserved for future direct-completion use. * The current integration routes completions through Pi SDK's ModelRegistry/AgentSession. diff --git a/apps/gateway/src/agent/adapters/openrouter.adapter.ts b/apps/gateway/src/agent/adapters/openrouter.adapter.ts new file mode 100644 index 0000000..36f1760 --- /dev/null +++ b/apps/gateway/src/agent/adapters/openrouter.adapter.ts @@ -0,0 +1,212 @@ +import { Logger } from '@nestjs/common'; +import OpenAI from 'openai'; +import type { + CompletionEvent, + CompletionParams, + IProviderAdapter, + ModelInfo, + ProviderHealth, +} from '@mosaic/types'; + +const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1'; + +interface OpenRouterModel { + id: string; + name?: string; + context_length?: number; + top_provider?: { + max_completion_tokens?: number; + }; + pricing?: { + prompt?: string | number; + completion?: string | number; + }; + architecture?: { + input_modalities?: string[]; + }; +} + +interface OpenRouterModelsResponse { + data?: OpenRouterModel[]; +} + +/** + * OpenRouter provider adapter. + * + * Routes completions through OpenRouter's OpenAI-compatible API. + * Configuration is driven by the OPENROUTER_API_KEY environment variable. + */ +export class OpenRouterAdapter implements IProviderAdapter { + readonly name = 'openrouter'; + + private readonly logger = new Logger(OpenRouterAdapter.name); + private client: OpenAI | null = null; + private registeredModels: ModelInfo[] = []; + + async register(): Promise { + const apiKey = process.env['OPENROUTER_API_KEY']; + if (!apiKey) { + this.logger.debug('Skipping OpenRouter provider registration: OPENROUTER_API_KEY not set'); + return; + } + + this.client = new OpenAI({ + apiKey, + baseURL: OPENROUTER_BASE_URL, + defaultHeaders: { + 'HTTP-Referer': 'https://mosaic.ai', + 'X-Title': 'Mosaic', + }, + }); + + try { + this.registeredModels = await this.fetchModels(apiKey); + this.logger.log(`OpenRouter provider registered with ${this.registeredModels.length} models`); + } catch (err) { + this.logger.warn( + `OpenRouter model discovery failed: ${err instanceof Error ? err.message : String(err)}. Registering with empty model list.`, + ); + this.registeredModels = []; + } + } + + listModels(): ModelInfo[] { + return this.registeredModels; + } + + async healthCheck(): Promise { + const apiKey = process.env['OPENROUTER_API_KEY']; + if (!apiKey) { + return { + status: 'down', + lastChecked: new Date().toISOString(), + error: 'OPENROUTER_API_KEY not configured', + }; + } + + const start = Date.now(); + try { + const res = await fetch(`${OPENROUTER_BASE_URL}/models`, { + method: 'GET', + headers: { + Authorization: `Bearer ${apiKey}`, + Accept: 'application/json', + }, + signal: AbortSignal.timeout(5000), + }); + const latencyMs = Date.now() - start; + + if (!res.ok) { + return { + status: 'degraded', + latencyMs, + lastChecked: new Date().toISOString(), + error: `HTTP ${res.status}`, + }; + } + + return { status: 'healthy', latencyMs, lastChecked: new Date().toISOString() }; + } catch (err) { + const latencyMs = Date.now() - start; + const error = err instanceof Error ? err.message : String(err); + return { status: 'down', latencyMs, lastChecked: new Date().toISOString(), error }; + } + } + + /** + * Stream a completion through OpenRouter's OpenAI-compatible API. + */ + async *createCompletion(params: CompletionParams): AsyncIterable { + if (!this.client) { + throw new Error('OpenRouterAdapter is not initialized. Ensure OPENROUTER_API_KEY is set.'); + } + + const stream = await this.client.chat.completions.create({ + model: params.model, + messages: params.messages.map((m) => ({ role: m.role, content: m.content })), + temperature: params.temperature, + max_tokens: params.maxTokens, + stream: true, + }); + + let inputTokens = 0; + let outputTokens = 0; + + for await (const chunk of stream) { + const choice = chunk.choices[0]; + if (!choice) continue; + + const delta = choice.delta; + + if (delta.content) { + yield { type: 'text_delta', content: delta.content }; + } + + if (choice.finish_reason === 'stop') { + const usage = (chunk as { usage?: { prompt_tokens?: number; completion_tokens?: number } }) + .usage; + if (usage) { + inputTokens = usage.prompt_tokens ?? 0; + outputTokens = usage.completion_tokens ?? 0; + } + } + } + + yield { + type: 'done', + usage: { inputTokens, outputTokens }, + }; + } + + // --------------------------------------------------------------------------- + // Private helpers + // --------------------------------------------------------------------------- + + private async fetchModels(apiKey: string): Promise { + const res = await fetch(`${OPENROUTER_BASE_URL}/models`, { + method: 'GET', + headers: { + Authorization: `Bearer ${apiKey}`, + Accept: 'application/json', + }, + signal: AbortSignal.timeout(10000), + }); + + if (!res.ok) { + throw new Error(`OpenRouter models endpoint returned HTTP ${res.status}`); + } + + const json = (await res.json()) as OpenRouterModelsResponse; + const data = json.data ?? []; + + return data.map((model): ModelInfo => { + const inputPrice = model.pricing?.prompt + ? parseFloat(String(model.pricing.prompt)) * 1000 + : 0; + const outputPrice = model.pricing?.completion + ? parseFloat(String(model.pricing.completion)) * 1000 + : 0; + + const inputModalities = model.architecture?.input_modalities ?? ['text']; + const inputTypes = inputModalities.includes('image') + ? (['text', 'image'] as const) + : (['text'] as const); + + return { + id: model.id, + provider: 'openrouter', + name: model.name ?? model.id, + reasoning: false, + contextWindow: model.context_length ?? 4096, + maxTokens: model.top_provider?.max_completion_tokens ?? 4096, + inputTypes: [...inputTypes], + cost: { + input: inputPrice, + output: outputPrice, + cacheRead: 0, + cacheWrite: 0, + }, + }; + }); + } +} diff --git a/apps/gateway/src/agent/provider.service.ts b/apps/gateway/src/agent/provider.service.ts index 39fb9af..1862a67 100644 --- a/apps/gateway/src/agent/provider.service.ts +++ b/apps/gateway/src/agent/provider.service.ts @@ -8,7 +8,12 @@ import type { ProviderHealth, ProviderInfo, } from '@mosaic/types'; -import { AnthropicAdapter, OllamaAdapter, OpenAIAdapter } from './adapters/index.js'; +import { + AnthropicAdapter, + OllamaAdapter, + OpenAIAdapter, + OpenRouterAdapter, +} from './adapters/index.js'; import type { TestConnectionResultDto } from './provider.dto.js'; /** Default health check interval in seconds */ @@ -46,6 +51,7 @@ export class ProviderService implements OnModuleInit, OnModuleDestroy { new OllamaAdapter(this.registry), new AnthropicAdapter(this.registry), new OpenAIAdapter(this.registry), + new OpenRouterAdapter(), ]; // Run all adapter registrations first (Ollama, Anthropic, and any future adapters)