diff --git a/apps/gateway/package.json b/apps/gateway/package.json index d8ef757..975f1a4 100644 --- a/apps/gateway/package.json +++ b/apps/gateway/package.json @@ -46,6 +46,7 @@ "dotenv": "^17.3.1", "fastify": "^5.0.0", "node-cron": "^4.2.1", + "openai": "^6.32.0", "reflect-metadata": "^0.2.0", "rxjs": "^7.8.0", "socket.io": "^4.8.0", diff --git a/apps/gateway/src/agent/adapters/index.ts b/apps/gateway/src/agent/adapters/index.ts index 0ab07b6..fc2385e 100644 --- a/apps/gateway/src/agent/adapters/index.ts +++ b/apps/gateway/src/agent/adapters/index.ts @@ -1 +1,2 @@ export { OllamaAdapter } from './ollama.adapter.js'; +export { OpenRouterAdapter } from './openrouter.adapter.js'; diff --git a/apps/gateway/src/agent/adapters/ollama.adapter.ts b/apps/gateway/src/agent/adapters/ollama.adapter.ts index b71d520..1a5e52a 100644 --- a/apps/gateway/src/agent/adapters/ollama.adapter.ts +++ b/apps/gateway/src/agent/adapters/ollama.adapter.ts @@ -8,11 +8,28 @@ import type { ProviderHealth, } from '@mosaic/types'; +/** Embedding models that Ollama ships with out of the box */ +const OLLAMA_EMBEDDING_MODELS: ReadonlyArray<{ + id: string; + contextWindow: number; + dimensions: number; +}> = [ + { id: 'nomic-embed-text', contextWindow: 8192, dimensions: 768 }, + { id: 'mxbai-embed-large', contextWindow: 512, dimensions: 1024 }, +]; + +interface OllamaEmbeddingResponse { + embedding?: number[]; +} + /** * Ollama provider adapter. * * Registers local Ollama models with the Pi ModelRegistry via the OpenAI-compatible - * completions API. Configuration is driven by environment variables: + * completions API. Also exposes embedding models and an `embed()` method for + * vector generation (used by EmbeddingService / M3-009). + * + * Configuration is driven by environment variables: * OLLAMA_BASE_URL or OLLAMA_HOST — base URL of the Ollama instance * OLLAMA_MODELS — comma-separated list of model IDs (default: llama3.2,codellama,mistral) */ @@ -52,7 +69,8 @@ export class OllamaAdapter implements IProviderAdapter { })), }); - this.registeredModels = modelIds.map((id) => ({ + // Chat / completion models + const completionModels: ModelInfo[] = modelIds.map((id) => ({ id, provider: 'ollama', name: id, @@ -63,8 +81,24 @@ export class OllamaAdapter implements IProviderAdapter { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, })); + // Embedding models (tracked in registeredModels but not in Pi registry, + // which only handles completion models) + const embeddingModels: ModelInfo[] = OLLAMA_EMBEDDING_MODELS.map((em) => ({ + id: em.id, + provider: 'ollama', + name: em.id, + reasoning: false, + contextWindow: em.contextWindow, + maxTokens: 0, + inputTypes: ['text'] as ('text' | 'image')[], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + })); + + this.registeredModels = [...completionModels, ...embeddingModels]; + this.logger.log( - `Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')}`, + `Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')} ` + + `and embedding models: ${OLLAMA_EMBEDDING_MODELS.map((em) => em.id).join(', ')}`, ); } @@ -110,6 +144,44 @@ export class OllamaAdapter implements IProviderAdapter { } } + /** + * Generate an embedding vector for the given text using Ollama's /api/embeddings endpoint. + * + * Defaults to 'nomic-embed-text' when no model is specified. + * Intended for use by EmbeddingService (M3-009). + * + * @param text - The input text to embed. + * @param model - Optional embedding model ID (default: 'nomic-embed-text'). + * @returns A float array representing the embedding vector. + */ + async embed(text: string, model = 'nomic-embed-text'): Promise { + const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST']; + if (!ollamaUrl) { + throw new Error('OllamaAdapter: OLLAMA_BASE_URL not configured'); + } + + const embeddingUrl = `${ollamaUrl}/api/embeddings`; + + const res = await fetch(embeddingUrl, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model, prompt: text }), + signal: AbortSignal.timeout(30000), + }); + + if (!res.ok) { + throw new Error(`OllamaAdapter.embed: request failed with HTTP ${res.status}`); + } + + const json = (await res.json()) as OllamaEmbeddingResponse; + + if (!Array.isArray(json.embedding)) { + throw new Error('OllamaAdapter.embed: unexpected response — missing embedding array'); + } + + return json.embedding; + } + /** * createCompletion is reserved for future direct-completion use. * The current integration routes completions through Pi SDK's ModelRegistry/AgentSession. diff --git a/apps/gateway/src/agent/adapters/openrouter.adapter.ts b/apps/gateway/src/agent/adapters/openrouter.adapter.ts new file mode 100644 index 0000000..36f1760 --- /dev/null +++ b/apps/gateway/src/agent/adapters/openrouter.adapter.ts @@ -0,0 +1,212 @@ +import { Logger } from '@nestjs/common'; +import OpenAI from 'openai'; +import type { + CompletionEvent, + CompletionParams, + IProviderAdapter, + ModelInfo, + ProviderHealth, +} from '@mosaic/types'; + +const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1'; + +interface OpenRouterModel { + id: string; + name?: string; + context_length?: number; + top_provider?: { + max_completion_tokens?: number; + }; + pricing?: { + prompt?: string | number; + completion?: string | number; + }; + architecture?: { + input_modalities?: string[]; + }; +} + +interface OpenRouterModelsResponse { + data?: OpenRouterModel[]; +} + +/** + * OpenRouter provider adapter. + * + * Routes completions through OpenRouter's OpenAI-compatible API. + * Configuration is driven by the OPENROUTER_API_KEY environment variable. + */ +export class OpenRouterAdapter implements IProviderAdapter { + readonly name = 'openrouter'; + + private readonly logger = new Logger(OpenRouterAdapter.name); + private client: OpenAI | null = null; + private registeredModels: ModelInfo[] = []; + + async register(): Promise { + const apiKey = process.env['OPENROUTER_API_KEY']; + if (!apiKey) { + this.logger.debug('Skipping OpenRouter provider registration: OPENROUTER_API_KEY not set'); + return; + } + + this.client = new OpenAI({ + apiKey, + baseURL: OPENROUTER_BASE_URL, + defaultHeaders: { + 'HTTP-Referer': 'https://mosaic.ai', + 'X-Title': 'Mosaic', + }, + }); + + try { + this.registeredModels = await this.fetchModels(apiKey); + this.logger.log(`OpenRouter provider registered with ${this.registeredModels.length} models`); + } catch (err) { + this.logger.warn( + `OpenRouter model discovery failed: ${err instanceof Error ? err.message : String(err)}. Registering with empty model list.`, + ); + this.registeredModels = []; + } + } + + listModels(): ModelInfo[] { + return this.registeredModels; + } + + async healthCheck(): Promise { + const apiKey = process.env['OPENROUTER_API_KEY']; + if (!apiKey) { + return { + status: 'down', + lastChecked: new Date().toISOString(), + error: 'OPENROUTER_API_KEY not configured', + }; + } + + const start = Date.now(); + try { + const res = await fetch(`${OPENROUTER_BASE_URL}/models`, { + method: 'GET', + headers: { + Authorization: `Bearer ${apiKey}`, + Accept: 'application/json', + }, + signal: AbortSignal.timeout(5000), + }); + const latencyMs = Date.now() - start; + + if (!res.ok) { + return { + status: 'degraded', + latencyMs, + lastChecked: new Date().toISOString(), + error: `HTTP ${res.status}`, + }; + } + + return { status: 'healthy', latencyMs, lastChecked: new Date().toISOString() }; + } catch (err) { + const latencyMs = Date.now() - start; + const error = err instanceof Error ? err.message : String(err); + return { status: 'down', latencyMs, lastChecked: new Date().toISOString(), error }; + } + } + + /** + * Stream a completion through OpenRouter's OpenAI-compatible API. + */ + async *createCompletion(params: CompletionParams): AsyncIterable { + if (!this.client) { + throw new Error('OpenRouterAdapter is not initialized. Ensure OPENROUTER_API_KEY is set.'); + } + + const stream = await this.client.chat.completions.create({ + model: params.model, + messages: params.messages.map((m) => ({ role: m.role, content: m.content })), + temperature: params.temperature, + max_tokens: params.maxTokens, + stream: true, + }); + + let inputTokens = 0; + let outputTokens = 0; + + for await (const chunk of stream) { + const choice = chunk.choices[0]; + if (!choice) continue; + + const delta = choice.delta; + + if (delta.content) { + yield { type: 'text_delta', content: delta.content }; + } + + if (choice.finish_reason === 'stop') { + const usage = (chunk as { usage?: { prompt_tokens?: number; completion_tokens?: number } }) + .usage; + if (usage) { + inputTokens = usage.prompt_tokens ?? 0; + outputTokens = usage.completion_tokens ?? 0; + } + } + } + + yield { + type: 'done', + usage: { inputTokens, outputTokens }, + }; + } + + // --------------------------------------------------------------------------- + // Private helpers + // --------------------------------------------------------------------------- + + private async fetchModels(apiKey: string): Promise { + const res = await fetch(`${OPENROUTER_BASE_URL}/models`, { + method: 'GET', + headers: { + Authorization: `Bearer ${apiKey}`, + Accept: 'application/json', + }, + signal: AbortSignal.timeout(10000), + }); + + if (!res.ok) { + throw new Error(`OpenRouter models endpoint returned HTTP ${res.status}`); + } + + const json = (await res.json()) as OpenRouterModelsResponse; + const data = json.data ?? []; + + return data.map((model): ModelInfo => { + const inputPrice = model.pricing?.prompt + ? parseFloat(String(model.pricing.prompt)) * 1000 + : 0; + const outputPrice = model.pricing?.completion + ? parseFloat(String(model.pricing.completion)) * 1000 + : 0; + + const inputModalities = model.architecture?.input_modalities ?? ['text']; + const inputTypes = inputModalities.includes('image') + ? (['text', 'image'] as const) + : (['text'] as const); + + return { + id: model.id, + provider: 'openrouter', + name: model.name ?? model.id, + reasoning: false, + contextWindow: model.context_length ?? 4096, + maxTokens: model.top_provider?.max_completion_tokens ?? 4096, + inputTypes: [...inputTypes], + cost: { + input: inputPrice, + output: outputPrice, + cacheRead: 0, + cacheWrite: 0, + }, + }; + }); + } +} diff --git a/apps/gateway/src/agent/provider.service.ts b/apps/gateway/src/agent/provider.service.ts index 43cc6d1..44ec9f1 100644 --- a/apps/gateway/src/agent/provider.service.ts +++ b/apps/gateway/src/agent/provider.service.ts @@ -8,7 +8,7 @@ import type { ProviderHealth, ProviderInfo, } from '@mosaic/types'; -import { OllamaAdapter } from './adapters/index.js'; +import { OllamaAdapter, OpenRouterAdapter } from './adapters/index.js'; import type { TestConnectionResultDto } from './provider.dto.js'; /** DI injection token for the provider adapter array. */ @@ -31,13 +31,13 @@ export class ProviderService implements OnModuleInit { this.registry = new ModelRegistry(authStorage); // Build the default set of adapters that rely on the registry - this.adapters = [new OllamaAdapter(this.registry)]; + this.adapters = [new OllamaAdapter(this.registry), new OpenRouterAdapter()]; - // Run all adapter registrations first (Ollama, and any future adapters) + // Run all adapter registrations first (Ollama, OpenRouter, and any future adapters) await this.registerAll(); // Register API-key providers directly (Anthropic, OpenAI, Z.ai, custom) - // These do not yet have dedicated adapter classes (M3-002 through M3-005). + // These do not yet have dedicated adapter classes (M3-002, M3-003, M3-005). this.registerAnthropicProvider(); this.registerOpenAIProvider(); this.registerZaiProvider(); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index bc6d1a2..05446d6 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -143,6 +143,9 @@ importers: node-cron: specifier: ^4.2.1 version: 4.2.1 + openai: + specifier: ^6.32.0 + version: 6.32.0(ws@8.19.0)(zod@4.3.6) reflect-metadata: specifier: ^0.2.0 version: 0.2.2 @@ -4879,6 +4882,18 @@ packages: zod: optional: true + openai@6.32.0: + resolution: {integrity: sha512-j3k+BjydAf8yQlcOI7WUQMQTbbF5GEIMAE2iZYCOzwwB3S2pCheaWYp+XZRNAch4jWVc52PMDGRRjutao3lLCg==} + hasBin: true + peerDependencies: + ws: ^8.18.0 + zod: ^3.25 || ^4.0 + peerDependenciesMeta: + ws: + optional: true + zod: + optional: true + optionator@0.9.4: resolution: {integrity: sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==} engines: {node: '>= 0.8.0'} @@ -10621,6 +10636,11 @@ snapshots: ws: 8.19.0 zod: 4.3.6 + openai@6.32.0(ws@8.19.0)(zod@4.3.6): + optionalDependencies: + ws: 8.19.0 + zod: 4.3.6 + optionator@0.9.4: dependencies: deep-is: 0.1.4