feat(M3-004,M3-006): add OpenRouter adapter and Ollama embedding support
- M3-004: Implement OpenRouterAdapter using openai SDK with custom base URL (https://openrouter.ai/api/v1). Fetches model catalog on registration, gracefully degrades when OPENROUTER_API_KEY is absent, streams completions via OpenAI-compatible API. - M3-006: Enhance OllamaAdapter with embedding model registration (nomic-embed-text, mxbai-embed-large) and an embed(text, model?) method that calls Ollama's /api/embeddings endpoint. listModels() now returns both completion and embedding models. Prepares for M3-009 EmbeddingService. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,11 +8,28 @@ import type {
|
||||
ProviderHealth,
|
||||
} from '@mosaic/types';
|
||||
|
||||
/** Embedding models that Ollama ships with out of the box */
|
||||
const OLLAMA_EMBEDDING_MODELS: ReadonlyArray<{
|
||||
id: string;
|
||||
contextWindow: number;
|
||||
dimensions: number;
|
||||
}> = [
|
||||
{ id: 'nomic-embed-text', contextWindow: 8192, dimensions: 768 },
|
||||
{ id: 'mxbai-embed-large', contextWindow: 512, dimensions: 1024 },
|
||||
];
|
||||
|
||||
interface OllamaEmbeddingResponse {
|
||||
embedding?: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Ollama provider adapter.
|
||||
*
|
||||
* Registers local Ollama models with the Pi ModelRegistry via the OpenAI-compatible
|
||||
* completions API. Configuration is driven by environment variables:
|
||||
* completions API. Also exposes embedding models and an `embed()` method for
|
||||
* vector generation (used by EmbeddingService / M3-009).
|
||||
*
|
||||
* Configuration is driven by environment variables:
|
||||
* OLLAMA_BASE_URL or OLLAMA_HOST — base URL of the Ollama instance
|
||||
* OLLAMA_MODELS — comma-separated list of model IDs (default: llama3.2,codellama,mistral)
|
||||
*/
|
||||
@@ -52,7 +69,8 @@ export class OllamaAdapter implements IProviderAdapter {
|
||||
})),
|
||||
});
|
||||
|
||||
this.registeredModels = modelIds.map((id) => ({
|
||||
// Chat / completion models
|
||||
const completionModels: ModelInfo[] = modelIds.map((id) => ({
|
||||
id,
|
||||
provider: 'ollama',
|
||||
name: id,
|
||||
@@ -63,8 +81,24 @@ export class OllamaAdapter implements IProviderAdapter {
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
}));
|
||||
|
||||
// Embedding models (tracked in registeredModels but not in Pi registry,
|
||||
// which only handles completion models)
|
||||
const embeddingModels: ModelInfo[] = OLLAMA_EMBEDDING_MODELS.map((em) => ({
|
||||
id: em.id,
|
||||
provider: 'ollama',
|
||||
name: em.id,
|
||||
reasoning: false,
|
||||
contextWindow: em.contextWindow,
|
||||
maxTokens: 0,
|
||||
inputTypes: ['text'] as ('text' | 'image')[],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
}));
|
||||
|
||||
this.registeredModels = [...completionModels, ...embeddingModels];
|
||||
|
||||
this.logger.log(
|
||||
`Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')}`,
|
||||
`Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')} ` +
|
||||
`and embedding models: ${OLLAMA_EMBEDDING_MODELS.map((em) => em.id).join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -110,6 +144,44 @@ export class OllamaAdapter implements IProviderAdapter {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate an embedding vector for the given text using Ollama's /api/embeddings endpoint.
|
||||
*
|
||||
* Defaults to 'nomic-embed-text' when no model is specified.
|
||||
* Intended for use by EmbeddingService (M3-009).
|
||||
*
|
||||
* @param text - The input text to embed.
|
||||
* @param model - Optional embedding model ID (default: 'nomic-embed-text').
|
||||
* @returns A float array representing the embedding vector.
|
||||
*/
|
||||
async embed(text: string, model = 'nomic-embed-text'): Promise<number[]> {
|
||||
const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
|
||||
if (!ollamaUrl) {
|
||||
throw new Error('OllamaAdapter: OLLAMA_BASE_URL not configured');
|
||||
}
|
||||
|
||||
const embeddingUrl = `${ollamaUrl}/api/embeddings`;
|
||||
|
||||
const res = await fetch(embeddingUrl, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ model, prompt: text }),
|
||||
signal: AbortSignal.timeout(30000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`OllamaAdapter.embed: request failed with HTTP ${res.status}`);
|
||||
}
|
||||
|
||||
const json = (await res.json()) as OllamaEmbeddingResponse;
|
||||
|
||||
if (!Array.isArray(json.embedding)) {
|
||||
throw new Error('OllamaAdapter.embed: unexpected response — missing embedding array');
|
||||
}
|
||||
|
||||
return json.embedding;
|
||||
}
|
||||
|
||||
/**
|
||||
* createCompletion is reserved for future direct-completion use.
|
||||
* The current integration routes completions through Pi SDK's ModelRegistry/AgentSession.
|
||||
|
||||
Reference in New Issue
Block a user