Co-authored-by: Jason Woltje <jason@diversecanvas.com> Co-committed-by: Jason Woltje <jason@diversecanvas.com>
198 lines
6.3 KiB
TypeScript
198 lines
6.3 KiB
TypeScript
import { Logger } from '@nestjs/common';
|
|
import type { ModelRegistry } from '@mariozechner/pi-coding-agent';
|
|
import type {
|
|
CompletionEvent,
|
|
CompletionParams,
|
|
IProviderAdapter,
|
|
ModelInfo,
|
|
ProviderHealth,
|
|
} from '@mosaic/types';
|
|
|
|
/** Embedding models that Ollama ships with out of the box */
|
|
const OLLAMA_EMBEDDING_MODELS: ReadonlyArray<{
|
|
id: string;
|
|
contextWindow: number;
|
|
dimensions: number;
|
|
}> = [
|
|
{ id: 'nomic-embed-text', contextWindow: 8192, dimensions: 768 },
|
|
{ id: 'mxbai-embed-large', contextWindow: 512, dimensions: 1024 },
|
|
];
|
|
|
|
interface OllamaEmbeddingResponse {
|
|
embedding?: number[];
|
|
}
|
|
|
|
/**
|
|
* Ollama provider adapter.
|
|
*
|
|
* Registers local Ollama models with the Pi ModelRegistry via the OpenAI-compatible
|
|
* completions API. Also exposes embedding models and an `embed()` method for
|
|
* vector generation (used by EmbeddingService / M3-009).
|
|
*
|
|
* Configuration is driven by environment variables:
|
|
* OLLAMA_BASE_URL or OLLAMA_HOST — base URL of the Ollama instance
|
|
* OLLAMA_MODELS — comma-separated list of model IDs (default: llama3.2,codellama,mistral)
|
|
*/
|
|
export class OllamaAdapter implements IProviderAdapter {
|
|
readonly name = 'ollama';
|
|
|
|
private readonly logger = new Logger(OllamaAdapter.name);
|
|
private registeredModels: ModelInfo[] = [];
|
|
|
|
constructor(private readonly registry: ModelRegistry) {}
|
|
|
|
async register(): Promise<void> {
|
|
const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
|
|
if (!ollamaUrl) {
|
|
this.logger.debug('Skipping Ollama provider registration: OLLAMA_BASE_URL not set');
|
|
return;
|
|
}
|
|
|
|
const modelsEnv = process.env['OLLAMA_MODELS'] ?? 'llama3.2,codellama,mistral';
|
|
const modelIds = modelsEnv
|
|
.split(',')
|
|
.map((id: string) => id.trim())
|
|
.filter(Boolean);
|
|
|
|
this.registry.registerProvider('ollama', {
|
|
baseUrl: `${ollamaUrl}/v1`,
|
|
apiKey: 'ollama',
|
|
api: 'openai-completions' as never,
|
|
models: modelIds.map((id) => ({
|
|
id,
|
|
name: id,
|
|
reasoning: false,
|
|
input: ['text'] as ('text' | 'image')[],
|
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
contextWindow: 8192,
|
|
maxTokens: 4096,
|
|
})),
|
|
});
|
|
|
|
// Chat / completion models
|
|
const completionModels: ModelInfo[] = modelIds.map((id) => ({
|
|
id,
|
|
provider: 'ollama',
|
|
name: id,
|
|
reasoning: false,
|
|
contextWindow: 8192,
|
|
maxTokens: 4096,
|
|
inputTypes: ['text'] as ('text' | 'image')[],
|
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
}));
|
|
|
|
// Embedding models (tracked in registeredModels but not in Pi registry,
|
|
// which only handles completion models)
|
|
const embeddingModels: ModelInfo[] = OLLAMA_EMBEDDING_MODELS.map((em) => ({
|
|
id: em.id,
|
|
provider: 'ollama',
|
|
name: em.id,
|
|
reasoning: false,
|
|
contextWindow: em.contextWindow,
|
|
maxTokens: 0,
|
|
inputTypes: ['text'] as ('text' | 'image')[],
|
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
}));
|
|
|
|
this.registeredModels = [...completionModels, ...embeddingModels];
|
|
|
|
this.logger.log(
|
|
`Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')} ` +
|
|
`and embedding models: ${OLLAMA_EMBEDDING_MODELS.map((em) => em.id).join(', ')}`,
|
|
);
|
|
}
|
|
|
|
listModels(): ModelInfo[] {
|
|
return this.registeredModels;
|
|
}
|
|
|
|
async healthCheck(): Promise<ProviderHealth> {
|
|
const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
|
|
if (!ollamaUrl) {
|
|
return {
|
|
status: 'down',
|
|
lastChecked: new Date().toISOString(),
|
|
error: 'OLLAMA_BASE_URL not configured',
|
|
};
|
|
}
|
|
|
|
const checkUrl = `${ollamaUrl}/v1/models`;
|
|
const start = Date.now();
|
|
|
|
try {
|
|
const res = await fetch(checkUrl, {
|
|
method: 'GET',
|
|
headers: { Accept: 'application/json' },
|
|
signal: AbortSignal.timeout(5000),
|
|
});
|
|
const latencyMs = Date.now() - start;
|
|
|
|
if (!res.ok) {
|
|
return {
|
|
status: 'degraded',
|
|
latencyMs,
|
|
lastChecked: new Date().toISOString(),
|
|
error: `HTTP ${res.status}`,
|
|
};
|
|
}
|
|
|
|
return { status: 'healthy', latencyMs, lastChecked: new Date().toISOString() };
|
|
} catch (err) {
|
|
const latencyMs = Date.now() - start;
|
|
const error = err instanceof Error ? err.message : String(err);
|
|
return { status: 'down', latencyMs, lastChecked: new Date().toISOString(), error };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate an embedding vector for the given text using Ollama's /api/embeddings endpoint.
|
|
*
|
|
* Defaults to 'nomic-embed-text' when no model is specified.
|
|
* Intended for use by EmbeddingService (M3-009).
|
|
*
|
|
* @param text - The input text to embed.
|
|
* @param model - Optional embedding model ID (default: 'nomic-embed-text').
|
|
* @returns A float array representing the embedding vector.
|
|
*/
|
|
async embed(text: string, model = 'nomic-embed-text'): Promise<number[]> {
|
|
const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
|
|
if (!ollamaUrl) {
|
|
throw new Error('OllamaAdapter: OLLAMA_BASE_URL not configured');
|
|
}
|
|
|
|
const embeddingUrl = `${ollamaUrl}/api/embeddings`;
|
|
|
|
const res = await fetch(embeddingUrl, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ model, prompt: text }),
|
|
signal: AbortSignal.timeout(30000),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
throw new Error(`OllamaAdapter.embed: request failed with HTTP ${res.status}`);
|
|
}
|
|
|
|
const json = (await res.json()) as OllamaEmbeddingResponse;
|
|
|
|
if (!Array.isArray(json.embedding)) {
|
|
throw new Error('OllamaAdapter.embed: unexpected response — missing embedding array');
|
|
}
|
|
|
|
return json.embedding;
|
|
}
|
|
|
|
/**
|
|
* createCompletion is reserved for future direct-completion use.
|
|
* The current integration routes completions through Pi SDK's ModelRegistry/AgentSession.
|
|
*/
|
|
async *createCompletion(_params: CompletionParams): AsyncIterable<CompletionEvent> {
|
|
throw new Error(
|
|
'OllamaAdapter.createCompletion is not yet implemented. ' +
|
|
'Use Pi SDK AgentSession for completions.',
|
|
);
|
|
// Satisfy the AsyncGenerator return type — unreachable but required for TypeScript.
|
|
yield undefined as never;
|
|
}
|
|
}
|