Compare commits

..

4 Commits

Author SHA1 Message Date
360d7fe96d feat(M3-004,M3-006): add OpenRouter adapter and Ollama embedding support
Some checks failed
ci/woodpecker/pr/ci Pipeline failed
ci/woodpecker/push/ci Pipeline failed
- M3-004: Implement OpenRouterAdapter using openai SDK with custom base URL
  (https://openrouter.ai/api/v1). Fetches model catalog on registration,
  gracefully degrades when OPENROUTER_API_KEY is absent, streams completions
  via OpenAI-compatible API.

- M3-006: Enhance OllamaAdapter with embedding model registration
  (nomic-embed-text, mxbai-embed-large) and an embed(text, model?) method
  that calls Ollama's /api/embeddings endpoint. listModels() now returns
  both completion and embedding models. Prepares for M3-009 EmbeddingService.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 16:37:50 -05:00
08da6b76d1 feat(M3-003): OpenAI provider adapter for Codex gpt-5.4 (#310)
Some checks failed
ci/woodpecker/push/ci Pipeline failed
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-03-21 21:35:43 +00:00
5d4efb467c feat(M3-002): implement AnthropicAdapter for Claude Sonnet 4.6, Opus 4.6, and Haiku 4.5 (#309)
Some checks failed
ci/woodpecker/push/ci Pipeline failed
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-03-21 21:33:55 +00:00
6c6bcbdb7f feat(M3-007,M3-009): provider health check scheduler and Ollama embedding default (#308)
Some checks failed
ci/woodpecker/push/ci Pipeline failed
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-03-21 21:30:15 +00:00
8 changed files with 683 additions and 68 deletions

View File

@@ -12,18 +12,19 @@
"test": "vitest run --passWithNoTests"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.80.0",
"@fastify/helmet": "^13.0.2",
"@mariozechner/pi-ai": "~0.57.1",
"@mariozechner/pi-coding-agent": "~0.57.1",
"@modelcontextprotocol/sdk": "^1.27.1",
"@mosaic/auth": "workspace:^",
"@mosaic/queue": "workspace:^",
"@mosaic/brain": "workspace:^",
"@mosaic/coord": "workspace:^",
"@mosaic/db": "workspace:^",
"@mosaic/discord-plugin": "workspace:^",
"@mosaic/log": "workspace:^",
"@mosaic/memory": "workspace:^",
"@mosaic/queue": "workspace:^",
"@mosaic/telegram-plugin": "workspace:^",
"@mosaic/types": "workspace:^",
"@nestjs/common": "^11.0.0",

View File

@@ -0,0 +1,191 @@
import { Logger } from '@nestjs/common';
import Anthropic from '@anthropic-ai/sdk';
import type { ModelRegistry } from '@mariozechner/pi-coding-agent';
import type {
CompletionEvent,
CompletionParams,
IProviderAdapter,
ModelInfo,
ProviderHealth,
} from '@mosaic/types';
/**
* Anthropic provider adapter.
*
* Registers Claude models with the Pi ModelRegistry via the Anthropic SDK.
* Configuration is driven by environment variables:
* ANTHROPIC_API_KEY — Anthropic API key (required)
*/
export class AnthropicAdapter implements IProviderAdapter {
readonly name = 'anthropic';
private readonly logger = new Logger(AnthropicAdapter.name);
private client: Anthropic | null = null;
private registeredModels: ModelInfo[] = [];
constructor(private readonly registry: ModelRegistry) {}
async register(): Promise<void> {
const apiKey = process.env['ANTHROPIC_API_KEY'];
if (!apiKey) {
this.logger.warn('Skipping Anthropic provider registration: ANTHROPIC_API_KEY not set');
return;
}
this.client = new Anthropic({ apiKey });
const models: ModelInfo[] = [
{
id: 'claude-opus-4-6',
provider: 'anthropic',
name: 'Claude Opus 4.6',
reasoning: true,
contextWindow: 200000,
maxTokens: 32000,
inputTypes: ['text', 'image'],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
},
{
id: 'claude-sonnet-4-6',
provider: 'anthropic',
name: 'Claude Sonnet 4.6',
reasoning: true,
contextWindow: 200000,
maxTokens: 16000,
inputTypes: ['text', 'image'],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
},
{
id: 'claude-haiku-4-5',
provider: 'anthropic',
name: 'Claude Haiku 4.5',
reasoning: false,
contextWindow: 200000,
maxTokens: 8192,
inputTypes: ['text', 'image'],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
},
];
this.registry.registerProvider('anthropic', {
apiKey,
baseUrl: 'https://api.anthropic.com',
api: 'anthropic' as never,
models: models.map((m) => ({
id: m.id,
name: m.name,
reasoning: m.reasoning,
input: m.inputTypes as ('text' | 'image')[],
cost: m.cost,
contextWindow: m.contextWindow,
maxTokens: m.maxTokens,
})),
});
this.registeredModels = models;
this.logger.log(
`Anthropic provider registered with models: ${models.map((m) => m.id).join(', ')}`,
);
}
listModels(): ModelInfo[] {
return this.registeredModels;
}
async healthCheck(): Promise<ProviderHealth> {
const apiKey = process.env['ANTHROPIC_API_KEY'];
if (!apiKey) {
return {
status: 'down',
lastChecked: new Date().toISOString(),
error: 'ANTHROPIC_API_KEY not configured',
};
}
const start = Date.now();
try {
const client = this.client ?? new Anthropic({ apiKey });
await client.models.list({ limit: 1 });
const latencyMs = Date.now() - start;
return { status: 'healthy', latencyMs, lastChecked: new Date().toISOString() };
} catch (err) {
const latencyMs = Date.now() - start;
const error = err instanceof Error ? err.message : String(err);
const status = error.includes('401') || error.includes('403') ? 'degraded' : 'down';
return { status, latencyMs, lastChecked: new Date().toISOString(), error };
}
}
/**
* Stream a completion from Anthropic using the messages API.
* Maps Anthropic streaming events to the CompletionEvent format.
*
* Note: Currently reserved for future direct-completion use. The Pi SDK
* integration routes completions through ModelRegistry / AgentSession.
*/
async *createCompletion(params: CompletionParams): AsyncIterable<CompletionEvent> {
const apiKey = process.env['ANTHROPIC_API_KEY'];
if (!apiKey) {
throw new Error('AnthropicAdapter: ANTHROPIC_API_KEY not configured');
}
const client = this.client ?? new Anthropic({ apiKey });
// Separate system messages from user/assistant messages
const systemMessages = params.messages.filter((m) => m.role === 'system');
const conversationMessages = params.messages.filter((m) => m.role !== 'system');
const systemPrompt =
systemMessages.length > 0 ? systemMessages.map((m) => m.content).join('\n') : undefined;
const stream = await client.messages.stream({
model: params.model,
max_tokens: params.maxTokens ?? 1024,
...(systemPrompt !== undefined ? { system: systemPrompt } : {}),
messages: conversationMessages.map((m) => ({
role: m.role as 'user' | 'assistant',
content: m.content,
})),
...(params.temperature !== undefined ? { temperature: params.temperature } : {}),
...(params.tools && params.tools.length > 0
? {
tools: params.tools.map((t) => ({
name: t.name,
description: t.description,
input_schema: t.parameters as Anthropic.Tool['input_schema'],
})),
}
: {}),
});
for await (const event of stream) {
if (event.type === 'content_block_delta' && event.delta.type === 'text_delta') {
yield { type: 'text_delta', content: event.delta.text };
} else if (event.type === 'content_block_delta' && event.delta.type === 'input_json_delta') {
yield { type: 'tool_call', name: '', arguments: event.delta.partial_json };
} else if (event.type === 'message_delta' && event.usage) {
yield {
type: 'done',
usage: {
inputTokens:
(event as { usage: { input_tokens?: number; output_tokens: number } }).usage
.input_tokens ?? 0,
outputTokens: event.usage.output_tokens,
},
};
}
}
// Emit final done event with full usage from the completed message
const finalMessage = await stream.finalMessage();
yield {
type: 'done',
usage: {
inputTokens: finalMessage.usage.input_tokens,
outputTokens: finalMessage.usage.output_tokens,
},
};
}
}

View File

@@ -1,2 +1,4 @@
export { OllamaAdapter } from './ollama.adapter.js';
export { AnthropicAdapter } from './anthropic.adapter.js';
export { OpenAIAdapter } from './openai.adapter.js';
export { OpenRouterAdapter } from './openrouter.adapter.js';

View File

@@ -0,0 +1,201 @@
import { Logger } from '@nestjs/common';
import OpenAI from 'openai';
import type { ModelRegistry } from '@mariozechner/pi-coding-agent';
import type {
CompletionEvent,
CompletionParams,
IProviderAdapter,
ModelInfo,
ProviderHealth,
} from '@mosaic/types';
/**
* OpenAI provider adapter.
*
* Registers OpenAI models (including Codex gpt-5.4) with the Pi ModelRegistry.
* Configuration is driven by environment variables:
* OPENAI_API_KEY — OpenAI API key (required; adapter skips registration when absent)
*/
export class OpenAIAdapter implements IProviderAdapter {
readonly name = 'openai';
private readonly logger = new Logger(OpenAIAdapter.name);
private registeredModels: ModelInfo[] = [];
private client: OpenAI | null = null;
/** Model ID used for Codex gpt-5.4 in the Pi registry. */
static readonly CODEX_MODEL_ID = 'codex-gpt-5-4';
constructor(private readonly registry: ModelRegistry) {}
async register(): Promise<void> {
const apiKey = process.env['OPENAI_API_KEY'];
if (!apiKey) {
this.logger.debug('Skipping OpenAI provider registration: OPENAI_API_KEY not set');
return;
}
this.client = new OpenAI({ apiKey });
const codexModel = {
id: OpenAIAdapter.CODEX_MODEL_ID,
name: 'Codex gpt-5.4',
/** OpenAI-compatible completions API */
api: 'openai-completions' as never,
reasoning: false,
input: ['text', 'image'] as ('text' | 'image')[],
cost: { input: 0.003, output: 0.012, cacheRead: 0.0015, cacheWrite: 0 },
contextWindow: 128_000,
maxTokens: 16_384,
};
this.registry.registerProvider('openai', {
apiKey,
baseUrl: 'https://api.openai.com/v1',
models: [codexModel],
});
this.registeredModels = [
{
id: OpenAIAdapter.CODEX_MODEL_ID,
provider: 'openai',
name: 'Codex gpt-5.4',
reasoning: false,
contextWindow: 128_000,
maxTokens: 16_384,
inputTypes: ['text', 'image'] as ('text' | 'image')[],
cost: { input: 0.003, output: 0.012, cacheRead: 0.0015, cacheWrite: 0 },
},
];
this.logger.log(`OpenAI provider registered with model: ${OpenAIAdapter.CODEX_MODEL_ID}`);
}
listModels(): ModelInfo[] {
return this.registeredModels;
}
async healthCheck(): Promise<ProviderHealth> {
const apiKey = process.env['OPENAI_API_KEY'];
if (!apiKey) {
return {
status: 'down',
lastChecked: new Date().toISOString(),
error: 'OPENAI_API_KEY not configured',
};
}
const start = Date.now();
try {
// Lightweight call — list models to verify key validity
const res = await fetch('https://api.openai.com/v1/models', {
method: 'GET',
headers: {
Authorization: `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
signal: AbortSignal.timeout(5000),
});
const latencyMs = Date.now() - start;
if (!res.ok) {
return {
status: 'degraded',
latencyMs,
lastChecked: new Date().toISOString(),
error: `HTTP ${res.status}`,
};
}
return { status: 'healthy', latencyMs, lastChecked: new Date().toISOString() };
} catch (err) {
const latencyMs = Date.now() - start;
const error = err instanceof Error ? err.message : String(err);
return { status: 'down', latencyMs, lastChecked: new Date().toISOString(), error };
}
}
/**
* Stream a completion from OpenAI using the chat completions API.
*
* Maps OpenAI streaming chunks to the Mosaic CompletionEvent format.
*/
async *createCompletion(params: CompletionParams): AsyncIterable<CompletionEvent> {
if (!this.client) {
throw new Error(
'OpenAIAdapter: client not initialized. ' +
'Ensure OPENAI_API_KEY is set and register() was called.',
);
}
const stream = await this.client.chat.completions.create({
model: params.model,
messages: params.messages.map((m) => ({
role: m.role,
content: m.content,
})),
...(params.temperature !== undefined && { temperature: params.temperature }),
...(params.maxTokens !== undefined && { max_tokens: params.maxTokens }),
...(params.tools &&
params.tools.length > 0 && {
tools: params.tools.map((t) => ({
type: 'function' as const,
function: {
name: t.name,
description: t.description,
parameters: t.parameters,
},
})),
}),
stream: true,
stream_options: { include_usage: true },
});
let inputTokens = 0;
let outputTokens = 0;
for await (const chunk of stream) {
const choice = chunk.choices[0];
// Accumulate usage when present (final chunk with stream_options.include_usage)
if (chunk.usage) {
inputTokens = chunk.usage.prompt_tokens;
outputTokens = chunk.usage.completion_tokens;
}
if (!choice) continue;
const delta = choice.delta;
// Text content delta
if (delta.content) {
yield { type: 'text_delta', content: delta.content };
}
// Tool call delta — emit when arguments are complete
if (delta.tool_calls) {
for (const toolCallDelta of delta.tool_calls) {
if (toolCallDelta.function?.name && toolCallDelta.function.arguments !== undefined) {
yield {
type: 'tool_call',
name: toolCallDelta.function.name,
arguments: toolCallDelta.function.arguments,
};
}
}
}
// Stream finished
if (choice.finish_reason === 'stop' || choice.finish_reason === 'tool_calls') {
yield {
type: 'done',
usage: { inputTokens, outputTokens },
};
return;
}
}
// Fallback done event when stream ends without explicit finish_reason
yield { type: 'done', usage: { inputTokens, outputTokens } };
}
}

View File

@@ -1,4 +1,4 @@
import { Injectable, Logger, type OnModuleInit } from '@nestjs/common';
import { Injectable, Logger, type OnModuleDestroy, type OnModuleInit } from '@nestjs/common';
import { ModelRegistry, AuthStorage } from '@mariozechner/pi-coding-agent';
import { getModel, type Model, type Api } from '@mariozechner/pi-ai';
import type {
@@ -8,14 +8,22 @@ import type {
ProviderHealth,
ProviderInfo,
} from '@mosaic/types';
import { OllamaAdapter, OpenRouterAdapter } from './adapters/index.js';
import {
AnthropicAdapter,
OllamaAdapter,
OpenAIAdapter,
OpenRouterAdapter,
} from './adapters/index.js';
import type { TestConnectionResultDto } from './provider.dto.js';
/** Default health check interval in seconds */
const DEFAULT_HEALTH_INTERVAL_SECS = 60;
/** DI injection token for the provider adapter array. */
export const PROVIDER_ADAPTERS = Symbol('PROVIDER_ADAPTERS');
@Injectable()
export class ProviderService implements OnModuleInit {
export class ProviderService implements OnModuleInit, OnModuleDestroy {
private readonly logger = new Logger(ProviderService.name);
private registry!: ModelRegistry;
@@ -26,25 +34,124 @@ export class ProviderService implements OnModuleInit {
*/
private adapters: IProviderAdapter[] = [];
/**
* Cached health status per provider, updated by the health check scheduler.
*/
private healthCache: Map<string, ProviderHealth & { modelCount: number }> = new Map();
/** Timer handle for the periodic health check scheduler */
private healthCheckTimer: ReturnType<typeof setInterval> | null = null;
async onModuleInit(): Promise<void> {
const authStorage = AuthStorage.inMemory();
this.registry = new ModelRegistry(authStorage);
// Build the default set of adapters that rely on the registry
this.adapters = [new OllamaAdapter(this.registry), new OpenRouterAdapter()];
this.adapters = [
new OllamaAdapter(this.registry),
new AnthropicAdapter(this.registry),
new OpenAIAdapter(this.registry),
new OpenRouterAdapter(),
];
// Run all adapter registrations first (Ollama, OpenRouter, and any future adapters)
// Run all adapter registrations first (Ollama, Anthropic, and any future adapters)
await this.registerAll();
// Register API-key providers directly (Anthropic, OpenAI, Z.ai, custom)
// These do not yet have dedicated adapter classes (M3-002, M3-003, M3-005).
this.registerAnthropicProvider();
this.registerOpenAIProvider();
// Register API-key providers directly (Z.ai, custom)
// OpenAI now has a dedicated adapter (M3-003).
this.registerZaiProvider();
this.registerCustomProviders();
const available = this.registry.getAvailable();
this.logger.log(`Providers initialized: ${available.length} models available`);
// Kick off the health check scheduler
this.startHealthCheckScheduler();
}
onModuleDestroy(): void {
if (this.healthCheckTimer !== null) {
clearInterval(this.healthCheckTimer);
this.healthCheckTimer = null;
}
}
// ---------------------------------------------------------------------------
// Health check scheduler
// ---------------------------------------------------------------------------
/**
* Start periodic health checks on all adapters.
* Interval is configurable via PROVIDER_HEALTH_INTERVAL env (seconds, default 60).
*/
private startHealthCheckScheduler(): void {
const intervalSecs =
parseInt(process.env['PROVIDER_HEALTH_INTERVAL'] ?? '', 10) || DEFAULT_HEALTH_INTERVAL_SECS;
const intervalMs = intervalSecs * 1000;
// Run an initial check immediately (non-blocking)
void this.runScheduledHealthChecks();
this.healthCheckTimer = setInterval(() => {
void this.runScheduledHealthChecks();
}, intervalMs);
this.logger.log(`Provider health check scheduler started (interval: ${intervalSecs}s)`);
}
private async runScheduledHealthChecks(): Promise<void> {
for (const adapter of this.adapters) {
try {
const health = await adapter.healthCheck();
const modelCount = adapter.listModels().length;
this.healthCache.set(adapter.name, { ...health, modelCount });
this.logger.debug(
`Health check [${adapter.name}]: ${health.status} (${health.latencyMs ?? 'n/a'}ms)`,
);
} catch (err) {
const modelCount = adapter.listModels().length;
this.healthCache.set(adapter.name, {
status: 'down',
lastChecked: new Date().toISOString(),
error: err instanceof Error ? err.message : String(err),
modelCount,
});
}
}
}
/**
* Return the cached health status for all adapters.
* Format: array of { name, status, latencyMs, lastChecked, modelCount }
*/
getProvidersHealth(): Array<{
name: string;
status: string;
latencyMs?: number;
lastChecked: string;
modelCount: number;
error?: string;
}> {
return this.adapters.map((adapter) => {
const cached = this.healthCache.get(adapter.name);
if (cached) {
return {
name: adapter.name,
status: cached.status,
latencyMs: cached.latencyMs,
lastChecked: cached.lastChecked,
modelCount: cached.modelCount,
error: cached.error,
};
}
// Not yet checked — return a pending placeholder
return {
name: adapter.name,
status: 'unknown',
lastChecked: new Date().toISOString(),
modelCount: adapter.listModels().length,
};
});
}
// ---------------------------------------------------------------------------
@@ -234,49 +341,9 @@ export class ProviderService implements OnModuleInit {
// ---------------------------------------------------------------------------
// Private helpers — direct registry registration for providers without adapters yet
// (Anthropic, OpenAI, Z.ai will move to adapters in M3-002 through M3-005)
// (Z.ai will move to an adapter in M3-005)
// ---------------------------------------------------------------------------
private registerAnthropicProvider(): void {
const apiKey = process.env['ANTHROPIC_API_KEY'];
if (!apiKey) {
this.logger.debug('Skipping Anthropic provider registration: ANTHROPIC_API_KEY not set');
return;
}
const models = ['claude-sonnet-4-6', 'claude-opus-4-6', 'claude-haiku-4-5'].map((id) =>
this.cloneBuiltInModel('anthropic', id, { maxTokens: 8192 }),
);
this.registry.registerProvider('anthropic', {
apiKey,
baseUrl: 'https://api.anthropic.com',
models,
});
this.logger.log('Anthropic provider registered with 3 models');
}
private registerOpenAIProvider(): void {
const apiKey = process.env['OPENAI_API_KEY'];
if (!apiKey) {
this.logger.debug('Skipping OpenAI provider registration: OPENAI_API_KEY not set');
return;
}
const models = ['gpt-4o', 'gpt-4o-mini', 'o3-mini'].map((id) =>
this.cloneBuiltInModel('openai', id),
);
this.registry.registerProvider('openai', {
apiKey,
baseUrl: 'https://api.openai.com/v1',
models,
});
this.logger.log('OpenAI provider registered with 3 models');
}
private registerZaiProvider(): void {
const apiKey = process.env['ZAI_API_KEY'];
if (!apiKey) {

View File

@@ -23,6 +23,11 @@ export class ProvidersController {
return this.providerService.listAvailableModels();
}
@Get('health')
health() {
return { providers: this.providerService.getProvidersHealth() };
}
@Post('test')
testConnection(@Body() body: TestConnectionDto): Promise<TestConnectionResultDto> {
return this.providerService.testConnection(body.providerId, body.baseUrl);

View File

@@ -1,36 +1,122 @@
import { Injectable, Logger } from '@nestjs/common';
import type { EmbeddingProvider } from '@mosaic/memory';
const DEFAULT_MODEL = 'text-embedding-3-small';
const DEFAULT_DIMENSIONS = 1536;
// ---------------------------------------------------------------------------
// Environment-driven configuration
//
// EMBEDDING_PROVIDER — 'ollama' (default) | 'openai'
// EMBEDDING_MODEL — model id, defaults differ per provider
// EMBEDDING_DIMENSIONS — integer, defaults differ per provider
// OLLAMA_BASE_URL — base URL for Ollama (used when provider=ollama)
// EMBEDDING_API_URL — full base URL for OpenAI-compatible API
// OPENAI_API_KEY — required for OpenAI provider
// ---------------------------------------------------------------------------
interface EmbeddingResponse {
const OLLAMA_DEFAULT_MODEL = 'nomic-embed-text';
const OLLAMA_DEFAULT_DIMENSIONS = 768;
const OPENAI_DEFAULT_MODEL = 'text-embedding-3-small';
const OPENAI_DEFAULT_DIMENSIONS = 1536;
/** Known dimension mismatch: warn if pgvector column likely has wrong size */
const PGVECTOR_SCHEMA_DIMENSIONS = 1536;
type EmbeddingBackend = 'ollama' | 'openai';
interface OllamaEmbeddingResponse {
embedding: number[];
}
interface OpenAIEmbeddingResponse {
data: Array<{ embedding: number[]; index: number }>;
model: string;
usage: { prompt_tokens: number; total_tokens: number };
}
/**
* Generates embeddings via the OpenAI-compatible embeddings API.
* Supports OpenAI, Azure OpenAI, and any provider with a compatible endpoint.
* Provider-agnostic embedding service.
*
* Defaults to Ollama's native embedding API using nomic-embed-text (768 dims).
* Falls back to the OpenAI-compatible API when EMBEDDING_PROVIDER=openai or
* when OPENAI_API_KEY is set and EMBEDDING_PROVIDER is not explicitly set to ollama.
*
* Dimension mismatch detection: if the configured dimensions differ from the
* pgvector schema (1536), a warning is logged with re-embedding instructions.
*/
@Injectable()
export class EmbeddingService implements EmbeddingProvider {
private readonly logger = new Logger(EmbeddingService.name);
private readonly apiKey: string | undefined;
private readonly baseUrl: string;
private readonly backend: EmbeddingBackend;
private readonly model: string;
readonly dimensions: number;
readonly dimensions = DEFAULT_DIMENSIONS;
// Ollama-specific
private readonly ollamaBaseUrl: string | undefined;
// OpenAI-compatible
private readonly openaiApiKey: string | undefined;
private readonly openaiBaseUrl: string;
constructor() {
this.apiKey = process.env['OPENAI_API_KEY'];
this.baseUrl = process.env['EMBEDDING_API_URL'] ?? 'https://api.openai.com/v1';
this.model = process.env['EMBEDDING_MODEL'] ?? DEFAULT_MODEL;
// Determine backend
const providerEnv = process.env['EMBEDDING_PROVIDER'];
const openaiKey = process.env['OPENAI_API_KEY'];
const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
if (providerEnv === 'openai') {
this.backend = 'openai';
} else if (providerEnv === 'ollama') {
this.backend = 'ollama';
} else if (process.env['EMBEDDING_API_URL']) {
// Legacy: explicit API URL configured → use openai-compat path
this.backend = 'openai';
} else if (ollamaUrl) {
// Ollama available and no explicit override → prefer Ollama
this.backend = 'ollama';
} else if (openaiKey) {
// OpenAI key present → use OpenAI
this.backend = 'openai';
} else {
// Nothing configured — default to ollama (will return zeros when unavailable)
this.backend = 'ollama';
}
// Set model and dimension defaults based on backend
if (this.backend === 'ollama') {
this.model = process.env['EMBEDDING_MODEL'] ?? OLLAMA_DEFAULT_MODEL;
this.dimensions =
parseInt(process.env['EMBEDDING_DIMENSIONS'] ?? '', 10) || OLLAMA_DEFAULT_DIMENSIONS;
this.ollamaBaseUrl = ollamaUrl;
this.openaiApiKey = undefined;
this.openaiBaseUrl = '';
} else {
this.model = process.env['EMBEDDING_MODEL'] ?? OPENAI_DEFAULT_MODEL;
this.dimensions =
parseInt(process.env['EMBEDDING_DIMENSIONS'] ?? '', 10) || OPENAI_DEFAULT_DIMENSIONS;
this.ollamaBaseUrl = undefined;
this.openaiApiKey = openaiKey;
this.openaiBaseUrl = process.env['EMBEDDING_API_URL'] ?? 'https://api.openai.com/v1';
}
// Warn on dimension mismatch with the current schema
if (this.dimensions !== PGVECTOR_SCHEMA_DIMENSIONS) {
this.logger.warn(
`Embedding dimensions (${this.dimensions}) differ from pgvector schema (${PGVECTOR_SCHEMA_DIMENSIONS}). ` +
`If insights already contain ${PGVECTOR_SCHEMA_DIMENSIONS}-dim vectors, similarity search will fail. ` +
`To fix: truncate the insights table and re-embed, or run a migration to ALTER COLUMN embedding TYPE vector(${this.dimensions}).`,
);
}
this.logger.log(
`EmbeddingService initialized: backend=${this.backend}, model=${this.model}, dimensions=${this.dimensions}`,
);
}
get available(): boolean {
return !!this.apiKey;
if (this.backend === 'ollama') {
return !!this.ollamaBaseUrl;
}
return !!this.openaiApiKey;
}
async embed(text: string): Promise<number[]> {
@@ -39,16 +125,60 @@ export class EmbeddingService implements EmbeddingProvider {
}
async embedBatch(texts: string[]): Promise<number[][]> {
if (!this.apiKey) {
this.logger.warn('No OPENAI_API_KEY configured — returning zero vectors');
if (!this.available) {
const reason =
this.backend === 'ollama'
? 'OLLAMA_BASE_URL not configured'
: 'No OPENAI_API_KEY configured';
this.logger.warn(`${reason} — returning zero vectors`);
return texts.map(() => new Array<number>(this.dimensions).fill(0));
}
const response = await fetch(`${this.baseUrl}/embeddings`, {
if (this.backend === 'ollama') {
return this.embedBatchOllama(texts);
}
return this.embedBatchOpenAI(texts);
}
// ---------------------------------------------------------------------------
// Ollama backend
// ---------------------------------------------------------------------------
private async embedBatchOllama(texts: string[]): Promise<number[][]> {
const baseUrl = this.ollamaBaseUrl!;
const results: number[][] = [];
// Ollama's /api/embeddings endpoint processes one text at a time
for (const text of texts) {
const response = await fetch(`${baseUrl}/api/embeddings`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: this.model, prompt: text }),
});
if (!response.ok) {
const body = await response.text();
this.logger.error(`Ollama embedding API error: ${response.status} ${body}`);
throw new Error(`Ollama embedding API returned ${response.status}`);
}
const json = (await response.json()) as OllamaEmbeddingResponse;
results.push(json.embedding);
}
return results;
}
// ---------------------------------------------------------------------------
// OpenAI-compatible backend
// ---------------------------------------------------------------------------
private async embedBatchOpenAI(texts: string[]): Promise<number[][]> {
const response = await fetch(`${this.openaiBaseUrl}/embeddings`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${this.apiKey}`,
Authorization: `Bearer ${this.openaiApiKey}`,
},
body: JSON.stringify({
model: this.model,
@@ -63,7 +193,7 @@ export class EmbeddingService implements EmbeddingProvider {
throw new Error(`Embedding API returned ${response.status}`);
}
const json = (await response.json()) as EmbeddingResponse;
const json = (await response.json()) as OpenAIEmbeddingResponse;
return json.data.sort((a, b) => a.index - b.index).map((d) => d.embedding);
}
}

18
pnpm-lock.yaml generated
View File

@@ -41,6 +41,9 @@ importers:
apps/gateway:
dependencies:
'@anthropic-ai/sdk':
specifier: ^0.80.0
version: 0.80.0(zod@4.3.6)
'@fastify/helmet':
specifier: ^13.0.2
version: 13.0.2
@@ -585,6 +588,15 @@ packages:
zod:
optional: true
'@anthropic-ai/sdk@0.80.0':
resolution: {integrity: sha512-WeXLn7zNVk3yjeshn+xZHvld6AoFUOR3Sep6pSoHho5YbSi6HwcirqgPA5ccFuW8QTVJAAU7N8uQQC6Wa9TG+g==}
hasBin: true
peerDependencies:
zod: ^3.25.0 || ^4.0.0
peerDependenciesMeta:
zod:
optional: true
'@asamuzakjp/css-color@5.0.1':
resolution: {integrity: sha512-2SZFvqMyvboVV1d15lMf7XiI3m7SDqXUuKaTymJYLN6dSGadqp+fVojqJlVoMlbZnlTmu3S0TLwLTJpvBMO1Aw==}
engines: {node: ^20.19.0 || ^22.12.0 || >=24.0.0}
@@ -5952,6 +5964,12 @@ snapshots:
optionalDependencies:
zod: 4.3.6
'@anthropic-ai/sdk@0.80.0(zod@4.3.6)':
dependencies:
json-schema-to-ts: 3.1.1
optionalDependencies:
zod: 4.3.6
'@asamuzakjp/css-color@5.0.1':
dependencies:
'@csstools/css-calc': 3.1.1(@csstools/css-parser-algorithms@4.0.0(@csstools/css-tokenizer@4.0.0))(@csstools/css-tokenizer@4.0.0)