Compare commits
4 Commits
ad98755014
...
360d7fe96d
| Author | SHA1 | Date | |
|---|---|---|---|
| 360d7fe96d | |||
| 08da6b76d1 | |||
| 5d4efb467c | |||
| 6c6bcbdb7f |
@@ -12,18 +12,19 @@
|
|||||||
"test": "vitest run --passWithNoTests"
|
"test": "vitest run --passWithNoTests"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@anthropic-ai/sdk": "^0.80.0",
|
||||||
"@fastify/helmet": "^13.0.2",
|
"@fastify/helmet": "^13.0.2",
|
||||||
"@mariozechner/pi-ai": "~0.57.1",
|
"@mariozechner/pi-ai": "~0.57.1",
|
||||||
"@mariozechner/pi-coding-agent": "~0.57.1",
|
"@mariozechner/pi-coding-agent": "~0.57.1",
|
||||||
"@modelcontextprotocol/sdk": "^1.27.1",
|
"@modelcontextprotocol/sdk": "^1.27.1",
|
||||||
"@mosaic/auth": "workspace:^",
|
"@mosaic/auth": "workspace:^",
|
||||||
"@mosaic/queue": "workspace:^",
|
|
||||||
"@mosaic/brain": "workspace:^",
|
"@mosaic/brain": "workspace:^",
|
||||||
"@mosaic/coord": "workspace:^",
|
"@mosaic/coord": "workspace:^",
|
||||||
"@mosaic/db": "workspace:^",
|
"@mosaic/db": "workspace:^",
|
||||||
"@mosaic/discord-plugin": "workspace:^",
|
"@mosaic/discord-plugin": "workspace:^",
|
||||||
"@mosaic/log": "workspace:^",
|
"@mosaic/log": "workspace:^",
|
||||||
"@mosaic/memory": "workspace:^",
|
"@mosaic/memory": "workspace:^",
|
||||||
|
"@mosaic/queue": "workspace:^",
|
||||||
"@mosaic/telegram-plugin": "workspace:^",
|
"@mosaic/telegram-plugin": "workspace:^",
|
||||||
"@mosaic/types": "workspace:^",
|
"@mosaic/types": "workspace:^",
|
||||||
"@nestjs/common": "^11.0.0",
|
"@nestjs/common": "^11.0.0",
|
||||||
@@ -46,6 +47,7 @@
|
|||||||
"dotenv": "^17.3.1",
|
"dotenv": "^17.3.1",
|
||||||
"fastify": "^5.0.0",
|
"fastify": "^5.0.0",
|
||||||
"node-cron": "^4.2.1",
|
"node-cron": "^4.2.1",
|
||||||
|
"openai": "^6.32.0",
|
||||||
"reflect-metadata": "^0.2.0",
|
"reflect-metadata": "^0.2.0",
|
||||||
"rxjs": "^7.8.0",
|
"rxjs": "^7.8.0",
|
||||||
"socket.io": "^4.8.0",
|
"socket.io": "^4.8.0",
|
||||||
|
|||||||
191
apps/gateway/src/agent/adapters/anthropic.adapter.ts
Normal file
191
apps/gateway/src/agent/adapters/anthropic.adapter.ts
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
import { Logger } from '@nestjs/common';
|
||||||
|
import Anthropic from '@anthropic-ai/sdk';
|
||||||
|
import type { ModelRegistry } from '@mariozechner/pi-coding-agent';
|
||||||
|
import type {
|
||||||
|
CompletionEvent,
|
||||||
|
CompletionParams,
|
||||||
|
IProviderAdapter,
|
||||||
|
ModelInfo,
|
||||||
|
ProviderHealth,
|
||||||
|
} from '@mosaic/types';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Anthropic provider adapter.
|
||||||
|
*
|
||||||
|
* Registers Claude models with the Pi ModelRegistry via the Anthropic SDK.
|
||||||
|
* Configuration is driven by environment variables:
|
||||||
|
* ANTHROPIC_API_KEY — Anthropic API key (required)
|
||||||
|
*/
|
||||||
|
export class AnthropicAdapter implements IProviderAdapter {
|
||||||
|
readonly name = 'anthropic';
|
||||||
|
|
||||||
|
private readonly logger = new Logger(AnthropicAdapter.name);
|
||||||
|
private client: Anthropic | null = null;
|
||||||
|
private registeredModels: ModelInfo[] = [];
|
||||||
|
|
||||||
|
constructor(private readonly registry: ModelRegistry) {}
|
||||||
|
|
||||||
|
async register(): Promise<void> {
|
||||||
|
const apiKey = process.env['ANTHROPIC_API_KEY'];
|
||||||
|
if (!apiKey) {
|
||||||
|
this.logger.warn('Skipping Anthropic provider registration: ANTHROPIC_API_KEY not set');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.client = new Anthropic({ apiKey });
|
||||||
|
|
||||||
|
const models: ModelInfo[] = [
|
||||||
|
{
|
||||||
|
id: 'claude-opus-4-6',
|
||||||
|
provider: 'anthropic',
|
||||||
|
name: 'Claude Opus 4.6',
|
||||||
|
reasoning: true,
|
||||||
|
contextWindow: 200000,
|
||||||
|
maxTokens: 32000,
|
||||||
|
inputTypes: ['text', 'image'],
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'claude-sonnet-4-6',
|
||||||
|
provider: 'anthropic',
|
||||||
|
name: 'Claude Sonnet 4.6',
|
||||||
|
reasoning: true,
|
||||||
|
contextWindow: 200000,
|
||||||
|
maxTokens: 16000,
|
||||||
|
inputTypes: ['text', 'image'],
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'claude-haiku-4-5',
|
||||||
|
provider: 'anthropic',
|
||||||
|
name: 'Claude Haiku 4.5',
|
||||||
|
reasoning: false,
|
||||||
|
contextWindow: 200000,
|
||||||
|
maxTokens: 8192,
|
||||||
|
inputTypes: ['text', 'image'],
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
this.registry.registerProvider('anthropic', {
|
||||||
|
apiKey,
|
||||||
|
baseUrl: 'https://api.anthropic.com',
|
||||||
|
api: 'anthropic' as never,
|
||||||
|
models: models.map((m) => ({
|
||||||
|
id: m.id,
|
||||||
|
name: m.name,
|
||||||
|
reasoning: m.reasoning,
|
||||||
|
input: m.inputTypes as ('text' | 'image')[],
|
||||||
|
cost: m.cost,
|
||||||
|
contextWindow: m.contextWindow,
|
||||||
|
maxTokens: m.maxTokens,
|
||||||
|
})),
|
||||||
|
});
|
||||||
|
|
||||||
|
this.registeredModels = models;
|
||||||
|
|
||||||
|
this.logger.log(
|
||||||
|
`Anthropic provider registered with models: ${models.map((m) => m.id).join(', ')}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
listModels(): ModelInfo[] {
|
||||||
|
return this.registeredModels;
|
||||||
|
}
|
||||||
|
|
||||||
|
async healthCheck(): Promise<ProviderHealth> {
|
||||||
|
const apiKey = process.env['ANTHROPIC_API_KEY'];
|
||||||
|
if (!apiKey) {
|
||||||
|
return {
|
||||||
|
status: 'down',
|
||||||
|
lastChecked: new Date().toISOString(),
|
||||||
|
error: 'ANTHROPIC_API_KEY not configured',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const start = Date.now();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const client = this.client ?? new Anthropic({ apiKey });
|
||||||
|
await client.models.list({ limit: 1 });
|
||||||
|
const latencyMs = Date.now() - start;
|
||||||
|
return { status: 'healthy', latencyMs, lastChecked: new Date().toISOString() };
|
||||||
|
} catch (err) {
|
||||||
|
const latencyMs = Date.now() - start;
|
||||||
|
const error = err instanceof Error ? err.message : String(err);
|
||||||
|
const status = error.includes('401') || error.includes('403') ? 'degraded' : 'down';
|
||||||
|
return { status, latencyMs, lastChecked: new Date().toISOString(), error };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stream a completion from Anthropic using the messages API.
|
||||||
|
* Maps Anthropic streaming events to the CompletionEvent format.
|
||||||
|
*
|
||||||
|
* Note: Currently reserved for future direct-completion use. The Pi SDK
|
||||||
|
* integration routes completions through ModelRegistry / AgentSession.
|
||||||
|
*/
|
||||||
|
async *createCompletion(params: CompletionParams): AsyncIterable<CompletionEvent> {
|
||||||
|
const apiKey = process.env['ANTHROPIC_API_KEY'];
|
||||||
|
if (!apiKey) {
|
||||||
|
throw new Error('AnthropicAdapter: ANTHROPIC_API_KEY not configured');
|
||||||
|
}
|
||||||
|
|
||||||
|
const client = this.client ?? new Anthropic({ apiKey });
|
||||||
|
|
||||||
|
// Separate system messages from user/assistant messages
|
||||||
|
const systemMessages = params.messages.filter((m) => m.role === 'system');
|
||||||
|
const conversationMessages = params.messages.filter((m) => m.role !== 'system');
|
||||||
|
|
||||||
|
const systemPrompt =
|
||||||
|
systemMessages.length > 0 ? systemMessages.map((m) => m.content).join('\n') : undefined;
|
||||||
|
|
||||||
|
const stream = await client.messages.stream({
|
||||||
|
model: params.model,
|
||||||
|
max_tokens: params.maxTokens ?? 1024,
|
||||||
|
...(systemPrompt !== undefined ? { system: systemPrompt } : {}),
|
||||||
|
messages: conversationMessages.map((m) => ({
|
||||||
|
role: m.role as 'user' | 'assistant',
|
||||||
|
content: m.content,
|
||||||
|
})),
|
||||||
|
...(params.temperature !== undefined ? { temperature: params.temperature } : {}),
|
||||||
|
...(params.tools && params.tools.length > 0
|
||||||
|
? {
|
||||||
|
tools: params.tools.map((t) => ({
|
||||||
|
name: t.name,
|
||||||
|
description: t.description,
|
||||||
|
input_schema: t.parameters as Anthropic.Tool['input_schema'],
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
|
});
|
||||||
|
|
||||||
|
for await (const event of stream) {
|
||||||
|
if (event.type === 'content_block_delta' && event.delta.type === 'text_delta') {
|
||||||
|
yield { type: 'text_delta', content: event.delta.text };
|
||||||
|
} else if (event.type === 'content_block_delta' && event.delta.type === 'input_json_delta') {
|
||||||
|
yield { type: 'tool_call', name: '', arguments: event.delta.partial_json };
|
||||||
|
} else if (event.type === 'message_delta' && event.usage) {
|
||||||
|
yield {
|
||||||
|
type: 'done',
|
||||||
|
usage: {
|
||||||
|
inputTokens:
|
||||||
|
(event as { usage: { input_tokens?: number; output_tokens: number } }).usage
|
||||||
|
.input_tokens ?? 0,
|
||||||
|
outputTokens: event.usage.output_tokens,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit final done event with full usage from the completed message
|
||||||
|
const finalMessage = await stream.finalMessage();
|
||||||
|
yield {
|
||||||
|
type: 'done',
|
||||||
|
usage: {
|
||||||
|
inputTokens: finalMessage.usage.input_tokens,
|
||||||
|
outputTokens: finalMessage.usage.output_tokens,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1 +1,4 @@
|
|||||||
export { OllamaAdapter } from './ollama.adapter.js';
|
export { OllamaAdapter } from './ollama.adapter.js';
|
||||||
|
export { AnthropicAdapter } from './anthropic.adapter.js';
|
||||||
|
export { OpenAIAdapter } from './openai.adapter.js';
|
||||||
|
export { OpenRouterAdapter } from './openrouter.adapter.js';
|
||||||
|
|||||||
@@ -8,11 +8,28 @@ import type {
|
|||||||
ProviderHealth,
|
ProviderHealth,
|
||||||
} from '@mosaic/types';
|
} from '@mosaic/types';
|
||||||
|
|
||||||
|
/** Embedding models that Ollama ships with out of the box */
|
||||||
|
const OLLAMA_EMBEDDING_MODELS: ReadonlyArray<{
|
||||||
|
id: string;
|
||||||
|
contextWindow: number;
|
||||||
|
dimensions: number;
|
||||||
|
}> = [
|
||||||
|
{ id: 'nomic-embed-text', contextWindow: 8192, dimensions: 768 },
|
||||||
|
{ id: 'mxbai-embed-large', contextWindow: 512, dimensions: 1024 },
|
||||||
|
];
|
||||||
|
|
||||||
|
interface OllamaEmbeddingResponse {
|
||||||
|
embedding?: number[];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ollama provider adapter.
|
* Ollama provider adapter.
|
||||||
*
|
*
|
||||||
* Registers local Ollama models with the Pi ModelRegistry via the OpenAI-compatible
|
* Registers local Ollama models with the Pi ModelRegistry via the OpenAI-compatible
|
||||||
* completions API. Configuration is driven by environment variables:
|
* completions API. Also exposes embedding models and an `embed()` method for
|
||||||
|
* vector generation (used by EmbeddingService / M3-009).
|
||||||
|
*
|
||||||
|
* Configuration is driven by environment variables:
|
||||||
* OLLAMA_BASE_URL or OLLAMA_HOST — base URL of the Ollama instance
|
* OLLAMA_BASE_URL or OLLAMA_HOST — base URL of the Ollama instance
|
||||||
* OLLAMA_MODELS — comma-separated list of model IDs (default: llama3.2,codellama,mistral)
|
* OLLAMA_MODELS — comma-separated list of model IDs (default: llama3.2,codellama,mistral)
|
||||||
*/
|
*/
|
||||||
@@ -52,7 +69,8 @@ export class OllamaAdapter implements IProviderAdapter {
|
|||||||
})),
|
})),
|
||||||
});
|
});
|
||||||
|
|
||||||
this.registeredModels = modelIds.map((id) => ({
|
// Chat / completion models
|
||||||
|
const completionModels: ModelInfo[] = modelIds.map((id) => ({
|
||||||
id,
|
id,
|
||||||
provider: 'ollama',
|
provider: 'ollama',
|
||||||
name: id,
|
name: id,
|
||||||
@@ -63,8 +81,24 @@ export class OllamaAdapter implements IProviderAdapter {
|
|||||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
// Embedding models (tracked in registeredModels but not in Pi registry,
|
||||||
|
// which only handles completion models)
|
||||||
|
const embeddingModels: ModelInfo[] = OLLAMA_EMBEDDING_MODELS.map((em) => ({
|
||||||
|
id: em.id,
|
||||||
|
provider: 'ollama',
|
||||||
|
name: em.id,
|
||||||
|
reasoning: false,
|
||||||
|
contextWindow: em.contextWindow,
|
||||||
|
maxTokens: 0,
|
||||||
|
inputTypes: ['text'] as ('text' | 'image')[],
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
}));
|
||||||
|
|
||||||
|
this.registeredModels = [...completionModels, ...embeddingModels];
|
||||||
|
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')}`,
|
`Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')} ` +
|
||||||
|
`and embedding models: ${OLLAMA_EMBEDDING_MODELS.map((em) => em.id).join(', ')}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -110,6 +144,44 @@ export class OllamaAdapter implements IProviderAdapter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate an embedding vector for the given text using Ollama's /api/embeddings endpoint.
|
||||||
|
*
|
||||||
|
* Defaults to 'nomic-embed-text' when no model is specified.
|
||||||
|
* Intended for use by EmbeddingService (M3-009).
|
||||||
|
*
|
||||||
|
* @param text - The input text to embed.
|
||||||
|
* @param model - Optional embedding model ID (default: 'nomic-embed-text').
|
||||||
|
* @returns A float array representing the embedding vector.
|
||||||
|
*/
|
||||||
|
async embed(text: string, model = 'nomic-embed-text'): Promise<number[]> {
|
||||||
|
const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
|
||||||
|
if (!ollamaUrl) {
|
||||||
|
throw new Error('OllamaAdapter: OLLAMA_BASE_URL not configured');
|
||||||
|
}
|
||||||
|
|
||||||
|
const embeddingUrl = `${ollamaUrl}/api/embeddings`;
|
||||||
|
|
||||||
|
const res = await fetch(embeddingUrl, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ model, prompt: text }),
|
||||||
|
signal: AbortSignal.timeout(30000),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`OllamaAdapter.embed: request failed with HTTP ${res.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const json = (await res.json()) as OllamaEmbeddingResponse;
|
||||||
|
|
||||||
|
if (!Array.isArray(json.embedding)) {
|
||||||
|
throw new Error('OllamaAdapter.embed: unexpected response — missing embedding array');
|
||||||
|
}
|
||||||
|
|
||||||
|
return json.embedding;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* createCompletion is reserved for future direct-completion use.
|
* createCompletion is reserved for future direct-completion use.
|
||||||
* The current integration routes completions through Pi SDK's ModelRegistry/AgentSession.
|
* The current integration routes completions through Pi SDK's ModelRegistry/AgentSession.
|
||||||
|
|||||||
201
apps/gateway/src/agent/adapters/openai.adapter.ts
Normal file
201
apps/gateway/src/agent/adapters/openai.adapter.ts
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
import { Logger } from '@nestjs/common';
|
||||||
|
import OpenAI from 'openai';
|
||||||
|
import type { ModelRegistry } from '@mariozechner/pi-coding-agent';
|
||||||
|
import type {
|
||||||
|
CompletionEvent,
|
||||||
|
CompletionParams,
|
||||||
|
IProviderAdapter,
|
||||||
|
ModelInfo,
|
||||||
|
ProviderHealth,
|
||||||
|
} from '@mosaic/types';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OpenAI provider adapter.
|
||||||
|
*
|
||||||
|
* Registers OpenAI models (including Codex gpt-5.4) with the Pi ModelRegistry.
|
||||||
|
* Configuration is driven by environment variables:
|
||||||
|
* OPENAI_API_KEY — OpenAI API key (required; adapter skips registration when absent)
|
||||||
|
*/
|
||||||
|
export class OpenAIAdapter implements IProviderAdapter {
|
||||||
|
readonly name = 'openai';
|
||||||
|
|
||||||
|
private readonly logger = new Logger(OpenAIAdapter.name);
|
||||||
|
private registeredModels: ModelInfo[] = [];
|
||||||
|
private client: OpenAI | null = null;
|
||||||
|
|
||||||
|
/** Model ID used for Codex gpt-5.4 in the Pi registry. */
|
||||||
|
static readonly CODEX_MODEL_ID = 'codex-gpt-5-4';
|
||||||
|
|
||||||
|
constructor(private readonly registry: ModelRegistry) {}
|
||||||
|
|
||||||
|
async register(): Promise<void> {
|
||||||
|
const apiKey = process.env['OPENAI_API_KEY'];
|
||||||
|
if (!apiKey) {
|
||||||
|
this.logger.debug('Skipping OpenAI provider registration: OPENAI_API_KEY not set');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.client = new OpenAI({ apiKey });
|
||||||
|
|
||||||
|
const codexModel = {
|
||||||
|
id: OpenAIAdapter.CODEX_MODEL_ID,
|
||||||
|
name: 'Codex gpt-5.4',
|
||||||
|
/** OpenAI-compatible completions API */
|
||||||
|
api: 'openai-completions' as never,
|
||||||
|
reasoning: false,
|
||||||
|
input: ['text', 'image'] as ('text' | 'image')[],
|
||||||
|
cost: { input: 0.003, output: 0.012, cacheRead: 0.0015, cacheWrite: 0 },
|
||||||
|
contextWindow: 128_000,
|
||||||
|
maxTokens: 16_384,
|
||||||
|
};
|
||||||
|
|
||||||
|
this.registry.registerProvider('openai', {
|
||||||
|
apiKey,
|
||||||
|
baseUrl: 'https://api.openai.com/v1',
|
||||||
|
models: [codexModel],
|
||||||
|
});
|
||||||
|
|
||||||
|
this.registeredModels = [
|
||||||
|
{
|
||||||
|
id: OpenAIAdapter.CODEX_MODEL_ID,
|
||||||
|
provider: 'openai',
|
||||||
|
name: 'Codex gpt-5.4',
|
||||||
|
reasoning: false,
|
||||||
|
contextWindow: 128_000,
|
||||||
|
maxTokens: 16_384,
|
||||||
|
inputTypes: ['text', 'image'] as ('text' | 'image')[],
|
||||||
|
cost: { input: 0.003, output: 0.012, cacheRead: 0.0015, cacheWrite: 0 },
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
this.logger.log(`OpenAI provider registered with model: ${OpenAIAdapter.CODEX_MODEL_ID}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
listModels(): ModelInfo[] {
|
||||||
|
return this.registeredModels;
|
||||||
|
}
|
||||||
|
|
||||||
|
async healthCheck(): Promise<ProviderHealth> {
|
||||||
|
const apiKey = process.env['OPENAI_API_KEY'];
|
||||||
|
if (!apiKey) {
|
||||||
|
return {
|
||||||
|
status: 'down',
|
||||||
|
lastChecked: new Date().toISOString(),
|
||||||
|
error: 'OPENAI_API_KEY not configured',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const start = Date.now();
|
||||||
|
try {
|
||||||
|
// Lightweight call — list models to verify key validity
|
||||||
|
const res = await fetch('https://api.openai.com/v1/models', {
|
||||||
|
method: 'GET',
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${apiKey}`,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
signal: AbortSignal.timeout(5000),
|
||||||
|
});
|
||||||
|
const latencyMs = Date.now() - start;
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
return {
|
||||||
|
status: 'degraded',
|
||||||
|
latencyMs,
|
||||||
|
lastChecked: new Date().toISOString(),
|
||||||
|
error: `HTTP ${res.status}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { status: 'healthy', latencyMs, lastChecked: new Date().toISOString() };
|
||||||
|
} catch (err) {
|
||||||
|
const latencyMs = Date.now() - start;
|
||||||
|
const error = err instanceof Error ? err.message : String(err);
|
||||||
|
return { status: 'down', latencyMs, lastChecked: new Date().toISOString(), error };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stream a completion from OpenAI using the chat completions API.
|
||||||
|
*
|
||||||
|
* Maps OpenAI streaming chunks to the Mosaic CompletionEvent format.
|
||||||
|
*/
|
||||||
|
async *createCompletion(params: CompletionParams): AsyncIterable<CompletionEvent> {
|
||||||
|
if (!this.client) {
|
||||||
|
throw new Error(
|
||||||
|
'OpenAIAdapter: client not initialized. ' +
|
||||||
|
'Ensure OPENAI_API_KEY is set and register() was called.',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const stream = await this.client.chat.completions.create({
|
||||||
|
model: params.model,
|
||||||
|
messages: params.messages.map((m) => ({
|
||||||
|
role: m.role,
|
||||||
|
content: m.content,
|
||||||
|
})),
|
||||||
|
...(params.temperature !== undefined && { temperature: params.temperature }),
|
||||||
|
...(params.maxTokens !== undefined && { max_tokens: params.maxTokens }),
|
||||||
|
...(params.tools &&
|
||||||
|
params.tools.length > 0 && {
|
||||||
|
tools: params.tools.map((t) => ({
|
||||||
|
type: 'function' as const,
|
||||||
|
function: {
|
||||||
|
name: t.name,
|
||||||
|
description: t.description,
|
||||||
|
parameters: t.parameters,
|
||||||
|
},
|
||||||
|
})),
|
||||||
|
}),
|
||||||
|
stream: true,
|
||||||
|
stream_options: { include_usage: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
let inputTokens = 0;
|
||||||
|
let outputTokens = 0;
|
||||||
|
|
||||||
|
for await (const chunk of stream) {
|
||||||
|
const choice = chunk.choices[0];
|
||||||
|
|
||||||
|
// Accumulate usage when present (final chunk with stream_options.include_usage)
|
||||||
|
if (chunk.usage) {
|
||||||
|
inputTokens = chunk.usage.prompt_tokens;
|
||||||
|
outputTokens = chunk.usage.completion_tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!choice) continue;
|
||||||
|
|
||||||
|
const delta = choice.delta;
|
||||||
|
|
||||||
|
// Text content delta
|
||||||
|
if (delta.content) {
|
||||||
|
yield { type: 'text_delta', content: delta.content };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tool call delta — emit when arguments are complete
|
||||||
|
if (delta.tool_calls) {
|
||||||
|
for (const toolCallDelta of delta.tool_calls) {
|
||||||
|
if (toolCallDelta.function?.name && toolCallDelta.function.arguments !== undefined) {
|
||||||
|
yield {
|
||||||
|
type: 'tool_call',
|
||||||
|
name: toolCallDelta.function.name,
|
||||||
|
arguments: toolCallDelta.function.arguments,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stream finished
|
||||||
|
if (choice.finish_reason === 'stop' || choice.finish_reason === 'tool_calls') {
|
||||||
|
yield {
|
||||||
|
type: 'done',
|
||||||
|
usage: { inputTokens, outputTokens },
|
||||||
|
};
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback done event when stream ends without explicit finish_reason
|
||||||
|
yield { type: 'done', usage: { inputTokens, outputTokens } };
|
||||||
|
}
|
||||||
|
}
|
||||||
212
apps/gateway/src/agent/adapters/openrouter.adapter.ts
Normal file
212
apps/gateway/src/agent/adapters/openrouter.adapter.ts
Normal file
@@ -0,0 +1,212 @@
|
|||||||
|
import { Logger } from '@nestjs/common';
|
||||||
|
import OpenAI from 'openai';
|
||||||
|
import type {
|
||||||
|
CompletionEvent,
|
||||||
|
CompletionParams,
|
||||||
|
IProviderAdapter,
|
||||||
|
ModelInfo,
|
||||||
|
ProviderHealth,
|
||||||
|
} from '@mosaic/types';
|
||||||
|
|
||||||
|
const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1';
|
||||||
|
|
||||||
|
interface OpenRouterModel {
|
||||||
|
id: string;
|
||||||
|
name?: string;
|
||||||
|
context_length?: number;
|
||||||
|
top_provider?: {
|
||||||
|
max_completion_tokens?: number;
|
||||||
|
};
|
||||||
|
pricing?: {
|
||||||
|
prompt?: string | number;
|
||||||
|
completion?: string | number;
|
||||||
|
};
|
||||||
|
architecture?: {
|
||||||
|
input_modalities?: string[];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OpenRouterModelsResponse {
|
||||||
|
data?: OpenRouterModel[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OpenRouter provider adapter.
|
||||||
|
*
|
||||||
|
* Routes completions through OpenRouter's OpenAI-compatible API.
|
||||||
|
* Configuration is driven by the OPENROUTER_API_KEY environment variable.
|
||||||
|
*/
|
||||||
|
export class OpenRouterAdapter implements IProviderAdapter {
|
||||||
|
readonly name = 'openrouter';
|
||||||
|
|
||||||
|
private readonly logger = new Logger(OpenRouterAdapter.name);
|
||||||
|
private client: OpenAI | null = null;
|
||||||
|
private registeredModels: ModelInfo[] = [];
|
||||||
|
|
||||||
|
async register(): Promise<void> {
|
||||||
|
const apiKey = process.env['OPENROUTER_API_KEY'];
|
||||||
|
if (!apiKey) {
|
||||||
|
this.logger.debug('Skipping OpenRouter provider registration: OPENROUTER_API_KEY not set');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.client = new OpenAI({
|
||||||
|
apiKey,
|
||||||
|
baseURL: OPENROUTER_BASE_URL,
|
||||||
|
defaultHeaders: {
|
||||||
|
'HTTP-Referer': 'https://mosaic.ai',
|
||||||
|
'X-Title': 'Mosaic',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
this.registeredModels = await this.fetchModels(apiKey);
|
||||||
|
this.logger.log(`OpenRouter provider registered with ${this.registeredModels.length} models`);
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.warn(
|
||||||
|
`OpenRouter model discovery failed: ${err instanceof Error ? err.message : String(err)}. Registering with empty model list.`,
|
||||||
|
);
|
||||||
|
this.registeredModels = [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
listModels(): ModelInfo[] {
|
||||||
|
return this.registeredModels;
|
||||||
|
}
|
||||||
|
|
||||||
|
async healthCheck(): Promise<ProviderHealth> {
|
||||||
|
const apiKey = process.env['OPENROUTER_API_KEY'];
|
||||||
|
if (!apiKey) {
|
||||||
|
return {
|
||||||
|
status: 'down',
|
||||||
|
lastChecked: new Date().toISOString(),
|
||||||
|
error: 'OPENROUTER_API_KEY not configured',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const start = Date.now();
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${OPENROUTER_BASE_URL}/models`, {
|
||||||
|
method: 'GET',
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${apiKey}`,
|
||||||
|
Accept: 'application/json',
|
||||||
|
},
|
||||||
|
signal: AbortSignal.timeout(5000),
|
||||||
|
});
|
||||||
|
const latencyMs = Date.now() - start;
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
return {
|
||||||
|
status: 'degraded',
|
||||||
|
latencyMs,
|
||||||
|
lastChecked: new Date().toISOString(),
|
||||||
|
error: `HTTP ${res.status}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { status: 'healthy', latencyMs, lastChecked: new Date().toISOString() };
|
||||||
|
} catch (err) {
|
||||||
|
const latencyMs = Date.now() - start;
|
||||||
|
const error = err instanceof Error ? err.message : String(err);
|
||||||
|
return { status: 'down', latencyMs, lastChecked: new Date().toISOString(), error };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stream a completion through OpenRouter's OpenAI-compatible API.
|
||||||
|
*/
|
||||||
|
async *createCompletion(params: CompletionParams): AsyncIterable<CompletionEvent> {
|
||||||
|
if (!this.client) {
|
||||||
|
throw new Error('OpenRouterAdapter is not initialized. Ensure OPENROUTER_API_KEY is set.');
|
||||||
|
}
|
||||||
|
|
||||||
|
const stream = await this.client.chat.completions.create({
|
||||||
|
model: params.model,
|
||||||
|
messages: params.messages.map((m) => ({ role: m.role, content: m.content })),
|
||||||
|
temperature: params.temperature,
|
||||||
|
max_tokens: params.maxTokens,
|
||||||
|
stream: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
let inputTokens = 0;
|
||||||
|
let outputTokens = 0;
|
||||||
|
|
||||||
|
for await (const chunk of stream) {
|
||||||
|
const choice = chunk.choices[0];
|
||||||
|
if (!choice) continue;
|
||||||
|
|
||||||
|
const delta = choice.delta;
|
||||||
|
|
||||||
|
if (delta.content) {
|
||||||
|
yield { type: 'text_delta', content: delta.content };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (choice.finish_reason === 'stop') {
|
||||||
|
const usage = (chunk as { usage?: { prompt_tokens?: number; completion_tokens?: number } })
|
||||||
|
.usage;
|
||||||
|
if (usage) {
|
||||||
|
inputTokens = usage.prompt_tokens ?? 0;
|
||||||
|
outputTokens = usage.completion_tokens ?? 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
yield {
|
||||||
|
type: 'done',
|
||||||
|
usage: { inputTokens, outputTokens },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Private helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private async fetchModels(apiKey: string): Promise<ModelInfo[]> {
|
||||||
|
const res = await fetch(`${OPENROUTER_BASE_URL}/models`, {
|
||||||
|
method: 'GET',
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${apiKey}`,
|
||||||
|
Accept: 'application/json',
|
||||||
|
},
|
||||||
|
signal: AbortSignal.timeout(10000),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`OpenRouter models endpoint returned HTTP ${res.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const json = (await res.json()) as OpenRouterModelsResponse;
|
||||||
|
const data = json.data ?? [];
|
||||||
|
|
||||||
|
return data.map((model): ModelInfo => {
|
||||||
|
const inputPrice = model.pricing?.prompt
|
||||||
|
? parseFloat(String(model.pricing.prompt)) * 1000
|
||||||
|
: 0;
|
||||||
|
const outputPrice = model.pricing?.completion
|
||||||
|
? parseFloat(String(model.pricing.completion)) * 1000
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
const inputModalities = model.architecture?.input_modalities ?? ['text'];
|
||||||
|
const inputTypes = inputModalities.includes('image')
|
||||||
|
? (['text', 'image'] as const)
|
||||||
|
: (['text'] as const);
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: model.id,
|
||||||
|
provider: 'openrouter',
|
||||||
|
name: model.name ?? model.id,
|
||||||
|
reasoning: false,
|
||||||
|
contextWindow: model.context_length ?? 4096,
|
||||||
|
maxTokens: model.top_provider?.max_completion_tokens ?? 4096,
|
||||||
|
inputTypes: [...inputTypes],
|
||||||
|
cost: {
|
||||||
|
input: inputPrice,
|
||||||
|
output: outputPrice,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
import { Injectable, Logger, type OnModuleInit } from '@nestjs/common';
|
import { Injectable, Logger, type OnModuleDestroy, type OnModuleInit } from '@nestjs/common';
|
||||||
import { ModelRegistry, AuthStorage } from '@mariozechner/pi-coding-agent';
|
import { ModelRegistry, AuthStorage } from '@mariozechner/pi-coding-agent';
|
||||||
import { getModel, type Model, type Api } from '@mariozechner/pi-ai';
|
import { getModel, type Model, type Api } from '@mariozechner/pi-ai';
|
||||||
import type {
|
import type {
|
||||||
@@ -8,14 +8,22 @@ import type {
|
|||||||
ProviderHealth,
|
ProviderHealth,
|
||||||
ProviderInfo,
|
ProviderInfo,
|
||||||
} from '@mosaic/types';
|
} from '@mosaic/types';
|
||||||
import { OllamaAdapter } from './adapters/index.js';
|
import {
|
||||||
|
AnthropicAdapter,
|
||||||
|
OllamaAdapter,
|
||||||
|
OpenAIAdapter,
|
||||||
|
OpenRouterAdapter,
|
||||||
|
} from './adapters/index.js';
|
||||||
import type { TestConnectionResultDto } from './provider.dto.js';
|
import type { TestConnectionResultDto } from './provider.dto.js';
|
||||||
|
|
||||||
|
/** Default health check interval in seconds */
|
||||||
|
const DEFAULT_HEALTH_INTERVAL_SECS = 60;
|
||||||
|
|
||||||
/** DI injection token for the provider adapter array. */
|
/** DI injection token for the provider adapter array. */
|
||||||
export const PROVIDER_ADAPTERS = Symbol('PROVIDER_ADAPTERS');
|
export const PROVIDER_ADAPTERS = Symbol('PROVIDER_ADAPTERS');
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class ProviderService implements OnModuleInit {
|
export class ProviderService implements OnModuleInit, OnModuleDestroy {
|
||||||
private readonly logger = new Logger(ProviderService.name);
|
private readonly logger = new Logger(ProviderService.name);
|
||||||
private registry!: ModelRegistry;
|
private registry!: ModelRegistry;
|
||||||
|
|
||||||
@@ -26,25 +34,124 @@ export class ProviderService implements OnModuleInit {
|
|||||||
*/
|
*/
|
||||||
private adapters: IProviderAdapter[] = [];
|
private adapters: IProviderAdapter[] = [];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cached health status per provider, updated by the health check scheduler.
|
||||||
|
*/
|
||||||
|
private healthCache: Map<string, ProviderHealth & { modelCount: number }> = new Map();
|
||||||
|
|
||||||
|
/** Timer handle for the periodic health check scheduler */
|
||||||
|
private healthCheckTimer: ReturnType<typeof setInterval> | null = null;
|
||||||
|
|
||||||
async onModuleInit(): Promise<void> {
|
async onModuleInit(): Promise<void> {
|
||||||
const authStorage = AuthStorage.inMemory();
|
const authStorage = AuthStorage.inMemory();
|
||||||
this.registry = new ModelRegistry(authStorage);
|
this.registry = new ModelRegistry(authStorage);
|
||||||
|
|
||||||
// Build the default set of adapters that rely on the registry
|
// Build the default set of adapters that rely on the registry
|
||||||
this.adapters = [new OllamaAdapter(this.registry)];
|
this.adapters = [
|
||||||
|
new OllamaAdapter(this.registry),
|
||||||
|
new AnthropicAdapter(this.registry),
|
||||||
|
new OpenAIAdapter(this.registry),
|
||||||
|
new OpenRouterAdapter(),
|
||||||
|
];
|
||||||
|
|
||||||
// Run all adapter registrations first (Ollama, and any future adapters)
|
// Run all adapter registrations first (Ollama, Anthropic, and any future adapters)
|
||||||
await this.registerAll();
|
await this.registerAll();
|
||||||
|
|
||||||
// Register API-key providers directly (Anthropic, OpenAI, Z.ai, custom)
|
// Register API-key providers directly (Z.ai, custom)
|
||||||
// These do not yet have dedicated adapter classes (M3-002 through M3-005).
|
// OpenAI now has a dedicated adapter (M3-003).
|
||||||
this.registerAnthropicProvider();
|
|
||||||
this.registerOpenAIProvider();
|
|
||||||
this.registerZaiProvider();
|
this.registerZaiProvider();
|
||||||
this.registerCustomProviders();
|
this.registerCustomProviders();
|
||||||
|
|
||||||
const available = this.registry.getAvailable();
|
const available = this.registry.getAvailable();
|
||||||
this.logger.log(`Providers initialized: ${available.length} models available`);
|
this.logger.log(`Providers initialized: ${available.length} models available`);
|
||||||
|
|
||||||
|
// Kick off the health check scheduler
|
||||||
|
this.startHealthCheckScheduler();
|
||||||
|
}
|
||||||
|
|
||||||
|
onModuleDestroy(): void {
|
||||||
|
if (this.healthCheckTimer !== null) {
|
||||||
|
clearInterval(this.healthCheckTimer);
|
||||||
|
this.healthCheckTimer = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Health check scheduler
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start periodic health checks on all adapters.
|
||||||
|
* Interval is configurable via PROVIDER_HEALTH_INTERVAL env (seconds, default 60).
|
||||||
|
*/
|
||||||
|
private startHealthCheckScheduler(): void {
|
||||||
|
const intervalSecs =
|
||||||
|
parseInt(process.env['PROVIDER_HEALTH_INTERVAL'] ?? '', 10) || DEFAULT_HEALTH_INTERVAL_SECS;
|
||||||
|
const intervalMs = intervalSecs * 1000;
|
||||||
|
|
||||||
|
// Run an initial check immediately (non-blocking)
|
||||||
|
void this.runScheduledHealthChecks();
|
||||||
|
|
||||||
|
this.healthCheckTimer = setInterval(() => {
|
||||||
|
void this.runScheduledHealthChecks();
|
||||||
|
}, intervalMs);
|
||||||
|
|
||||||
|
this.logger.log(`Provider health check scheduler started (interval: ${intervalSecs}s)`);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async runScheduledHealthChecks(): Promise<void> {
|
||||||
|
for (const adapter of this.adapters) {
|
||||||
|
try {
|
||||||
|
const health = await adapter.healthCheck();
|
||||||
|
const modelCount = adapter.listModels().length;
|
||||||
|
this.healthCache.set(adapter.name, { ...health, modelCount });
|
||||||
|
this.logger.debug(
|
||||||
|
`Health check [${adapter.name}]: ${health.status} (${health.latencyMs ?? 'n/a'}ms)`,
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
const modelCount = adapter.listModels().length;
|
||||||
|
this.healthCache.set(adapter.name, {
|
||||||
|
status: 'down',
|
||||||
|
lastChecked: new Date().toISOString(),
|
||||||
|
error: err instanceof Error ? err.message : String(err),
|
||||||
|
modelCount,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the cached health status for all adapters.
|
||||||
|
* Format: array of { name, status, latencyMs, lastChecked, modelCount }
|
||||||
|
*/
|
||||||
|
getProvidersHealth(): Array<{
|
||||||
|
name: string;
|
||||||
|
status: string;
|
||||||
|
latencyMs?: number;
|
||||||
|
lastChecked: string;
|
||||||
|
modelCount: number;
|
||||||
|
error?: string;
|
||||||
|
}> {
|
||||||
|
return this.adapters.map((adapter) => {
|
||||||
|
const cached = this.healthCache.get(adapter.name);
|
||||||
|
if (cached) {
|
||||||
|
return {
|
||||||
|
name: adapter.name,
|
||||||
|
status: cached.status,
|
||||||
|
latencyMs: cached.latencyMs,
|
||||||
|
lastChecked: cached.lastChecked,
|
||||||
|
modelCount: cached.modelCount,
|
||||||
|
error: cached.error,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// Not yet checked — return a pending placeholder
|
||||||
|
return {
|
||||||
|
name: adapter.name,
|
||||||
|
status: 'unknown',
|
||||||
|
lastChecked: new Date().toISOString(),
|
||||||
|
modelCount: adapter.listModels().length,
|
||||||
|
};
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -234,49 +341,9 @@ export class ProviderService implements OnModuleInit {
|
|||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Private helpers — direct registry registration for providers without adapters yet
|
// Private helpers — direct registry registration for providers without adapters yet
|
||||||
// (Anthropic, OpenAI, Z.ai will move to adapters in M3-002 through M3-005)
|
// (Z.ai will move to an adapter in M3-005)
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
private registerAnthropicProvider(): void {
|
|
||||||
const apiKey = process.env['ANTHROPIC_API_KEY'];
|
|
||||||
if (!apiKey) {
|
|
||||||
this.logger.debug('Skipping Anthropic provider registration: ANTHROPIC_API_KEY not set');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const models = ['claude-sonnet-4-6', 'claude-opus-4-6', 'claude-haiku-4-5'].map((id) =>
|
|
||||||
this.cloneBuiltInModel('anthropic', id, { maxTokens: 8192 }),
|
|
||||||
);
|
|
||||||
|
|
||||||
this.registry.registerProvider('anthropic', {
|
|
||||||
apiKey,
|
|
||||||
baseUrl: 'https://api.anthropic.com',
|
|
||||||
models,
|
|
||||||
});
|
|
||||||
|
|
||||||
this.logger.log('Anthropic provider registered with 3 models');
|
|
||||||
}
|
|
||||||
|
|
||||||
private registerOpenAIProvider(): void {
|
|
||||||
const apiKey = process.env['OPENAI_API_KEY'];
|
|
||||||
if (!apiKey) {
|
|
||||||
this.logger.debug('Skipping OpenAI provider registration: OPENAI_API_KEY not set');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const models = ['gpt-4o', 'gpt-4o-mini', 'o3-mini'].map((id) =>
|
|
||||||
this.cloneBuiltInModel('openai', id),
|
|
||||||
);
|
|
||||||
|
|
||||||
this.registry.registerProvider('openai', {
|
|
||||||
apiKey,
|
|
||||||
baseUrl: 'https://api.openai.com/v1',
|
|
||||||
models,
|
|
||||||
});
|
|
||||||
|
|
||||||
this.logger.log('OpenAI provider registered with 3 models');
|
|
||||||
}
|
|
||||||
|
|
||||||
private registerZaiProvider(): void {
|
private registerZaiProvider(): void {
|
||||||
const apiKey = process.env['ZAI_API_KEY'];
|
const apiKey = process.env['ZAI_API_KEY'];
|
||||||
if (!apiKey) {
|
if (!apiKey) {
|
||||||
|
|||||||
@@ -23,6 +23,11 @@ export class ProvidersController {
|
|||||||
return this.providerService.listAvailableModels();
|
return this.providerService.listAvailableModels();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Get('health')
|
||||||
|
health() {
|
||||||
|
return { providers: this.providerService.getProvidersHealth() };
|
||||||
|
}
|
||||||
|
|
||||||
@Post('test')
|
@Post('test')
|
||||||
testConnection(@Body() body: TestConnectionDto): Promise<TestConnectionResultDto> {
|
testConnection(@Body() body: TestConnectionDto): Promise<TestConnectionResultDto> {
|
||||||
return this.providerService.testConnection(body.providerId, body.baseUrl);
|
return this.providerService.testConnection(body.providerId, body.baseUrl);
|
||||||
|
|||||||
@@ -1,36 +1,122 @@
|
|||||||
import { Injectable, Logger } from '@nestjs/common';
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
import type { EmbeddingProvider } from '@mosaic/memory';
|
import type { EmbeddingProvider } from '@mosaic/memory';
|
||||||
|
|
||||||
const DEFAULT_MODEL = 'text-embedding-3-small';
|
// ---------------------------------------------------------------------------
|
||||||
const DEFAULT_DIMENSIONS = 1536;
|
// Environment-driven configuration
|
||||||
|
//
|
||||||
|
// EMBEDDING_PROVIDER — 'ollama' (default) | 'openai'
|
||||||
|
// EMBEDDING_MODEL — model id, defaults differ per provider
|
||||||
|
// EMBEDDING_DIMENSIONS — integer, defaults differ per provider
|
||||||
|
// OLLAMA_BASE_URL — base URL for Ollama (used when provider=ollama)
|
||||||
|
// EMBEDDING_API_URL — full base URL for OpenAI-compatible API
|
||||||
|
// OPENAI_API_KEY — required for OpenAI provider
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
interface EmbeddingResponse {
|
const OLLAMA_DEFAULT_MODEL = 'nomic-embed-text';
|
||||||
|
const OLLAMA_DEFAULT_DIMENSIONS = 768;
|
||||||
|
|
||||||
|
const OPENAI_DEFAULT_MODEL = 'text-embedding-3-small';
|
||||||
|
const OPENAI_DEFAULT_DIMENSIONS = 1536;
|
||||||
|
|
||||||
|
/** Known dimension mismatch: warn if pgvector column likely has wrong size */
|
||||||
|
const PGVECTOR_SCHEMA_DIMENSIONS = 1536;
|
||||||
|
|
||||||
|
type EmbeddingBackend = 'ollama' | 'openai';
|
||||||
|
|
||||||
|
interface OllamaEmbeddingResponse {
|
||||||
|
embedding: number[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OpenAIEmbeddingResponse {
|
||||||
data: Array<{ embedding: number[]; index: number }>;
|
data: Array<{ embedding: number[]; index: number }>;
|
||||||
model: string;
|
model: string;
|
||||||
usage: { prompt_tokens: number; total_tokens: number };
|
usage: { prompt_tokens: number; total_tokens: number };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates embeddings via the OpenAI-compatible embeddings API.
|
* Provider-agnostic embedding service.
|
||||||
* Supports OpenAI, Azure OpenAI, and any provider with a compatible endpoint.
|
*
|
||||||
|
* Defaults to Ollama's native embedding API using nomic-embed-text (768 dims).
|
||||||
|
* Falls back to the OpenAI-compatible API when EMBEDDING_PROVIDER=openai or
|
||||||
|
* when OPENAI_API_KEY is set and EMBEDDING_PROVIDER is not explicitly set to ollama.
|
||||||
|
*
|
||||||
|
* Dimension mismatch detection: if the configured dimensions differ from the
|
||||||
|
* pgvector schema (1536), a warning is logged with re-embedding instructions.
|
||||||
*/
|
*/
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class EmbeddingService implements EmbeddingProvider {
|
export class EmbeddingService implements EmbeddingProvider {
|
||||||
private readonly logger = new Logger(EmbeddingService.name);
|
private readonly logger = new Logger(EmbeddingService.name);
|
||||||
private readonly apiKey: string | undefined;
|
private readonly backend: EmbeddingBackend;
|
||||||
private readonly baseUrl: string;
|
|
||||||
private readonly model: string;
|
private readonly model: string;
|
||||||
|
readonly dimensions: number;
|
||||||
|
|
||||||
readonly dimensions = DEFAULT_DIMENSIONS;
|
// Ollama-specific
|
||||||
|
private readonly ollamaBaseUrl: string | undefined;
|
||||||
|
|
||||||
|
// OpenAI-compatible
|
||||||
|
private readonly openaiApiKey: string | undefined;
|
||||||
|
private readonly openaiBaseUrl: string;
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.apiKey = process.env['OPENAI_API_KEY'];
|
// Determine backend
|
||||||
this.baseUrl = process.env['EMBEDDING_API_URL'] ?? 'https://api.openai.com/v1';
|
const providerEnv = process.env['EMBEDDING_PROVIDER'];
|
||||||
this.model = process.env['EMBEDDING_MODEL'] ?? DEFAULT_MODEL;
|
const openaiKey = process.env['OPENAI_API_KEY'];
|
||||||
|
const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
|
||||||
|
|
||||||
|
if (providerEnv === 'openai') {
|
||||||
|
this.backend = 'openai';
|
||||||
|
} else if (providerEnv === 'ollama') {
|
||||||
|
this.backend = 'ollama';
|
||||||
|
} else if (process.env['EMBEDDING_API_URL']) {
|
||||||
|
// Legacy: explicit API URL configured → use openai-compat path
|
||||||
|
this.backend = 'openai';
|
||||||
|
} else if (ollamaUrl) {
|
||||||
|
// Ollama available and no explicit override → prefer Ollama
|
||||||
|
this.backend = 'ollama';
|
||||||
|
} else if (openaiKey) {
|
||||||
|
// OpenAI key present → use OpenAI
|
||||||
|
this.backend = 'openai';
|
||||||
|
} else {
|
||||||
|
// Nothing configured — default to ollama (will return zeros when unavailable)
|
||||||
|
this.backend = 'ollama';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set model and dimension defaults based on backend
|
||||||
|
if (this.backend === 'ollama') {
|
||||||
|
this.model = process.env['EMBEDDING_MODEL'] ?? OLLAMA_DEFAULT_MODEL;
|
||||||
|
this.dimensions =
|
||||||
|
parseInt(process.env['EMBEDDING_DIMENSIONS'] ?? '', 10) || OLLAMA_DEFAULT_DIMENSIONS;
|
||||||
|
this.ollamaBaseUrl = ollamaUrl;
|
||||||
|
this.openaiApiKey = undefined;
|
||||||
|
this.openaiBaseUrl = '';
|
||||||
|
} else {
|
||||||
|
this.model = process.env['EMBEDDING_MODEL'] ?? OPENAI_DEFAULT_MODEL;
|
||||||
|
this.dimensions =
|
||||||
|
parseInt(process.env['EMBEDDING_DIMENSIONS'] ?? '', 10) || OPENAI_DEFAULT_DIMENSIONS;
|
||||||
|
this.ollamaBaseUrl = undefined;
|
||||||
|
this.openaiApiKey = openaiKey;
|
||||||
|
this.openaiBaseUrl = process.env['EMBEDDING_API_URL'] ?? 'https://api.openai.com/v1';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Warn on dimension mismatch with the current schema
|
||||||
|
if (this.dimensions !== PGVECTOR_SCHEMA_DIMENSIONS) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Embedding dimensions (${this.dimensions}) differ from pgvector schema (${PGVECTOR_SCHEMA_DIMENSIONS}). ` +
|
||||||
|
`If insights already contain ${PGVECTOR_SCHEMA_DIMENSIONS}-dim vectors, similarity search will fail. ` +
|
||||||
|
`To fix: truncate the insights table and re-embed, or run a migration to ALTER COLUMN embedding TYPE vector(${this.dimensions}).`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.logger.log(
|
||||||
|
`EmbeddingService initialized: backend=${this.backend}, model=${this.model}, dimensions=${this.dimensions}`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
get available(): boolean {
|
get available(): boolean {
|
||||||
return !!this.apiKey;
|
if (this.backend === 'ollama') {
|
||||||
|
return !!this.ollamaBaseUrl;
|
||||||
|
}
|
||||||
|
return !!this.openaiApiKey;
|
||||||
}
|
}
|
||||||
|
|
||||||
async embed(text: string): Promise<number[]> {
|
async embed(text: string): Promise<number[]> {
|
||||||
@@ -39,16 +125,60 @@ export class EmbeddingService implements EmbeddingProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||||
if (!this.apiKey) {
|
if (!this.available) {
|
||||||
this.logger.warn('No OPENAI_API_KEY configured — returning zero vectors');
|
const reason =
|
||||||
|
this.backend === 'ollama'
|
||||||
|
? 'OLLAMA_BASE_URL not configured'
|
||||||
|
: 'No OPENAI_API_KEY configured';
|
||||||
|
this.logger.warn(`${reason} — returning zero vectors`);
|
||||||
return texts.map(() => new Array<number>(this.dimensions).fill(0));
|
return texts.map(() => new Array<number>(this.dimensions).fill(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
const response = await fetch(`${this.baseUrl}/embeddings`, {
|
if (this.backend === 'ollama') {
|
||||||
|
return this.embedBatchOllama(texts);
|
||||||
|
}
|
||||||
|
return this.embedBatchOpenAI(texts);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Ollama backend
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private async embedBatchOllama(texts: string[]): Promise<number[][]> {
|
||||||
|
const baseUrl = this.ollamaBaseUrl!;
|
||||||
|
const results: number[][] = [];
|
||||||
|
|
||||||
|
// Ollama's /api/embeddings endpoint processes one text at a time
|
||||||
|
for (const text of texts) {
|
||||||
|
const response = await fetch(`${baseUrl}/api/embeddings`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ model: this.model, prompt: text }),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const body = await response.text();
|
||||||
|
this.logger.error(`Ollama embedding API error: ${response.status} ${body}`);
|
||||||
|
throw new Error(`Ollama embedding API returned ${response.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const json = (await response.json()) as OllamaEmbeddingResponse;
|
||||||
|
results.push(json.embedding);
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// OpenAI-compatible backend
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private async embedBatchOpenAI(texts: string[]): Promise<number[][]> {
|
||||||
|
const response = await fetch(`${this.openaiBaseUrl}/embeddings`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.openaiApiKey}`,
|
||||||
},
|
},
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: this.model,
|
model: this.model,
|
||||||
@@ -63,7 +193,7 @@ export class EmbeddingService implements EmbeddingProvider {
|
|||||||
throw new Error(`Embedding API returned ${response.status}`);
|
throw new Error(`Embedding API returned ${response.status}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const json = (await response.json()) as EmbeddingResponse;
|
const json = (await response.json()) as OpenAIEmbeddingResponse;
|
||||||
return json.data.sort((a, b) => a.index - b.index).map((d) => d.embedding);
|
return json.data.sort((a, b) => a.index - b.index).map((d) => d.embedding);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
38
pnpm-lock.yaml
generated
38
pnpm-lock.yaml
generated
@@ -41,6 +41,9 @@ importers:
|
|||||||
|
|
||||||
apps/gateway:
|
apps/gateway:
|
||||||
dependencies:
|
dependencies:
|
||||||
|
'@anthropic-ai/sdk':
|
||||||
|
specifier: ^0.80.0
|
||||||
|
version: 0.80.0(zod@4.3.6)
|
||||||
'@fastify/helmet':
|
'@fastify/helmet':
|
||||||
specifier: ^13.0.2
|
specifier: ^13.0.2
|
||||||
version: 13.0.2
|
version: 13.0.2
|
||||||
@@ -143,6 +146,9 @@ importers:
|
|||||||
node-cron:
|
node-cron:
|
||||||
specifier: ^4.2.1
|
specifier: ^4.2.1
|
||||||
version: 4.2.1
|
version: 4.2.1
|
||||||
|
openai:
|
||||||
|
specifier: ^6.32.0
|
||||||
|
version: 6.32.0(ws@8.19.0)(zod@4.3.6)
|
||||||
reflect-metadata:
|
reflect-metadata:
|
||||||
specifier: ^0.2.0
|
specifier: ^0.2.0
|
||||||
version: 0.2.2
|
version: 0.2.2
|
||||||
@@ -582,6 +588,15 @@ packages:
|
|||||||
zod:
|
zod:
|
||||||
optional: true
|
optional: true
|
||||||
|
|
||||||
|
'@anthropic-ai/sdk@0.80.0':
|
||||||
|
resolution: {integrity: sha512-WeXLn7zNVk3yjeshn+xZHvld6AoFUOR3Sep6pSoHho5YbSi6HwcirqgPA5ccFuW8QTVJAAU7N8uQQC6Wa9TG+g==}
|
||||||
|
hasBin: true
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.25.0 || ^4.0.0
|
||||||
|
peerDependenciesMeta:
|
||||||
|
zod:
|
||||||
|
optional: true
|
||||||
|
|
||||||
'@asamuzakjp/css-color@5.0.1':
|
'@asamuzakjp/css-color@5.0.1':
|
||||||
resolution: {integrity: sha512-2SZFvqMyvboVV1d15lMf7XiI3m7SDqXUuKaTymJYLN6dSGadqp+fVojqJlVoMlbZnlTmu3S0TLwLTJpvBMO1Aw==}
|
resolution: {integrity: sha512-2SZFvqMyvboVV1d15lMf7XiI3m7SDqXUuKaTymJYLN6dSGadqp+fVojqJlVoMlbZnlTmu3S0TLwLTJpvBMO1Aw==}
|
||||||
engines: {node: ^20.19.0 || ^22.12.0 || >=24.0.0}
|
engines: {node: ^20.19.0 || ^22.12.0 || >=24.0.0}
|
||||||
@@ -4879,6 +4894,18 @@ packages:
|
|||||||
zod:
|
zod:
|
||||||
optional: true
|
optional: true
|
||||||
|
|
||||||
|
openai@6.32.0:
|
||||||
|
resolution: {integrity: sha512-j3k+BjydAf8yQlcOI7WUQMQTbbF5GEIMAE2iZYCOzwwB3S2pCheaWYp+XZRNAch4jWVc52PMDGRRjutao3lLCg==}
|
||||||
|
hasBin: true
|
||||||
|
peerDependencies:
|
||||||
|
ws: ^8.18.0
|
||||||
|
zod: ^3.25 || ^4.0
|
||||||
|
peerDependenciesMeta:
|
||||||
|
ws:
|
||||||
|
optional: true
|
||||||
|
zod:
|
||||||
|
optional: true
|
||||||
|
|
||||||
optionator@0.9.4:
|
optionator@0.9.4:
|
||||||
resolution: {integrity: sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==}
|
resolution: {integrity: sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==}
|
||||||
engines: {node: '>= 0.8.0'}
|
engines: {node: '>= 0.8.0'}
|
||||||
@@ -5937,6 +5964,12 @@ snapshots:
|
|||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
zod: 4.3.6
|
zod: 4.3.6
|
||||||
|
|
||||||
|
'@anthropic-ai/sdk@0.80.0(zod@4.3.6)':
|
||||||
|
dependencies:
|
||||||
|
json-schema-to-ts: 3.1.1
|
||||||
|
optionalDependencies:
|
||||||
|
zod: 4.3.6
|
||||||
|
|
||||||
'@asamuzakjp/css-color@5.0.1':
|
'@asamuzakjp/css-color@5.0.1':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@csstools/css-calc': 3.1.1(@csstools/css-parser-algorithms@4.0.0(@csstools/css-tokenizer@4.0.0))(@csstools/css-tokenizer@4.0.0)
|
'@csstools/css-calc': 3.1.1(@csstools/css-parser-algorithms@4.0.0(@csstools/css-tokenizer@4.0.0))(@csstools/css-tokenizer@4.0.0)
|
||||||
@@ -10621,6 +10654,11 @@ snapshots:
|
|||||||
ws: 8.19.0
|
ws: 8.19.0
|
||||||
zod: 4.3.6
|
zod: 4.3.6
|
||||||
|
|
||||||
|
openai@6.32.0(ws@8.19.0)(zod@4.3.6):
|
||||||
|
optionalDependencies:
|
||||||
|
ws: 8.19.0
|
||||||
|
zod: 4.3.6
|
||||||
|
|
||||||
optionator@0.9.4:
|
optionator@0.9.4:
|
||||||
dependencies:
|
dependencies:
|
||||||
deep-is: 0.1.4
|
deep-is: 0.1.4
|
||||||
|
|||||||
Reference in New Issue
Block a user