feat(M3-007,M3-009): provider health check scheduler and Ollama embedding default (#308)
Some checks failed
ci/woodpecker/push/ci Pipeline failed

Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
This commit was merged in pull request #308.
This commit is contained in:
2026-03-21 21:30:15 +00:00
committed by jason.woltje
parent cfdd2b679c
commit 6c6bcbdb7f
3 changed files with 263 additions and 67 deletions

View File

@@ -1,36 +1,122 @@
import { Injectable, Logger } from '@nestjs/common';
import type { EmbeddingProvider } from '@mosaic/memory';
const DEFAULT_MODEL = 'text-embedding-3-small';
const DEFAULT_DIMENSIONS = 1536;
// ---------------------------------------------------------------------------
// Environment-driven configuration
//
// EMBEDDING_PROVIDER — 'ollama' (default) | 'openai'
// EMBEDDING_MODEL — model id, defaults differ per provider
// EMBEDDING_DIMENSIONS — integer, defaults differ per provider
// OLLAMA_BASE_URL — base URL for Ollama (used when provider=ollama)
// EMBEDDING_API_URL — full base URL for OpenAI-compatible API
// OPENAI_API_KEY — required for OpenAI provider
// ---------------------------------------------------------------------------
interface EmbeddingResponse {
const OLLAMA_DEFAULT_MODEL = 'nomic-embed-text';
const OLLAMA_DEFAULT_DIMENSIONS = 768;
const OPENAI_DEFAULT_MODEL = 'text-embedding-3-small';
const OPENAI_DEFAULT_DIMENSIONS = 1536;
/** Known dimension mismatch: warn if pgvector column likely has wrong size */
const PGVECTOR_SCHEMA_DIMENSIONS = 1536;
type EmbeddingBackend = 'ollama' | 'openai';
interface OllamaEmbeddingResponse {
embedding: number[];
}
interface OpenAIEmbeddingResponse {
data: Array<{ embedding: number[]; index: number }>;
model: string;
usage: { prompt_tokens: number; total_tokens: number };
}
/**
* Generates embeddings via the OpenAI-compatible embeddings API.
* Supports OpenAI, Azure OpenAI, and any provider with a compatible endpoint.
* Provider-agnostic embedding service.
*
* Defaults to Ollama's native embedding API using nomic-embed-text (768 dims).
* Falls back to the OpenAI-compatible API when EMBEDDING_PROVIDER=openai or
* when OPENAI_API_KEY is set and EMBEDDING_PROVIDER is not explicitly set to ollama.
*
* Dimension mismatch detection: if the configured dimensions differ from the
* pgvector schema (1536), a warning is logged with re-embedding instructions.
*/
@Injectable()
export class EmbeddingService implements EmbeddingProvider {
private readonly logger = new Logger(EmbeddingService.name);
private readonly apiKey: string | undefined;
private readonly baseUrl: string;
private readonly backend: EmbeddingBackend;
private readonly model: string;
readonly dimensions: number;
readonly dimensions = DEFAULT_DIMENSIONS;
// Ollama-specific
private readonly ollamaBaseUrl: string | undefined;
// OpenAI-compatible
private readonly openaiApiKey: string | undefined;
private readonly openaiBaseUrl: string;
constructor() {
this.apiKey = process.env['OPENAI_API_KEY'];
this.baseUrl = process.env['EMBEDDING_API_URL'] ?? 'https://api.openai.com/v1';
this.model = process.env['EMBEDDING_MODEL'] ?? DEFAULT_MODEL;
// Determine backend
const providerEnv = process.env['EMBEDDING_PROVIDER'];
const openaiKey = process.env['OPENAI_API_KEY'];
const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
if (providerEnv === 'openai') {
this.backend = 'openai';
} else if (providerEnv === 'ollama') {
this.backend = 'ollama';
} else if (process.env['EMBEDDING_API_URL']) {
// Legacy: explicit API URL configured → use openai-compat path
this.backend = 'openai';
} else if (ollamaUrl) {
// Ollama available and no explicit override → prefer Ollama
this.backend = 'ollama';
} else if (openaiKey) {
// OpenAI key present → use OpenAI
this.backend = 'openai';
} else {
// Nothing configured — default to ollama (will return zeros when unavailable)
this.backend = 'ollama';
}
// Set model and dimension defaults based on backend
if (this.backend === 'ollama') {
this.model = process.env['EMBEDDING_MODEL'] ?? OLLAMA_DEFAULT_MODEL;
this.dimensions =
parseInt(process.env['EMBEDDING_DIMENSIONS'] ?? '', 10) || OLLAMA_DEFAULT_DIMENSIONS;
this.ollamaBaseUrl = ollamaUrl;
this.openaiApiKey = undefined;
this.openaiBaseUrl = '';
} else {
this.model = process.env['EMBEDDING_MODEL'] ?? OPENAI_DEFAULT_MODEL;
this.dimensions =
parseInt(process.env['EMBEDDING_DIMENSIONS'] ?? '', 10) || OPENAI_DEFAULT_DIMENSIONS;
this.ollamaBaseUrl = undefined;
this.openaiApiKey = openaiKey;
this.openaiBaseUrl = process.env['EMBEDDING_API_URL'] ?? 'https://api.openai.com/v1';
}
// Warn on dimension mismatch with the current schema
if (this.dimensions !== PGVECTOR_SCHEMA_DIMENSIONS) {
this.logger.warn(
`Embedding dimensions (${this.dimensions}) differ from pgvector schema (${PGVECTOR_SCHEMA_DIMENSIONS}). ` +
`If insights already contain ${PGVECTOR_SCHEMA_DIMENSIONS}-dim vectors, similarity search will fail. ` +
`To fix: truncate the insights table and re-embed, or run a migration to ALTER COLUMN embedding TYPE vector(${this.dimensions}).`,
);
}
this.logger.log(
`EmbeddingService initialized: backend=${this.backend}, model=${this.model}, dimensions=${this.dimensions}`,
);
}
get available(): boolean {
return !!this.apiKey;
if (this.backend === 'ollama') {
return !!this.ollamaBaseUrl;
}
return !!this.openaiApiKey;
}
async embed(text: string): Promise<number[]> {
@@ -39,16 +125,60 @@ export class EmbeddingService implements EmbeddingProvider {
}
async embedBatch(texts: string[]): Promise<number[][]> {
if (!this.apiKey) {
this.logger.warn('No OPENAI_API_KEY configured — returning zero vectors');
if (!this.available) {
const reason =
this.backend === 'ollama'
? 'OLLAMA_BASE_URL not configured'
: 'No OPENAI_API_KEY configured';
this.logger.warn(`${reason} — returning zero vectors`);
return texts.map(() => new Array<number>(this.dimensions).fill(0));
}
const response = await fetch(`${this.baseUrl}/embeddings`, {
if (this.backend === 'ollama') {
return this.embedBatchOllama(texts);
}
return this.embedBatchOpenAI(texts);
}
// ---------------------------------------------------------------------------
// Ollama backend
// ---------------------------------------------------------------------------
private async embedBatchOllama(texts: string[]): Promise<number[][]> {
const baseUrl = this.ollamaBaseUrl!;
const results: number[][] = [];
// Ollama's /api/embeddings endpoint processes one text at a time
for (const text of texts) {
const response = await fetch(`${baseUrl}/api/embeddings`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: this.model, prompt: text }),
});
if (!response.ok) {
const body = await response.text();
this.logger.error(`Ollama embedding API error: ${response.status} ${body}`);
throw new Error(`Ollama embedding API returned ${response.status}`);
}
const json = (await response.json()) as OllamaEmbeddingResponse;
results.push(json.embedding);
}
return results;
}
// ---------------------------------------------------------------------------
// OpenAI-compatible backend
// ---------------------------------------------------------------------------
private async embedBatchOpenAI(texts: string[]): Promise<number[][]> {
const response = await fetch(`${this.openaiBaseUrl}/embeddings`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${this.apiKey}`,
Authorization: `Bearer ${this.openaiApiKey}`,
},
body: JSON.stringify({
model: this.model,
@@ -63,7 +193,7 @@ export class EmbeddingService implements EmbeddingProvider {
throw new Error(`Embedding API returned ${response.status}`);
}
const json = (await response.json()) as EmbeddingResponse;
const json = (await response.json()) as OpenAIEmbeddingResponse;
return json.data.sort((a, b) => a.index - b.index).map((d) => d.embedding);
}
}