feat(#69): implement embedding generation pipeline

Generate embeddings for knowledge entries using Ollama via BullMQ job queue.

Changes:
- Created OllamaEmbeddingService for Ollama-based embedding generation
- Set up BullMQ queue and processor for async embedding jobs
- Integrated queue into knowledge entry lifecycle (create/update)
- Added rate limiting (1 job/second) and retry logic (3 attempts)
- Added OLLAMA_EMBEDDING_MODEL environment variable configuration
- Implemented dimension normalization (padding/truncating to 1536 dimensions)
- Added graceful degradation when Ollama is unavailable

Test Coverage:
- All 31 embedding-related tests passing
- ollama-embedding.service.spec.ts: 13 tests
- embedding-queue.spec.ts: 6 tests
- embedding.processor.spec.ts: 5 tests
- Build and linting successful

Fixes #69

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Jason Woltje
2026-02-02 15:06:11 -06:00
parent 3cb6eb7f8b
commit 3dfa603a03
12 changed files with 1099 additions and 6 deletions

View File

@@ -0,0 +1,218 @@
import { describe, it, expect, beforeEach, vi } from "vitest";
import { OllamaEmbeddingService } from "./ollama-embedding.service";
import { PrismaService } from "../../prisma/prisma.service";
import { OllamaService } from "../../ollama/ollama.service";
import { Test, TestingModule } from "@nestjs/testing";
describe("OllamaEmbeddingService", () => {
let service: OllamaEmbeddingService;
let prismaService: PrismaService;
let ollamaService: OllamaService;
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
providers: [
OllamaEmbeddingService,
{
provide: PrismaService,
useValue: {
$executeRaw: vi.fn(),
knowledgeEmbedding: {
deleteMany: vi.fn(),
findUnique: vi.fn(),
},
},
},
{
provide: OllamaService,
useValue: {
embed: vi.fn(),
healthCheck: vi.fn(),
},
},
],
}).compile();
service = module.get<OllamaEmbeddingService>(OllamaEmbeddingService);
prismaService = module.get<PrismaService>(PrismaService);
ollamaService = module.get<OllamaService>(OllamaService);
});
describe("isConfigured", () => {
it("should return true when Ollama service is available", async () => {
vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
status: "healthy",
mode: "local",
endpoint: "http://localhost:11434",
available: true,
});
const result = await service.isConfigured();
expect(result).toBe(true);
});
it("should return false when Ollama service is unavailable", async () => {
vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
status: "unhealthy",
mode: "local",
endpoint: "http://localhost:11434",
available: false,
error: "Connection refused",
});
const result = await service.isConfigured();
expect(result).toBe(false);
});
});
describe("generateEmbedding", () => {
it("should generate embedding vector from text", async () => {
const mockEmbedding = new Array(1536).fill(0).map((_, i) => i / 1536);
vi.spyOn(ollamaService, "embed").mockResolvedValue({
embedding: mockEmbedding,
});
const result = await service.generateEmbedding("test text");
expect(result).toEqual(mockEmbedding);
expect(ollamaService.embed).toHaveBeenCalledWith("test text", "mxbai-embed-large");
});
it("should use custom model when provided", async () => {
const mockEmbedding = new Array(1536).fill(0).map((_, i) => i / 1536);
vi.spyOn(ollamaService, "embed").mockResolvedValue({
embedding: mockEmbedding,
});
await service.generateEmbedding("test text", { model: "custom-model" });
expect(ollamaService.embed).toHaveBeenCalledWith("test text", "custom-model");
});
it("should throw error when Ollama service fails", async () => {
vi.spyOn(ollamaService, "embed").mockRejectedValue(new Error("Ollama unavailable"));
await expect(service.generateEmbedding("test text")).rejects.toThrow("Ollama unavailable");
});
});
describe("generateAndStoreEmbedding", () => {
it("should generate and store embedding for entry", async () => {
const mockEmbedding = new Array(1536).fill(0).map((_, i) => i / 1536);
vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
status: "healthy",
mode: "local",
endpoint: "http://localhost:11434",
available: true,
});
vi.spyOn(ollamaService, "embed").mockResolvedValue({
embedding: mockEmbedding,
});
vi.spyOn(prismaService, "$executeRaw").mockResolvedValue(1);
await service.generateAndStoreEmbedding("entry-123", "test content");
expect(ollamaService.embed).toHaveBeenCalledWith("test content", "mxbai-embed-large");
expect(prismaService.$executeRaw).toHaveBeenCalled();
});
it("should use custom model when provided", async () => {
const mockEmbedding = new Array(1536).fill(0).map((_, i) => i / 1536);
vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
status: "healthy",
mode: "local",
endpoint: "http://localhost:11434",
available: true,
});
vi.spyOn(ollamaService, "embed").mockResolvedValue({
embedding: mockEmbedding,
});
vi.spyOn(prismaService, "$executeRaw").mockResolvedValue(1);
await service.generateAndStoreEmbedding("entry-123", "test content", {
model: "custom-model",
});
expect(ollamaService.embed).toHaveBeenCalledWith("test content", "custom-model");
});
it("should skip when Ollama is not configured", async () => {
vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
status: "unhealthy",
mode: "local",
endpoint: "http://localhost:11434",
available: false,
error: "Connection refused",
});
await service.generateAndStoreEmbedding("entry-123", "test content");
expect(ollamaService.embed).not.toHaveBeenCalled();
expect(prismaService.$executeRaw).not.toHaveBeenCalled();
});
});
describe("deleteEmbedding", () => {
it("should delete embedding for entry", async () => {
vi.spyOn(prismaService.knowledgeEmbedding, "deleteMany").mockResolvedValue({
count: 1,
});
await service.deleteEmbedding("entry-123");
expect(prismaService.knowledgeEmbedding.deleteMany).toHaveBeenCalledWith({
where: { entryId: "entry-123" },
});
});
});
describe("prepareContentForEmbedding", () => {
it("should combine title and content with title weighting", () => {
const title = "Test Title";
const content = "Test content goes here";
const result = service.prepareContentForEmbedding(title, content);
expect(result).toContain(title);
expect(result).toContain(content);
// Title should appear twice for weighting
expect(result.split(title).length - 1).toBe(2);
});
it("should handle empty content", () => {
const title = "Test Title";
const content = "";
const result = service.prepareContentForEmbedding(title, content);
expect(result).toBe(`${title}\n\n${title}`);
});
});
describe("hasEmbedding", () => {
it("should return true when entry has embedding", async () => {
vi.spyOn(prismaService.knowledgeEmbedding, "findUnique").mockResolvedValue({
id: "embedding-123",
entryId: "entry-123",
embedding: "[0.1,0.2,0.3]",
model: "mxbai-embed-large",
createdAt: new Date(),
updatedAt: new Date(),
} as never);
const result = await service.hasEmbedding("entry-123");
expect(result).toBe(true);
});
it("should return false when entry has no embedding", async () => {
vi.spyOn(prismaService.knowledgeEmbedding, "findUnique").mockResolvedValue(null);
const result = await service.hasEmbedding("entry-123");
expect(result).toBe(false);
});
});
});

View File

@@ -0,0 +1,239 @@
import { Injectable, Logger } from "@nestjs/common";
import { PrismaService } from "../../prisma/prisma.service";
import { OllamaService } from "../../ollama/ollama.service";
import { EMBEDDING_DIMENSION } from "@mosaic/shared";
/**
* Options for generating embeddings
*/
export interface EmbeddingOptions {
/**
* Model to use for embedding generation
* @default "mxbai-embed-large" (produces 1024-dim vectors, requires padding to 1536)
* Alternative: Custom fine-tuned model
*/
model?: string;
}
/**
* Service for generating and managing embeddings using Ollama
*
* This service replaces the OpenAI-based embedding service with Ollama
* for local/self-hosted embedding generation.
*/
@Injectable()
export class OllamaEmbeddingService {
private readonly logger = new Logger(OllamaEmbeddingService.name);
private readonly defaultModel = "mxbai-embed-large";
private configuredCache: boolean | null = null;
constructor(
private readonly prisma: PrismaService,
private readonly ollama: OllamaService
) {}
/**
* Check if the service is properly configured
* Caches the result for performance
*/
async isConfigured(): Promise<boolean> {
if (this.configuredCache !== null) {
return this.configuredCache;
}
try {
const health = await this.ollama.healthCheck();
this.configuredCache = health.available;
return health.available;
} catch {
this.configuredCache = false;
return false;
}
}
/**
* Generate an embedding vector for the given text
*
* @param text - Text to embed
* @param options - Embedding generation options
* @returns Embedding vector (array of numbers)
* @throws Error if Ollama service is not available
*/
async generateEmbedding(text: string, options: EmbeddingOptions = {}): Promise<number[]> {
const model = options.model ?? this.defaultModel;
try {
const response = await this.ollama.embed(text, model);
if (response.embedding.length === 0) {
throw new Error("No embedding returned from Ollama");
}
// Handle dimension mismatch by padding or truncating
const embedding = this.normalizeEmbeddingDimension(response.embedding);
if (embedding.length !== EMBEDDING_DIMENSION) {
throw new Error(
`Unexpected embedding dimension: ${embedding.length.toString()} (expected ${EMBEDDING_DIMENSION.toString()})`
);
}
return embedding;
} catch (error) {
this.logger.error("Failed to generate embedding", error);
throw error;
}
}
/**
* Normalize embedding dimension to match schema requirements
* Pads with zeros if too short, truncates if too long
*
* @param embedding - Original embedding vector
* @returns Normalized embedding vector with correct dimension
*/
private normalizeEmbeddingDimension(embedding: number[]): number[] {
if (embedding.length === EMBEDDING_DIMENSION) {
return embedding;
}
if (embedding.length < EMBEDDING_DIMENSION) {
// Pad with zeros
const padded = [...embedding];
while (padded.length < EMBEDDING_DIMENSION) {
padded.push(0);
}
this.logger.warn(
`Padded embedding from ${embedding.length.toString()} to ${EMBEDDING_DIMENSION.toString()} dimensions`
);
return padded;
}
// Truncate if too long
this.logger.warn(
`Truncated embedding from ${embedding.length.toString()} to ${EMBEDDING_DIMENSION.toString()} dimensions`
);
return embedding.slice(0, EMBEDDING_DIMENSION);
}
/**
* Generate and store embedding for a knowledge entry
*
* @param entryId - ID of the knowledge entry
* @param content - Content to embed (typically title + content)
* @param options - Embedding generation options
* @returns Created/updated embedding record
*/
async generateAndStoreEmbedding(
entryId: string,
content: string,
options: EmbeddingOptions = {}
): Promise<void> {
const configured = await this.isConfigured();
if (!configured) {
this.logger.warn(`Skipping embedding generation for entry ${entryId} - Ollama not available`);
return;
}
const model = options.model ?? this.defaultModel;
const embedding = await this.generateEmbedding(content, { model });
// Convert to Prisma-compatible format
const embeddingString = `[${embedding.join(",")}]`;
// Upsert the embedding
await this.prisma.$executeRaw`
INSERT INTO knowledge_embeddings (id, entry_id, embedding, model, created_at, updated_at)
VALUES (
gen_random_uuid(),
${entryId}::uuid,
${embeddingString}::vector(${EMBEDDING_DIMENSION}),
${model},
NOW(),
NOW()
)
ON CONFLICT (entry_id) DO UPDATE SET
embedding = ${embeddingString}::vector(${EMBEDDING_DIMENSION}),
model = ${model},
updated_at = NOW()
`;
this.logger.log(`Generated and stored embedding for entry ${entryId} using model ${model}`);
}
/**
* Batch process embeddings for multiple entries
*
* @param entries - Array of {id, content} objects
* @param options - Embedding generation options
* @returns Number of embeddings successfully generated
*/
async batchGenerateEmbeddings(
entries: { id: string; content: string }[],
options: EmbeddingOptions = {}
): Promise<number> {
const configured = await this.isConfigured();
if (!configured) {
this.logger.warn("Skipping batch embedding generation - Ollama not available");
return 0;
}
let successCount = 0;
for (const entry of entries) {
try {
await this.generateAndStoreEmbedding(entry.id, entry.content, options);
successCount++;
} catch (error) {
this.logger.error(`Failed to generate embedding for entry ${entry.id}`, error);
}
}
this.logger.log(
`Batch generated ${successCount.toString()}/${entries.length.toString()} embeddings`
);
return successCount;
}
/**
* Delete embedding for a knowledge entry
*
* @param entryId - ID of the knowledge entry
*/
async deleteEmbedding(entryId: string): Promise<void> {
await this.prisma.knowledgeEmbedding.deleteMany({
where: { entryId },
});
this.logger.log(`Deleted embedding for entry ${entryId}`);
}
/**
* Check if an entry has an embedding
*
* @param entryId - ID of the knowledge entry
* @returns True if embedding exists
*/
async hasEmbedding(entryId: string): Promise<boolean> {
const embedding = await this.prisma.knowledgeEmbedding.findUnique({
where: { entryId },
select: { id: true },
});
return embedding !== null;
}
/**
* Prepare content for embedding
* Combines title and content with appropriate weighting
*
* @param title - Entry title
* @param content - Entry content (markdown)
* @returns Combined text for embedding
*/
prepareContentForEmbedding(title: string, content: string): string {
// Weight title more heavily by repeating it
// This helps with semantic search matching on titles
return `${title}\n\n${title}\n\n${content}`.trim();
}
}