feat(#69): implement embedding generation pipeline
Generate embeddings for knowledge entries using Ollama via BullMQ job queue. Changes: - Created OllamaEmbeddingService for Ollama-based embedding generation - Set up BullMQ queue and processor for async embedding jobs - Integrated queue into knowledge entry lifecycle (create/update) - Added rate limiting (1 job/second) and retry logic (3 attempts) - Added OLLAMA_EMBEDDING_MODEL environment variable configuration - Implemented dimension normalization (padding/truncating to 1536 dimensions) - Added graceful degradation when Ollama is unavailable Test Coverage: - All 31 embedding-related tests passing - ollama-embedding.service.spec.ts: 13 tests - embedding-queue.spec.ts: 6 tests - embedding.processor.spec.ts: 5 tests - Build and linting successful Fixes #69 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
218
apps/api/src/knowledge/services/ollama-embedding.service.spec.ts
Normal file
218
apps/api/src/knowledge/services/ollama-embedding.service.spec.ts
Normal file
@@ -0,0 +1,218 @@
|
||||
import { describe, it, expect, beforeEach, vi } from "vitest";
|
||||
import { OllamaEmbeddingService } from "./ollama-embedding.service";
|
||||
import { PrismaService } from "../../prisma/prisma.service";
|
||||
import { OllamaService } from "../../ollama/ollama.service";
|
||||
import { Test, TestingModule } from "@nestjs/testing";
|
||||
|
||||
describe("OllamaEmbeddingService", () => {
|
||||
let service: OllamaEmbeddingService;
|
||||
let prismaService: PrismaService;
|
||||
let ollamaService: OllamaService;
|
||||
|
||||
beforeEach(async () => {
|
||||
const module: TestingModule = await Test.createTestingModule({
|
||||
providers: [
|
||||
OllamaEmbeddingService,
|
||||
{
|
||||
provide: PrismaService,
|
||||
useValue: {
|
||||
$executeRaw: vi.fn(),
|
||||
knowledgeEmbedding: {
|
||||
deleteMany: vi.fn(),
|
||||
findUnique: vi.fn(),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
provide: OllamaService,
|
||||
useValue: {
|
||||
embed: vi.fn(),
|
||||
healthCheck: vi.fn(),
|
||||
},
|
||||
},
|
||||
],
|
||||
}).compile();
|
||||
|
||||
service = module.get<OllamaEmbeddingService>(OllamaEmbeddingService);
|
||||
prismaService = module.get<PrismaService>(PrismaService);
|
||||
ollamaService = module.get<OllamaService>(OllamaService);
|
||||
});
|
||||
|
||||
describe("isConfigured", () => {
|
||||
it("should return true when Ollama service is available", async () => {
|
||||
vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
|
||||
status: "healthy",
|
||||
mode: "local",
|
||||
endpoint: "http://localhost:11434",
|
||||
available: true,
|
||||
});
|
||||
|
||||
const result = await service.isConfigured();
|
||||
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it("should return false when Ollama service is unavailable", async () => {
|
||||
vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
|
||||
status: "unhealthy",
|
||||
mode: "local",
|
||||
endpoint: "http://localhost:11434",
|
||||
available: false,
|
||||
error: "Connection refused",
|
||||
});
|
||||
|
||||
const result = await service.isConfigured();
|
||||
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("generateEmbedding", () => {
|
||||
it("should generate embedding vector from text", async () => {
|
||||
const mockEmbedding = new Array(1536).fill(0).map((_, i) => i / 1536);
|
||||
vi.spyOn(ollamaService, "embed").mockResolvedValue({
|
||||
embedding: mockEmbedding,
|
||||
});
|
||||
|
||||
const result = await service.generateEmbedding("test text");
|
||||
|
||||
expect(result).toEqual(mockEmbedding);
|
||||
expect(ollamaService.embed).toHaveBeenCalledWith("test text", "mxbai-embed-large");
|
||||
});
|
||||
|
||||
it("should use custom model when provided", async () => {
|
||||
const mockEmbedding = new Array(1536).fill(0).map((_, i) => i / 1536);
|
||||
vi.spyOn(ollamaService, "embed").mockResolvedValue({
|
||||
embedding: mockEmbedding,
|
||||
});
|
||||
|
||||
await service.generateEmbedding("test text", { model: "custom-model" });
|
||||
|
||||
expect(ollamaService.embed).toHaveBeenCalledWith("test text", "custom-model");
|
||||
});
|
||||
|
||||
it("should throw error when Ollama service fails", async () => {
|
||||
vi.spyOn(ollamaService, "embed").mockRejectedValue(new Error("Ollama unavailable"));
|
||||
|
||||
await expect(service.generateEmbedding("test text")).rejects.toThrow("Ollama unavailable");
|
||||
});
|
||||
});
|
||||
|
||||
describe("generateAndStoreEmbedding", () => {
|
||||
it("should generate and store embedding for entry", async () => {
|
||||
const mockEmbedding = new Array(1536).fill(0).map((_, i) => i / 1536);
|
||||
vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
|
||||
status: "healthy",
|
||||
mode: "local",
|
||||
endpoint: "http://localhost:11434",
|
||||
available: true,
|
||||
});
|
||||
vi.spyOn(ollamaService, "embed").mockResolvedValue({
|
||||
embedding: mockEmbedding,
|
||||
});
|
||||
vi.spyOn(prismaService, "$executeRaw").mockResolvedValue(1);
|
||||
|
||||
await service.generateAndStoreEmbedding("entry-123", "test content");
|
||||
|
||||
expect(ollamaService.embed).toHaveBeenCalledWith("test content", "mxbai-embed-large");
|
||||
expect(prismaService.$executeRaw).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("should use custom model when provided", async () => {
|
||||
const mockEmbedding = new Array(1536).fill(0).map((_, i) => i / 1536);
|
||||
vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
|
||||
status: "healthy",
|
||||
mode: "local",
|
||||
endpoint: "http://localhost:11434",
|
||||
available: true,
|
||||
});
|
||||
vi.spyOn(ollamaService, "embed").mockResolvedValue({
|
||||
embedding: mockEmbedding,
|
||||
});
|
||||
vi.spyOn(prismaService, "$executeRaw").mockResolvedValue(1);
|
||||
|
||||
await service.generateAndStoreEmbedding("entry-123", "test content", {
|
||||
model: "custom-model",
|
||||
});
|
||||
|
||||
expect(ollamaService.embed).toHaveBeenCalledWith("test content", "custom-model");
|
||||
});
|
||||
|
||||
it("should skip when Ollama is not configured", async () => {
|
||||
vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
|
||||
status: "unhealthy",
|
||||
mode: "local",
|
||||
endpoint: "http://localhost:11434",
|
||||
available: false,
|
||||
error: "Connection refused",
|
||||
});
|
||||
|
||||
await service.generateAndStoreEmbedding("entry-123", "test content");
|
||||
|
||||
expect(ollamaService.embed).not.toHaveBeenCalled();
|
||||
expect(prismaService.$executeRaw).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("deleteEmbedding", () => {
|
||||
it("should delete embedding for entry", async () => {
|
||||
vi.spyOn(prismaService.knowledgeEmbedding, "deleteMany").mockResolvedValue({
|
||||
count: 1,
|
||||
});
|
||||
|
||||
await service.deleteEmbedding("entry-123");
|
||||
|
||||
expect(prismaService.knowledgeEmbedding.deleteMany).toHaveBeenCalledWith({
|
||||
where: { entryId: "entry-123" },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("prepareContentForEmbedding", () => {
|
||||
it("should combine title and content with title weighting", () => {
|
||||
const title = "Test Title";
|
||||
const content = "Test content goes here";
|
||||
|
||||
const result = service.prepareContentForEmbedding(title, content);
|
||||
|
||||
expect(result).toContain(title);
|
||||
expect(result).toContain(content);
|
||||
// Title should appear twice for weighting
|
||||
expect(result.split(title).length - 1).toBe(2);
|
||||
});
|
||||
|
||||
it("should handle empty content", () => {
|
||||
const title = "Test Title";
|
||||
const content = "";
|
||||
|
||||
const result = service.prepareContentForEmbedding(title, content);
|
||||
|
||||
expect(result).toBe(`${title}\n\n${title}`);
|
||||
});
|
||||
});
|
||||
|
||||
describe("hasEmbedding", () => {
|
||||
it("should return true when entry has embedding", async () => {
|
||||
vi.spyOn(prismaService.knowledgeEmbedding, "findUnique").mockResolvedValue({
|
||||
id: "embedding-123",
|
||||
entryId: "entry-123",
|
||||
embedding: "[0.1,0.2,0.3]",
|
||||
model: "mxbai-embed-large",
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
} as never);
|
||||
|
||||
const result = await service.hasEmbedding("entry-123");
|
||||
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it("should return false when entry has no embedding", async () => {
|
||||
vi.spyOn(prismaService.knowledgeEmbedding, "findUnique").mockResolvedValue(null);
|
||||
|
||||
const result = await service.hasEmbedding("entry-123");
|
||||
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
239
apps/api/src/knowledge/services/ollama-embedding.service.ts
Normal file
239
apps/api/src/knowledge/services/ollama-embedding.service.ts
Normal file
@@ -0,0 +1,239 @@
|
||||
import { Injectable, Logger } from "@nestjs/common";
|
||||
import { PrismaService } from "../../prisma/prisma.service";
|
||||
import { OllamaService } from "../../ollama/ollama.service";
|
||||
import { EMBEDDING_DIMENSION } from "@mosaic/shared";
|
||||
|
||||
/**
|
||||
* Options for generating embeddings
|
||||
*/
|
||||
export interface EmbeddingOptions {
|
||||
/**
|
||||
* Model to use for embedding generation
|
||||
* @default "mxbai-embed-large" (produces 1024-dim vectors, requires padding to 1536)
|
||||
* Alternative: Custom fine-tuned model
|
||||
*/
|
||||
model?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Service for generating and managing embeddings using Ollama
|
||||
*
|
||||
* This service replaces the OpenAI-based embedding service with Ollama
|
||||
* for local/self-hosted embedding generation.
|
||||
*/
|
||||
@Injectable()
|
||||
export class OllamaEmbeddingService {
|
||||
private readonly logger = new Logger(OllamaEmbeddingService.name);
|
||||
private readonly defaultModel = "mxbai-embed-large";
|
||||
private configuredCache: boolean | null = null;
|
||||
|
||||
constructor(
|
||||
private readonly prisma: PrismaService,
|
||||
private readonly ollama: OllamaService
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Check if the service is properly configured
|
||||
* Caches the result for performance
|
||||
*/
|
||||
async isConfigured(): Promise<boolean> {
|
||||
if (this.configuredCache !== null) {
|
||||
return this.configuredCache;
|
||||
}
|
||||
|
||||
try {
|
||||
const health = await this.ollama.healthCheck();
|
||||
this.configuredCache = health.available;
|
||||
return health.available;
|
||||
} catch {
|
||||
this.configuredCache = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate an embedding vector for the given text
|
||||
*
|
||||
* @param text - Text to embed
|
||||
* @param options - Embedding generation options
|
||||
* @returns Embedding vector (array of numbers)
|
||||
* @throws Error if Ollama service is not available
|
||||
*/
|
||||
async generateEmbedding(text: string, options: EmbeddingOptions = {}): Promise<number[]> {
|
||||
const model = options.model ?? this.defaultModel;
|
||||
|
||||
try {
|
||||
const response = await this.ollama.embed(text, model);
|
||||
|
||||
if (response.embedding.length === 0) {
|
||||
throw new Error("No embedding returned from Ollama");
|
||||
}
|
||||
|
||||
// Handle dimension mismatch by padding or truncating
|
||||
const embedding = this.normalizeEmbeddingDimension(response.embedding);
|
||||
|
||||
if (embedding.length !== EMBEDDING_DIMENSION) {
|
||||
throw new Error(
|
||||
`Unexpected embedding dimension: ${embedding.length.toString()} (expected ${EMBEDDING_DIMENSION.toString()})`
|
||||
);
|
||||
}
|
||||
|
||||
return embedding;
|
||||
} catch (error) {
|
||||
this.logger.error("Failed to generate embedding", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize embedding dimension to match schema requirements
|
||||
* Pads with zeros if too short, truncates if too long
|
||||
*
|
||||
* @param embedding - Original embedding vector
|
||||
* @returns Normalized embedding vector with correct dimension
|
||||
*/
|
||||
private normalizeEmbeddingDimension(embedding: number[]): number[] {
|
||||
if (embedding.length === EMBEDDING_DIMENSION) {
|
||||
return embedding;
|
||||
}
|
||||
|
||||
if (embedding.length < EMBEDDING_DIMENSION) {
|
||||
// Pad with zeros
|
||||
const padded = [...embedding];
|
||||
while (padded.length < EMBEDDING_DIMENSION) {
|
||||
padded.push(0);
|
||||
}
|
||||
this.logger.warn(
|
||||
`Padded embedding from ${embedding.length.toString()} to ${EMBEDDING_DIMENSION.toString()} dimensions`
|
||||
);
|
||||
return padded;
|
||||
}
|
||||
|
||||
// Truncate if too long
|
||||
this.logger.warn(
|
||||
`Truncated embedding from ${embedding.length.toString()} to ${EMBEDDING_DIMENSION.toString()} dimensions`
|
||||
);
|
||||
return embedding.slice(0, EMBEDDING_DIMENSION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate and store embedding for a knowledge entry
|
||||
*
|
||||
* @param entryId - ID of the knowledge entry
|
||||
* @param content - Content to embed (typically title + content)
|
||||
* @param options - Embedding generation options
|
||||
* @returns Created/updated embedding record
|
||||
*/
|
||||
async generateAndStoreEmbedding(
|
||||
entryId: string,
|
||||
content: string,
|
||||
options: EmbeddingOptions = {}
|
||||
): Promise<void> {
|
||||
const configured = await this.isConfigured();
|
||||
if (!configured) {
|
||||
this.logger.warn(`Skipping embedding generation for entry ${entryId} - Ollama not available`);
|
||||
return;
|
||||
}
|
||||
|
||||
const model = options.model ?? this.defaultModel;
|
||||
const embedding = await this.generateEmbedding(content, { model });
|
||||
|
||||
// Convert to Prisma-compatible format
|
||||
const embeddingString = `[${embedding.join(",")}]`;
|
||||
|
||||
// Upsert the embedding
|
||||
await this.prisma.$executeRaw`
|
||||
INSERT INTO knowledge_embeddings (id, entry_id, embedding, model, created_at, updated_at)
|
||||
VALUES (
|
||||
gen_random_uuid(),
|
||||
${entryId}::uuid,
|
||||
${embeddingString}::vector(${EMBEDDING_DIMENSION}),
|
||||
${model},
|
||||
NOW(),
|
||||
NOW()
|
||||
)
|
||||
ON CONFLICT (entry_id) DO UPDATE SET
|
||||
embedding = ${embeddingString}::vector(${EMBEDDING_DIMENSION}),
|
||||
model = ${model},
|
||||
updated_at = NOW()
|
||||
`;
|
||||
|
||||
this.logger.log(`Generated and stored embedding for entry ${entryId} using model ${model}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch process embeddings for multiple entries
|
||||
*
|
||||
* @param entries - Array of {id, content} objects
|
||||
* @param options - Embedding generation options
|
||||
* @returns Number of embeddings successfully generated
|
||||
*/
|
||||
async batchGenerateEmbeddings(
|
||||
entries: { id: string; content: string }[],
|
||||
options: EmbeddingOptions = {}
|
||||
): Promise<number> {
|
||||
const configured = await this.isConfigured();
|
||||
if (!configured) {
|
||||
this.logger.warn("Skipping batch embedding generation - Ollama not available");
|
||||
return 0;
|
||||
}
|
||||
|
||||
let successCount = 0;
|
||||
|
||||
for (const entry of entries) {
|
||||
try {
|
||||
await this.generateAndStoreEmbedding(entry.id, entry.content, options);
|
||||
successCount++;
|
||||
} catch (error) {
|
||||
this.logger.error(`Failed to generate embedding for entry ${entry.id}`, error);
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.log(
|
||||
`Batch generated ${successCount.toString()}/${entries.length.toString()} embeddings`
|
||||
);
|
||||
return successCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete embedding for a knowledge entry
|
||||
*
|
||||
* @param entryId - ID of the knowledge entry
|
||||
*/
|
||||
async deleteEmbedding(entryId: string): Promise<void> {
|
||||
await this.prisma.knowledgeEmbedding.deleteMany({
|
||||
where: { entryId },
|
||||
});
|
||||
|
||||
this.logger.log(`Deleted embedding for entry ${entryId}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an entry has an embedding
|
||||
*
|
||||
* @param entryId - ID of the knowledge entry
|
||||
* @returns True if embedding exists
|
||||
*/
|
||||
async hasEmbedding(entryId: string): Promise<boolean> {
|
||||
const embedding = await this.prisma.knowledgeEmbedding.findUnique({
|
||||
where: { entryId },
|
||||
select: { id: true },
|
||||
});
|
||||
|
||||
return embedding !== null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare content for embedding
|
||||
* Combines title and content with appropriate weighting
|
||||
*
|
||||
* @param title - Entry title
|
||||
* @param content - Entry content (markdown)
|
||||
* @returns Combined text for embedding
|
||||
*/
|
||||
prepareContentForEmbedding(title: string, content: string): string {
|
||||
// Weight title more heavily by repeating it
|
||||
// This helps with semantic search matching on titles
|
||||
return `${title}\n\n${title}\n\n${content}`.trim();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user