feat(#69): implement embedding generation pipeline

Generate embeddings for knowledge entries using Ollama via BullMQ job queue. Changes: - Created OllamaEmbeddingService for Ollama-based embedding generation - Set up BullMQ queue and processor for async embedding jobs - Integrated queue into knowledge entry lifecycle (create/update) - Added rate limiting (1 job/second) and retry logic (3 attempts) - Added OLLAMA_EMBEDDING_MODEL environment variable configuration - Implemented dimension normalization (padding/truncating to 1536 dimensions) - Added graceful degradation when Ollama is unavailable Test Coverage: - All 31 embedding-related tests passing - ollama-embedding.service.spec.ts: 13 tests - embedding-queue.spec.ts: 6 tests - embedding.processor.spec.ts: 5 tests - Build and linting successful Fixes #69 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-02 15:06:11 -06:00
parent 3cb6eb7f8b
commit 3dfa603a03
12 changed files with 1099 additions and 6 deletions
--- a/apps/api/src/knowledge/services/ollama-embedding.service.spec.ts
+++ b/apps/api/src/knowledge/services/ollama-embedding.service.spec.ts
@@ -0,0 +1,218 @@
+import { describe, it, expect, beforeEach, vi } from "vitest";
+import { OllamaEmbeddingService } from "./ollama-embedding.service";
+import { PrismaService } from "../../prisma/prisma.service";
+import { OllamaService } from "../../ollama/ollama.service";
+import { Test, TestingModule } from "@nestjs/testing";
+
+describe("OllamaEmbeddingService", () => {
+  let service: OllamaEmbeddingService;
+  let prismaService: PrismaService;
+  let ollamaService: OllamaService;
+
+  beforeEach(async () => {
+    const module: TestingModule = await Test.createTestingModule({
+      providers: [
+        OllamaEmbeddingService,
+        {
+          provide: PrismaService,
+          useValue: {
+            $executeRaw: vi.fn(),
+            knowledgeEmbedding: {
+              deleteMany: vi.fn(),
+              findUnique: vi.fn(),
+            },
+          },
+        },
+        {
+          provide: OllamaService,
+          useValue: {
+            embed: vi.fn(),
+            healthCheck: vi.fn(),
+          },
+        },
+      ],
+    }).compile();
+
+    service = module.get<OllamaEmbeddingService>(OllamaEmbeddingService);
+    prismaService = module.get<PrismaService>(PrismaService);
+    ollamaService = module.get<OllamaService>(OllamaService);
+  });
+
+  describe("isConfigured", () => {
+    it("should return true when Ollama service is available", async () => {
+      vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
+        status: "healthy",
+        mode: "local",
+        endpoint: "http://localhost:11434",
+        available: true,
+      });
+
+      const result = await service.isConfigured();
+
+      expect(result).toBe(true);
+    });
+
+    it("should return false when Ollama service is unavailable", async () => {
+      vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
+        status: "unhealthy",
+        mode: "local",
+        endpoint: "http://localhost:11434",
+        available: false,
+        error: "Connection refused",
+      });
+
+      const result = await service.isConfigured();
+
+      expect(result).toBe(false);
+    });
+  });
+
+  describe("generateEmbedding", () => {
+    it("should generate embedding vector from text", async () => {
+      const mockEmbedding = new Array(1536).fill(0).map((_, i) => i / 1536);
+      vi.spyOn(ollamaService, "embed").mockResolvedValue({
+        embedding: mockEmbedding,
+      });
+
+      const result = await service.generateEmbedding("test text");
+
+      expect(result).toEqual(mockEmbedding);
+      expect(ollamaService.embed).toHaveBeenCalledWith("test text", "mxbai-embed-large");
+    });
+
+    it("should use custom model when provided", async () => {
+      const mockEmbedding = new Array(1536).fill(0).map((_, i) => i / 1536);
+      vi.spyOn(ollamaService, "embed").mockResolvedValue({
+        embedding: mockEmbedding,
+      });
+
+      await service.generateEmbedding("test text", { model: "custom-model" });
+
+      expect(ollamaService.embed).toHaveBeenCalledWith("test text", "custom-model");
+    });
+
+    it("should throw error when Ollama service fails", async () => {
+      vi.spyOn(ollamaService, "embed").mockRejectedValue(new Error("Ollama unavailable"));
+
+      await expect(service.generateEmbedding("test text")).rejects.toThrow("Ollama unavailable");
+    });
+  });
+
+  describe("generateAndStoreEmbedding", () => {
+    it("should generate and store embedding for entry", async () => {
+      const mockEmbedding = new Array(1536).fill(0).map((_, i) => i / 1536);
+      vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
+        status: "healthy",
+        mode: "local",
+        endpoint: "http://localhost:11434",
+        available: true,
+      });
+      vi.spyOn(ollamaService, "embed").mockResolvedValue({
+        embedding: mockEmbedding,
+      });
+      vi.spyOn(prismaService, "$executeRaw").mockResolvedValue(1);
+
+      await service.generateAndStoreEmbedding("entry-123", "test content");
+
+      expect(ollamaService.embed).toHaveBeenCalledWith("test content", "mxbai-embed-large");
+      expect(prismaService.$executeRaw).toHaveBeenCalled();
+    });
+
+    it("should use custom model when provided", async () => {
+      const mockEmbedding = new Array(1536).fill(0).map((_, i) => i / 1536);
+      vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
+        status: "healthy",
+        mode: "local",
+        endpoint: "http://localhost:11434",
+        available: true,
+      });
+      vi.spyOn(ollamaService, "embed").mockResolvedValue({
+        embedding: mockEmbedding,
+      });
+      vi.spyOn(prismaService, "$executeRaw").mockResolvedValue(1);
+
+      await service.generateAndStoreEmbedding("entry-123", "test content", {
+        model: "custom-model",
+      });
+
+      expect(ollamaService.embed).toHaveBeenCalledWith("test content", "custom-model");
+    });
+
+    it("should skip when Ollama is not configured", async () => {
+      vi.spyOn(ollamaService, "healthCheck").mockResolvedValue({
+        status: "unhealthy",
+        mode: "local",
+        endpoint: "http://localhost:11434",
+        available: false,
+        error: "Connection refused",
+      });
+
+      await service.generateAndStoreEmbedding("entry-123", "test content");
+
+      expect(ollamaService.embed).not.toHaveBeenCalled();
+      expect(prismaService.$executeRaw).not.toHaveBeenCalled();
+    });
+  });
+
+  describe("deleteEmbedding", () => {
+    it("should delete embedding for entry", async () => {
+      vi.spyOn(prismaService.knowledgeEmbedding, "deleteMany").mockResolvedValue({
+        count: 1,
+      });
+
+      await service.deleteEmbedding("entry-123");
+
+      expect(prismaService.knowledgeEmbedding.deleteMany).toHaveBeenCalledWith({
+        where: { entryId: "entry-123" },
+      });
+    });
+  });
+
+  describe("prepareContentForEmbedding", () => {
+    it("should combine title and content with title weighting", () => {
+      const title = "Test Title";
+      const content = "Test content goes here";
+
+      const result = service.prepareContentForEmbedding(title, content);
+
+      expect(result).toContain(title);
+      expect(result).toContain(content);
+      // Title should appear twice for weighting
+      expect(result.split(title).length - 1).toBe(2);
+    });
+
+    it("should handle empty content", () => {
+      const title = "Test Title";
+      const content = "";
+
+      const result = service.prepareContentForEmbedding(title, content);
+
+      expect(result).toBe(`${title}\n\n${title}`);
+    });
+  });
+
+  describe("hasEmbedding", () => {
+    it("should return true when entry has embedding", async () => {
+      vi.spyOn(prismaService.knowledgeEmbedding, "findUnique").mockResolvedValue({
+        id: "embedding-123",
+        entryId: "entry-123",
+        embedding: "[0.1,0.2,0.3]",
+        model: "mxbai-embed-large",
+        createdAt: new Date(),
+        updatedAt: new Date(),
+      } as never);
+
+      const result = await service.hasEmbedding("entry-123");
+
+      expect(result).toBe(true);
+    });
+
+    it("should return false when entry has no embedding", async () => {
+      vi.spyOn(prismaService.knowledgeEmbedding, "findUnique").mockResolvedValue(null);
+
+      const result = await service.hasEmbedding("entry-123");
+
+      expect(result).toBe(false);
+    });
+  });
+});
--- a/apps/api/src/knowledge/services/ollama-embedding.service.ts
+++ b/apps/api/src/knowledge/services/ollama-embedding.service.ts
@@ -0,0 +1,239 @@
+import { Injectable, Logger } from "@nestjs/common";
+import { PrismaService } from "../../prisma/prisma.service";
+import { OllamaService } from "../../ollama/ollama.service";
+import { EMBEDDING_DIMENSION } from "@mosaic/shared";
+
+/**
+ * Options for generating embeddings
+ */
+export interface EmbeddingOptions {
+  /**
+   * Model to use for embedding generation
+   * @default "mxbai-embed-large" (produces 1024-dim vectors, requires padding to 1536)
+   * Alternative: Custom fine-tuned model
+   */
+  model?: string;
+}
+
+/**
+ * Service for generating and managing embeddings using Ollama
+ *
+ * This service replaces the OpenAI-based embedding service with Ollama
+ * for local/self-hosted embedding generation.
+ */
+@Injectable()
+export class OllamaEmbeddingService {
+  private readonly logger = new Logger(OllamaEmbeddingService.name);
+  private readonly defaultModel = "mxbai-embed-large";
+  private configuredCache: boolean | null = null;
+
+  constructor(
+    private readonly prisma: PrismaService,
+    private readonly ollama: OllamaService
+  ) {}
+
+  /**
+   * Check if the service is properly configured
+   * Caches the result for performance
+   */
+  async isConfigured(): Promise<boolean> {
+    if (this.configuredCache !== null) {
+      return this.configuredCache;
+    }
+
+    try {
+      const health = await this.ollama.healthCheck();
+      this.configuredCache = health.available;
+      return health.available;
+    } catch {
+      this.configuredCache = false;
+      return false;
+    }
+  }
+
+  /**
+   * Generate an embedding vector for the given text
+   *
+   * @param text - Text to embed
+   * @param options - Embedding generation options
+   * @returns Embedding vector (array of numbers)
+   * @throws Error if Ollama service is not available
+   */
+  async generateEmbedding(text: string, options: EmbeddingOptions = {}): Promise<number[]> {
+    const model = options.model ?? this.defaultModel;
+
+    try {
+      const response = await this.ollama.embed(text, model);
+
+      if (response.embedding.length === 0) {
+        throw new Error("No embedding returned from Ollama");
+      }
+
+      // Handle dimension mismatch by padding or truncating
+      const embedding = this.normalizeEmbeddingDimension(response.embedding);
+
+      if (embedding.length !== EMBEDDING_DIMENSION) {
+        throw new Error(
+          `Unexpected embedding dimension: ${embedding.length.toString()} (expected ${EMBEDDING_DIMENSION.toString()})`
+        );
+      }
+
+      return embedding;
+    } catch (error) {
+      this.logger.error("Failed to generate embedding", error);
+      throw error;
+    }
+  }
+
+  /**
+   * Normalize embedding dimension to match schema requirements
+   * Pads with zeros if too short, truncates if too long
+   *
+   * @param embedding - Original embedding vector
+   * @returns Normalized embedding vector with correct dimension
+   */
+  private normalizeEmbeddingDimension(embedding: number[]): number[] {
+    if (embedding.length === EMBEDDING_DIMENSION) {
+      return embedding;
+    }
+
+    if (embedding.length < EMBEDDING_DIMENSION) {
+      // Pad with zeros
+      const padded = [...embedding];
+      while (padded.length < EMBEDDING_DIMENSION) {
+        padded.push(0);
+      }
+      this.logger.warn(
+        `Padded embedding from ${embedding.length.toString()} to ${EMBEDDING_DIMENSION.toString()} dimensions`
+      );
+      return padded;
+    }
+
+    // Truncate if too long
+    this.logger.warn(
+      `Truncated embedding from ${embedding.length.toString()} to ${EMBEDDING_DIMENSION.toString()} dimensions`
+    );
+    return embedding.slice(0, EMBEDDING_DIMENSION);
+  }
+
+  /**
+   * Generate and store embedding for a knowledge entry
+   *
+   * @param entryId - ID of the knowledge entry
+   * @param content - Content to embed (typically title + content)
+   * @param options - Embedding generation options
+   * @returns Created/updated embedding record
+   */
+  async generateAndStoreEmbedding(
+    entryId: string,
+    content: string,
+    options: EmbeddingOptions = {}
+  ): Promise<void> {
+    const configured = await this.isConfigured();
+    if (!configured) {
+      this.logger.warn(`Skipping embedding generation for entry ${entryId} - Ollama not available`);
+      return;
+    }
+
+    const model = options.model ?? this.defaultModel;
+    const embedding = await this.generateEmbedding(content, { model });
+
+    // Convert to Prisma-compatible format
+    const embeddingString = `[${embedding.join(",")}]`;
+
+    // Upsert the embedding
+    await this.prisma.$executeRaw`
+      INSERT INTO knowledge_embeddings (id, entry_id, embedding, model, created_at, updated_at)
+      VALUES (
+        gen_random_uuid(),
+        ${entryId}::uuid,
+        ${embeddingString}::vector(${EMBEDDING_DIMENSION}),
+        ${model},
+        NOW(),
+        NOW()
+      )
+      ON CONFLICT (entry_id) DO UPDATE SET
+        embedding = ${embeddingString}::vector(${EMBEDDING_DIMENSION}),
+        model = ${model},
+        updated_at = NOW()
+    `;
+
+    this.logger.log(`Generated and stored embedding for entry ${entryId} using model ${model}`);
+  }
+
+  /**
+   * Batch process embeddings for multiple entries
+   *
+   * @param entries - Array of {id, content} objects
+   * @param options - Embedding generation options
+   * @returns Number of embeddings successfully generated
+   */
+  async batchGenerateEmbeddings(
+    entries: { id: string; content: string }[],
+    options: EmbeddingOptions = {}
+  ): Promise<number> {
+    const configured = await this.isConfigured();
+    if (!configured) {
+      this.logger.warn("Skipping batch embedding generation - Ollama not available");
+      return 0;
+    }
+
+    let successCount = 0;
+
+    for (const entry of entries) {
+      try {
+        await this.generateAndStoreEmbedding(entry.id, entry.content, options);
+        successCount++;
+      } catch (error) {
+        this.logger.error(`Failed to generate embedding for entry ${entry.id}`, error);
+      }
+    }
+
+    this.logger.log(
+      `Batch generated ${successCount.toString()}/${entries.length.toString()} embeddings`
+    );
+    return successCount;
+  }
+
+  /**
+   * Delete embedding for a knowledge entry
+   *
+   * @param entryId - ID of the knowledge entry
+   */
+  async deleteEmbedding(entryId: string): Promise<void> {
+    await this.prisma.knowledgeEmbedding.deleteMany({
+      where: { entryId },
+    });
+
+    this.logger.log(`Deleted embedding for entry ${entryId}`);
+  }
+
+  /**
+   * Check if an entry has an embedding
+   *
+   * @param entryId - ID of the knowledge entry
+   * @returns True if embedding exists
+   */
+  async hasEmbedding(entryId: string): Promise<boolean> {
+    const embedding = await this.prisma.knowledgeEmbedding.findUnique({
+      where: { entryId },
+      select: { id: true },
+    });
+
+    return embedding !== null;
+  }
+
+  /**
+   * Prepare content for embedding
+   * Combines title and content with appropriate weighting
+   *
+   * @param title - Entry title
+   * @param content - Entry content (markdown)
+   * @returns Combined text for embedding
+   */
+  prepareContentForEmbedding(title: string, content: string): string {
+    // Weight title more heavily by repeating it
+    // This helps with semantic search matching on titles
+    return `${title}\n\n${title}\n\n${content}`.trim();
+  }
+}