Issues resolved: - #68: pgvector Setup * Added pgvector vector index migration for knowledge_embeddings * Vector index uses HNSW algorithm with cosine distance * Optimized for 1536-dimension OpenAI embeddings - #69: Embedding Generation Pipeline * Created EmbeddingService with OpenAI integration * Automatic embedding generation on entry create/update * Batch processing endpoint for existing entries * Async generation to avoid blocking API responses * Content preparation with title weighting - #70: Semantic Search API * POST /api/knowledge/search/semantic - pure vector search * POST /api/knowledge/search/hybrid - RRF combined search * POST /api/knowledge/embeddings/batch - batch generation * Comprehensive test coverage * Full documentation in docs/SEMANTIC_SEARCH.md Technical details: - Uses OpenAI text-embedding-3-small model (1536 dims) - HNSW index for O(log n) similarity search - Reciprocal Rank Fusion for hybrid search - Graceful degradation when OpenAI not configured - Async embedding generation for performance Configuration: - Added OPENAI_API_KEY to .env.example - Optional feature - disabled if API key not set - Falls back to keyword search in hybrid mode
This commit is contained in:
115
apps/api/src/knowledge/services/embedding.service.spec.ts
Normal file
115
apps/api/src/knowledge/services/embedding.service.spec.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import { describe, it, expect, beforeEach, vi } from "vitest";
|
||||
import { EmbeddingService } from "./embedding.service";
|
||||
import { PrismaService } from "../../prisma/prisma.service";
|
||||
|
||||
describe("EmbeddingService", () => {
|
||||
let service: EmbeddingService;
|
||||
let prismaService: PrismaService;
|
||||
|
||||
beforeEach(() => {
|
||||
prismaService = {
|
||||
$executeRaw: vi.fn(),
|
||||
knowledgeEmbedding: {
|
||||
deleteMany: vi.fn(),
|
||||
},
|
||||
} as unknown as PrismaService;
|
||||
|
||||
service = new EmbeddingService(prismaService);
|
||||
});
|
||||
|
||||
describe("isConfigured", () => {
|
||||
it("should return false when OPENAI_API_KEY is not set", () => {
|
||||
const originalEnv = process.env["OPENAI_API_KEY"];
|
||||
delete process.env["OPENAI_API_KEY"];
|
||||
|
||||
expect(service.isConfigured()).toBe(false);
|
||||
|
||||
if (originalEnv) {
|
||||
process.env["OPENAI_API_KEY"] = originalEnv;
|
||||
}
|
||||
});
|
||||
|
||||
it("should return true when OPENAI_API_KEY is set", () => {
|
||||
const originalEnv = process.env["OPENAI_API_KEY"];
|
||||
process.env["OPENAI_API_KEY"] = "test-key";
|
||||
|
||||
expect(service.isConfigured()).toBe(true);
|
||||
|
||||
if (originalEnv) {
|
||||
process.env["OPENAI_API_KEY"] = originalEnv;
|
||||
} else {
|
||||
delete process.env["OPENAI_API_KEY"];
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("prepareContentForEmbedding", () => {
|
||||
it("should combine title and content with title weighting", () => {
|
||||
const title = "Test Title";
|
||||
const content = "Test content goes here";
|
||||
|
||||
const result = service.prepareContentForEmbedding(title, content);
|
||||
|
||||
expect(result).toContain(title);
|
||||
expect(result).toContain(content);
|
||||
// Title should appear twice for weighting
|
||||
expect(result.split(title).length - 1).toBe(2);
|
||||
});
|
||||
|
||||
it("should handle empty content", () => {
|
||||
const title = "Test Title";
|
||||
const content = "";
|
||||
|
||||
const result = service.prepareContentForEmbedding(title, content);
|
||||
|
||||
expect(result).toBe(`${title}\n\n${title}`);
|
||||
});
|
||||
});
|
||||
|
||||
describe("generateAndStoreEmbedding", () => {
|
||||
it("should skip generation when not configured", async () => {
|
||||
const originalEnv = process.env["OPENAI_API_KEY"];
|
||||
delete process.env["OPENAI_API_KEY"];
|
||||
|
||||
await service.generateAndStoreEmbedding("test-id", "test content");
|
||||
|
||||
expect(prismaService.$executeRaw).not.toHaveBeenCalled();
|
||||
|
||||
if (originalEnv) {
|
||||
process.env["OPENAI_API_KEY"] = originalEnv;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("deleteEmbedding", () => {
|
||||
it("should delete embedding for entry", async () => {
|
||||
const entryId = "test-entry-id";
|
||||
|
||||
await service.deleteEmbedding(entryId);
|
||||
|
||||
expect(prismaService.knowledgeEmbedding.deleteMany).toHaveBeenCalledWith({
|
||||
where: { entryId },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("batchGenerateEmbeddings", () => {
|
||||
it("should return 0 when not configured", async () => {
|
||||
const originalEnv = process.env["OPENAI_API_KEY"];
|
||||
delete process.env["OPENAI_API_KEY"];
|
||||
|
||||
const entries = [
|
||||
{ id: "1", content: "content 1" },
|
||||
{ id: "2", content: "content 2" },
|
||||
];
|
||||
|
||||
const result = await service.batchGenerateEmbeddings(entries);
|
||||
|
||||
expect(result).toBe(0);
|
||||
|
||||
if (originalEnv) {
|
||||
process.env["OPENAI_API_KEY"] = originalEnv;
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user