feat: add semantic search with pgvector (closes #68, #69, #70)
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
ci/woodpecker/pr/woodpecker Pipeline failed

Issues resolved:
- #68: pgvector Setup
  * Added pgvector vector index migration for knowledge_embeddings
  * Vector index uses HNSW algorithm with cosine distance
  * Optimized for 1536-dimension OpenAI embeddings

- #69: Embedding Generation Pipeline
  * Created EmbeddingService with OpenAI integration
  * Automatic embedding generation on entry create/update
  * Batch processing endpoint for existing entries
  * Async generation to avoid blocking API responses
  * Content preparation with title weighting

- #70: Semantic Search API
  * POST /api/knowledge/search/semantic - pure vector search
  * POST /api/knowledge/search/hybrid - RRF combined search
  * POST /api/knowledge/embeddings/batch - batch generation
  * Comprehensive test coverage
  * Full documentation in docs/SEMANTIC_SEARCH.md

Technical details:
- Uses OpenAI text-embedding-3-small model (1536 dims)
- HNSW index for O(log n) similarity search
- Reciprocal Rank Fusion for hybrid search
- Graceful degradation when OpenAI not configured
- Async embedding generation for performance

Configuration:
- Added OPENAI_API_KEY to .env.example
- Optional feature - disabled if API key not set
- Falls back to keyword search in hybrid mode
This commit is contained in:
Jason Woltje
2026-01-30 00:24:41 -06:00
parent 22cd68811d
commit 3ec2059470
14 changed files with 1408 additions and 5 deletions

View File

@@ -0,0 +1,115 @@
import { describe, it, expect, beforeEach, vi } from "vitest";
import { EmbeddingService } from "./embedding.service";
import { PrismaService } from "../../prisma/prisma.service";
describe("EmbeddingService", () => {
let service: EmbeddingService;
let prismaService: PrismaService;
beforeEach(() => {
prismaService = {
$executeRaw: vi.fn(),
knowledgeEmbedding: {
deleteMany: vi.fn(),
},
} as unknown as PrismaService;
service = new EmbeddingService(prismaService);
});
describe("isConfigured", () => {
it("should return false when OPENAI_API_KEY is not set", () => {
const originalEnv = process.env["OPENAI_API_KEY"];
delete process.env["OPENAI_API_KEY"];
expect(service.isConfigured()).toBe(false);
if (originalEnv) {
process.env["OPENAI_API_KEY"] = originalEnv;
}
});
it("should return true when OPENAI_API_KEY is set", () => {
const originalEnv = process.env["OPENAI_API_KEY"];
process.env["OPENAI_API_KEY"] = "test-key";
expect(service.isConfigured()).toBe(true);
if (originalEnv) {
process.env["OPENAI_API_KEY"] = originalEnv;
} else {
delete process.env["OPENAI_API_KEY"];
}
});
});
describe("prepareContentForEmbedding", () => {
it("should combine title and content with title weighting", () => {
const title = "Test Title";
const content = "Test content goes here";
const result = service.prepareContentForEmbedding(title, content);
expect(result).toContain(title);
expect(result).toContain(content);
// Title should appear twice for weighting
expect(result.split(title).length - 1).toBe(2);
});
it("should handle empty content", () => {
const title = "Test Title";
const content = "";
const result = service.prepareContentForEmbedding(title, content);
expect(result).toBe(`${title}\n\n${title}`);
});
});
describe("generateAndStoreEmbedding", () => {
it("should skip generation when not configured", async () => {
const originalEnv = process.env["OPENAI_API_KEY"];
delete process.env["OPENAI_API_KEY"];
await service.generateAndStoreEmbedding("test-id", "test content");
expect(prismaService.$executeRaw).not.toHaveBeenCalled();
if (originalEnv) {
process.env["OPENAI_API_KEY"] = originalEnv;
}
});
});
describe("deleteEmbedding", () => {
it("should delete embedding for entry", async () => {
const entryId = "test-entry-id";
await service.deleteEmbedding(entryId);
expect(prismaService.knowledgeEmbedding.deleteMany).toHaveBeenCalledWith({
where: { entryId },
});
});
});
describe("batchGenerateEmbeddings", () => {
it("should return 0 when not configured", async () => {
const originalEnv = process.env["OPENAI_API_KEY"];
delete process.env["OPENAI_API_KEY"];
const entries = [
{ id: "1", content: "content 1" },
{ id: "2", content: "content 2" },
];
const result = await service.batchGenerateEmbeddings(entries);
expect(result).toBe(0);
if (originalEnv) {
process.env["OPENAI_API_KEY"] = originalEnv;
}
});
});
});