import { describe, it, expect, beforeAll, afterAll } from "vitest"; import { PrismaClient, EntryStatus } from "@prisma/client"; import { SearchService } from "./search.service"; import { EmbeddingService } from "./embedding.service"; import { KnowledgeCacheService } from "./cache.service"; import { PrismaService } from "../../prisma/prisma.service"; /** * Integration tests for semantic search functionality * * These tests require: * - A running PostgreSQL database with pgvector extension * - OPENAI_API_KEY environment variable set * * Run with: pnpm test semantic-search.integration.spec.ts */ describe("Semantic Search Integration", () => { let prisma: PrismaClient; let searchService: SearchService; let embeddingService: EmbeddingService; let cacheService: KnowledgeCacheService; let testWorkspaceId: string; let testUserId: string; beforeAll(async () => { // Initialize services prisma = new PrismaClient(); const prismaService = prisma as unknown as PrismaService; // Mock cache service for testing cacheService = { getSearch: async () => null, setSearch: async () => {}, isEnabled: () => false, getStats: () => ({ hits: 0, misses: 0, hitRate: 0 }), resetStats: () => {}, } as unknown as KnowledgeCacheService; embeddingService = new EmbeddingService(prismaService); searchService = new SearchService( prismaService, cacheService, embeddingService ); // Create test workspace and user const workspace = await prisma.workspace.create({ data: { name: "Test Workspace for Semantic Search", owner: { create: { email: "semantic-test@example.com", name: "Test User", }, }, }, }); testWorkspaceId = workspace.id; testUserId = workspace.ownerId; }); afterAll(async () => { // Cleanup test data if (testWorkspaceId) { await prisma.knowledgeEntry.deleteMany({ where: { workspaceId: testWorkspaceId }, }); await prisma.workspace.delete({ where: { id: testWorkspaceId }, }); } await prisma.$disconnect(); }); describe("EmbeddingService", () => { it("should check if OpenAI is configured", () => { const isConfigured = embeddingService.isConfigured(); // This test will pass if OPENAI_API_KEY is set expect(typeof isConfigured).toBe("boolean"); }); it("should prepare content for embedding correctly", () => { const title = "Introduction to PostgreSQL"; const content = "PostgreSQL is a powerful open-source database."; const prepared = embeddingService.prepareContentForEmbedding( title, content ); // Title should appear twice for weighting expect(prepared).toContain(title); expect(prepared).toContain(content); const titleCount = (prepared.match(new RegExp(title, "g")) || []).length; expect(titleCount).toBe(2); }); }); describe("Semantic Search", () => { const testEntries = [ { slug: "postgresql-intro", title: "Introduction to PostgreSQL", content: "PostgreSQL is a powerful, open-source relational database system. It supports advanced data types and performance optimization features.", }, { slug: "mongodb-basics", title: "MongoDB Basics", content: "MongoDB is a NoSQL document database. It stores data in flexible, JSON-like documents instead of tables and rows.", }, { slug: "database-indexing", title: "Database Indexing Strategies", content: "Indexing is crucial for database performance. Both B-tree and hash indexes have their use cases depending on query patterns.", }, ]; it("should skip semantic search if OpenAI not configured", async () => { if (!embeddingService.isConfigured()) { await expect( searchService.semanticSearch( "database performance", testWorkspaceId ) ).rejects.toThrow(); } else { // If configured, this is expected to work (tested below) expect(true).toBe(true); } }); it.skipIf(!process.env["OPENAI_API_KEY"])( "should generate embeddings and perform semantic search", async () => { // Create test entries for (const entry of testEntries) { const created = await prisma.knowledgeEntry.create({ data: { workspaceId: testWorkspaceId, slug: entry.slug, title: entry.title, content: entry.content, status: EntryStatus.PUBLISHED, visibility: "WORKSPACE", createdBy: testUserId, updatedBy: testUserId, }, }); // Generate embedding const preparedContent = embeddingService.prepareContentForEmbedding( entry.title, entry.content ); await embeddingService.generateAndStoreEmbedding( created.id, preparedContent ); } // Wait a bit for embeddings to be stored await new Promise((resolve) => setTimeout(resolve, 1000)); // Perform semantic search const results = await searchService.semanticSearch( "relational database systems", testWorkspaceId ); // Should return results expect(results.data.length).toBeGreaterThan(0); // PostgreSQL entry should rank high for "relational database" const postgresEntry = results.data.find( (r) => r.slug === "postgresql-intro" ); expect(postgresEntry).toBeDefined(); expect(postgresEntry!.rank).toBeGreaterThan(0); }, 30000 // 30 second timeout for API calls ); it.skipIf(!process.env["OPENAI_API_KEY"])( "should perform hybrid search combining vector and keyword", async () => { const results = await searchService.hybridSearch( "indexing", testWorkspaceId ); // Should return results expect(results.data.length).toBeGreaterThan(0); // Should find the indexing entry const indexingEntry = results.data.find( (r) => r.slug === "database-indexing" ); expect(indexingEntry).toBeDefined(); }, 30000 ); }); describe("Batch Embedding Generation", () => { it.skipIf(!process.env["OPENAI_API_KEY"])( "should batch generate embeddings", async () => { // Create entries without embeddings const entries = await Promise.all( Array.from({ length: 3 }, (_, i) => prisma.knowledgeEntry.create({ data: { workspaceId: testWorkspaceId, slug: `batch-test-${i}`, title: `Batch Test Entry ${i}`, content: `This is test content for batch entry ${i}`, status: EntryStatus.PUBLISHED, visibility: "WORKSPACE", createdBy: testUserId, updatedBy: testUserId, }, }) ) ); // Batch generate embeddings const entriesForEmbedding = entries.map((e) => ({ id: e.id, content: embeddingService.prepareContentForEmbedding( e.title, e.content ), })); const successCount = await embeddingService.batchGenerateEmbeddings( entriesForEmbedding ); expect(successCount).toBe(3); // Verify embeddings were created const embeddings = await prisma.knowledgeEmbedding.findMany({ where: { entryId: { in: entries.map((e) => e.id) }, }, }); expect(embeddings.length).toBe(3); }, 60000 // 60 second timeout for batch operations ); }); });