diff --git a/.env.example b/.env.example index be5d0df..4f13421 100644 --- a/.env.example +++ b/.env.example @@ -101,6 +101,12 @@ OLLAMA_PORT=11434 # Note: Embeddings are padded/truncated to 1536 dimensions to match schema OLLAMA_EMBEDDING_MODEL=mxbai-embed-large +# Semantic Search Configuration +# Similarity threshold for semantic search (0.0 to 1.0, where 1.0 is identical) +# Lower values return more results but may be less relevant +# Default: 0.5 (50% similarity) +SEMANTIC_SEARCH_SIMILARITY_THRESHOLD=0.5 + # ====================== # OpenAI API (For Semantic Search) # ====================== diff --git a/apps/api/src/knowledge/knowledge.service.versions.spec.ts b/apps/api/src/knowledge/knowledge.service.versions.spec.ts index 9371519..62a4b3c 100644 --- a/apps/api/src/knowledge/knowledge.service.versions.spec.ts +++ b/apps/api/src/knowledge/knowledge.service.versions.spec.ts @@ -5,6 +5,8 @@ import { PrismaService } from "../prisma/prisma.service"; import { LinkSyncService } from "./services/link-sync.service"; import { KnowledgeCacheService } from "./services/cache.service"; import { EmbeddingService } from "./services/embedding.service"; +import { OllamaEmbeddingService } from "./services/ollama-embedding.service"; +import { EmbeddingQueueService } from "./queues/embedding-queue.service"; import { NotFoundException } from "@nestjs/common"; describe("KnowledgeService - Version History", () => { @@ -125,6 +127,17 @@ describe("KnowledgeService - Version History", () => { batchGenerateEmbeddings: vi.fn().mockResolvedValue([]), }; + const mockOllamaEmbeddingService = { + isConfigured: vi.fn().mockResolvedValue(false), + generateEmbedding: vi.fn().mockResolvedValue([]), + generateAndStoreEmbedding: vi.fn().mockResolvedValue(undefined), + batchGenerateEmbeddings: vi.fn().mockResolvedValue(0), + }; + + const mockEmbeddingQueueService = { + enqueueEmbeddingGeneration: vi.fn().mockResolvedValue(undefined), + }; + beforeEach(async () => { const module: TestingModule = await Test.createTestingModule({ providers: [ @@ -145,6 +158,14 @@ describe("KnowledgeService - Version History", () => { provide: EmbeddingService, useValue: mockEmbeddingService, }, + { + provide: OllamaEmbeddingService, + useValue: mockOllamaEmbeddingService, + }, + { + provide: EmbeddingQueueService, + useValue: mockEmbeddingQueueService, + }, ], }).compile(); @@ -329,7 +350,13 @@ describe("KnowledgeService - Version History", () => { // Mock for findVersion call mockPrismaService.knowledgeEntry.findUnique.mockResolvedValueOnce(entryWithVersions); - const result = await service.restoreVersion(workspaceId, slug, 2, userId, "Custom restore note"); + const result = await service.restoreVersion( + workspaceId, + slug, + 2, + userId, + "Custom restore note" + ); expect(result.title).toBe("Test Entry v2"); expect(result.content).toBe("# Version 2"); diff --git a/apps/api/src/knowledge/search.controller.ts b/apps/api/src/knowledge/search.controller.ts index 74523c4..43fee1c 100644 --- a/apps/api/src/knowledge/search.controller.ts +++ b/apps/api/src/knowledge/search.controller.ts @@ -101,7 +101,7 @@ export class SearchController { /** * POST /api/knowledge/search/semantic * Semantic search using vector similarity - * Requires: Any workspace member, OpenAI API key configured + * Requires: Any workspace member, Ollama configured * * @body query - The search query string (required) * @body status - Filter by entry status (optional) diff --git a/apps/api/src/knowledge/services/search.service.spec.ts b/apps/api/src/knowledge/services/search.service.spec.ts index 1579d1a..49cec30 100644 --- a/apps/api/src/knowledge/services/search.service.spec.ts +++ b/apps/api/src/knowledge/services/search.service.spec.ts @@ -4,7 +4,7 @@ import { EntryStatus } from "@prisma/client"; import { SearchService } from "./search.service"; import { PrismaService } from "../../prisma/prisma.service"; import { KnowledgeCacheService } from "./cache.service"; -import { EmbeddingService } from "./embedding.service"; +import { OllamaEmbeddingService } from "./ollama-embedding.service"; describe("SearchService", () => { let service: SearchService; @@ -46,10 +46,11 @@ describe("SearchService", () => { isEnabled: vi.fn().mockReturnValue(false), }; - const mockEmbeddingService = { - isConfigured: vi.fn().mockReturnValue(false), - generateEmbedding: vi.fn().mockResolvedValue(null), - batchGenerateEmbeddings: vi.fn().mockResolvedValue([]), + const mockOllamaEmbeddingService = { + isConfigured: vi.fn().mockResolvedValue(false), + generateEmbedding: vi.fn().mockResolvedValue([]), + generateAndStoreEmbedding: vi.fn().mockResolvedValue(undefined), + batchGenerateEmbeddings: vi.fn().mockResolvedValue(0), }; const module: TestingModule = await Test.createTestingModule({ @@ -64,8 +65,8 @@ describe("SearchService", () => { useValue: mockCacheService, }, { - provide: EmbeddingService, - useValue: mockEmbeddingService, + provide: OllamaEmbeddingService, + useValue: mockOllamaEmbeddingService, }, ], }).compile(); @@ -410,4 +411,206 @@ describe("SearchService", () => { ); }); }); + + describe("semanticSearch", () => { + it("should throw error when OllamaEmbeddingService is not configured", async () => { + const ollamaService = service["ollama"]; + ollamaService.isConfigured = vi.fn().mockResolvedValue(false); + + await expect(service.semanticSearch("test query", mockWorkspaceId)).rejects.toThrow( + "Semantic search requires Ollama to be configured" + ); + }); + + it("should perform semantic search using vector similarity", async () => { + const ollamaService = service["ollama"]; + ollamaService.isConfigured = vi.fn().mockResolvedValue(true); + + // Mock embedding generation + const mockEmbedding = new Array(1536).fill(0.1); + ollamaService.generateEmbedding = vi.fn().mockResolvedValue(mockEmbedding); + + const mockSearchResults = [ + { + id: "entry-1", + workspace_id: mockWorkspaceId, + slug: "semantic-entry", + title: "Semantic Entry", + content: "This is semantically similar content", + content_html: "

This is semantically similar content

", + summary: null, + status: EntryStatus.PUBLISHED, + visibility: "WORKSPACE", + created_at: new Date(), + updated_at: new Date(), + created_by: "user-1", + updated_by: "user-1", + rank: 0.85, + headline: null, + }, + ]; + + prismaService.$queryRaw + .mockResolvedValueOnce(mockSearchResults) + .mockResolvedValueOnce([{ count: BigInt(1) }]); + + prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]); + + const result = await service.semanticSearch("semantic query", mockWorkspaceId); + + expect(result.data).toHaveLength(1); + expect(result.data[0].rank).toBe(0.85); + expect(ollamaService.generateEmbedding).toHaveBeenCalledWith("semantic query", {}); + expect(prismaService.$queryRaw).toHaveBeenCalled(); + }); + + it("should apply similarity threshold filter", async () => { + const ollamaService = service["ollama"]; + ollamaService.isConfigured = vi.fn().mockResolvedValue(true); + + const mockEmbedding = new Array(1536).fill(0.1); + ollamaService.generateEmbedding = vi.fn().mockResolvedValue(mockEmbedding); + + // Set environment variable for similarity threshold + process.env.SEMANTIC_SEARCH_SIMILARITY_THRESHOLD = "0.7"; + + const mockSearchResults = [ + { + id: "entry-1", + workspace_id: mockWorkspaceId, + slug: "high-similarity", + title: "High Similarity Entry", + content: "Very similar content", + content_html: "

Very similar content

", + summary: null, + status: EntryStatus.PUBLISHED, + visibility: "WORKSPACE", + created_at: new Date(), + updated_at: new Date(), + created_by: "user-1", + updated_by: "user-1", + rank: 0.9, + headline: null, + }, + ]; + + prismaService.$queryRaw + .mockResolvedValueOnce(mockSearchResults) + .mockResolvedValueOnce([{ count: BigInt(1) }]); + + prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]); + + const result = await service.semanticSearch("query", mockWorkspaceId); + + expect(result.data).toHaveLength(1); + expect(result.data[0].rank).toBeGreaterThanOrEqual(0.7); + + // Clean up + delete process.env.SEMANTIC_SEARCH_SIMILARITY_THRESHOLD; + }); + + it("should handle pagination correctly", async () => { + const ollamaService = service["ollama"]; + ollamaService.isConfigured = vi.fn().mockResolvedValue(true); + + const mockEmbedding = new Array(1536).fill(0.1); + ollamaService.generateEmbedding = vi.fn().mockResolvedValue(mockEmbedding); + + prismaService.$queryRaw + .mockResolvedValueOnce([]) + .mockResolvedValueOnce([{ count: BigInt(25) }]); + + prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]); + + const result = await service.semanticSearch("query", mockWorkspaceId, { + page: 2, + limit: 10, + }); + + expect(result.pagination.page).toBe(2); + expect(result.pagination.limit).toBe(10); + expect(result.pagination.total).toBe(25); + expect(result.pagination.totalPages).toBe(3); + }); + + it("should apply status filter when provided", async () => { + const ollamaService = service["ollama"]; + ollamaService.isConfigured = vi.fn().mockResolvedValue(true); + + const mockEmbedding = new Array(1536).fill(0.1); + ollamaService.generateEmbedding = vi.fn().mockResolvedValue(mockEmbedding); + + prismaService.$queryRaw + .mockResolvedValueOnce([]) + .mockResolvedValueOnce([{ count: BigInt(0) }]); + + prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]); + + await service.semanticSearch("query", mockWorkspaceId, { + status: EntryStatus.DRAFT, + }); + + // Verify the query was called with status filter + expect(prismaService.$queryRaw).toHaveBeenCalled(); + }); + + it("should include similarity scores in results", async () => { + const ollamaService = service["ollama"]; + ollamaService.isConfigured = vi.fn().mockResolvedValue(true); + + const mockEmbedding = new Array(1536).fill(0.1); + ollamaService.generateEmbedding = vi.fn().mockResolvedValue(mockEmbedding); + + const mockSearchResults = [ + { + id: "entry-1", + workspace_id: mockWorkspaceId, + slug: "entry-1", + title: "Entry 1", + content: "Content 1", + content_html: "

Content 1

", + summary: null, + status: EntryStatus.PUBLISHED, + visibility: "WORKSPACE", + created_at: new Date(), + updated_at: new Date(), + created_by: "user-1", + updated_by: "user-1", + rank: 0.95, + headline: null, + }, + { + id: "entry-2", + workspace_id: mockWorkspaceId, + slug: "entry-2", + title: "Entry 2", + content: "Content 2", + content_html: "

Content 2

", + summary: null, + status: EntryStatus.PUBLISHED, + visibility: "WORKSPACE", + created_at: new Date(), + updated_at: new Date(), + created_by: "user-1", + updated_by: "user-1", + rank: 0.75, + headline: null, + }, + ]; + + prismaService.$queryRaw + .mockResolvedValueOnce(mockSearchResults) + .mockResolvedValueOnce([{ count: BigInt(2) }]); + + prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]); + + const result = await service.semanticSearch("query", mockWorkspaceId); + + expect(result.data).toHaveLength(2); + expect(result.data[0].rank).toBe(0.95); + expect(result.data[1].rank).toBe(0.75); + // Verify results are ordered by similarity (descending) + expect(result.data[0].rank).toBeGreaterThan(result.data[1].rank); + }); + }); }); diff --git a/apps/api/src/knowledge/services/search.service.ts b/apps/api/src/knowledge/services/search.service.ts index 0dc4ad8..c3b55da 100644 --- a/apps/api/src/knowledge/services/search.service.ts +++ b/apps/api/src/knowledge/services/search.service.ts @@ -3,7 +3,7 @@ import { EntryStatus, Prisma } from "@prisma/client"; import { PrismaService } from "../../prisma/prisma.service"; import type { KnowledgeEntryWithTags, PaginatedEntries } from "../entities/knowledge-entry.entity"; import { KnowledgeCacheService } from "./cache.service"; -import { EmbeddingService } from "./embedding.service"; +import { OllamaEmbeddingService } from "./ollama-embedding.service"; /** * Search options for full-text search @@ -63,11 +63,18 @@ interface RawSearchResult { */ @Injectable() export class SearchService { + private readonly similarityThreshold: number; + constructor( private readonly prisma: PrismaService, private readonly cache: KnowledgeCacheService, - private readonly embedding: EmbeddingService - ) {} + private readonly ollama: OllamaEmbeddingService + ) { + // Default similarity threshold is 0.5 (range: 0-1, where 1 is identical) + this.similarityThreshold = parseFloat( + process.env.SEMANTIC_SEARCH_SIMILARITY_THRESHOLD ?? "0.5" + ); + } /** * Full-text search on title and content using PostgreSQL ts_vector @@ -451,16 +458,17 @@ export class SearchService { workspaceId: string, options: SearchOptions = {} ): Promise { - if (!this.embedding.isConfigured()) { - throw new Error("Semantic search requires OPENAI_API_KEY to be configured"); + const configured = await this.ollama.isConfigured(); + if (!configured) { + throw new Error("Semantic search requires Ollama to be configured"); } const page = options.page ?? 1; const limit = options.limit ?? 20; const offset = (page - 1) * limit; - // Generate embedding for the query - const queryEmbedding = await this.embedding.generateEmbedding(query); + // Generate embedding for the query using Ollama + const queryEmbedding = await this.ollama.generateEmbedding(query, {}); const embeddingString = `[${queryEmbedding.join(",")}]`; // Build status filter @@ -468,9 +476,16 @@ export class SearchService { ? Prisma.sql`AND e.status = ${options.status}::text::"EntryStatus"` : Prisma.sql`AND e.status != 'ARCHIVED'`; + // Calculate the distance threshold from similarity threshold + // Cosine similarity ranges from -1 to 1, but for embeddings it's typically 0 to 1 + // Distance = 1 - similarity, so distance threshold = 1 - similarity threshold + const distanceThreshold = 1 - this.similarityThreshold; + // Vector similarity search using cosine distance + // Lower distance = higher similarity + // Filter out results below similarity threshold const searchResults = await this.prisma.$queryRaw` - SELECT + SELECT e.id, e.workspace_id, e.slug, @@ -490,18 +505,20 @@ export class SearchService { INNER JOIN knowledge_embeddings emb ON e.id = emb.entry_id WHERE e.workspace_id = ${workspaceId}::uuid ${statusFilter} + AND (emb.embedding <=> ${embeddingString}::vector) <= ${distanceThreshold} ORDER BY emb.embedding <=> ${embeddingString}::vector LIMIT ${limit} OFFSET ${offset} `; - // Get total count for pagination + // Get total count for pagination (also filtered by similarity threshold) const countResult = await this.prisma.$queryRaw<[{ count: bigint }]>` SELECT COUNT(*) as count FROM knowledge_entries e INNER JOIN knowledge_embeddings emb ON e.id = emb.entry_id WHERE e.workspace_id = ${workspaceId}::uuid ${statusFilter} + AND (emb.embedding <=> ${embeddingString}::vector) <= ${distanceThreshold} `; const total = Number(countResult[0].count); @@ -556,7 +573,8 @@ export class SearchService { workspaceId: string, options: SearchOptions = {} ): Promise { - if (!this.embedding.isConfigured()) { + const configured = await this.ollama.isConfigured(); + if (!configured) { // Fall back to keyword search if embeddings not configured return this.search(query, workspaceId, options); } @@ -581,8 +599,8 @@ export class SearchService { }; } - // Generate embedding for vector search - const queryEmbedding = await this.embedding.generateEmbedding(query); + // Generate embedding for vector search using Ollama + const queryEmbedding = await this.ollama.generateEmbedding(query, {}); const embeddingString = `[${queryEmbedding.join(",")}]`; // Build status filter diff --git a/docs/scratchpads/70-semantic-search-api.md b/docs/scratchpads/70-semantic-search-api.md new file mode 100644 index 0000000..3779b40 --- /dev/null +++ b/docs/scratchpads/70-semantic-search-api.md @@ -0,0 +1,57 @@ +# Issue #70: [KNOW-018] Semantic Search API + +## Objective + +Implement semantic (vector) search endpoint that uses embeddings generated by issue #69 to enable natural language search over knowledge entries. + +## Approach + +1. Review existing embedding schema and pgvector setup +2. Review OllamaEmbeddingService from issue #69 +3. Create DTOs for semantic search request/response +4. Write tests first (TDD) +5. Implement semantic search in SearchService using pgvector cosine similarity +6. Create controller endpoint POST /api/knowledge/search/semantic +7. Add configurable similarity threshold +8. Test with real queries +9. Run quality checks and code review + +## Progress + +- [x] Create scratchpad +- [x] Review existing code (embedding schema, OllamaEmbeddingService) +- [x] Add similarity threshold environment variable +- [x] Write tests (TDD - RED) +- [x] Update SearchService to use OllamaEmbeddingService instead of OpenAI (TDD - GREEN) +- [x] Update hybridSearch to use OllamaEmbeddingService +- [x] Update test files to include OllamaEmbeddingService mocks +- [x] All tests passing +- [x] Type check and build successful +- [ ] Run code review +- [ ] Run QA checks +- [ ] Commit changes +- [ ] Close issue + +## Testing + +- Unit tests for SearchService.semanticSearch() +- Controller tests for POST /api/knowledge/search/semantic +- Integration tests with real embeddings +- Target: 85%+ coverage + +## Notes + +- Use pgvector cosine similarity operator (<=>) +- Lower distance = higher similarity +- Results should include similarity scores +- Similarity threshold should be configurable via environment variable +- Reuse OllamaEmbeddingService from issue #69 + +## Findings + +- The semantic search endpoint already exists in search.controller.ts (line 111) +- The SearchService already has semanticSearch() method (line 449) +- BUT: It currently uses OpenAI-based EmbeddingService instead of OllamaEmbeddingService +- Need to update SearchService to inject and use OllamaEmbeddingService +- Need to add configurable similarity threshold +- Controller endpoint already properly configured with guards and permissions