Files
stack/apps/api/src/knowledge/services/search.service.spec.ts
Jason Woltje 3969dd5598 feat(#70): implement semantic search API with Ollama embeddings
Updated semantic search to use OllamaEmbeddingService instead of OpenAI:
- Replaced EmbeddingService with OllamaEmbeddingService in SearchService
- Added configurable similarity threshold (SEMANTIC_SEARCH_SIMILARITY_THRESHOLD)
- Updated both semanticSearch() and hybridSearch() methods
- Added comprehensive tests for semantic search functionality
- Updated controller documentation to reflect Ollama requirement
- All tests passing with 85%+ coverage

Related changes:
- Updated knowledge.service.versions.spec.ts to include OllamaEmbeddingService
- Added similarity threshold environment variable to .env.example

Fixes #70

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-02 15:15:04 -06:00

617 lines
20 KiB
TypeScript

import { describe, it, expect, beforeEach, vi } from "vitest";
import { Test, TestingModule } from "@nestjs/testing";
import { EntryStatus } from "@prisma/client";
import { SearchService } from "./search.service";
import { PrismaService } from "../../prisma/prisma.service";
import { KnowledgeCacheService } from "./cache.service";
import { OllamaEmbeddingService } from "./ollama-embedding.service";
describe("SearchService", () => {
let service: SearchService;
let prismaService: any;
const mockWorkspaceId = "550e8400-e29b-41d4-a716-446655440000";
beforeEach(async () => {
const mockQueryRaw = vi.fn();
const mockKnowledgeEntryCount = vi.fn();
const mockKnowledgeEntryFindMany = vi.fn();
const mockKnowledgeEntryTagFindMany = vi.fn();
const mockPrismaService = {
$queryRaw: mockQueryRaw,
knowledgeEntry: {
count: mockKnowledgeEntryCount,
findMany: mockKnowledgeEntryFindMany,
},
knowledgeEntryTag: {
findMany: mockKnowledgeEntryTagFindMany,
},
};
const mockCacheService = {
getEntry: vi.fn().mockResolvedValue(null),
setEntry: vi.fn().mockResolvedValue(undefined),
invalidateEntry: vi.fn().mockResolvedValue(undefined),
getSearch: vi.fn().mockResolvedValue(null),
setSearch: vi.fn().mockResolvedValue(undefined),
invalidateSearches: vi.fn().mockResolvedValue(undefined),
getGraph: vi.fn().mockResolvedValue(null),
setGraph: vi.fn().mockResolvedValue(undefined),
invalidateGraphs: vi.fn().mockResolvedValue(undefined),
invalidateGraphsForEntry: vi.fn().mockResolvedValue(undefined),
clearWorkspaceCache: vi.fn().mockResolvedValue(undefined),
getStats: vi.fn().mockReturnValue({ hits: 0, misses: 0, sets: 0, deletes: 0, hitRate: 0 }),
resetStats: vi.fn(),
isEnabled: vi.fn().mockReturnValue(false),
};
const mockOllamaEmbeddingService = {
isConfigured: vi.fn().mockResolvedValue(false),
generateEmbedding: vi.fn().mockResolvedValue([]),
generateAndStoreEmbedding: vi.fn().mockResolvedValue(undefined),
batchGenerateEmbeddings: vi.fn().mockResolvedValue(0),
};
const module: TestingModule = await Test.createTestingModule({
providers: [
SearchService,
{
provide: PrismaService,
useValue: mockPrismaService,
},
{
provide: KnowledgeCacheService,
useValue: mockCacheService,
},
{
provide: OllamaEmbeddingService,
useValue: mockOllamaEmbeddingService,
},
],
}).compile();
service = module.get<SearchService>(SearchService);
prismaService = module.get<PrismaService>(PrismaService);
});
describe("search", () => {
it("should return empty results for empty query", async () => {
const result = await service.search("", mockWorkspaceId);
expect(result.data).toEqual([]);
expect(result.pagination.total).toBe(0);
expect(result.query).toBe("");
});
it("should return empty results for whitespace-only query", async () => {
const result = await service.search(" ", mockWorkspaceId);
expect(result.data).toEqual([]);
expect(result.pagination.total).toBe(0);
});
it("should perform full-text search and return ranked results", async () => {
const mockSearchResults = [
{
id: "entry-1",
workspace_id: mockWorkspaceId,
slug: "test-entry",
title: "Test Entry",
content: "This is test content",
content_html: "<p>This is test content</p>",
summary: "Test summary",
status: EntryStatus.PUBLISHED,
visibility: "WORKSPACE",
created_at: new Date(),
updated_at: new Date(),
created_by: "user-1",
updated_by: "user-1",
rank: 0.5,
headline: "This is <mark>test</mark> content",
},
];
prismaService.$queryRaw
.mockResolvedValueOnce(mockSearchResults)
.mockResolvedValueOnce([{ count: BigInt(1) }]);
prismaService.knowledgeEntryTag.findMany.mockResolvedValue([
{
entryId: "entry-1",
tag: {
id: "tag-1",
name: "Documentation",
slug: "documentation",
color: "#blue",
},
},
]);
const result = await service.search("test", mockWorkspaceId);
expect(result.data).toHaveLength(1);
expect(result.data[0].title).toBe("Test Entry");
expect(result.data[0].rank).toBe(0.5);
expect(result.data[0].headline).toBe("This is <mark>test</mark> content");
expect(result.data[0].tags).toHaveLength(1);
expect(result.pagination.total).toBe(1);
expect(result.query).toBe("test");
});
it("should sanitize search query removing special characters", async () => {
prismaService.$queryRaw
.mockResolvedValueOnce([])
.mockResolvedValueOnce([{ count: BigInt(0) }]);
prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]);
await service.search("test & query | !special:chars*", mockWorkspaceId);
// Should have been called with sanitized query
expect(prismaService.$queryRaw).toHaveBeenCalled();
});
it("should apply status filter when provided", async () => {
prismaService.$queryRaw
.mockResolvedValueOnce([])
.mockResolvedValueOnce([{ count: BigInt(0) }]);
prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]);
await service.search("test", mockWorkspaceId, {
status: EntryStatus.DRAFT,
});
expect(prismaService.$queryRaw).toHaveBeenCalled();
});
it("should handle pagination correctly", async () => {
prismaService.$queryRaw
.mockResolvedValueOnce([])
.mockResolvedValueOnce([{ count: BigInt(50) }]);
prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]);
const result = await service.search("test", mockWorkspaceId, {
page: 2,
limit: 10,
});
expect(result.pagination.page).toBe(2);
expect(result.pagination.limit).toBe(10);
expect(result.pagination.total).toBe(50);
expect(result.pagination.totalPages).toBe(5);
});
it("should filter by tags when provided", async () => {
const mockSearchResults = [
{
id: "entry-1",
workspace_id: mockWorkspaceId,
slug: "tagged-entry",
title: "Tagged Entry",
content: "Content with search term",
content_html: "<p>Content with search term</p>",
summary: null,
status: EntryStatus.PUBLISHED,
visibility: "WORKSPACE",
created_at: new Date(),
updated_at: new Date(),
created_by: "user-1",
updated_by: "user-1",
rank: 0.8,
headline: "Content with <mark>search term</mark>",
},
];
prismaService.$queryRaw
.mockResolvedValueOnce(mockSearchResults)
.mockResolvedValueOnce([{ count: BigInt(1) }]);
prismaService.knowledgeEntryTag.findMany.mockResolvedValue([
{
entryId: "entry-1",
tag: {
id: "tag-1",
name: "API",
slug: "api",
color: "#blue",
},
},
]);
const result = await service.search("search term", mockWorkspaceId, {
tags: ["api", "documentation"],
});
expect(result.data).toHaveLength(1);
expect(result.data[0].title).toBe("Tagged Entry");
expect(result.data[0].tags).toHaveLength(1);
expect(prismaService.$queryRaw).toHaveBeenCalled();
});
it("should combine full-text search with tag filtering", async () => {
prismaService.$queryRaw
.mockResolvedValueOnce([])
.mockResolvedValueOnce([{ count: BigInt(0) }]);
prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]);
await service.search("test query", mockWorkspaceId, {
tags: ["api"],
status: EntryStatus.PUBLISHED,
page: 1,
limit: 20,
});
// Verify the query was called (the actual SQL logic will be tested in integration tests)
expect(prismaService.$queryRaw).toHaveBeenCalled();
});
});
describe("searchByTags", () => {
it("should return empty results for empty tags array", async () => {
const result = await service.searchByTags([], mockWorkspaceId);
expect(result.data).toEqual([]);
expect(result.pagination.total).toBe(0);
});
it("should find entries with all specified tags", async () => {
const mockEntries = [
{
id: "entry-1",
workspaceId: mockWorkspaceId,
slug: "tagged-entry",
title: "Tagged Entry",
content: "Content with tags",
contentHtml: "<p>Content with tags</p>",
summary: null,
status: EntryStatus.PUBLISHED,
visibility: "WORKSPACE",
createdAt: new Date(),
updatedAt: new Date(),
createdBy: "user-1",
updatedBy: "user-1",
tags: [
{
tag: {
id: "tag-1",
name: "API",
slug: "api",
color: "#blue",
},
},
{
tag: {
id: "tag-2",
name: "Documentation",
slug: "documentation",
color: "#green",
},
},
],
},
];
prismaService.knowledgeEntry.count.mockResolvedValue(1);
prismaService.knowledgeEntry.findMany.mockResolvedValue(mockEntries);
const result = await service.searchByTags(["api", "documentation"], mockWorkspaceId);
expect(result.data).toHaveLength(1);
expect(result.data[0].title).toBe("Tagged Entry");
expect(result.data[0].tags).toHaveLength(2);
expect(result.pagination.total).toBe(1);
});
it("should apply status filter when provided", async () => {
prismaService.knowledgeEntry.count.mockResolvedValue(0);
prismaService.knowledgeEntry.findMany.mockResolvedValue([]);
await service.searchByTags(["api"], mockWorkspaceId, {
status: EntryStatus.DRAFT,
});
expect(prismaService.knowledgeEntry.findMany).toHaveBeenCalledWith(
expect.objectContaining({
where: expect.objectContaining({
status: EntryStatus.DRAFT,
}),
})
);
});
it("should handle pagination correctly", async () => {
prismaService.knowledgeEntry.count.mockResolvedValue(25);
prismaService.knowledgeEntry.findMany.mockResolvedValue([]);
const result = await service.searchByTags(["api"], mockWorkspaceId, {
page: 2,
limit: 10,
});
expect(result.pagination.page).toBe(2);
expect(result.pagination.limit).toBe(10);
expect(result.pagination.total).toBe(25);
expect(result.pagination.totalPages).toBe(3);
});
});
describe("recentEntries", () => {
it("should return recently modified entries", async () => {
const mockEntries = [
{
id: "entry-1",
workspaceId: mockWorkspaceId,
slug: "recent-entry",
title: "Recent Entry",
content: "Recently updated content",
contentHtml: "<p>Recently updated content</p>",
summary: null,
status: EntryStatus.PUBLISHED,
visibility: "WORKSPACE",
createdAt: new Date(),
updatedAt: new Date(),
createdBy: "user-1",
updatedBy: "user-1",
tags: [],
},
];
prismaService.knowledgeEntry.findMany.mockResolvedValue(mockEntries);
const result = await service.recentEntries(mockWorkspaceId);
expect(result).toHaveLength(1);
expect(result[0].title).toBe("Recent Entry");
expect(prismaService.knowledgeEntry.findMany).toHaveBeenCalledWith(
expect.objectContaining({
orderBy: { updatedAt: "desc" },
take: 10,
})
);
});
it("should respect the limit parameter", async () => {
prismaService.knowledgeEntry.findMany.mockResolvedValue([]);
await service.recentEntries(mockWorkspaceId, 5);
expect(prismaService.knowledgeEntry.findMany).toHaveBeenCalledWith(
expect.objectContaining({
take: 5,
})
);
});
it("should apply status filter when provided", async () => {
prismaService.knowledgeEntry.findMany.mockResolvedValue([]);
await service.recentEntries(mockWorkspaceId, 10, EntryStatus.DRAFT);
expect(prismaService.knowledgeEntry.findMany).toHaveBeenCalledWith(
expect.objectContaining({
where: expect.objectContaining({
status: EntryStatus.DRAFT,
}),
})
);
});
it("should exclude archived entries by default", async () => {
prismaService.knowledgeEntry.findMany.mockResolvedValue([]);
await service.recentEntries(mockWorkspaceId);
expect(prismaService.knowledgeEntry.findMany).toHaveBeenCalledWith(
expect.objectContaining({
where: expect.objectContaining({
status: { not: EntryStatus.ARCHIVED },
}),
})
);
});
});
describe("semanticSearch", () => {
it("should throw error when OllamaEmbeddingService is not configured", async () => {
const ollamaService = service["ollama"];
ollamaService.isConfigured = vi.fn().mockResolvedValue(false);
await expect(service.semanticSearch("test query", mockWorkspaceId)).rejects.toThrow(
"Semantic search requires Ollama to be configured"
);
});
it("should perform semantic search using vector similarity", async () => {
const ollamaService = service["ollama"];
ollamaService.isConfigured = vi.fn().mockResolvedValue(true);
// Mock embedding generation
const mockEmbedding = new Array(1536).fill(0.1);
ollamaService.generateEmbedding = vi.fn().mockResolvedValue(mockEmbedding);
const mockSearchResults = [
{
id: "entry-1",
workspace_id: mockWorkspaceId,
slug: "semantic-entry",
title: "Semantic Entry",
content: "This is semantically similar content",
content_html: "<p>This is semantically similar content</p>",
summary: null,
status: EntryStatus.PUBLISHED,
visibility: "WORKSPACE",
created_at: new Date(),
updated_at: new Date(),
created_by: "user-1",
updated_by: "user-1",
rank: 0.85,
headline: null,
},
];
prismaService.$queryRaw
.mockResolvedValueOnce(mockSearchResults)
.mockResolvedValueOnce([{ count: BigInt(1) }]);
prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]);
const result = await service.semanticSearch("semantic query", mockWorkspaceId);
expect(result.data).toHaveLength(1);
expect(result.data[0].rank).toBe(0.85);
expect(ollamaService.generateEmbedding).toHaveBeenCalledWith("semantic query", {});
expect(prismaService.$queryRaw).toHaveBeenCalled();
});
it("should apply similarity threshold filter", async () => {
const ollamaService = service["ollama"];
ollamaService.isConfigured = vi.fn().mockResolvedValue(true);
const mockEmbedding = new Array(1536).fill(0.1);
ollamaService.generateEmbedding = vi.fn().mockResolvedValue(mockEmbedding);
// Set environment variable for similarity threshold
process.env.SEMANTIC_SEARCH_SIMILARITY_THRESHOLD = "0.7";
const mockSearchResults = [
{
id: "entry-1",
workspace_id: mockWorkspaceId,
slug: "high-similarity",
title: "High Similarity Entry",
content: "Very similar content",
content_html: "<p>Very similar content</p>",
summary: null,
status: EntryStatus.PUBLISHED,
visibility: "WORKSPACE",
created_at: new Date(),
updated_at: new Date(),
created_by: "user-1",
updated_by: "user-1",
rank: 0.9,
headline: null,
},
];
prismaService.$queryRaw
.mockResolvedValueOnce(mockSearchResults)
.mockResolvedValueOnce([{ count: BigInt(1) }]);
prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]);
const result = await service.semanticSearch("query", mockWorkspaceId);
expect(result.data).toHaveLength(1);
expect(result.data[0].rank).toBeGreaterThanOrEqual(0.7);
// Clean up
delete process.env.SEMANTIC_SEARCH_SIMILARITY_THRESHOLD;
});
it("should handle pagination correctly", async () => {
const ollamaService = service["ollama"];
ollamaService.isConfigured = vi.fn().mockResolvedValue(true);
const mockEmbedding = new Array(1536).fill(0.1);
ollamaService.generateEmbedding = vi.fn().mockResolvedValue(mockEmbedding);
prismaService.$queryRaw
.mockResolvedValueOnce([])
.mockResolvedValueOnce([{ count: BigInt(25) }]);
prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]);
const result = await service.semanticSearch("query", mockWorkspaceId, {
page: 2,
limit: 10,
});
expect(result.pagination.page).toBe(2);
expect(result.pagination.limit).toBe(10);
expect(result.pagination.total).toBe(25);
expect(result.pagination.totalPages).toBe(3);
});
it("should apply status filter when provided", async () => {
const ollamaService = service["ollama"];
ollamaService.isConfigured = vi.fn().mockResolvedValue(true);
const mockEmbedding = new Array(1536).fill(0.1);
ollamaService.generateEmbedding = vi.fn().mockResolvedValue(mockEmbedding);
prismaService.$queryRaw
.mockResolvedValueOnce([])
.mockResolvedValueOnce([{ count: BigInt(0) }]);
prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]);
await service.semanticSearch("query", mockWorkspaceId, {
status: EntryStatus.DRAFT,
});
// Verify the query was called with status filter
expect(prismaService.$queryRaw).toHaveBeenCalled();
});
it("should include similarity scores in results", async () => {
const ollamaService = service["ollama"];
ollamaService.isConfigured = vi.fn().mockResolvedValue(true);
const mockEmbedding = new Array(1536).fill(0.1);
ollamaService.generateEmbedding = vi.fn().mockResolvedValue(mockEmbedding);
const mockSearchResults = [
{
id: "entry-1",
workspace_id: mockWorkspaceId,
slug: "entry-1",
title: "Entry 1",
content: "Content 1",
content_html: "<p>Content 1</p>",
summary: null,
status: EntryStatus.PUBLISHED,
visibility: "WORKSPACE",
created_at: new Date(),
updated_at: new Date(),
created_by: "user-1",
updated_by: "user-1",
rank: 0.95,
headline: null,
},
{
id: "entry-2",
workspace_id: mockWorkspaceId,
slug: "entry-2",
title: "Entry 2",
content: "Content 2",
content_html: "<p>Content 2</p>",
summary: null,
status: EntryStatus.PUBLISHED,
visibility: "WORKSPACE",
created_at: new Date(),
updated_at: new Date(),
created_by: "user-1",
updated_by: "user-1",
rank: 0.75,
headline: null,
},
];
prismaService.$queryRaw
.mockResolvedValueOnce(mockSearchResults)
.mockResolvedValueOnce([{ count: BigInt(2) }]);
prismaService.knowledgeEntryTag.findMany.mockResolvedValue([]);
const result = await service.semanticSearch("query", mockWorkspaceId);
expect(result.data).toHaveLength(2);
expect(result.data[0].rank).toBe(0.95);
expect(result.data[1].rank).toBe(0.75);
// Verify results are ordered by similarity (descending)
expect(result.data[0].rank).toBeGreaterThan(result.data[1].rank);
});
});
});