feat(#65): implement full-text search with tsvector and GIN index
Add PostgreSQL full-text search infrastructure for knowledge entries: - Add search_vector tsvector column to knowledge_entries table - Create GIN index for fast full-text search performance - Implement automatic trigger to maintain search_vector on insert/update - Weight fields: title (A), summary (B), content (C) - Update SearchService to use precomputed search_vector - Add comprehensive integration tests for FTS functionality Tests: - 8/8 new integration tests passing - 205/225 knowledge module tests passing - All quality gates pass (typecheck, lint) Refs #65 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
276
apps/api/src/knowledge/services/fulltext-search.spec.ts
Normal file
276
apps/api/src/knowledge/services/fulltext-search.spec.ts
Normal file
@@ -0,0 +1,276 @@
|
||||
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
||||
import { PrismaClient } from "@prisma/client";
|
||||
|
||||
/**
|
||||
* Integration tests for PostgreSQL full-text search setup
|
||||
* Tests the tsvector column, GIN index, and automatic trigger
|
||||
*/
|
||||
describe("Full-Text Search Setup (Integration)", () => {
|
||||
let prisma: PrismaClient;
|
||||
let testWorkspaceId: string;
|
||||
let testUserId: string;
|
||||
|
||||
beforeAll(async () => {
|
||||
prisma = new PrismaClient();
|
||||
await prisma.$connect();
|
||||
|
||||
// Create test workspace
|
||||
const workspace = await prisma.workspace.create({
|
||||
data: {
|
||||
name: "Test Workspace",
|
||||
owner: {
|
||||
create: {
|
||||
email: `test-fts-${Date.now()}@example.com`,
|
||||
name: "Test User",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
testWorkspaceId = workspace.id;
|
||||
testUserId = workspace.ownerId;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
// Cleanup
|
||||
if (testWorkspaceId) {
|
||||
await prisma.knowledgeEntry.deleteMany({
|
||||
where: { workspaceId: testWorkspaceId },
|
||||
});
|
||||
await prisma.workspace.delete({
|
||||
where: { id: testWorkspaceId },
|
||||
});
|
||||
}
|
||||
await prisma.$disconnect();
|
||||
});
|
||||
|
||||
describe("tsvector column", () => {
|
||||
it("should have search_vector column in knowledge_entries table", async () => {
|
||||
// Query to check if column exists
|
||||
const result = await prisma.$queryRaw<{ column_name: string; data_type: string }[]>`
|
||||
SELECT column_name, data_type
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'knowledge_entries'
|
||||
AND column_name = 'search_vector'
|
||||
`;
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].column_name).toBe("search_vector");
|
||||
expect(result[0].data_type).toBe("tsvector");
|
||||
});
|
||||
|
||||
it("should automatically populate search_vector on insert", async () => {
|
||||
const entry = await prisma.knowledgeEntry.create({
|
||||
data: {
|
||||
workspaceId: testWorkspaceId,
|
||||
slug: "auto-populate-test",
|
||||
title: "PostgreSQL Full-Text Search",
|
||||
content: "This is a test of the automatic trigger functionality.",
|
||||
summary: "Testing automatic population",
|
||||
createdBy: testUserId,
|
||||
updatedBy: testUserId,
|
||||
},
|
||||
});
|
||||
|
||||
// Query raw to check search_vector was populated
|
||||
const result = await prisma.$queryRaw<{ id: string; search_vector: string | null }[]>`
|
||||
SELECT id, search_vector::text
|
||||
FROM knowledge_entries
|
||||
WHERE id = ${entry.id}::uuid
|
||||
`;
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].search_vector).not.toBeNull();
|
||||
// Verify 'postgresql' appears in title (weight A)
|
||||
expect(result[0].search_vector).toContain("'postgresql':1A");
|
||||
// Verify 'search' appears in both title (A) and content (C)
|
||||
expect(result[0].search_vector).toContain("'search':5A");
|
||||
});
|
||||
|
||||
it("should automatically update search_vector on update", async () => {
|
||||
const entry = await prisma.knowledgeEntry.create({
|
||||
data: {
|
||||
workspaceId: testWorkspaceId,
|
||||
slug: "auto-update-test",
|
||||
title: "Original Title",
|
||||
content: "Original content",
|
||||
createdBy: testUserId,
|
||||
updatedBy: testUserId,
|
||||
},
|
||||
});
|
||||
|
||||
// Update the entry
|
||||
await prisma.knowledgeEntry.update({
|
||||
where: { id: entry.id },
|
||||
data: {
|
||||
title: "Updated Elasticsearch Title",
|
||||
content: "Updated content with Elasticsearch",
|
||||
},
|
||||
});
|
||||
|
||||
// Check search_vector was updated
|
||||
const result = await prisma.$queryRaw<{ id: string; search_vector: string | null }[]>`
|
||||
SELECT id, search_vector::text
|
||||
FROM knowledge_entries
|
||||
WHERE id = ${entry.id}::uuid
|
||||
`;
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
// Verify 'elasticsearch' appears in both title (A) and content (C)
|
||||
// PostgreSQL combines positions: '2A,7C' means position 2 in title (A) and position 7 in content (C)
|
||||
expect(result[0].search_vector).toContain("'elasticsearch':2A,7C");
|
||||
expect(result[0].search_vector).not.toContain("'original'");
|
||||
});
|
||||
|
||||
it("should include summary in search_vector with weight B", async () => {
|
||||
const entry = await prisma.knowledgeEntry.create({
|
||||
data: {
|
||||
workspaceId: testWorkspaceId,
|
||||
slug: "summary-weight-test",
|
||||
title: "Title Word",
|
||||
content: "Content word",
|
||||
summary: "Summary keyword here",
|
||||
createdBy: testUserId,
|
||||
updatedBy: testUserId,
|
||||
},
|
||||
});
|
||||
|
||||
const result = await prisma.$queryRaw<{ id: string; search_vector: string | null }[]>`
|
||||
SELECT id, search_vector::text
|
||||
FROM knowledge_entries
|
||||
WHERE id = ${entry.id}::uuid
|
||||
`;
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
// Summary should have weight B - 'keyword' appears in summary
|
||||
expect(result[0].search_vector).toContain("'keyword':4B");
|
||||
});
|
||||
|
||||
it("should handle null summary gracefully", async () => {
|
||||
const entry = await prisma.knowledgeEntry.create({
|
||||
data: {
|
||||
workspaceId: testWorkspaceId,
|
||||
slug: "null-summary-test",
|
||||
title: "Title without summary",
|
||||
content: "Content without summary",
|
||||
summary: null,
|
||||
createdBy: testUserId,
|
||||
updatedBy: testUserId,
|
||||
},
|
||||
});
|
||||
|
||||
const result = await prisma.$queryRaw<{ id: string; search_vector: string | null }[]>`
|
||||
SELECT id, search_vector::text
|
||||
FROM knowledge_entries
|
||||
WHERE id = ${entry.id}::uuid
|
||||
`;
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].search_vector).not.toBeNull();
|
||||
// Verify 'titl' (stemmed from 'title') appears with weight A
|
||||
expect(result[0].search_vector).toContain("'titl':1A");
|
||||
// Verify 'content' appears with weight C
|
||||
expect(result[0].search_vector).toContain("'content':4C");
|
||||
});
|
||||
});
|
||||
|
||||
describe("GIN index", () => {
|
||||
it("should have GIN index on search_vector column", async () => {
|
||||
const result = await prisma.$queryRaw<{ indexname: string; indexdef: string }[]>`
|
||||
SELECT indexname, indexdef
|
||||
FROM pg_indexes
|
||||
WHERE tablename = 'knowledge_entries'
|
||||
AND indexname = 'knowledge_entries_search_vector_idx'
|
||||
`;
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].indexdef).toContain("gin");
|
||||
expect(result[0].indexdef).toContain("search_vector");
|
||||
});
|
||||
});
|
||||
|
||||
describe("search performance", () => {
|
||||
it("should perform fast searches using the GIN index", async () => {
|
||||
// Create multiple entries
|
||||
const entries = Array.from({ length: 10 }, (_, i) => ({
|
||||
workspaceId: testWorkspaceId,
|
||||
slug: `perf-test-${i}`,
|
||||
title: `Performance Test ${i}`,
|
||||
content: i % 2 === 0 ? "Contains database keyword" : "No keyword here",
|
||||
createdBy: testUserId,
|
||||
updatedBy: testUserId,
|
||||
}));
|
||||
|
||||
await prisma.knowledgeEntry.createMany({
|
||||
data: entries,
|
||||
});
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
// Search using the precomputed search_vector
|
||||
const results = await prisma.$queryRaw<{ id: string; title: string }[]>`
|
||||
SELECT id, title
|
||||
FROM knowledge_entries
|
||||
WHERE workspace_id = ${testWorkspaceId}::uuid
|
||||
AND search_vector @@ plainto_tsquery('english', 'database')
|
||||
ORDER BY ts_rank(search_vector, plainto_tsquery('english', 'database')) DESC
|
||||
`;
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
// Should be fast with index (< 100ms for small dataset)
|
||||
expect(duration).toBeLessThan(100);
|
||||
});
|
||||
|
||||
it("should rank results by relevance using weighted fields", async () => {
|
||||
// Create entries with keyword in different positions
|
||||
await prisma.knowledgeEntry.createMany({
|
||||
data: [
|
||||
{
|
||||
workspaceId: testWorkspaceId,
|
||||
slug: "rank-title",
|
||||
title: "Redis caching strategies",
|
||||
content: "Various approaches to caching",
|
||||
summary: "Overview of strategies",
|
||||
createdBy: testUserId,
|
||||
updatedBy: testUserId,
|
||||
},
|
||||
{
|
||||
workspaceId: testWorkspaceId,
|
||||
slug: "rank-summary",
|
||||
title: "Database optimization",
|
||||
content: "Performance tuning",
|
||||
summary: "Redis is mentioned in summary",
|
||||
createdBy: testUserId,
|
||||
updatedBy: testUserId,
|
||||
},
|
||||
{
|
||||
workspaceId: testWorkspaceId,
|
||||
slug: "rank-content",
|
||||
title: "Performance guide",
|
||||
content: "Use Redis for better performance",
|
||||
summary: "Best practices",
|
||||
createdBy: testUserId,
|
||||
updatedBy: testUserId,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const results = await prisma.$queryRaw<{ slug: string; rank: number }[]>`
|
||||
SELECT slug, ts_rank(search_vector, plainto_tsquery('english', 'redis')) AS rank
|
||||
FROM knowledge_entries
|
||||
WHERE workspace_id = ${testWorkspaceId}::uuid
|
||||
AND search_vector @@ plainto_tsquery('english', 'redis')
|
||||
ORDER BY rank DESC
|
||||
`;
|
||||
|
||||
expect(results.length).toBe(3);
|
||||
// Title match should rank highest (weight A)
|
||||
expect(results[0].slug).toBe("rank-title");
|
||||
// Summary should rank second (weight B)
|
||||
expect(results[1].slug).toBe("rank-summary");
|
||||
// Content should rank third (weight C)
|
||||
expect(results[2].slug).toBe("rank-content");
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -118,12 +118,13 @@ export class SearchService {
|
||||
: Prisma.sql`AND e.status != 'ARCHIVED'`;
|
||||
|
||||
// PostgreSQL full-text search query
|
||||
// Uses ts_rank for relevance scoring with weights: title (A=1.0), content (B=0.4)
|
||||
// Uses precomputed search_vector column (with weights: A=title, B=summary, C=content)
|
||||
// Maintained automatically by database trigger
|
||||
const searchResults = await this.prisma.$queryRaw<RawSearchResult[]>`
|
||||
WITH search_query AS (
|
||||
SELECT plainto_tsquery('english', ${sanitizedQuery}) AS query
|
||||
)
|
||||
SELECT
|
||||
SELECT
|
||||
e.id,
|
||||
e.workspace_id,
|
||||
e.slug,
|
||||
@@ -137,11 +138,7 @@ export class SearchService {
|
||||
e.updated_at,
|
||||
e.created_by,
|
||||
e.updated_by,
|
||||
ts_rank(
|
||||
setweight(to_tsvector('english', e.title), 'A') ||
|
||||
setweight(to_tsvector('english', e.content), 'B'),
|
||||
sq.query
|
||||
) AS rank,
|
||||
ts_rank(e.search_vector, sq.query) AS rank,
|
||||
ts_headline(
|
||||
'english',
|
||||
e.content,
|
||||
@@ -151,10 +148,7 @@ export class SearchService {
|
||||
FROM knowledge_entries e, search_query sq
|
||||
WHERE e.workspace_id = ${workspaceId}::uuid
|
||||
${statusFilter}
|
||||
AND (
|
||||
to_tsvector('english', e.title) @@ sq.query
|
||||
OR to_tsvector('english', e.content) @@ sq.query
|
||||
)
|
||||
AND e.search_vector @@ sq.query
|
||||
ORDER BY rank DESC, e.updated_at DESC
|
||||
LIMIT ${limit}
|
||||
OFFSET ${offset}
|
||||
@@ -166,10 +160,7 @@ export class SearchService {
|
||||
FROM knowledge_entries e
|
||||
WHERE e.workspace_id = ${workspaceId}::uuid
|
||||
${statusFilter}
|
||||
AND (
|
||||
to_tsvector('english', e.title) @@ plainto_tsquery('english', ${sanitizedQuery})
|
||||
OR to_tsvector('english', e.content) @@ plainto_tsquery('english', ${sanitizedQuery})
|
||||
)
|
||||
AND e.search_vector @@ plainto_tsquery('english', ${sanitizedQuery})
|
||||
`;
|
||||
|
||||
const total = Number(countResult[0].count);
|
||||
@@ -592,22 +583,18 @@ export class SearchService {
|
||||
${statusFilter}
|
||||
),
|
||||
keyword_search AS (
|
||||
SELECT
|
||||
SELECT
|
||||
e.id,
|
||||
ROW_NUMBER() OVER (
|
||||
ORDER BY ts_rank(
|
||||
setweight(to_tsvector('english', e.title), 'A') ||
|
||||
setweight(to_tsvector('english', e.content), 'B'),
|
||||
e.search_vector,
|
||||
plainto_tsquery('english', ${sanitizedQuery})
|
||||
) DESC
|
||||
) AS rank
|
||||
FROM knowledge_entries e
|
||||
WHERE e.workspace_id = ${workspaceId}::uuid
|
||||
${statusFilter}
|
||||
AND (
|
||||
to_tsvector('english', e.title) @@ plainto_tsquery('english', ${sanitizedQuery})
|
||||
OR to_tsvector('english', e.content) @@ plainto_tsquery('english', ${sanitizedQuery})
|
||||
)
|
||||
AND e.search_vector @@ plainto_tsquery('english', ${sanitizedQuery})
|
||||
),
|
||||
combined AS (
|
||||
SELECT
|
||||
@@ -660,10 +647,7 @@ export class SearchService {
|
||||
FROM knowledge_entries e
|
||||
WHERE e.workspace_id = ${workspaceId}::uuid
|
||||
${statusFilter}
|
||||
AND (
|
||||
to_tsvector('english', e.title) @@ plainto_tsquery('english', ${sanitizedQuery})
|
||||
OR to_tsvector('english', e.content) @@ plainto_tsquery('english', ${sanitizedQuery})
|
||||
)
|
||||
AND e.search_vector @@ plainto_tsquery('english', ${sanitizedQuery})
|
||||
)
|
||||
SELECT COUNT(DISTINCT id) as count
|
||||
FROM (
|
||||
|
||||
Reference in New Issue
Block a user