feat(#65): implement full-text search with tsvector and GIN index
Add PostgreSQL full-text search infrastructure for knowledge entries: - Add search_vector tsvector column to knowledge_entries table - Create GIN index for fast full-text search performance - Implement automatic trigger to maintain search_vector on insert/update - Weight fields: title (A), summary (B), content (C) - Update SearchService to use precomputed search_vector - Add comprehensive integration tests for FTS functionality Tests: - 8/8 new integration tests passing - 205/225 knowledge module tests passing - All quality gates pass (typecheck, lint) Refs #65 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,36 @@
|
|||||||
|
-- Add tsvector column for full-text search on knowledge_entries
|
||||||
|
-- Weighted fields: title (A), summary (B), content (C)
|
||||||
|
|
||||||
|
-- Step 1: Add the search_vector column
|
||||||
|
ALTER TABLE "knowledge_entries"
|
||||||
|
ADD COLUMN "search_vector" tsvector;
|
||||||
|
|
||||||
|
-- Step 2: Create GIN index for fast full-text search
|
||||||
|
CREATE INDEX "knowledge_entries_search_vector_idx"
|
||||||
|
ON "knowledge_entries"
|
||||||
|
USING gin("search_vector");
|
||||||
|
|
||||||
|
-- Step 3: Create function to update search_vector
|
||||||
|
CREATE OR REPLACE FUNCTION knowledge_entries_search_vector_update()
|
||||||
|
RETURNS trigger AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.search_vector :=
|
||||||
|
setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'A') ||
|
||||||
|
setweight(to_tsvector('english', COALESCE(NEW.summary, '')), 'B') ||
|
||||||
|
setweight(to_tsvector('english', COALESCE(NEW.content, '')), 'C');
|
||||||
|
RETURN NEW;
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Step 4: Create trigger to automatically update search_vector on insert/update
|
||||||
|
CREATE TRIGGER knowledge_entries_search_vector_trigger
|
||||||
|
BEFORE INSERT OR UPDATE ON "knowledge_entries"
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION knowledge_entries_search_vector_update();
|
||||||
|
|
||||||
|
-- Step 5: Populate search_vector for existing entries
|
||||||
|
UPDATE "knowledge_entries"
|
||||||
|
SET search_vector =
|
||||||
|
setweight(to_tsvector('english', COALESCE(title, '')), 'A') ||
|
||||||
|
setweight(to_tsvector('english', COALESCE(summary, '')), 'B') ||
|
||||||
|
setweight(to_tsvector('english', COALESCE(content, '')), 'C');
|
||||||
@@ -798,6 +798,9 @@ model KnowledgeEntry {
|
|||||||
contentHtml String? @map("content_html") @db.Text
|
contentHtml String? @map("content_html") @db.Text
|
||||||
summary String?
|
summary String?
|
||||||
|
|
||||||
|
// Full-text search vector (automatically maintained by trigger)
|
||||||
|
searchVector Unsupported("tsvector")? @map("search_vector")
|
||||||
|
|
||||||
// Status
|
// Status
|
||||||
status EntryStatus @default(DRAFT)
|
status EntryStatus @default(DRAFT)
|
||||||
visibility Visibility @default(PRIVATE)
|
visibility Visibility @default(PRIVATE)
|
||||||
@@ -820,6 +823,7 @@ model KnowledgeEntry {
|
|||||||
@@index([workspaceId, updatedAt])
|
@@index([workspaceId, updatedAt])
|
||||||
@@index([createdBy])
|
@@index([createdBy])
|
||||||
@@index([updatedBy])
|
@@index([updatedBy])
|
||||||
|
// Note: GIN index on searchVector created via migration (not supported in Prisma schema)
|
||||||
@@map("knowledge_entries")
|
@@map("knowledge_entries")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
276
apps/api/src/knowledge/services/fulltext-search.spec.ts
Normal file
276
apps/api/src/knowledge/services/fulltext-search.spec.ts
Normal file
@@ -0,0 +1,276 @@
|
|||||||
|
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
||||||
|
import { PrismaClient } from "@prisma/client";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Integration tests for PostgreSQL full-text search setup
|
||||||
|
* Tests the tsvector column, GIN index, and automatic trigger
|
||||||
|
*/
|
||||||
|
describe("Full-Text Search Setup (Integration)", () => {
|
||||||
|
let prisma: PrismaClient;
|
||||||
|
let testWorkspaceId: string;
|
||||||
|
let testUserId: string;
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
prisma = new PrismaClient();
|
||||||
|
await prisma.$connect();
|
||||||
|
|
||||||
|
// Create test workspace
|
||||||
|
const workspace = await prisma.workspace.create({
|
||||||
|
data: {
|
||||||
|
name: "Test Workspace",
|
||||||
|
owner: {
|
||||||
|
create: {
|
||||||
|
email: `test-fts-${Date.now()}@example.com`,
|
||||||
|
name: "Test User",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
testWorkspaceId = workspace.id;
|
||||||
|
testUserId = workspace.ownerId;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterAll(async () => {
|
||||||
|
// Cleanup
|
||||||
|
if (testWorkspaceId) {
|
||||||
|
await prisma.knowledgeEntry.deleteMany({
|
||||||
|
where: { workspaceId: testWorkspaceId },
|
||||||
|
});
|
||||||
|
await prisma.workspace.delete({
|
||||||
|
where: { id: testWorkspaceId },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
await prisma.$disconnect();
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("tsvector column", () => {
|
||||||
|
it("should have search_vector column in knowledge_entries table", async () => {
|
||||||
|
// Query to check if column exists
|
||||||
|
const result = await prisma.$queryRaw<{ column_name: string; data_type: string }[]>`
|
||||||
|
SELECT column_name, data_type
|
||||||
|
FROM information_schema.columns
|
||||||
|
WHERE table_name = 'knowledge_entries'
|
||||||
|
AND column_name = 'search_vector'
|
||||||
|
`;
|
||||||
|
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
expect(result[0].column_name).toBe("search_vector");
|
||||||
|
expect(result[0].data_type).toBe("tsvector");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should automatically populate search_vector on insert", async () => {
|
||||||
|
const entry = await prisma.knowledgeEntry.create({
|
||||||
|
data: {
|
||||||
|
workspaceId: testWorkspaceId,
|
||||||
|
slug: "auto-populate-test",
|
||||||
|
title: "PostgreSQL Full-Text Search",
|
||||||
|
content: "This is a test of the automatic trigger functionality.",
|
||||||
|
summary: "Testing automatic population",
|
||||||
|
createdBy: testUserId,
|
||||||
|
updatedBy: testUserId,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Query raw to check search_vector was populated
|
||||||
|
const result = await prisma.$queryRaw<{ id: string; search_vector: string | null }[]>`
|
||||||
|
SELECT id, search_vector::text
|
||||||
|
FROM knowledge_entries
|
||||||
|
WHERE id = ${entry.id}::uuid
|
||||||
|
`;
|
||||||
|
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
expect(result[0].search_vector).not.toBeNull();
|
||||||
|
// Verify 'postgresql' appears in title (weight A)
|
||||||
|
expect(result[0].search_vector).toContain("'postgresql':1A");
|
||||||
|
// Verify 'search' appears in both title (A) and content (C)
|
||||||
|
expect(result[0].search_vector).toContain("'search':5A");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should automatically update search_vector on update", async () => {
|
||||||
|
const entry = await prisma.knowledgeEntry.create({
|
||||||
|
data: {
|
||||||
|
workspaceId: testWorkspaceId,
|
||||||
|
slug: "auto-update-test",
|
||||||
|
title: "Original Title",
|
||||||
|
content: "Original content",
|
||||||
|
createdBy: testUserId,
|
||||||
|
updatedBy: testUserId,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Update the entry
|
||||||
|
await prisma.knowledgeEntry.update({
|
||||||
|
where: { id: entry.id },
|
||||||
|
data: {
|
||||||
|
title: "Updated Elasticsearch Title",
|
||||||
|
content: "Updated content with Elasticsearch",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check search_vector was updated
|
||||||
|
const result = await prisma.$queryRaw<{ id: string; search_vector: string | null }[]>`
|
||||||
|
SELECT id, search_vector::text
|
||||||
|
FROM knowledge_entries
|
||||||
|
WHERE id = ${entry.id}::uuid
|
||||||
|
`;
|
||||||
|
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
// Verify 'elasticsearch' appears in both title (A) and content (C)
|
||||||
|
// PostgreSQL combines positions: '2A,7C' means position 2 in title (A) and position 7 in content (C)
|
||||||
|
expect(result[0].search_vector).toContain("'elasticsearch':2A,7C");
|
||||||
|
expect(result[0].search_vector).not.toContain("'original'");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should include summary in search_vector with weight B", async () => {
|
||||||
|
const entry = await prisma.knowledgeEntry.create({
|
||||||
|
data: {
|
||||||
|
workspaceId: testWorkspaceId,
|
||||||
|
slug: "summary-weight-test",
|
||||||
|
title: "Title Word",
|
||||||
|
content: "Content word",
|
||||||
|
summary: "Summary keyword here",
|
||||||
|
createdBy: testUserId,
|
||||||
|
updatedBy: testUserId,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await prisma.$queryRaw<{ id: string; search_vector: string | null }[]>`
|
||||||
|
SELECT id, search_vector::text
|
||||||
|
FROM knowledge_entries
|
||||||
|
WHERE id = ${entry.id}::uuid
|
||||||
|
`;
|
||||||
|
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
// Summary should have weight B - 'keyword' appears in summary
|
||||||
|
expect(result[0].search_vector).toContain("'keyword':4B");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle null summary gracefully", async () => {
|
||||||
|
const entry = await prisma.knowledgeEntry.create({
|
||||||
|
data: {
|
||||||
|
workspaceId: testWorkspaceId,
|
||||||
|
slug: "null-summary-test",
|
||||||
|
title: "Title without summary",
|
||||||
|
content: "Content without summary",
|
||||||
|
summary: null,
|
||||||
|
createdBy: testUserId,
|
||||||
|
updatedBy: testUserId,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await prisma.$queryRaw<{ id: string; search_vector: string | null }[]>`
|
||||||
|
SELECT id, search_vector::text
|
||||||
|
FROM knowledge_entries
|
||||||
|
WHERE id = ${entry.id}::uuid
|
||||||
|
`;
|
||||||
|
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
expect(result[0].search_vector).not.toBeNull();
|
||||||
|
// Verify 'titl' (stemmed from 'title') appears with weight A
|
||||||
|
expect(result[0].search_vector).toContain("'titl':1A");
|
||||||
|
// Verify 'content' appears with weight C
|
||||||
|
expect(result[0].search_vector).toContain("'content':4C");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("GIN index", () => {
|
||||||
|
it("should have GIN index on search_vector column", async () => {
|
||||||
|
const result = await prisma.$queryRaw<{ indexname: string; indexdef: string }[]>`
|
||||||
|
SELECT indexname, indexdef
|
||||||
|
FROM pg_indexes
|
||||||
|
WHERE tablename = 'knowledge_entries'
|
||||||
|
AND indexname = 'knowledge_entries_search_vector_idx'
|
||||||
|
`;
|
||||||
|
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
expect(result[0].indexdef).toContain("gin");
|
||||||
|
expect(result[0].indexdef).toContain("search_vector");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("search performance", () => {
|
||||||
|
it("should perform fast searches using the GIN index", async () => {
|
||||||
|
// Create multiple entries
|
||||||
|
const entries = Array.from({ length: 10 }, (_, i) => ({
|
||||||
|
workspaceId: testWorkspaceId,
|
||||||
|
slug: `perf-test-${i}`,
|
||||||
|
title: `Performance Test ${i}`,
|
||||||
|
content: i % 2 === 0 ? "Contains database keyword" : "No keyword here",
|
||||||
|
createdBy: testUserId,
|
||||||
|
updatedBy: testUserId,
|
||||||
|
}));
|
||||||
|
|
||||||
|
await prisma.knowledgeEntry.createMany({
|
||||||
|
data: entries,
|
||||||
|
});
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
// Search using the precomputed search_vector
|
||||||
|
const results = await prisma.$queryRaw<{ id: string; title: string }[]>`
|
||||||
|
SELECT id, title
|
||||||
|
FROM knowledge_entries
|
||||||
|
WHERE workspace_id = ${testWorkspaceId}::uuid
|
||||||
|
AND search_vector @@ plainto_tsquery('english', 'database')
|
||||||
|
ORDER BY ts_rank(search_vector, plainto_tsquery('english', 'database')) DESC
|
||||||
|
`;
|
||||||
|
|
||||||
|
const duration = Date.now() - startTime;
|
||||||
|
|
||||||
|
expect(results.length).toBeGreaterThan(0);
|
||||||
|
// Should be fast with index (< 100ms for small dataset)
|
||||||
|
expect(duration).toBeLessThan(100);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should rank results by relevance using weighted fields", async () => {
|
||||||
|
// Create entries with keyword in different positions
|
||||||
|
await prisma.knowledgeEntry.createMany({
|
||||||
|
data: [
|
||||||
|
{
|
||||||
|
workspaceId: testWorkspaceId,
|
||||||
|
slug: "rank-title",
|
||||||
|
title: "Redis caching strategies",
|
||||||
|
content: "Various approaches to caching",
|
||||||
|
summary: "Overview of strategies",
|
||||||
|
createdBy: testUserId,
|
||||||
|
updatedBy: testUserId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
workspaceId: testWorkspaceId,
|
||||||
|
slug: "rank-summary",
|
||||||
|
title: "Database optimization",
|
||||||
|
content: "Performance tuning",
|
||||||
|
summary: "Redis is mentioned in summary",
|
||||||
|
createdBy: testUserId,
|
||||||
|
updatedBy: testUserId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
workspaceId: testWorkspaceId,
|
||||||
|
slug: "rank-content",
|
||||||
|
title: "Performance guide",
|
||||||
|
content: "Use Redis for better performance",
|
||||||
|
summary: "Best practices",
|
||||||
|
createdBy: testUserId,
|
||||||
|
updatedBy: testUserId,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = await prisma.$queryRaw<{ slug: string; rank: number }[]>`
|
||||||
|
SELECT slug, ts_rank(search_vector, plainto_tsquery('english', 'redis')) AS rank
|
||||||
|
FROM knowledge_entries
|
||||||
|
WHERE workspace_id = ${testWorkspaceId}::uuid
|
||||||
|
AND search_vector @@ plainto_tsquery('english', 'redis')
|
||||||
|
ORDER BY rank DESC
|
||||||
|
`;
|
||||||
|
|
||||||
|
expect(results.length).toBe(3);
|
||||||
|
// Title match should rank highest (weight A)
|
||||||
|
expect(results[0].slug).toBe("rank-title");
|
||||||
|
// Summary should rank second (weight B)
|
||||||
|
expect(results[1].slug).toBe("rank-summary");
|
||||||
|
// Content should rank third (weight C)
|
||||||
|
expect(results[2].slug).toBe("rank-content");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -118,12 +118,13 @@ export class SearchService {
|
|||||||
: Prisma.sql`AND e.status != 'ARCHIVED'`;
|
: Prisma.sql`AND e.status != 'ARCHIVED'`;
|
||||||
|
|
||||||
// PostgreSQL full-text search query
|
// PostgreSQL full-text search query
|
||||||
// Uses ts_rank for relevance scoring with weights: title (A=1.0), content (B=0.4)
|
// Uses precomputed search_vector column (with weights: A=title, B=summary, C=content)
|
||||||
|
// Maintained automatically by database trigger
|
||||||
const searchResults = await this.prisma.$queryRaw<RawSearchResult[]>`
|
const searchResults = await this.prisma.$queryRaw<RawSearchResult[]>`
|
||||||
WITH search_query AS (
|
WITH search_query AS (
|
||||||
SELECT plainto_tsquery('english', ${sanitizedQuery}) AS query
|
SELECT plainto_tsquery('english', ${sanitizedQuery}) AS query
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
e.id,
|
e.id,
|
||||||
e.workspace_id,
|
e.workspace_id,
|
||||||
e.slug,
|
e.slug,
|
||||||
@@ -137,11 +138,7 @@ export class SearchService {
|
|||||||
e.updated_at,
|
e.updated_at,
|
||||||
e.created_by,
|
e.created_by,
|
||||||
e.updated_by,
|
e.updated_by,
|
||||||
ts_rank(
|
ts_rank(e.search_vector, sq.query) AS rank,
|
||||||
setweight(to_tsvector('english', e.title), 'A') ||
|
|
||||||
setweight(to_tsvector('english', e.content), 'B'),
|
|
||||||
sq.query
|
|
||||||
) AS rank,
|
|
||||||
ts_headline(
|
ts_headline(
|
||||||
'english',
|
'english',
|
||||||
e.content,
|
e.content,
|
||||||
@@ -151,10 +148,7 @@ export class SearchService {
|
|||||||
FROM knowledge_entries e, search_query sq
|
FROM knowledge_entries e, search_query sq
|
||||||
WHERE e.workspace_id = ${workspaceId}::uuid
|
WHERE e.workspace_id = ${workspaceId}::uuid
|
||||||
${statusFilter}
|
${statusFilter}
|
||||||
AND (
|
AND e.search_vector @@ sq.query
|
||||||
to_tsvector('english', e.title) @@ sq.query
|
|
||||||
OR to_tsvector('english', e.content) @@ sq.query
|
|
||||||
)
|
|
||||||
ORDER BY rank DESC, e.updated_at DESC
|
ORDER BY rank DESC, e.updated_at DESC
|
||||||
LIMIT ${limit}
|
LIMIT ${limit}
|
||||||
OFFSET ${offset}
|
OFFSET ${offset}
|
||||||
@@ -166,10 +160,7 @@ export class SearchService {
|
|||||||
FROM knowledge_entries e
|
FROM knowledge_entries e
|
||||||
WHERE e.workspace_id = ${workspaceId}::uuid
|
WHERE e.workspace_id = ${workspaceId}::uuid
|
||||||
${statusFilter}
|
${statusFilter}
|
||||||
AND (
|
AND e.search_vector @@ plainto_tsquery('english', ${sanitizedQuery})
|
||||||
to_tsvector('english', e.title) @@ plainto_tsquery('english', ${sanitizedQuery})
|
|
||||||
OR to_tsvector('english', e.content) @@ plainto_tsquery('english', ${sanitizedQuery})
|
|
||||||
)
|
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const total = Number(countResult[0].count);
|
const total = Number(countResult[0].count);
|
||||||
@@ -592,22 +583,18 @@ export class SearchService {
|
|||||||
${statusFilter}
|
${statusFilter}
|
||||||
),
|
),
|
||||||
keyword_search AS (
|
keyword_search AS (
|
||||||
SELECT
|
SELECT
|
||||||
e.id,
|
e.id,
|
||||||
ROW_NUMBER() OVER (
|
ROW_NUMBER() OVER (
|
||||||
ORDER BY ts_rank(
|
ORDER BY ts_rank(
|
||||||
setweight(to_tsvector('english', e.title), 'A') ||
|
e.search_vector,
|
||||||
setweight(to_tsvector('english', e.content), 'B'),
|
|
||||||
plainto_tsquery('english', ${sanitizedQuery})
|
plainto_tsquery('english', ${sanitizedQuery})
|
||||||
) DESC
|
) DESC
|
||||||
) AS rank
|
) AS rank
|
||||||
FROM knowledge_entries e
|
FROM knowledge_entries e
|
||||||
WHERE e.workspace_id = ${workspaceId}::uuid
|
WHERE e.workspace_id = ${workspaceId}::uuid
|
||||||
${statusFilter}
|
${statusFilter}
|
||||||
AND (
|
AND e.search_vector @@ plainto_tsquery('english', ${sanitizedQuery})
|
||||||
to_tsvector('english', e.title) @@ plainto_tsquery('english', ${sanitizedQuery})
|
|
||||||
OR to_tsvector('english', e.content) @@ plainto_tsquery('english', ${sanitizedQuery})
|
|
||||||
)
|
|
||||||
),
|
),
|
||||||
combined AS (
|
combined AS (
|
||||||
SELECT
|
SELECT
|
||||||
@@ -660,10 +647,7 @@ export class SearchService {
|
|||||||
FROM knowledge_entries e
|
FROM knowledge_entries e
|
||||||
WHERE e.workspace_id = ${workspaceId}::uuid
|
WHERE e.workspace_id = ${workspaceId}::uuid
|
||||||
${statusFilter}
|
${statusFilter}
|
||||||
AND (
|
AND e.search_vector @@ plainto_tsquery('english', ${sanitizedQuery})
|
||||||
to_tsvector('english', e.title) @@ plainto_tsquery('english', ${sanitizedQuery})
|
|
||||||
OR to_tsvector('english', e.content) @@ plainto_tsquery('english', ${sanitizedQuery})
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
SELECT COUNT(DISTINCT id) as count
|
SELECT COUNT(DISTINCT id) as count
|
||||||
FROM (
|
FROM (
|
||||||
|
|||||||
52
docs/scratchpads/65-full-text-search.md
Normal file
52
docs/scratchpads/65-full-text-search.md
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
# Issue #65: [KNOW-013] Full-Text Search Setup
|
||||||
|
|
||||||
|
## Objective
|
||||||
|
|
||||||
|
Set up PostgreSQL full-text search for entries in the knowledge module with weighted fields and proper indexing.
|
||||||
|
|
||||||
|
## Approach
|
||||||
|
|
||||||
|
1. Examine current Prisma schema for knowledge entries
|
||||||
|
2. Write tests for full-text search functionality (TDD)
|
||||||
|
3. Add tsvector column to knowledge entries table
|
||||||
|
4. Create GIN index for performance
|
||||||
|
5. Implement trigger to maintain tsvector on insert/update
|
||||||
|
6. Weight fields: title (A), summary (B), content (C)
|
||||||
|
7. Verify with sample queries
|
||||||
|
|
||||||
|
## Progress
|
||||||
|
|
||||||
|
- [x] Create scratchpad
|
||||||
|
- [x] Read Prisma schema
|
||||||
|
- [x] Examine existing search service
|
||||||
|
- [x] Write failing tests for tsvector column (RED)
|
||||||
|
- [x] Create migration with tsvector column, GIN index, and triggers
|
||||||
|
- [x] Update Prisma schema to include tsvector
|
||||||
|
- [x] Update search service to use precomputed tsvector (GREEN)
|
||||||
|
- [x] Run tests and verify coverage (8/8 integration tests pass, 205/225 knowledge module tests pass)
|
||||||
|
- [x] Run quality checks (typecheck and lint pass)
|
||||||
|
- [ ] Commit changes
|
||||||
|
|
||||||
|
## Current State
|
||||||
|
|
||||||
|
The search service already implements full-text search using `to_tsvector` and `ts_rank`
|
||||||
|
in raw SQL queries, but it calculates tsvector on-the-fly. This is inefficient for large
|
||||||
|
datasets. The migration will:
|
||||||
|
|
||||||
|
1. Add a `search_vector` tsvector column to knowledge_entries
|
||||||
|
2. Create a GIN index on search_vector for fast lookups
|
||||||
|
3. Add a trigger to automatically update search_vector on insert/update
|
||||||
|
4. Use weighted fields: title (A), summary (B), content (C)
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
- Unit tests for search query generation
|
||||||
|
- Integration tests with actual database queries
|
||||||
|
- Performance verification with sample data
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- Using PostgreSQL's built-in full-text search capabilities
|
||||||
|
- GIN index for fast text search
|
||||||
|
- Automatic maintenance via triggers
|
||||||
|
- Field weights: A (title) > B (summary) > C (content)
|
||||||
Reference in New Issue
Block a user