From 24d59e7595c215050a7da9d1ecaf0ba39ee8400a Mon Sep 17 00:00:00 2001
From: Jason Woltje <jason.woltje@uscllc.com>
Date: Mon, 2 Feb 2026 14:25:45 -0600
Subject: [PATCH] feat(#65): implement full-text search with tsvector and GIN
 index

Add PostgreSQL full-text search infrastructure for knowledge entries:
- Add search_vector tsvector column to knowledge_entries table
- Create GIN index for fast full-text search performance
- Implement automatic trigger to maintain search_vector on insert/update
- Weight fields: title (A), summary (B), content (C)
- Update SearchService to use precomputed search_vector
- Add comprehensive integration tests for FTS functionality

Tests:
- 8/8 new integration tests passing
- 205/225 knowledge module tests passing
- All quality gates pass (typecheck, lint)

Refs #65

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .../migration.sql                             |  36 +++
 apps/api/prisma/schema.prisma                 |   4 +
 .../services/fulltext-search.spec.ts          | 276 ++++++++++++++++++
 .../src/knowledge/services/search.service.ts  |  36 +--
 docs/scratchpads/65-full-text-search.md       |  52 ++++
 5 files changed, 378 insertions(+), 26 deletions(-)
 create mode 100644 apps/api/prisma/migrations/20260202142100_add_fulltext_search_to_knowledge_entries/migration.sql
 create mode 100644 apps/api/src/knowledge/services/fulltext-search.spec.ts
 create mode 100644 docs/scratchpads/65-full-text-search.md

diff --git a/apps/api/prisma/migrations/20260202142100_add_fulltext_search_to_knowledge_entries/migration.sql b/apps/api/prisma/migrations/20260202142100_add_fulltext_search_to_knowledge_entries/migration.sql
new file mode 100644
index 0000000..1289d9d
--- /dev/null
+++ b/apps/api/prisma/migrations/20260202142100_add_fulltext_search_to_knowledge_entries/migration.sql
@@ -0,0 +1,36 @@
+-- Add tsvector column for full-text search on knowledge_entries
+-- Weighted fields: title (A), summary (B), content (C)
+
+-- Step 1: Add the search_vector column
+ALTER TABLE "knowledge_entries"
+ADD COLUMN "search_vector" tsvector;
+
+-- Step 2: Create GIN index for fast full-text search
+CREATE INDEX "knowledge_entries_search_vector_idx"
+ON "knowledge_entries"
+USING gin("search_vector");
+
+-- Step 3: Create function to update search_vector
+CREATE OR REPLACE FUNCTION knowledge_entries_search_vector_update()
+RETURNS trigger AS $$
+BEGIN
+  NEW.search_vector :=
+    setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'A') ||
+    setweight(to_tsvector('english', COALESCE(NEW.summary, '')), 'B') ||
+    setweight(to_tsvector('english', COALESCE(NEW.content, '')), 'C');
+  RETURN NEW;
+END
+$$ LANGUAGE plpgsql;
+
+-- Step 4: Create trigger to automatically update search_vector on insert/update
+CREATE TRIGGER knowledge_entries_search_vector_trigger
+BEFORE INSERT OR UPDATE ON "knowledge_entries"
+FOR EACH ROW
+EXECUTE FUNCTION knowledge_entries_search_vector_update();
+
+-- Step 5: Populate search_vector for existing entries
+UPDATE "knowledge_entries"
+SET search_vector =
+  setweight(to_tsvector('english', COALESCE(title, '')), 'A') ||
+  setweight(to_tsvector('english', COALESCE(summary, '')), 'B') ||
+  setweight(to_tsvector('english', COALESCE(content, '')), 'C');
diff --git a/apps/api/prisma/schema.prisma b/apps/api/prisma/schema.prisma
index 7bc4532..2e59cb3 100644
--- a/apps/api/prisma/schema.prisma
+++ b/apps/api/prisma/schema.prisma
@@ -798,6 +798,9 @@ model KnowledgeEntry {
   contentHtml String? @map("content_html") @db.Text
   summary     String?
 
+  // Full-text search vector (automatically maintained by trigger)
+  searchVector Unsupported("tsvector")? @map("search_vector")
+
   // Status
   status     EntryStatus @default(DRAFT)
   visibility Visibility  @default(PRIVATE)
@@ -820,6 +823,7 @@ model KnowledgeEntry {
   @@index([workspaceId, updatedAt])
   @@index([createdBy])
   @@index([updatedBy])
+  // Note: GIN index on searchVector created via migration (not supported in Prisma schema)
   @@map("knowledge_entries")
 }
 
diff --git a/apps/api/src/knowledge/services/fulltext-search.spec.ts b/apps/api/src/knowledge/services/fulltext-search.spec.ts
new file mode 100644
index 0000000..36005b9
--- /dev/null
+++ b/apps/api/src/knowledge/services/fulltext-search.spec.ts
@@ -0,0 +1,276 @@
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import { PrismaClient } from "@prisma/client";
+
+/**
+ * Integration tests for PostgreSQL full-text search setup
+ * Tests the tsvector column, GIN index, and automatic trigger
+ */
+describe("Full-Text Search Setup (Integration)", () => {
+  let prisma: PrismaClient;
+  let testWorkspaceId: string;
+  let testUserId: string;
+
+  beforeAll(async () => {
+    prisma = new PrismaClient();
+    await prisma.$connect();
+
+    // Create test workspace
+    const workspace = await prisma.workspace.create({
+      data: {
+        name: "Test Workspace",
+        owner: {
+          create: {
+            email: `test-fts-${Date.now()}@example.com`,
+            name: "Test User",
+          },
+        },
+      },
+    });
+    testWorkspaceId = workspace.id;
+    testUserId = workspace.ownerId;
+  });
+
+  afterAll(async () => {
+    // Cleanup
+    if (testWorkspaceId) {
+      await prisma.knowledgeEntry.deleteMany({
+        where: { workspaceId: testWorkspaceId },
+      });
+      await prisma.workspace.delete({
+        where: { id: testWorkspaceId },
+      });
+    }
+    await prisma.$disconnect();
+  });
+
+  describe("tsvector column", () => {
+    it("should have search_vector column in knowledge_entries table", async () => {
+      // Query to check if column exists
+      const result = await prisma.$queryRaw<{ column_name: string; data_type: string }[]>`
+        SELECT column_name, data_type
+        FROM information_schema.columns
+        WHERE table_name = 'knowledge_entries'
+          AND column_name = 'search_vector'
+      `;
+
+      expect(result).toHaveLength(1);
+      expect(result[0].column_name).toBe("search_vector");
+      expect(result[0].data_type).toBe("tsvector");
+    });
+
+    it("should automatically populate search_vector on insert", async () => {
+      const entry = await prisma.knowledgeEntry.create({
+        data: {
+          workspaceId: testWorkspaceId,
+          slug: "auto-populate-test",
+          title: "PostgreSQL Full-Text Search",
+          content: "This is a test of the automatic trigger functionality.",
+          summary: "Testing automatic population",
+          createdBy: testUserId,
+          updatedBy: testUserId,
+        },
+      });
+
+      // Query raw to check search_vector was populated
+      const result = await prisma.$queryRaw<{ id: string; search_vector: string | null }[]>`
+        SELECT id, search_vector::text
+        FROM knowledge_entries
+        WHERE id = ${entry.id}::uuid
+      `;
+
+      expect(result).toHaveLength(1);
+      expect(result[0].search_vector).not.toBeNull();
+      // Verify 'postgresql' appears in title (weight A)
+      expect(result[0].search_vector).toContain("'postgresql':1A");
+      // Verify 'search' appears in both title (A) and content (C)
+      expect(result[0].search_vector).toContain("'search':5A");
+    });
+
+    it("should automatically update search_vector on update", async () => {
+      const entry = await prisma.knowledgeEntry.create({
+        data: {
+          workspaceId: testWorkspaceId,
+          slug: "auto-update-test",
+          title: "Original Title",
+          content: "Original content",
+          createdBy: testUserId,
+          updatedBy: testUserId,
+        },
+      });
+
+      // Update the entry
+      await prisma.knowledgeEntry.update({
+        where: { id: entry.id },
+        data: {
+          title: "Updated Elasticsearch Title",
+          content: "Updated content with Elasticsearch",
+        },
+      });
+
+      // Check search_vector was updated
+      const result = await prisma.$queryRaw<{ id: string; search_vector: string | null }[]>`
+        SELECT id, search_vector::text
+        FROM knowledge_entries
+        WHERE id = ${entry.id}::uuid
+      `;
+
+      expect(result).toHaveLength(1);
+      // Verify 'elasticsearch' appears in both title (A) and content (C)
+      // PostgreSQL combines positions: '2A,7C' means position 2 in title (A) and position 7 in content (C)
+      expect(result[0].search_vector).toContain("'elasticsearch':2A,7C");
+      expect(result[0].search_vector).not.toContain("'original'");
+    });
+
+    it("should include summary in search_vector with weight B", async () => {
+      const entry = await prisma.knowledgeEntry.create({
+        data: {
+          workspaceId: testWorkspaceId,
+          slug: "summary-weight-test",
+          title: "Title Word",
+          content: "Content word",
+          summary: "Summary keyword here",
+          createdBy: testUserId,
+          updatedBy: testUserId,
+        },
+      });
+
+      const result = await prisma.$queryRaw<{ id: string; search_vector: string | null }[]>`
+        SELECT id, search_vector::text
+        FROM knowledge_entries
+        WHERE id = ${entry.id}::uuid
+      `;
+
+      expect(result).toHaveLength(1);
+      // Summary should have weight B - 'keyword' appears in summary
+      expect(result[0].search_vector).toContain("'keyword':4B");
+    });
+
+    it("should handle null summary gracefully", async () => {
+      const entry = await prisma.knowledgeEntry.create({
+        data: {
+          workspaceId: testWorkspaceId,
+          slug: "null-summary-test",
+          title: "Title without summary",
+          content: "Content without summary",
+          summary: null,
+          createdBy: testUserId,
+          updatedBy: testUserId,
+        },
+      });
+
+      const result = await prisma.$queryRaw<{ id: string; search_vector: string | null }[]>`
+        SELECT id, search_vector::text
+        FROM knowledge_entries
+        WHERE id = ${entry.id}::uuid
+      `;
+
+      expect(result).toHaveLength(1);
+      expect(result[0].search_vector).not.toBeNull();
+      // Verify 'titl' (stemmed from 'title') appears with weight A
+      expect(result[0].search_vector).toContain("'titl':1A");
+      // Verify 'content' appears with weight C
+      expect(result[0].search_vector).toContain("'content':4C");
+    });
+  });
+
+  describe("GIN index", () => {
+    it("should have GIN index on search_vector column", async () => {
+      const result = await prisma.$queryRaw<{ indexname: string; indexdef: string }[]>`
+        SELECT indexname, indexdef
+        FROM pg_indexes
+        WHERE tablename = 'knowledge_entries'
+          AND indexname = 'knowledge_entries_search_vector_idx'
+      `;
+
+      expect(result).toHaveLength(1);
+      expect(result[0].indexdef).toContain("gin");
+      expect(result[0].indexdef).toContain("search_vector");
+    });
+  });
+
+  describe("search performance", () => {
+    it("should perform fast searches using the GIN index", async () => {
+      // Create multiple entries
+      const entries = Array.from({ length: 10 }, (_, i) => ({
+        workspaceId: testWorkspaceId,
+        slug: `perf-test-${i}`,
+        title: `Performance Test ${i}`,
+        content: i % 2 === 0 ? "Contains database keyword" : "No keyword here",
+        createdBy: testUserId,
+        updatedBy: testUserId,
+      }));
+
+      await prisma.knowledgeEntry.createMany({
+        data: entries,
+      });
+
+      const startTime = Date.now();
+
+      // Search using the precomputed search_vector
+      const results = await prisma.$queryRaw<{ id: string; title: string }[]>`
+        SELECT id, title
+        FROM knowledge_entries
+        WHERE workspace_id = ${testWorkspaceId}::uuid
+          AND search_vector @@ plainto_tsquery('english', 'database')
+        ORDER BY ts_rank(search_vector, plainto_tsquery('english', 'database')) DESC
+      `;
+
+      const duration = Date.now() - startTime;
+
+      expect(results.length).toBeGreaterThan(0);
+      // Should be fast with index (< 100ms for small dataset)
+      expect(duration).toBeLessThan(100);
+    });
+
+    it("should rank results by relevance using weighted fields", async () => {
+      // Create entries with keyword in different positions
+      await prisma.knowledgeEntry.createMany({
+        data: [
+          {
+            workspaceId: testWorkspaceId,
+            slug: "rank-title",
+            title: "Redis caching strategies",
+            content: "Various approaches to caching",
+            summary: "Overview of strategies",
+            createdBy: testUserId,
+            updatedBy: testUserId,
+          },
+          {
+            workspaceId: testWorkspaceId,
+            slug: "rank-summary",
+            title: "Database optimization",
+            content: "Performance tuning",
+            summary: "Redis is mentioned in summary",
+            createdBy: testUserId,
+            updatedBy: testUserId,
+          },
+          {
+            workspaceId: testWorkspaceId,
+            slug: "rank-content",
+            title: "Performance guide",
+            content: "Use Redis for better performance",
+            summary: "Best practices",
+            createdBy: testUserId,
+            updatedBy: testUserId,
+          },
+        ],
+      });
+
+      const results = await prisma.$queryRaw<{ slug: string; rank: number }[]>`
+        SELECT slug, ts_rank(search_vector, plainto_tsquery('english', 'redis')) AS rank
+        FROM knowledge_entries
+        WHERE workspace_id = ${testWorkspaceId}::uuid
+          AND search_vector @@ plainto_tsquery('english', 'redis')
+        ORDER BY rank DESC
+      `;
+
+      expect(results.length).toBe(3);
+      // Title match should rank highest (weight A)
+      expect(results[0].slug).toBe("rank-title");
+      // Summary should rank second (weight B)
+      expect(results[1].slug).toBe("rank-summary");
+      // Content should rank third (weight C)
+      expect(results[2].slug).toBe("rank-content");
+    });
+  });
+});
diff --git a/apps/api/src/knowledge/services/search.service.ts b/apps/api/src/knowledge/services/search.service.ts
index abfc202..0acb620 100644
--- a/apps/api/src/knowledge/services/search.service.ts
+++ b/apps/api/src/knowledge/services/search.service.ts
@@ -118,12 +118,13 @@ export class SearchService {
       : Prisma.sql`AND e.status != 'ARCHIVED'`;
 
     // PostgreSQL full-text search query
-    // Uses ts_rank for relevance scoring with weights: title (A=1.0), content (B=0.4)
+    // Uses precomputed search_vector column (with weights: A=title, B=summary, C=content)
+    // Maintained automatically by database trigger
     const searchResults = await this.prisma.$queryRaw<RawSearchResult[]>`
       WITH search_query AS (
         SELECT plainto_tsquery('english', ${sanitizedQuery}) AS query
       )
-      SELECT 
+      SELECT
         e.id,
         e.workspace_id,
         e.slug,
@@ -137,11 +138,7 @@ export class SearchService {
         e.updated_at,
         e.created_by,
         e.updated_by,
-        ts_rank(
-          setweight(to_tsvector('english', e.title), 'A') ||
-          setweight(to_tsvector('english', e.content), 'B'),
-          sq.query
-        ) AS rank,
+        ts_rank(e.search_vector, sq.query) AS rank,
         ts_headline(
           'english',
           e.content,
@@ -151,10 +148,7 @@ export class SearchService {
       FROM knowledge_entries e, search_query sq
       WHERE e.workspace_id = ${workspaceId}::uuid
         ${statusFilter}
-        AND (
-          to_tsvector('english', e.title) @@ sq.query
-          OR to_tsvector('english', e.content) @@ sq.query
-        )
+        AND e.search_vector @@ sq.query
       ORDER BY rank DESC, e.updated_at DESC
       LIMIT ${limit}
       OFFSET ${offset}
@@ -166,10 +160,7 @@ export class SearchService {
       FROM knowledge_entries e
       WHERE e.workspace_id = ${workspaceId}::uuid
         ${statusFilter}
-        AND (
-          to_tsvector('english', e.title) @@ plainto_tsquery('english', ${sanitizedQuery})
-          OR to_tsvector('english', e.content) @@ plainto_tsquery('english', ${sanitizedQuery})
-        )
+        AND e.search_vector @@ plainto_tsquery('english', ${sanitizedQuery})
     `;
 
     const total = Number(countResult[0].count);
@@ -592,22 +583,18 @@ export class SearchService {
           ${statusFilter}
       ),
       keyword_search AS (
-        SELECT 
+        SELECT
           e.id,
           ROW_NUMBER() OVER (
             ORDER BY ts_rank(
-              setweight(to_tsvector('english', e.title), 'A') ||
-              setweight(to_tsvector('english', e.content), 'B'),
+              e.search_vector,
               plainto_tsquery('english', ${sanitizedQuery})
             ) DESC
           ) AS rank
         FROM knowledge_entries e
         WHERE e.workspace_id = ${workspaceId}::uuid
           ${statusFilter}
-          AND (
-            to_tsvector('english', e.title) @@ plainto_tsquery('english', ${sanitizedQuery})
-            OR to_tsvector('english', e.content) @@ plainto_tsquery('english', ${sanitizedQuery})
-          )
+          AND e.search_vector @@ plainto_tsquery('english', ${sanitizedQuery})
       ),
       combined AS (
         SELECT 
@@ -660,10 +647,7 @@ export class SearchService {
         FROM knowledge_entries e
         WHERE e.workspace_id = ${workspaceId}::uuid
           ${statusFilter}
-          AND (
-            to_tsvector('english', e.title) @@ plainto_tsquery('english', ${sanitizedQuery})
-            OR to_tsvector('english', e.content) @@ plainto_tsquery('english', ${sanitizedQuery})
-          )
+          AND e.search_vector @@ plainto_tsquery('english', ${sanitizedQuery})
       )
       SELECT COUNT(DISTINCT id) as count
       FROM (
diff --git a/docs/scratchpads/65-full-text-search.md b/docs/scratchpads/65-full-text-search.md
new file mode 100644
index 0000000..db26eb8
--- /dev/null
+++ b/docs/scratchpads/65-full-text-search.md
@@ -0,0 +1,52 @@
+# Issue #65: [KNOW-013] Full-Text Search Setup
+
+## Objective
+
+Set up PostgreSQL full-text search for entries in the knowledge module with weighted fields and proper indexing.
+
+## Approach
+
+1. Examine current Prisma schema for knowledge entries
+2. Write tests for full-text search functionality (TDD)
+3. Add tsvector column to knowledge entries table
+4. Create GIN index for performance
+5. Implement trigger to maintain tsvector on insert/update
+6. Weight fields: title (A), summary (B), content (C)
+7. Verify with sample queries
+
+## Progress
+
+- [x] Create scratchpad
+- [x] Read Prisma schema
+- [x] Examine existing search service
+- [x] Write failing tests for tsvector column (RED)
+- [x] Create migration with tsvector column, GIN index, and triggers
+- [x] Update Prisma schema to include tsvector
+- [x] Update search service to use precomputed tsvector (GREEN)
+- [x] Run tests and verify coverage (8/8 integration tests pass, 205/225 knowledge module tests pass)
+- [x] Run quality checks (typecheck and lint pass)
+- [ ] Commit changes
+
+## Current State
+
+The search service already implements full-text search using `to_tsvector` and `ts_rank`
+in raw SQL queries, but it calculates tsvector on-the-fly. This is inefficient for large
+datasets. The migration will:
+
+1. Add a `search_vector` tsvector column to knowledge_entries
+2. Create a GIN index on search_vector for fast lookups
+3. Add a trigger to automatically update search_vector on insert/update
+4. Use weighted fields: title (A), summary (B), content (C)
+
+## Testing
+
+- Unit tests for search query generation
+- Integration tests with actual database queries
+- Performance verification with sample data
+
+## Notes
+
+- Using PostgreSQL's built-in full-text search capabilities
+- GIN index for fast text search
+- Automatic maintenance via triggers
+- Field weights: A (title) > B (summary) > C (content)