Files
stack/apps/api/src/knowledge/search.controller.ts
Jason Woltje 3ec2059470
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
ci/woodpecker/pr/woodpecker Pipeline failed
feat: add semantic search with pgvector (closes #68, #69, #70)
Issues resolved:
- #68: pgvector Setup
  * Added pgvector vector index migration for knowledge_embeddings
  * Vector index uses HNSW algorithm with cosine distance
  * Optimized for 1536-dimension OpenAI embeddings

- #69: Embedding Generation Pipeline
  * Created EmbeddingService with OpenAI integration
  * Automatic embedding generation on entry create/update
  * Batch processing endpoint for existing entries
  * Async generation to avoid blocking API responses
  * Content preparation with title weighting

- #70: Semantic Search API
  * POST /api/knowledge/search/semantic - pure vector search
  * POST /api/knowledge/search/hybrid - RRF combined search
  * POST /api/knowledge/embeddings/batch - batch generation
  * Comprehensive test coverage
  * Full documentation in docs/SEMANTIC_SEARCH.md

Technical details:
- Uses OpenAI text-embedding-3-small model (1536 dims)
- HNSW index for O(log n) similarity search
- Reciprocal Rank Fusion for hybrid search
- Graceful degradation when OpenAI not configured
- Async embedding generation for performance

Configuration:
- Added OPENAI_API_KEY to .env.example
- Optional feature - disabled if API key not set
- Falls back to keyword search in hybrid mode
2026-01-30 15:19:13 -06:00

153 lines
4.7 KiB
TypeScript

import { Controller, Get, Post, Body, Query, UseGuards } from "@nestjs/common";
import { SearchService, PaginatedSearchResults } from "./services/search.service";
import { SearchQueryDto, TagSearchDto, RecentEntriesDto } from "./dto";
import { AuthGuard } from "../auth/guards/auth.guard";
import { WorkspaceGuard, PermissionGuard } from "../common/guards";
import { Workspace, Permission, RequirePermission } from "../common/decorators";
import { EntryStatus } from "@prisma/client";
import type {
PaginatedEntries,
KnowledgeEntryWithTags,
} from "./entities/knowledge-entry.entity";
/**
* Response for recent entries endpoint
*/
interface RecentEntriesResponse {
data: KnowledgeEntryWithTags[];
count: number;
}
/**
* Controller for knowledge search endpoints
* All endpoints require authentication and workspace context
*/
@Controller("knowledge/search")
@UseGuards(AuthGuard, WorkspaceGuard, PermissionGuard)
export class SearchController {
constructor(private readonly searchService: SearchService) {}
/**
* GET /api/knowledge/search
* Full-text search across knowledge entries
* Searches title and content with relevance ranking
* Requires: Any workspace member
*
* @query q - The search query string (required)
* @query status - Filter by entry status (optional)
* @query page - Page number (default: 1)
* @query limit - Results per page (default: 20, max: 100)
*/
@Get()
@RequirePermission(Permission.WORKSPACE_ANY)
async search(
@Workspace() workspaceId: string,
@Query() query: SearchQueryDto
): Promise<PaginatedSearchResults> {
return this.searchService.search(query.q, workspaceId, {
status: query.status,
page: query.page,
limit: query.limit,
});
}
/**
* GET /api/knowledge/search/by-tags
* Search entries by tags (entries must have ALL specified tags)
* Requires: Any workspace member
*
* @query tags - Comma-separated list of tag slugs (required)
* @query status - Filter by entry status (optional)
* @query page - Page number (default: 1)
* @query limit - Results per page (default: 20, max: 100)
*/
@Get("by-tags")
@RequirePermission(Permission.WORKSPACE_ANY)
async searchByTags(
@Workspace() workspaceId: string,
@Query() query: TagSearchDto
): Promise<PaginatedEntries> {
return this.searchService.searchByTags(query.tags, workspaceId, {
status: query.status,
page: query.page,
limit: query.limit,
});
}
/**
* GET /api/knowledge/search/recent
* Get recently modified entries
* Requires: Any workspace member
*
* @query limit - Maximum number of entries (default: 10, max: 50)
* @query status - Filter by entry status (optional)
*/
@Get("recent")
@RequirePermission(Permission.WORKSPACE_ANY)
async recentEntries(
@Workspace() workspaceId: string,
@Query() query: RecentEntriesDto
): Promise<RecentEntriesResponse> {
const entries = await this.searchService.recentEntries(
workspaceId,
query.limit || 10,
query.status
);
return {
data: entries,
count: entries.length,
};
}
/**
* POST /api/knowledge/search/semantic
* Semantic search using vector similarity
* Requires: Any workspace member, OpenAI API key configured
*
* @body query - The search query string (required)
* @body status - Filter by entry status (optional)
* @query page - Page number (default: 1)
* @query limit - Results per page (default: 20, max: 100)
*/
@Post("semantic")
@RequirePermission(Permission.WORKSPACE_ANY)
async semanticSearch(
@Workspace() workspaceId: string,
@Body() body: { query: string; status?: EntryStatus },
@Query("page") page?: number,
@Query("limit") limit?: number
): Promise<PaginatedSearchResults> {
return this.searchService.semanticSearch(body.query, workspaceId, {
status: body.status,
page,
limit,
});
}
/**
* POST /api/knowledge/search/hybrid
* Hybrid search combining vector similarity and full-text search
* Uses Reciprocal Rank Fusion to merge results
* Requires: Any workspace member
*
* @body query - The search query string (required)
* @body status - Filter by entry status (optional)
* @query page - Page number (default: 1)
* @query limit - Results per page (default: 20, max: 100)
*/
@Post("hybrid")
@RequirePermission(Permission.WORKSPACE_ANY)
async hybridSearch(
@Workspace() workspaceId: string,
@Body() body: { query: string; status?: EntryStatus },
@Query("page") page?: number,
@Query("limit") limit?: number
): Promise<PaginatedSearchResults> {
return this.searchService.hybridSearch(body.query, workspaceId, {
status: body.status,
page,
limit,
});
}
}