feat(knowledge): add search service

This commit is contained in:
Jason Woltje
2026-01-29 20:35:07 -06:00
parent c6a65869c6
commit c26b7d4e64
7 changed files with 1109 additions and 3 deletions

View File

@@ -0,0 +1,415 @@
import { Injectable } from "@nestjs/common";
import { EntryStatus, Prisma } from "@prisma/client";
import { PrismaService } from "../../prisma/prisma.service";
import type {
KnowledgeEntryWithTags,
PaginatedEntries,
} from "../entities/knowledge-entry.entity";
/**
* Search options for full-text search
*/
export interface SearchOptions {
status?: EntryStatus | undefined;
page?: number | undefined;
limit?: number | undefined;
}
/**
* Search result with relevance ranking
*/
export interface SearchResult extends KnowledgeEntryWithTags {
rank: number;
headline?: string | undefined;
}
/**
* Paginated search results
*/
export interface PaginatedSearchResults {
data: SearchResult[];
pagination: {
page: number;
limit: number;
total: number;
totalPages: number;
};
query: string;
}
/**
* Raw search result from PostgreSQL query
*/
interface RawSearchResult {
id: string;
workspace_id: string;
slug: string;
title: string;
content: string;
content_html: string | null;
summary: string | null;
status: EntryStatus;
visibility: string;
created_at: Date;
updated_at: Date;
created_by: string;
updated_by: string;
rank: number;
headline: string | null;
}
/**
* Service for searching knowledge entries using PostgreSQL full-text search
*/
@Injectable()
export class SearchService {
constructor(private readonly prisma: PrismaService) {}
/**
* Full-text search on title and content using PostgreSQL ts_vector
*
* @param query - The search query string
* @param workspaceId - The workspace to search within
* @param options - Search options (status filter, pagination)
* @returns Paginated search results ranked by relevance
*/
async search(
query: string,
workspaceId: string,
options: SearchOptions = {}
): Promise<PaginatedSearchResults> {
const page = options.page || 1;
const limit = options.limit || 20;
const offset = (page - 1) * limit;
// Sanitize and prepare the search query
const sanitizedQuery = this.sanitizeSearchQuery(query);
if (!sanitizedQuery) {
return {
data: [],
pagination: {
page,
limit,
total: 0,
totalPages: 0,
},
query,
};
}
// Build status filter
const statusFilter = options.status
? Prisma.sql`AND e.status = ${options.status}::text::"EntryStatus"`
: Prisma.sql`AND e.status != 'ARCHIVED'`;
// PostgreSQL full-text search query
// Uses ts_rank for relevance scoring with weights: title (A=1.0), content (B=0.4)
const searchResults = await this.prisma.$queryRaw<RawSearchResult[]>`
WITH search_query AS (
SELECT plainto_tsquery('english', ${sanitizedQuery}) AS query
)
SELECT
e.id,
e.workspace_id,
e.slug,
e.title,
e.content,
e.content_html,
e.summary,
e.status,
e.visibility,
e.created_at,
e.updated_at,
e.created_by,
e.updated_by,
ts_rank(
setweight(to_tsvector('english', e.title), 'A') ||
setweight(to_tsvector('english', e.content), 'B'),
sq.query
) AS rank,
ts_headline(
'english',
e.content,
sq.query,
'MaxWords=50, MinWords=25, StartSel=<mark>, StopSel=</mark>'
) AS headline
FROM knowledge_entries e, search_query sq
WHERE e.workspace_id = ${workspaceId}::uuid
${statusFilter}
AND (
to_tsvector('english', e.title) @@ sq.query
OR to_tsvector('english', e.content) @@ sq.query
)
ORDER BY rank DESC, e.updated_at DESC
LIMIT ${limit}
OFFSET ${offset}
`;
// Get total count for pagination
const countResult = await this.prisma.$queryRaw<[{ count: bigint }]>`
SELECT COUNT(*) as count
FROM knowledge_entries e
WHERE e.workspace_id = ${workspaceId}::uuid
${statusFilter}
AND (
to_tsvector('english', e.title) @@ plainto_tsquery('english', ${sanitizedQuery})
OR to_tsvector('english', e.content) @@ plainto_tsquery('english', ${sanitizedQuery})
)
`;
const total = Number(countResult[0].count);
// Fetch tags for the results
const entryIds = searchResults.map((r) => r.id);
const tagsMap = await this.fetchTagsForEntries(entryIds);
// Transform results to the expected format
const data: SearchResult[] = searchResults.map((row) => ({
id: row.id,
workspaceId: row.workspace_id,
slug: row.slug,
title: row.title,
content: row.content,
contentHtml: row.content_html,
summary: row.summary,
status: row.status,
visibility: row.visibility as "PRIVATE" | "WORKSPACE" | "PUBLIC",
createdAt: row.created_at,
updatedAt: row.updated_at,
createdBy: row.created_by,
updatedBy: row.updated_by,
rank: row.rank,
headline: row.headline ?? undefined,
tags: tagsMap.get(row.id) || [],
}));
return {
data,
pagination: {
page,
limit,
total,
totalPages: Math.ceil(total / limit),
},
query,
};
}
/**
* Search entries by tags (entries must have ALL specified tags)
*
* @param tags - Array of tag slugs to filter by
* @param workspaceId - The workspace to search within
* @param options - Search options (status filter, pagination)
* @returns Paginated entries that have all specified tags
*/
async searchByTags(
tags: string[],
workspaceId: string,
options: SearchOptions = {}
): Promise<PaginatedEntries> {
const page = options.page || 1;
const limit = options.limit || 20;
const skip = (page - 1) * limit;
if (!tags || tags.length === 0) {
return {
data: [],
pagination: {
page,
limit,
total: 0,
totalPages: 0,
},
};
}
// Build where clause for entries that have ALL specified tags
const where: Prisma.KnowledgeEntryWhereInput = {
workspaceId,
status: options.status || { not: EntryStatus.ARCHIVED },
AND: tags.map((tagSlug) => ({
tags: {
some: {
tag: {
slug: tagSlug,
},
},
},
})),
};
// Get total count
const total = await this.prisma.knowledgeEntry.count({ where });
// Get entries
const entries = await this.prisma.knowledgeEntry.findMany({
where,
include: {
tags: {
include: {
tag: true,
},
},
},
orderBy: {
updatedAt: "desc",
},
skip,
take: limit,
});
// Transform to response format
const data: KnowledgeEntryWithTags[] = entries.map((entry) => ({
id: entry.id,
workspaceId: entry.workspaceId,
slug: entry.slug,
title: entry.title,
content: entry.content,
contentHtml: entry.contentHtml,
summary: entry.summary,
status: entry.status,
visibility: entry.visibility,
createdAt: entry.createdAt,
updatedAt: entry.updatedAt,
createdBy: entry.createdBy,
updatedBy: entry.updatedBy,
tags: entry.tags.map((et) => ({
id: et.tag.id,
name: et.tag.name,
slug: et.tag.slug,
color: et.tag.color,
})),
}));
return {
data,
pagination: {
page,
limit,
total,
totalPages: Math.ceil(total / limit),
},
};
}
/**
* Get recently modified entries
*
* @param workspaceId - The workspace to query
* @param limit - Maximum number of entries to return (default: 10)
* @param status - Optional status filter
* @returns Array of recently modified entries
*/
async recentEntries(
workspaceId: string,
limit: number = 10,
status?: EntryStatus
): Promise<KnowledgeEntryWithTags[]> {
const where: Prisma.KnowledgeEntryWhereInput = {
workspaceId,
status: status || { not: EntryStatus.ARCHIVED },
};
const entries = await this.prisma.knowledgeEntry.findMany({
where,
include: {
tags: {
include: {
tag: true,
},
},
},
orderBy: {
updatedAt: "desc",
},
take: limit,
});
return entries.map((entry) => ({
id: entry.id,
workspaceId: entry.workspaceId,
slug: entry.slug,
title: entry.title,
content: entry.content,
contentHtml: entry.contentHtml,
summary: entry.summary,
status: entry.status,
visibility: entry.visibility,
createdAt: entry.createdAt,
updatedAt: entry.updatedAt,
createdBy: entry.createdBy,
updatedBy: entry.updatedBy,
tags: entry.tags.map((et) => ({
id: et.tag.id,
name: et.tag.name,
slug: et.tag.slug,
color: et.tag.color,
})),
}));
}
/**
* Sanitize search query to prevent SQL injection and handle special characters
*/
private sanitizeSearchQuery(query: string): string {
if (!query || typeof query !== "string") {
return "";
}
// Trim and normalize whitespace
let sanitized = query.trim().replace(/\s+/g, " ");
// Remove PostgreSQL full-text search operators that could cause issues
sanitized = sanitized.replace(/[&|!:*()]/g, " ");
// Trim again after removing special chars
sanitized = sanitized.trim();
return sanitized;
}
/**
* Fetch tags for a list of entry IDs
*/
private async fetchTagsForEntries(
entryIds: string[]
): Promise<
Map<
string,
Array<{ id: string; name: string; slug: string; color: string | null }>
>
> {
if (entryIds.length === 0) {
return new Map();
}
const entryTags = await this.prisma.knowledgeEntryTag.findMany({
where: {
entryId: { in: entryIds },
},
include: {
tag: true,
},
});
const tagsMap = new Map<
string,
Array<{ id: string; name: string; slug: string; color: string | null }>
>();
for (const et of entryTags) {
const tags = tagsMap.get(et.entryId) || [];
tags.push({
id: et.tag.id,
name: et.tag.name,
slug: et.tag.slug,
color: et.tag.color,
});
tagsMap.set(et.entryId, tags);
}
return tagsMap;
}
}