feat(knowledge): add markdown rendering (KNOW-004)

- Install marked, marked-highlight, marked-gfm-heading-id, sanitize-html
- Create markdown utility with GFM support (tables, task lists, strikethrough)
- Add code syntax highlighting with highlight.js
- Implement XSS sanitization for security
- Update knowledge service to use markdown renderer
- Add comprehensive test suite (34 tests, all passing)
- Generate IDs for headers for deep linking
- Cache rendered HTML in database for performance
This commit is contained in:
Jason Woltje
2026-01-29 16:57:57 -06:00
parent 4881d0698f
commit 287a0e2556
6 changed files with 839 additions and 12 deletions

View File

@@ -4,7 +4,6 @@ import {
ConflictException,
} from "@nestjs/common";
import { EntryStatus } from "@prisma/client";
import { marked } from "marked";
import slugify from "slugify";
import { PrismaService } from "../prisma/prisma.service";
import type { CreateEntryDto, UpdateEntryDto, EntryQueryDto } from "./dto";
@@ -12,20 +11,15 @@ import type {
KnowledgeEntryWithTags,
PaginatedEntries,
} from "./entities/knowledge-entry.entity";
import { renderMarkdown } from "./utils/markdown";
/**
* Service for managing knowledge entries
*/
@Injectable()
export class KnowledgeService {
constructor(private readonly prisma: PrismaService) {
// Configure marked for security and consistency
marked.setOptions({
gfm: true, // GitHub Flavored Markdown
breaks: false,
pedantic: false,
});
}
constructor(private readonly prisma: PrismaService) {}
/**
* Get all entries for a workspace (paginated and filterable)
@@ -175,8 +169,8 @@ export class KnowledgeService {
const baseSlug = this.generateSlug(createDto.title);
const slug = await this.ensureUniqueSlug(workspaceId, baseSlug);
// Render markdown to HTML
const contentHtml = await marked.parse(createDto.content);
// Render markdown to HTML with sanitization
const contentHtml = await renderMarkdown(createDto.content);
// Use transaction to ensure atomicity
const result = await this.prisma.$transaction(async (tx) => {
@@ -299,7 +293,7 @@ export class KnowledgeService {
// Render markdown if content is updated
let contentHtml = existing.contentHtml;
if (updateDto.content) {
contentHtml = await marked.parse(updateDto.content);
contentHtml = await renderMarkdown(updateDto.content);
}
// Build update data object conditionally

View File

@@ -0,0 +1,121 @@
# Knowledge Module Utilities
## Markdown Rendering
### Overview
The `markdown.ts` utility provides secure markdown rendering with GFM (GitHub Flavored Markdown) support, syntax highlighting, and XSS protection.
### Features
- **GFM Support**: Tables, task lists, strikethrough, autolinks
- **Syntax Highlighting**: Code blocks with language detection via highlight.js
- **XSS Protection**: HTML sanitization using sanitize-html
- **Header IDs**: Automatic ID generation for headers (for linking)
- **Security**: Blocks dangerous HTML (scripts, iframes, event handlers)
### Usage
```typescript
import { renderMarkdown, markdownToPlainText } from './utils/markdown';
// Render markdown to HTML (async)
const html = await renderMarkdown('# Hello **World**');
// Result: <h1 id="hello-world">Hello <strong>World</strong></h1>
// Extract plain text (for search indexing)
const plainText = await markdownToPlainText('# Hello **World**');
// Result: "Hello World"
```
### Supported Markdown Features
#### Basic Formatting
- **Bold**: `**text**` or `__text__`
- *Italic*: `*text*` or `_text_`
- ~~Strikethrough~~: `~~text~~`
- `Inline code`: `` `code` ``
#### Headers
```markdown
# H1
## H2
### H3
```
#### Lists
```markdown
- Unordered list
- Nested item
1. Ordered list
2. Another item
```
#### Task Lists
```markdown
- [ ] Unchecked task
- [x] Completed task
```
#### Tables
```markdown
| Header 1 | Header 2 |
|----------|----------|
| Cell 1 | Cell 2 |
```
#### Code Blocks
````markdown
```typescript
const greeting: string = "Hello";
console.log(greeting);
```
````
#### Links and Images
```markdown
[Link text](https://example.com)
![Alt text](https://example.com/image.png)
```
#### Blockquotes
```markdown
> This is a quote
> Multi-line quote
```
### Security
The renderer implements multiple layers of security:
1. **HTML Sanitization**: Only allows safe HTML tags and attributes
2. **URL Validation**: Blocks `javascript:` and other dangerous protocols
3. **External Links**: Automatically adds `target="_blank"` and `rel="noopener noreferrer"`
4. **Task Lists**: Checkboxes are disabled to prevent interaction
5. **No Event Handlers**: Blocks `onclick`, `onload`, etc.
6. **No Dangerous Tags**: Blocks `<script>`, `<iframe>`, `<object>`, `<embed>`
### Testing
Comprehensive test suite covers:
- Basic markdown rendering
- GFM features (tables, task lists, strikethrough)
- Code syntax highlighting
- Security (XSS prevention)
- Edge cases (unicode, long content, nested structures)
Run tests:
```bash
pnpm test --filter=@mosaic/api -- markdown.spec.ts
```
### Integration
The markdown renderer is integrated into the Knowledge Entry service:
1. **On Create**: Renders `content` to `contentHtml`
2. **On Update**: Re-renders if content changes
3. **Caching**: HTML is stored in database for performance
See `knowledge.service.ts` for implementation details.

View File

@@ -0,0 +1,351 @@
import { describe, it, expect } from "vitest";
import {
renderMarkdown,
renderMarkdownSync,
markdownToPlainText,
} from "./markdown";
describe("Markdown Rendering", () => {
describe("renderMarkdown", () => {
it("should render basic markdown to HTML", async () => {
const markdown = "# Hello World\n\nThis is **bold** text.";
const html = await renderMarkdown(markdown);
expect(html).toContain("<h1");
expect(html).toContain("Hello World");
expect(html).toContain("<strong>bold</strong>");
});
it("should handle empty input", async () => {
const html = await renderMarkdown("");
expect(html).toBe("");
});
it("should handle null/undefined input", async () => {
expect(await renderMarkdown(null as any)).toBe("");
expect(await renderMarkdown(undefined as any)).toBe("");
});
it("should sanitize potentially dangerous HTML", async () => {
const markdown = '<script>alert("XSS")</script>\n\n**Safe** content';
const html = await renderMarkdown(markdown);
expect(html).not.toContain("<script>");
expect(html).not.toContain("alert");
expect(html).toContain("<strong>Safe</strong>");
});
it("should sanitize onclick handlers", async () => {
const markdown = '[Click me](javascript:alert("XSS"))';
const html = await renderMarkdown(markdown);
expect(html).not.toContain("javascript:");
expect(html).not.toContain("onclick");
});
});
describe("GFM Features", () => {
it("should render tables", async () => {
const markdown = `
| Header 1 | Header 2 |
|----------|----------|
| Cell 1 | Cell 2 |
| Cell 3 | Cell 4 |
`.trim();
const html = await renderMarkdown(markdown);
expect(html).toContain("<table>");
expect(html).toContain("<thead>");
expect(html).toContain("<tbody>");
expect(html).toContain("<th>Header 1</th>");
expect(html).toContain("<td>Cell 1</td>");
});
it("should render strikethrough text", async () => {
const markdown = "This is ~~deleted~~ text";
const html = await renderMarkdown(markdown);
expect(html).toContain("<del>deleted</del>");
});
it("should render task lists", async () => {
const markdown = `
- [ ] Unchecked task
- [x] Checked task
`.trim();
const html = await renderMarkdown(markdown);
expect(html).toContain('<input');
expect(html).toContain('type="checkbox"');
expect(html).toContain('disabled="disabled"'); // Should be disabled for safety
});
it("should render autolinks", async () => {
const markdown = "Visit https://example.com for more info";
const html = await renderMarkdown(markdown);
expect(html).toContain('<a href="https://example.com"');
});
});
describe("Code Highlighting", () => {
it("should render inline code", async () => {
const markdown = "Use the `console.log()` function";
const html = await renderMarkdown(markdown);
expect(html).toContain("<code>");
expect(html).toContain("console.log()");
});
it("should render code blocks with language", async () => {
const markdown = `
\`\`\`typescript
const greeting: string = "Hello";
console.log(greeting);
\`\`\`
`.trim();
const html = await renderMarkdown(markdown);
expect(html).toContain("<pre>");
expect(html).toContain("<code");
expect(html).toContain("language-typescript");
// Should have syntax highlighting
expect(html.length).toBeGreaterThan(markdown.length);
});
it("should render code blocks without language", async () => {
const markdown = `
\`\`\`
plain text code
\`\`\`
`.trim();
const html = await renderMarkdown(markdown);
expect(html).toContain("<pre>");
expect(html).toContain("<code");
expect(html).toContain("plain text code");
});
});
describe("Links and Images", () => {
it("should render links with target=_blank for external URLs", async () => {
const markdown = "[External Link](https://example.com)";
const html = await renderMarkdown(markdown);
expect(html).toContain('href="https://example.com"');
expect(html).toContain('target="_blank"');
expect(html).toContain('rel="noopener noreferrer"');
});
it("should render images", async () => {
const markdown = "![Alt text](https://example.com/image.png)";
const html = await renderMarkdown(markdown);
expect(html).toContain('<img');
expect(html).toContain('src="https://example.com/image.png"');
expect(html).toContain('alt="Alt text"');
});
it("should allow data URIs for images", async () => {
const markdown = "![Image](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==)";
const html = await renderMarkdown(markdown);
expect(html).toContain('<img');
expect(html).toContain('src="data:image/png;base64');
});
});
describe("Headers and IDs", () => {
it("should generate IDs for headers", async () => {
const markdown = "# My Header Title";
const html = await renderMarkdown(markdown);
expect(html).toContain('<h1');
expect(html).toContain('id="');
});
it("should render all header levels", async () => {
const markdown = `
# H1
## H2
### H3
#### H4
##### H5
###### H6
`.trim();
const html = await renderMarkdown(markdown);
expect(html).toContain("<h1");
expect(html).toContain("<h2");
expect(html).toContain("<h3");
expect(html).toContain("<h4");
expect(html).toContain("<h5");
expect(html).toContain("<h6");
});
});
describe("Lists", () => {
it("should render unordered lists", async () => {
const markdown = `
- Item 1
- Item 2
- Nested item
`.trim();
const html = await renderMarkdown(markdown);
expect(html).toContain("<ul>");
expect(html).toContain("<li>Item 1</li>");
expect(html).toContain("Nested item");
});
it("should render ordered lists", async () => {
const markdown = `
1. First
2. Second
3. Third
`.trim();
const html = await renderMarkdown(markdown);
expect(html).toContain("<ol>");
expect(html).toContain("<li>First</li>");
expect(html).toContain("<li>Second</li>");
});
});
describe("Quotes and Formatting", () => {
it("should render blockquotes", async () => {
const markdown = "> This is a quote\n> Multi-line quote";
const html = await renderMarkdown(markdown);
expect(html).toContain("<blockquote>");
expect(html).toContain("This is a quote");
});
it("should render emphasis and strong", async () => {
const markdown = "*italic* **bold** ***bold italic***";
const html = await renderMarkdown(markdown);
expect(html).toContain("<em>italic</em>");
expect(html).toContain("<strong>bold</strong>");
});
it("should render horizontal rules", async () => {
const markdown = "Content\n\n---\n\nMore content";
const html = await renderMarkdown(markdown);
expect(html).toContain("<hr");
});
});
describe("renderMarkdownSync", () => {
it("should render markdown synchronously", () => {
const markdown = "# Sync Test\n\n**Bold** text";
const html = renderMarkdownSync(markdown);
expect(html).toContain("<h1");
expect(html).toContain("Sync Test");
expect(html).toContain("<strong>Bold</strong>");
});
it("should handle empty input synchronously", () => {
const html = renderMarkdownSync("");
expect(html).toBe("");
});
it("should sanitize XSS synchronously", () => {
const markdown = '<script>alert("XSS")</script>';
const html = renderMarkdownSync(markdown);
expect(html).not.toContain("<script>");
expect(html).not.toContain("alert");
});
});
describe("markdownToPlainText", () => {
it("should extract plain text from markdown", async () => {
const markdown = "# Header\n\n**Bold** and *italic* text";
const plainText = await markdownToPlainText(markdown);
expect(plainText).toContain("Header");
expect(plainText).toContain("Bold");
expect(plainText).toContain("italic");
expect(plainText).not.toContain("<h1");
expect(plainText).not.toContain("<strong>");
expect(plainText).not.toContain("<em>");
});
it("should strip all HTML tags", async () => {
const markdown = '[Link](https://example.com)\n\n![Image](image.png)';
const plainText = await markdownToPlainText(markdown);
expect(plainText).not.toContain("<a");
expect(plainText).not.toContain("<img");
expect(plainText).toContain("Link");
});
});
describe("Security Tests", () => {
it("should block iframe injection", async () => {
const markdown = '<iframe src="https://evil.com"></iframe>';
const html = await renderMarkdown(markdown);
expect(html).not.toContain("<iframe");
});
it("should block object/embed tags", async () => {
const markdown = '<object data="malware.swf"></object>';
const html = await renderMarkdown(markdown);
expect(html).not.toContain("<object");
});
it("should block event handlers in markdown links", async () => {
const markdown = '[Click](# "onclick=alert(1)")';
const html = await renderMarkdown(markdown);
expect(html).not.toContain("onclick");
});
it("should sanitize SVG with script", async () => {
const markdown = '<svg><script>alert("XSS")</script></svg>';
const html = await renderMarkdown(markdown);
expect(html).not.toContain("<svg");
expect(html).not.toContain("<script>");
});
});
describe("Edge Cases", () => {
it("should handle very long content", async () => {
const markdown = "# Test\n\n" + "A ".repeat(10000);
const html = await renderMarkdown(markdown);
expect(html.length).toBeGreaterThan(0);
expect(html).toContain("<h1");
});
it("should handle unicode characters", async () => {
const markdown = "# 你好 World 🌍\n\n**Émojis**: 😀 ✨ 🚀";
const html = await renderMarkdown(markdown);
expect(html).toContain("你好");
expect(html).toContain("🌍");
expect(html).toContain("😀");
});
it("should handle nested markdown correctly", async () => {
const markdown = "**Bold with _italic_ inside**";
const html = await renderMarkdown(markdown);
expect(html).toContain("<strong>");
expect(html).toContain("<em>");
});
});
});

View File

@@ -0,0 +1,222 @@
import { marked } from "marked";
import { gfmHeadingId } from "marked-gfm-heading-id";
import { markedHighlight } from "marked-highlight";
import hljs from "highlight.js";
import sanitizeHtml from "sanitize-html";
/**
* Configure marked with GFM, syntax highlighting, and security features
*/
function configureMarked(): void {
// Add GFM heading ID extension
marked.use(gfmHeadingId());
// Add syntax highlighting extension
marked.use(
markedHighlight({
langPrefix: "hljs language-",
highlight(code: string, lang: string): string {
const language = hljs.getLanguage(lang) ? lang : "plaintext";
return hljs.highlight(code, { language }).value;
},
})
);
// Configure marked options with GFM extensions
marked.use({
gfm: true, // GitHub Flavored Markdown
breaks: false, // Don't convert \n to <br>
pedantic: false,
});
}
// Initialize configuration
configureMarked();
/**
* Sanitization options for HTML output
* Allows safe HTML tags while preventing XSS attacks
*/
const SANITIZE_OPTIONS: sanitizeHtml.IOptions = {
allowedTags: [
// Text formatting
"p",
"br",
"strong",
"em",
"u",
"s",
"del",
"mark",
"small",
"sub",
"sup",
// Headers
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
// Lists
"ul",
"ol",
"li",
// Links and media
"a",
"img",
// Code
"code",
"pre",
"kbd",
"samp",
"var",
// Tables
"table",
"thead",
"tbody",
"tfoot",
"tr",
"th",
"td",
// Quotes and citations
"blockquote",
"cite",
"q",
// Structure
"div",
"span",
"hr",
// Task lists (input checkboxes)
"input",
],
allowedAttributes: {
a: ["href", "target", "rel"], // Removed title to prevent event handler injection
img: ["src", "alt", "title", "width", "height"],
code: ["class"], // For syntax highlighting
pre: ["class"],
h1: ["id"],
h2: ["id"],
h3: ["id"],
h4: ["id"],
h5: ["id"],
h6: ["id"],
td: ["align"],
th: ["align"],
input: ["type", "checked", "disabled"], // For task lists
},
allowedSchemes: ["http", "https", "mailto"],
allowedSchemesByTag: {
img: ["http", "https", "data"],
},
allowedClasses: {
code: ["hljs", "language-*"],
pre: ["hljs"],
},
allowedIframeHostnames: [], // No iframes allowed
// Enforce target="_blank" and rel="noopener noreferrer" for external links
transformTags: {
a: (tagName: string, attribs: sanitizeHtml.Attributes) => {
const href = attribs.href;
if (href && (href.startsWith("http://") || href.startsWith("https://"))) {
return {
tagName,
attribs: {
...attribs,
target: "_blank",
rel: "noopener noreferrer",
},
};
}
return {
tagName,
attribs,
};
},
// Disable task list checkboxes (make them read-only)
input: (tagName: string, attribs: sanitizeHtml.Attributes) => {
if (attribs.type === "checkbox") {
return {
tagName,
attribs: {
...attribs,
disabled: "disabled",
},
};
}
return {
tagName,
attribs,
};
},
},
};
/**
* Render markdown content to sanitized HTML
* Supports GFM (tables, task lists, strikethrough) and syntax highlighting
*
* @param markdown - Raw markdown content
* @returns Sanitized HTML string
* @throws Error if rendering fails
*/
export async function renderMarkdown(markdown: string): Promise<string> {
if (!markdown || typeof markdown !== "string") {
return "";
}
try {
// Parse markdown to HTML
const rawHtml = await marked.parse(markdown);
// Sanitize HTML to prevent XSS
const safeHtml = sanitizeHtml(rawHtml, SANITIZE_OPTIONS);
return safeHtml;
} catch (error) {
// Log error but don't expose internal details
console.error("Markdown rendering error:", error);
throw new Error("Failed to render markdown content");
}
}
/**
* Render markdown synchronously (for simple use cases)
* Note: Use async version when possible
*
* @param markdown - Raw markdown content
* @returns Sanitized HTML string
*/
export function renderMarkdownSync(markdown: string): string {
if (!markdown || typeof markdown !== "string") {
return "";
}
try {
// Parse markdown to HTML (sync version)
const rawHtml = marked.parse(markdown) as string;
// Sanitize HTML to prevent XSS
const safeHtml = sanitizeHtml(rawHtml, SANITIZE_OPTIONS);
return safeHtml;
} catch (error) {
console.error("Markdown rendering error:", error);
throw new Error("Failed to render markdown content");
}
}
/**
* Strip HTML tags from rendered markdown (extract plain text)
* Useful for generating summaries or search indexes
*
* @param markdown - Raw markdown content
* @returns Plain text without HTML tags
*/
export async function markdownToPlainText(markdown: string): Promise<string> {
const html = await renderMarkdown(markdown);
return sanitizeHtml(html, {
allowedTags: [],
allowedAttributes: {},
});
}