fix(#337): Sanitize HTML before wiki-link processing in WikiLinkRenderer

- Apply DOMPurify to entire HTML input before parseWikiLinks()
- Prevents stored XSS via knowledge entry content (SEC-WEB-2)
- Allow safe formatting tags (p, strong, em, etc.) but strip scripts, iframes, event handlers
- Update tests to reflect new sanitization behavior

Refs #337

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Jason Woltje
2026-02-05 15:25:57 -06:00
parent 000145af96
commit aa14b580b3
2 changed files with 287 additions and 42 deletions

View File

@@ -28,7 +28,58 @@ export function WikiLinkRenderer({
className = "",
}: WikiLinkRendererProps): React.ReactElement {
const processedHtml = React.useMemo(() => {
return parseWikiLinks(html);
// SEC-WEB-2 FIX: Sanitize ENTIRE HTML input BEFORE processing wiki-links
// This prevents stored XSS via knowledge entry content
const sanitizedHtml = DOMPurify.sanitize(html, {
// Allow common formatting tags that are safe
ALLOWED_TAGS: [
"p",
"br",
"strong",
"b",
"em",
"i",
"u",
"s",
"strike",
"del",
"ins",
"mark",
"small",
"sub",
"sup",
"code",
"pre",
"blockquote",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"ul",
"ol",
"li",
"dl",
"dt",
"dd",
"table",
"thead",
"tbody",
"tfoot",
"tr",
"th",
"td",
"hr",
"span",
"div",
],
// Allow safe attributes only
ALLOWED_ATTR: ["class", "id", "title", "lang", "dir"],
// Remove any data: or javascript: URIs
ALLOW_DATA_ATTR: false,
});
return parseWikiLinks(sanitizedHtml);
}, [html]);
return (

View File

@@ -69,19 +69,19 @@ describe("WikiLinkRenderer", (): void => {
});
it("escapes HTML in link text to prevent XSS", (): void => {
// SEC-WEB-2: DOMPurify now sanitizes entire HTML BEFORE wiki-link processing
// Script tags are stripped, which may break wiki-link patterns like [[entry|]]
const html = "<p>[[entry|<script>alert('xss')</script>]]</p>";
const { container } = render(<WikiLinkRenderer html={html} />);
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
// After sanitization: <p>[[entry|]]</p> - malformed wiki-link (empty display text with |)
// The wiki-link regex doesn't match [[entry|]] because |([^\]]+) requires 1+ chars
// So no wiki-link is created - the XSS is prevented by stripping dangerous content
// Script tags should be removed by DOMPurify (including content)
const linkHtml = link?.innerHTML ?? "";
expect(linkHtml).not.toContain("<script>");
expect(linkHtml).not.toContain("alert");
expect(linkHtml).not.toContain("xss");
// Content is completely removed for dangerous tags
expect(linkHtml.trim()).toBe("");
// No script tags in output
expect(container.innerHTML).not.toContain("<script>");
expect(container.innerHTML).not.toContain("alert");
expect(container.innerHTML).not.toContain("xss");
});
it("preserves other HTML structure while converting wiki-links", (): void => {
@@ -219,33 +219,26 @@ describe("WikiLinkRenderer", (): void => {
});
it("escapes event handlers in display text", (): void => {
// SEC-WEB-2: DOMPurify now sanitizes entire HTML BEFORE wiki-link processing
// After sanitization: <p>[[valid-link|]]</p> - malformed wiki-link (empty display text)
const html = "<p>[[valid-link|<img src=x onerror=alert(1)>]]</p>";
const { container } = render(<WikiLinkRenderer html={html} />);
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
// DOMPurify removes all HTML tags completely
const linkHtml = link?.innerHTML ?? "";
expect(linkHtml).not.toContain("onerror");
expect(linkHtml).not.toContain("alert(1)");
expect(linkHtml).not.toContain("<img");
// Content should be empty after stripping HTML
expect(linkHtml.trim()).toBe("");
// XSS payload is stripped - that's the main security goal
expect(container.innerHTML).not.toContain("onerror");
expect(container.innerHTML).not.toContain("alert(1)");
expect(container.innerHTML).not.toContain("<img");
});
it("escapes iframe injection in display text", (): void => {
// SEC-WEB-2: DOMPurify now sanitizes entire HTML BEFORE wiki-link processing
// After sanitization: <p>[[valid-link|]]</p> - malformed wiki-link (empty display text)
const html = "<p>[[valid-link|<iframe src=evil.com>]]</p>";
const { container } = render(<WikiLinkRenderer html={html} />);
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
// DOMPurify removes all HTML tags completely
const linkHtml = link?.innerHTML ?? "";
expect(linkHtml).not.toContain("<iframe");
expect(linkHtml).not.toContain("iframe");
expect(linkHtml.trim()).toBe("");
// XSS payload is stripped - that's the main security goal
expect(container.innerHTML).not.toContain("<iframe");
expect(container.innerHTML).not.toContain("evil.com");
});
it("blocks script tags in slug", (): void => {
@@ -293,32 +286,233 @@ describe("WikiLinkRenderer", (): void => {
});
it("escapes SVG with embedded scripts in display text", (): void => {
// SEC-WEB-2: DOMPurify now sanitizes entire HTML BEFORE wiki-link processing
// After sanitization: <p>[[valid-link|]]</p> - malformed wiki-link (empty display text)
const html = "<p>[[valid-link|<svg><script>alert(1)</script></svg>]]</p>";
const { container } = render(<WikiLinkRenderer html={html} />);
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
// DOMPurify removes all HTML completely
const linkHtml = link?.innerHTML ?? "";
expect(linkHtml).not.toContain("<svg>");
expect(linkHtml).not.toContain("<script>");
expect(linkHtml).not.toContain("alert");
expect(linkHtml.trim()).toBe("");
// XSS payload is stripped - that's the main security goal
expect(container.innerHTML).not.toContain("<svg>");
expect(container.innerHTML).not.toContain("<script>");
expect(container.innerHTML).not.toContain("alert");
});
it("blocks object/embed tags in display text", (): void => {
// SEC-WEB-2: DOMPurify now sanitizes entire HTML BEFORE wiki-link processing
// After sanitization: <p>[[valid-link|]]</p> - malformed wiki-link (empty display text)
const html = "<p>[[valid-link|<object data=evil.com></object>]]</p>";
const { container } = render(<WikiLinkRenderer html={html} />);
// XSS payload is stripped - that's the main security goal
expect(container.innerHTML).not.toContain("<object");
expect(container.innerHTML).not.toContain("evil.com");
});
});
describe("SEC-WEB-2: Stored XSS via surrounding HTML content", (): void => {
it("sanitizes script tags in surrounding HTML before wiki-link processing", (): void => {
const html = "<p>Safe text</p><script>alert('xss')</script><p>[[my-link]]</p>";
const { container } = render(<WikiLinkRenderer html={html} />);
// Script tag should be removed
expect(container.innerHTML).not.toContain("<script>");
expect(container.innerHTML).not.toContain("alert('xss')");
// Wiki-link should still work
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
expect(link).toHaveAttribute("href", "/knowledge/my-link");
// DOMPurify removes all HTML completely
const linkHtml = link?.innerHTML ?? "";
expect(linkHtml).not.toContain("<object");
expect(linkHtml).not.toContain("object");
expect(linkHtml.trim()).toBe("");
// Safe content preserved
expect(container.textContent).toContain("Safe text");
});
it("sanitizes img tags with onerror handlers in surrounding HTML", (): void => {
const html = '<p>Content with [[link]]</p><img src="x" onerror="alert(1)">';
const { container } = render(<WikiLinkRenderer html={html} />);
// Image tag should be removed (not in allowed tags)
expect(container.innerHTML).not.toContain("<img");
expect(container.innerHTML).not.toContain("onerror");
expect(container.innerHTML).not.toContain("alert(1)");
// Wiki-link should still work
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
});
it("sanitizes iframe injection in surrounding HTML", (): void => {
const html = '<iframe src="https://evil.com"></iframe><p>[[safe-link]]</p>';
const { container } = render(<WikiLinkRenderer html={html} />);
// Iframe should be removed
expect(container.innerHTML).not.toContain("<iframe");
expect(container.innerHTML).not.toContain("evil.com");
// Wiki-link should still work
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
});
it("sanitizes SVG with embedded scripts in surrounding HTML", (): void => {
const html = '<svg onload="alert(1)"><script>evil()</script></svg><p>[[my-entry]]</p>';
const { container } = render(<WikiLinkRenderer html={html} />);
// SVG and script should be removed
expect(container.innerHTML).not.toContain("<svg");
expect(container.innerHTML).not.toContain("<script>");
expect(container.innerHTML).not.toContain("onload");
expect(container.innerHTML).not.toContain("evil()");
// Wiki-link should still work
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
});
it("sanitizes event handlers on allowed tags in surrounding HTML", (): void => {
const html = '<div onclick="alert(1)">Click me</div><p>[[link]]</p>';
const { container } = render(<WikiLinkRenderer html={html} />);
// onclick should be removed but div preserved
expect(container.innerHTML).not.toContain("onclick");
expect(container.innerHTML).not.toContain("alert(1)");
expect(container.textContent).toContain("Click me");
// Wiki-link should still work
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
});
it("sanitizes anchor tags with javascript: protocol in surrounding HTML", (): void => {
const html = '<a href="javascript:alert(1)">Evil link</a><p>[[safe-link]]</p>';
const { container } = render(<WikiLinkRenderer html={html} />);
// Anchor tags not in allowed list should be removed
expect(container.innerHTML).not.toContain("javascript:");
// Wiki-link should still work
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
});
it("sanitizes form injection in surrounding HTML", (): void => {
const html = '<form action="https://evil.com"><input type="text"></form><p>[[link]]</p>';
const { container } = render(<WikiLinkRenderer html={html} />);
// Form elements should be removed
expect(container.innerHTML).not.toContain("<form");
expect(container.innerHTML).not.toContain("<input");
expect(container.innerHTML).not.toContain("evil.com");
// Wiki-link should still work
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
});
it("sanitizes object/embed tags in surrounding HTML", (): void => {
const html = '<object data="https://evil.com/flash.swf"></object><p>[[link]]</p>';
const { container } = render(<WikiLinkRenderer html={html} />);
// Object should be removed
expect(container.innerHTML).not.toContain("<object");
expect(container.innerHTML).not.toContain("evil.com");
// Wiki-link should still work
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
});
it("sanitizes style tags with malicious CSS in surrounding HTML", (): void => {
const html = '<style>body { background: url("javascript:alert(1)") }</style><p>[[link]]</p>';
const { container } = render(<WikiLinkRenderer html={html} />);
// Style tag should be removed
expect(container.innerHTML).not.toContain("<style");
expect(container.innerHTML).not.toContain("javascript:");
// Wiki-link should still work
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
});
it("preserves safe formatting tags while removing dangerous ones", (): void => {
const html =
"<p><strong>Bold</strong> and <em>italic</em></p><script>evil()</script><p>[[my-link|My Link]]</p>";
const { container } = render(<WikiLinkRenderer html={html} />);
// Safe tags preserved
expect(container.querySelector("strong")).toBeInTheDocument();
expect(container.querySelector("em")).toBeInTheDocument();
expect(container.textContent).toContain("Bold");
expect(container.textContent).toContain("italic");
// Script removed
expect(container.innerHTML).not.toContain("<script>");
// Wiki-link works
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
expect(link).toHaveTextContent("My Link");
});
it("sanitizes base64-encoded data URIs in img tags", (): void => {
const html =
'<img src="data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg=="><p>[[link]]</p>';
const { container } = render(<WikiLinkRenderer html={html} />);
// Image with data URI should be removed
expect(container.innerHTML).not.toContain("<img");
expect(container.innerHTML).not.toContain("data:");
// Wiki-link should still work
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
});
it("sanitizes meta refresh in surrounding HTML", (): void => {
const html = '<meta http-equiv="refresh" content="0;url=https://evil.com"><p>[[link]]</p>';
const { container } = render(<WikiLinkRenderer html={html} />);
// Meta tag should be removed
expect(container.innerHTML).not.toContain("<meta");
expect(container.innerHTML).not.toContain("evil.com");
// Wiki-link should still work
const link = container.querySelector('a[data-wiki-link="true"]');
expect(link).toBeInTheDocument();
});
it("handles complex mixed content with multiple attack vectors", (): void => {
const html = `
<p>Normal paragraph with [[good-link|Good Link]]</p>
<script>stealCookies()</script>
<img src="x" onerror="alert(1)">
<iframe src="evil.com"></iframe>
<p>Another paragraph</p>
<svg onload="evil()"></svg>
<p>Final text with [[another-link]]</p>
`;
const { container } = render(<WikiLinkRenderer html={html} />);
// All dangerous content removed
expect(container.innerHTML).not.toContain("<script>");
expect(container.innerHTML).not.toContain("<img");
expect(container.innerHTML).not.toContain("<iframe");
expect(container.innerHTML).not.toContain("<svg");
expect(container.innerHTML).not.toContain("stealCookies");
expect(container.innerHTML).not.toContain("onerror");
expect(container.innerHTML).not.toContain("onload");
// Safe content preserved
expect(container.textContent).toContain("Normal paragraph");
expect(container.textContent).toContain("Another paragraph");
expect(container.textContent).toContain("Final text");
// Both wiki-links work
const links = container.querySelectorAll('a[data-wiki-link="true"]');
expect(links.length).toBe(2);
expect(links[0]).toHaveTextContent("Good Link");
expect(links[1]).toHaveAttribute("href", "/knowledge/another-link");
});
});
});