From aa14b580b3701304d0631a9e5a3a36599d61caa5 Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Thu, 5 Feb 2026 15:25:57 -0600 Subject: [PATCH] fix(#337): Sanitize HTML before wiki-link processing in WikiLinkRenderer - Apply DOMPurify to entire HTML input before parseWikiLinks() - Prevents stored XSS via knowledge entry content (SEC-WEB-2) - Allow safe formatting tags (p, strong, em, etc.) but strip scripts, iframes, event handlers - Update tests to reflect new sanitization behavior Refs #337 Co-Authored-By: Claude Opus 4.5 --- .../components/knowledge/WikiLinkRenderer.tsx | 53 +++- .../__tests__/WikiLinkRenderer.test.tsx | 276 +++++++++++++++--- 2 files changed, 287 insertions(+), 42 deletions(-) diff --git a/apps/web/src/components/knowledge/WikiLinkRenderer.tsx b/apps/web/src/components/knowledge/WikiLinkRenderer.tsx index ffa3511..e0027c5 100644 --- a/apps/web/src/components/knowledge/WikiLinkRenderer.tsx +++ b/apps/web/src/components/knowledge/WikiLinkRenderer.tsx @@ -28,7 +28,58 @@ export function WikiLinkRenderer({ className = "", }: WikiLinkRendererProps): React.ReactElement { const processedHtml = React.useMemo(() => { - return parseWikiLinks(html); + // SEC-WEB-2 FIX: Sanitize ENTIRE HTML input BEFORE processing wiki-links + // This prevents stored XSS via knowledge entry content + const sanitizedHtml = DOMPurify.sanitize(html, { + // Allow common formatting tags that are safe + ALLOWED_TAGS: [ + "p", + "br", + "strong", + "b", + "em", + "i", + "u", + "s", + "strike", + "del", + "ins", + "mark", + "small", + "sub", + "sup", + "code", + "pre", + "blockquote", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "ul", + "ol", + "li", + "dl", + "dt", + "dd", + "table", + "thead", + "tbody", + "tfoot", + "tr", + "th", + "td", + "hr", + "span", + "div", + ], + // Allow safe attributes only + ALLOWED_ATTR: ["class", "id", "title", "lang", "dir"], + // Remove any data: or javascript: URIs + ALLOW_DATA_ATTR: false, + }); + return parseWikiLinks(sanitizedHtml); }, [html]); return ( diff --git a/apps/web/src/components/knowledge/__tests__/WikiLinkRenderer.test.tsx b/apps/web/src/components/knowledge/__tests__/WikiLinkRenderer.test.tsx index 03ffb47..c34ee0c 100644 --- a/apps/web/src/components/knowledge/__tests__/WikiLinkRenderer.test.tsx +++ b/apps/web/src/components/knowledge/__tests__/WikiLinkRenderer.test.tsx @@ -69,19 +69,19 @@ describe("WikiLinkRenderer", (): void => { }); it("escapes HTML in link text to prevent XSS", (): void => { + // SEC-WEB-2: DOMPurify now sanitizes entire HTML BEFORE wiki-link processing + // Script tags are stripped, which may break wiki-link patterns like [[entry|]] const html = "

[[entry|]]

"; const { container } = render(); - const link = container.querySelector('a[data-wiki-link="true"]'); - expect(link).toBeInTheDocument(); + // After sanitization:

[[entry|]]

- malformed wiki-link (empty display text with |) + // The wiki-link regex doesn't match [[entry|]] because |([^\]]+) requires 1+ chars + // So no wiki-link is created - the XSS is prevented by stripping dangerous content - // Script tags should be removed by DOMPurify (including content) - const linkHtml = link?.innerHTML ?? ""; - expect(linkHtml).not.toContain("]]

"; const { container } = render(); - const link = container.querySelector('a[data-wiki-link="true"]'); - expect(link).toBeInTheDocument(); - - // DOMPurify removes all HTML completely - const linkHtml = link?.innerHTML ?? ""; - expect(linkHtml).not.toContain(""); - expect(linkHtml).not.toContain("

[[valid-link|]]

- malformed wiki-link (empty display text) const html = "

[[valid-link|]]

"; const { container } = render(); + // XSS payload is stripped - that's the main security goal + expect(container.innerHTML).not.toContain(" { + it("sanitizes script tags in surrounding HTML before wiki-link processing", (): void => { + const html = "

Safe text

[[my-link]]

"; + const { container } = render(); + + // Script tag should be removed + expect(container.innerHTML).not.toContain("

[[my-entry]]

'; + const { container } = render(); + + // SVG and script should be removed + expect(container.innerHTML).not.toContain(""); + expect(container.innerHTML).not.toContain("onload"); + expect(container.innerHTML).not.toContain("evil()"); + + // Wiki-link should still work + const link = container.querySelector('a[data-wiki-link="true"]'); + expect(link).toBeInTheDocument(); + }); + + it("sanitizes event handlers on allowed tags in surrounding HTML", (): void => { + const html = '
Click me

[[link]]

'; + const { container } = render(); + + // onclick should be removed but div preserved + expect(container.innerHTML).not.toContain("onclick"); + expect(container.innerHTML).not.toContain("alert(1)"); + expect(container.textContent).toContain("Click me"); + + // Wiki-link should still work + const link = container.querySelector('a[data-wiki-link="true"]'); + expect(link).toBeInTheDocument(); + }); + + it("sanitizes anchor tags with javascript: protocol in surrounding HTML", (): void => { + const html = 'Evil link

[[safe-link]]

'; + const { container } = render(); + + // Anchor tags not in allowed list should be removed + expect(container.innerHTML).not.toContain("javascript:"); + + // Wiki-link should still work + const link = container.querySelector('a[data-wiki-link="true"]'); + expect(link).toBeInTheDocument(); + }); + + it("sanitizes form injection in surrounding HTML", (): void => { + const html = '

[[link]]

'; + const { container } = render(); + + // Form elements should be removed + expect(container.innerHTML).not.toContain(" { + const html = '

[[link]]

'; + const { container } = render(); + + // Object should be removed + expect(container.innerHTML).not.toContain(" { + const html = '

[[link]]

'; + const { container } = render(); + + // Style tag should be removed + expect(container.innerHTML).not.toContain(" { + const html = + "

Bold and italic

[[my-link|My Link]]

"; + const { container } = render(); + + // Safe tags preserved + expect(container.querySelector("strong")).toBeInTheDocument(); + expect(container.querySelector("em")).toBeInTheDocument(); + expect(container.textContent).toContain("Bold"); + expect(container.textContent).toContain("italic"); + + // Script removed + expect(container.innerHTML).not.toContain(" + + +

Another paragraph

+ +

Final text with [[another-link]]

+ `; + const { container } = render(); + + // All dangerous content removed + expect(container.innerHTML).not.toContain("