diff --git a/apps/api/src/knowledge/utils/markdown.spec.ts b/apps/api/src/knowledge/utils/markdown.spec.ts index 32d13a0..cfc2025 100644 --- a/apps/api/src/knowledge/utils/markdown.spec.ts +++ b/apps/api/src/knowledge/utils/markdown.spec.ts @@ -146,13 +146,12 @@ plain text code expect(html).toContain('alt="Alt text"'); }); - it("should allow data URIs for images", async () => { + it("should block data URIs for images", async () => { const markdown = "![Image](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==)"; const html = await renderMarkdown(markdown); - expect(html).toContain(""); }); + + it("should block data: URI scheme in image src", async () => { + const markdown = "![XSS](data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=)"; + const html = await renderMarkdown(markdown); + + expect(html).not.toContain("data:"); + expect(html).not.toContain("text/html"); + }); + + it("should block data: URI scheme in links", async () => { + const markdown = "[Click me](data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=)"; + const html = await renderMarkdown(markdown); + + expect(html).not.toContain("data:"); + expect(html).not.toContain("text/html"); + }); + + it("should block data: URI with mixed case in images", async () => { + const markdown = + "![XSS](Data:image/svg+xml;base64,PHN2Zz48c2NyaXB0PmFsZXJ0KCdYU1MnKTwvc2NyaXB0Pjwvc3ZnPg==)"; + const html = await renderMarkdown(markdown); + + expect(html).not.toContain("data:"); + expect(html).not.toContain("Data:"); + }); + + it("should block data: URI with leading whitespace", async () => { + const markdown = "![XSS]( data:image/png;base64,abc123)"; + const html = await renderMarkdown(markdown); + + expect(html).not.toContain("data:"); + }); + + it("should block data: URI in sync renderer", () => { + const markdown = "![XSS](data:image/png;base64,abc123)"; + const html = renderMarkdownSync(markdown); + + expect(html).not.toContain("data:"); + }); }); describe("Edge Cases", () => { diff --git a/apps/api/src/knowledge/utils/markdown.ts b/apps/api/src/knowledge/utils/markdown.ts index 55203c4..09e5cdb 100644 --- a/apps/api/src/knowledge/utils/markdown.ts +++ b/apps/api/src/knowledge/utils/markdown.ts @@ -107,7 +107,7 @@ const SANITIZE_OPTIONS: sanitizeHtml.IOptions = { }, allowedSchemes: ["http", "https", "mailto"], allowedSchemesByTag: { - img: ["http", "https", "data"], + img: ["http", "https"], }, allowedClasses: { code: ["hljs", "language-*"], @@ -115,9 +115,18 @@ const SANITIZE_OPTIONS: sanitizeHtml.IOptions = { }, allowedIframeHostnames: [], // No iframes allowed // Enforce target="_blank" and rel="noopener noreferrer" for external links + // Block data: URIs in links and images to prevent XSS/CSRF attacks transformTags: { a: (tagName: string, attribs: sanitizeHtml.Attributes) => { const href = attribs.href; + // Strip data: URI scheme from links + if (href?.trim().toLowerCase().startsWith("data:")) { + const { href: _removed, ...safeAttribs } = attribs; + return { + tagName, + attribs: safeAttribs, + }; + } if (href && (href.startsWith("http://") || href.startsWith("https://"))) { return { tagName, @@ -133,6 +142,21 @@ const SANITIZE_OPTIONS: sanitizeHtml.IOptions = { attribs, }; }, + // Strip data: URI scheme from images to prevent XSS/CSRF + img: (tagName: string, attribs: sanitizeHtml.Attributes) => { + const src = attribs.src; + if (src?.trim().toLowerCase().startsWith("data:")) { + const { src: _removed, ...safeAttribs } = attribs; + return { + tagName, + attribs: safeAttribs, + }; + } + return { + tagName, + attribs, + }; + }, // Disable task list checkboxes (make them read-only) input: (tagName: string, attribs: sanitizeHtml.Attributes) => { if (attribs.type === "checkbox") {