-
Notifications
You must be signed in to change notification settings - Fork 18
fix: sanitize raw HTML in MDXISH & MDX renderers #1526
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
eaglethrost
wants to merge
10
commits into
next
Choose a base branch
from
dimas/rm-17024-stored-xss-in-hub-docs-renderer-via-mathml
base: next
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 6 commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
15b76ac
feat: sanitize mdxish & mdx
eaglethrost c2d6449
fix: github test
eaglethrost d771030
fix: tests, remove style
eaglethrost f3957e3
test: enhance
eaglethrost 053eb40
chore: improvements
eaglethrost 31035c3
fix: tests & code structure comments
eaglethrost df6d12e
chore: coderabbit comments
eaglethrost 1c9ef15
Merge branch 'next' into dimas/rm-17024-stored-xss-in-hub-docs-render…
eaglethrost 26e7f0f
fix: remove embed
eaglethrost 1b2b2c9
fix: move remark plugins
eaglethrost File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| import { render } from '@testing-library/react'; | ||
| import React from 'react'; | ||
|
|
||
| import { execute } from '../helpers'; | ||
|
|
||
| // `md` format sanitizes via rehype-sanitize's allow-list (covered in run.test.tsx). | ||
| // Default MDX keeps raw HTML as JSX nodes that allow-list never sees, so these assert | ||
| // the deny-list stripper removes the known script-execution vectors on that path. | ||
| describe('MDX (compile) sanitization', () => { | ||
| it('strips script-execution vectors in default MDX format', () => { | ||
| const md = [ | ||
| '# Docs', | ||
| '', | ||
| '<script>window.__xss = 1</script>', | ||
| '', | ||
| '<a href="javascript:alert(1)">link</a>', | ||
| '', | ||
| '<img src="x" onerror="window.__xss = 1" />', | ||
| '', | ||
| '<iframe src="javascript:alert(1)"></iframe>', | ||
| ].join('\n'); | ||
|
|
||
| const Component = execute(md, {}, {}); // no format => MDX | ||
| const { container } = render(<Component />); | ||
|
|
||
| expect(container.querySelector('script')).not.toBeInTheDocument(); | ||
| expect(container.querySelector('iframe')).not.toBeInTheDocument(); | ||
|
|
||
| // The link text still renders, but no anchor carries a script-executing href. | ||
| const dangerousScheme = /^\s*(?:javascript|vbscript|data):/i; | ||
| const hrefs = [...container.querySelectorAll('a')].map(a => a.getAttribute('href')); | ||
| expect(hrefs.some(href => href !== null && dangerousScheme.test(href))).toBe(false); | ||
| expect(container.textContent).toContain('link'); | ||
|
|
||
| // Image still renders, but the onerror handler is gone. | ||
| const image = container.querySelector('img'); | ||
| expect(image?.getAttribute('onerror')).toBeNull(); | ||
| }); | ||
|
|
||
| it('strips the MathML namespace-confusion payload in default MDX format', () => { | ||
| const md = '# Docs\n\n<math><mtext><script>window.__xss = 1</script></mtext></math>'; | ||
|
|
||
| const Component = execute(md, {}, {}); | ||
| const { container } = render(<Component />); | ||
|
|
||
| expect(container.querySelector('script')).not.toBeInTheDocument(); | ||
| expect(container.querySelector('math')).not.toBeInTheDocument(); | ||
| expect(container.querySelector('h1')).toBeInTheDocument(); | ||
| }); | ||
| }); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,162 @@ | ||
| import type { RMDXModule } from '../../../types'; | ||
|
|
||
| import { visit } from 'unist-util-visit'; | ||
|
|
||
| import { mdxish } from '../../../lib'; | ||
| import { findAllElementsByTagName, findElementByTagName } from '../../helpers'; | ||
|
|
||
| /** Collects every property key present on any element in the tree. */ | ||
| function allPropertyKeys(tree: ReturnType<typeof mdxish>): string[] { | ||
| const keys = new Set<string>(); | ||
| visit(tree, 'element', node => { | ||
| Object.keys(node.properties ?? {}).forEach(key => keys.add(key)); | ||
| }); | ||
| return [...keys]; | ||
| } | ||
|
|
||
| describe('mdxish raw HTML sanitization', () => { | ||
| describe('script execution vectors', () => { | ||
| it('strips the MathML namespace-confusion payload from the report', () => { | ||
| const tree = mdxish('# Docs\n\n<math><mtext><script>window.__xssfired=1</script></mtext></math>\n'); | ||
|
|
||
| expect(findElementByTagName(tree, 'script')).toBeNull(); | ||
| expect(findElementByTagName(tree, 'math')).toBeNull(); | ||
| expect(findElementByTagName(tree, 'mtext')).toBeNull(); | ||
| // The heading and surrounding structure survive. | ||
| expect(findElementByTagName(tree, 'h1')).not.toBeNull(); | ||
| }); | ||
|
|
||
| it('strips scripts containing String.fromCharCode payload', () => { | ||
| const payload = | ||
| '<math><mtext><script>fetch(String.fromCharCode(47,97,112,105)).then(function(r){return r.text()})</script></mtext></math>'; | ||
| const tree = mdxish(`# Docs\n\n${payload}\n`); | ||
|
|
||
| expect(findElementByTagName(tree, 'script')).toBeNull(); | ||
| expect(JSON.stringify(tree)).not.toContain('fromCharCode'); | ||
| }); | ||
|
|
||
| it('strips a bare top-level <script>', () => { | ||
| const tree = mdxish('<script>alert(1)</script>'); | ||
|
|
||
| expect(findElementByTagName(tree, 'script')).toBeNull(); | ||
| }); | ||
|
|
||
| it('strips SVG foreign content carrying a script', () => { | ||
| const tree = mdxish('<svg><foreignObject><script>alert(1)</script></foreignObject></svg>'); | ||
|
|
||
| expect(findElementByTagName(tree, 'svg')).toBeNull(); | ||
| expect(findElementByTagName(tree, 'script')).toBeNull(); | ||
| }); | ||
|
|
||
| it('strips embedders (iframe/object)', () => { | ||
| const tree = mdxish('<iframe src="javascript:alert(1)"></iframe>\n\n<object data="x"></object>'); | ||
|
|
||
| expect(findElementByTagName(tree, 'iframe')).toBeNull(); | ||
| expect(findElementByTagName(tree, 'object')).toBeNull(); | ||
| }); | ||
| }); | ||
|
|
||
| describe('attribute vectors', () => { | ||
| it('removes event-handler attributes but keeps the element', () => { | ||
| const tree = mdxish('<img src="x.png" onerror="alert(1)" alt="ok">'); | ||
|
|
||
| const img = findElementByTagName(tree, 'img'); | ||
| expect(img).not.toBeNull(); | ||
| expect(allPropertyKeys(tree)).not.toContain('onError'); | ||
| expect(img?.properties?.src).toBe('x.png'); | ||
| }); | ||
|
|
||
| it('removes javascript: hrefs but keeps the anchor text', () => { | ||
| const tree = mdxish('<a href="javascript:alert(1)">click me</a>'); | ||
|
|
||
| const anchor = findElementByTagName(tree, 'a'); | ||
| expect(anchor).not.toBeNull(); | ||
| expect(anchor?.properties?.href).toBeUndefined(); | ||
| expect(JSON.stringify(tree)).toContain('click me'); | ||
| }); | ||
|
|
||
| it('ignores whitespace/control-char obfuscated javascript: URLs', () => { | ||
| const tree = mdxish('<a href="java\tscript:alert(1)">x</a>'); | ||
|
|
||
| expect(findElementByTagName(tree, 'a')?.properties?.href).toBeUndefined(); | ||
| }); | ||
| }); | ||
|
|
||
| describe('safe content is preserved', () => { | ||
| it('keeps benign formatting, links, and images', () => { | ||
| const tree = mdxish( | ||
| '<div class="note"><strong>Bold</strong> and <a href="https://example.com">link</a></div>\n\n<img src="https://example.com/a.png" alt="ok">', | ||
| ); | ||
|
|
||
| expect(findElementByTagName(tree, 'strong')).not.toBeNull(); | ||
| expect(findElementByTagName(tree, 'a')?.properties?.href).toBe('https://example.com'); | ||
| expect(findElementByTagName(tree, 'img')?.properties?.src).toBe('https://example.com/a.png'); | ||
| }); | ||
|
|
||
| it('keeps relative and mailto links', () => { | ||
| const tree = mdxish('<a href="/docs/start">a</a> <a href="mailto:x@y.com">b</a>'); | ||
|
|
||
| const hrefs = findAllElementsByTagName(tree, 'a').map(node => node.properties?.href); | ||
| expect(hrefs).toStrictEqual(['/docs/start', 'mailto:x@y.com']); | ||
| }); | ||
| }); | ||
|
|
||
| describe('custom components', () => { | ||
| const testComponents: Record<string, RMDXModule> = { | ||
| TestComponent: {} as RMDXModule | ||
| } | ||
|
|
||
| it('preserves event-handler-named props on PascalCase components', () => { | ||
| const tree = mdxish('<TestComponent onClick="fn" href="javascript:alert(1)" />', { | ||
| components: testComponents, | ||
| }); | ||
|
|
||
| const component = findElementByTagName(tree, 'TestComponent'); | ||
| expect(component?.properties?.onClick).toBe('fn'); | ||
| // eslint-disable-next-line no-script-url | ||
| expect(component?.properties?.href).toBe('javascript:alert(1)'); | ||
| }); | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
|
|
||
| it('still sanitizes raw HTML nested inside a component', () => { | ||
| const tree = mdxish('<TestComponent>\n\n<img src="x" onerror="alert(1)">\n\n</TestComponent>', { | ||
| components: testComponents, | ||
| }); | ||
|
|
||
| expect(allPropertyKeys(tree)).not.toContain('onError'); | ||
| expect(findElementByTagName(tree, 'img')).not.toBeNull(); | ||
| }); | ||
| }); | ||
|
|
||
| describe('integration with other nodes', () => { | ||
| it('sanitizes raw HTML embedded inside a table cell', () => { | ||
| const tree = mdxish('| A | B |\n| --- | --- |\n| <img src=x onerror=alert(1)> | ok |'); | ||
|
|
||
| expect(allPropertyKeys(tree)).not.toContain('onError'); | ||
| expect(findElementByTagName(tree, 'script')).toBeNull(); | ||
| }); | ||
|
|
||
| it('sanitizes raw HTML nested inside a callout', () => { | ||
| const tree = mdxish('> 📘 Title\n>\n> <script>alert(1)</script> body text'); | ||
|
|
||
| expect(findElementByTagName(tree, 'script')).toBeNull(); | ||
| expect(JSON.stringify(tree)).toContain('body text'); | ||
| }); | ||
|
|
||
| it('sanitizes raw HTML nested inside a JSX table cell', () => { | ||
| const tree = mdxish(` | ||
| <Table> | ||
| <tbody> | ||
| <tr> | ||
| <td> | ||
| <script>alert(1)</script> | ||
| </td> | ||
| </tr> | ||
| </tbody> | ||
| </Table> | ||
| `); | ||
|
|
||
| expect(findElementByTagName(tree, 'script')).toBeNull(); | ||
| expect(findElementByTagName(tree, 'table')).not.toBeNull(); | ||
| }); | ||
| }); | ||
| }); | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,156 @@ | ||
| /* eslint-disable no-script-url -- the `javascript:`/`vbscript:` URLs are intentional XSS fixtures */ | ||
| import type { Element, Root } from 'hast'; | ||
| import type { MdxJsxFlowElementHast } from 'mdast-util-mdx-jsx'; | ||
|
|
||
| import { stripDangerousHtml } from '../../../processor/plugin/dangerous-html'; | ||
|
|
||
| const root = (...children: Root['children']): Root => ({ type: 'root', children }); | ||
|
|
||
| const el = (tagName: string, properties: Element['properties'] = {}, children: Element['children'] = []): Element => ({ | ||
| type: 'element', | ||
| tagName, | ||
| properties, | ||
| children, | ||
| }); | ||
|
|
||
| const jsx = (name: string | null, attributes: MdxJsxFlowElementHast['attributes'] = []): MdxJsxFlowElementHast => ({ | ||
| type: 'mdxJsxFlowElement', | ||
| name, | ||
| attributes, | ||
| children: [], | ||
| }); | ||
|
|
||
| describe('stripDangerousHtml', () => { | ||
| describe('dangerous tag removal', () => { | ||
| it.each([ | ||
| 'script', | ||
| 'noscript', | ||
| 'template', | ||
| 'iframe', | ||
| 'frame', | ||
| 'frameset', | ||
| 'object', | ||
| 'applet', | ||
| 'embed', | ||
| 'base', | ||
| 'link', | ||
| 'meta', | ||
| 'svg', | ||
| 'math', | ||
| ])('removes <%s> and its subtree', tagName => { | ||
| const tree = root(el('p'), el(tagName, {}, [el('span')]), el('div')); | ||
|
|
||
| stripDangerousHtml(tree); | ||
|
|
||
| const tags = tree.children.map(child => (child.type === 'element' ? child.tagName : child.type)); | ||
| expect(tags).toStrictEqual(['p', 'div']); | ||
| }); | ||
|
|
||
| // Lowercase-leading names are host elements (uppercase-leading ones are custom | ||
| // components), so the deny-set lookup lowercases to also catch e.g. `iFrame`. | ||
| it('matches lowercase-leading dangerous tags case-insensitively', () => { | ||
| const tree = root(el('iFrame')); | ||
|
|
||
| stripDangerousHtml(tree); | ||
|
|
||
| expect(tree.children).toHaveLength(0); | ||
| }); | ||
|
|
||
| it('removes consecutive dangerous siblings', () => { | ||
| const tree = root(el('script'), el('iframe'), el('p')); | ||
|
|
||
| stripDangerousHtml(tree); | ||
|
|
||
| expect(tree.children).toHaveLength(1); | ||
| expect((tree.children[0] as Element).tagName).toBe('p'); | ||
| }); | ||
| }); | ||
|
|
||
| describe('host element attribute cleaning', () => { | ||
| it('drops event-handler attributes', () => { | ||
| const node = el('img', { src: 'x', onError: 'steal()', onClick: 'go()' }); | ||
| stripDangerousHtml(root(node)); | ||
|
|
||
| expect(node.properties).toStrictEqual({ src: 'x' }); | ||
| }); | ||
|
|
||
| it('drops javascript: and vbscript: URLs on url-valued attributes', () => { | ||
| const node = el('a', { href: 'javascript:alert(1)' }); | ||
| const node2 = el('a', { href: 'vbscript:msgbox(1)' }); | ||
| stripDangerousHtml(root(node, node2)); | ||
|
|
||
| expect(node.properties).toStrictEqual({}); | ||
| expect(node2.properties).toStrictEqual({}); | ||
| }); | ||
|
|
||
| it('keeps safe URLs', () => { | ||
| const node = el('a', { href: 'https://example.com/javascript:not-a-scheme' }); | ||
| stripDangerousHtml(root(node)); | ||
|
|
||
| expect(node.properties?.href).toBe('https://example.com/javascript:not-a-scheme'); | ||
| }); | ||
|
|
||
| it('drops dangerous data: URLs but keeps benign ones', () => { | ||
| const danger = el('a', { href: 'data:text/html,<script>alert(1)</script>' }); | ||
| const safe = el('img', { src: 'data:image/png;base64,iVBOR' }); | ||
| stripDangerousHtml(root(danger, safe)); | ||
|
|
||
| expect(danger.properties).toStrictEqual({}); | ||
| expect(safe.properties?.src).toBe('data:image/png;base64,iVBOR'); | ||
| }); | ||
|
|
||
| it('ignores control characters when resolving the scheme', () => { | ||
| const node = el('a', { href: 'java\tscript:alert(1)' }); | ||
| stripDangerousHtml(root(node)); | ||
|
|
||
| expect(node.properties).toStrictEqual({}); | ||
| }); | ||
|
|
||
| it('keeps a normal srcset (treated as a single URL, no javascript: scheme)', () => { | ||
| const node = el('img', { srcSet: 'a.png 1x, b.png 2x' }); | ||
| stripDangerousHtml(root(node)); | ||
|
|
||
| expect(node.properties?.srcSet).toBe('a.png 1x, b.png 2x'); | ||
| }); | ||
|
|
||
| it('normalizes attribute names so xlink:href / formaction are checked', () => { | ||
| const node = el('a', { xLinkHref: 'javascript:alert(1)', formAction: 'javascript:alert(1)' }); | ||
| stripDangerousHtml(root(node)); | ||
|
|
||
| expect(node.properties).toStrictEqual({}); | ||
| }); | ||
| }); | ||
|
|
||
| describe('MDX JSX nodes', () => { | ||
| it('drops event-handler and javascript: attributes on host JSX elements', () => { | ||
| const node = jsx('a', [ | ||
| { type: 'mdxJsxAttribute', name: 'onClick', value: 'go()' }, | ||
| { type: 'mdxJsxAttribute', name: 'href', value: 'javascript:alert(1)' }, | ||
| { type: 'mdxJsxAttribute', name: 'id', value: 'keep' }, | ||
| ]); | ||
| stripDangerousHtml(root(node)); | ||
|
|
||
| expect(node.attributes).toStrictEqual([{ type: 'mdxJsxAttribute', name: 'id', value: 'keep' }]); | ||
| }); | ||
|
|
||
| it('keeps spread expression attributes untouched', () => { | ||
| const spread = { type: 'mdxJsxExpressionAttribute', value: '...{ onClick: handler }' } as const; | ||
| const node = jsx('div', [spread]); | ||
| stripDangerousHtml(root(node)); | ||
|
|
||
| expect(node.attributes).toStrictEqual([spread]); | ||
| }); | ||
|
|
||
| it('preserves PascalCase custom components and their props, but descends to clean children', () => { | ||
| const child = el('img', { onError: 'steal()' }); | ||
| const component = jsx('Callout', [{ type: 'mdxJsxAttribute', name: 'onClick', value: 'props-not-a-handler' }]); | ||
| component.children = [child]; | ||
| stripDangerousHtml(root(component)); | ||
|
|
||
| // Component prop survives... | ||
| expect(component.attributes).toHaveLength(1); | ||
| // ...but the nested raw <img> handler is stripped. | ||
| expect(child.properties).toStrictEqual({}); | ||
| }); | ||
| }); | ||
| }); |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is now resolved