Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions __tests__/components/HTMLBlock.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ describe('HTML Block', () => {
expect(view.indexOf('<h1>')).toBeGreaterThanOrEqual(0);
});

// TODO: Skipped about the mdxish engine fails this test since it wraps the <pre> in a <p> tag
// Rendering looks correct, so skip this for now until we decide if we want to fix this or not
it.skip.each(renderingEngines)('%s: renders the html in a `<pre>` tag if safeMode={true}', (_label, renderContent) => {

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now resolved

it.each(renderingEngines)('%s: renders the html in a `<pre>` tag if safeMode={true}', (_label, renderContent) => {
const md = '<HTMLBlock safeMode={true}>{`<button onload="alert(\'gotcha!\')"/>`}</HTMLBlock>';
const Component = renderContent(md);
expect(renderToStaticMarkup(<Component />)).toBe(
Expand Down
54 changes: 54 additions & 0 deletions __tests__/lib/compile-sanitize.test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { render } from '@testing-library/react';
import React from 'react';

import { execute } from '../helpers';

// `md` format sanitizes via rehype-sanitize's allow-list (covered in run.test.tsx).
// Default MDX keeps raw HTML as JSX nodes that allow-list never sees, so these assert
// the deny-list stripper removes the known script-execution vectors on that path.
describe('MDX (compile) sanitization', () => {
it('strips script-execution vectors in default MDX format', () => {
const md = [
'# Docs',
'',
'<script>window.__xss = 1</script>',
'',
'<a href="javascript:alert(1)">link</a>',
'',
'<a href="vbscript:msgbox(1)">link</a>',
'',
'<a href="data:text/html,<script>alert(1)</script>">link</a>',
'',
'<img src="x" onerror="window.__xss = 1" />',
'',
'<iframe src="javascript:alert(1)"></iframe>',
].join('\n');

const Component = execute(md, {}, {}); // no format => MDX
const { container } = render(<Component />);

expect(container.querySelector('script')).not.toBeInTheDocument();
expect(container.querySelector('iframe')).not.toBeInTheDocument();

// The link text still renders, but no anchor carries a script-executing href.
const dangerousScheme = /^\s*(?:javascript|vbscript|data):/i;
const hrefs = [...container.querySelectorAll('a')].map(a => a.getAttribute('href'));
expect(hrefs.some(href => href !== null && dangerousScheme.test(href))).toBe(false);
expect(container.textContent).toContain('link');

// Image still renders, but the onerror handler is gone.
const image = container.querySelector('img');
expect(image?.getAttribute('onerror')).toBeNull();
});

it('strips the MathML namespace-confusion payload in default MDX format', () => {
const md = '# Docs\n\n<math><mtext><script>window.__xss = 1</script></mtext></math>';

const Component = execute(md, {}, {});
const { container } = render(<Component />);

expect(container.querySelector('script')).not.toBeInTheDocument();
expect(container.querySelector('math')).not.toBeInTheDocument();
expect(container.querySelector('h1')).toBeInTheDocument();
});
});
172 changes: 172 additions & 0 deletions __tests__/lib/mdxish/sanitize-raw-html.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import type { RMDXModule } from '../../../types';

import { visit } from 'unist-util-visit';

import { mdxish } from '../../../lib';
import { findAllElementsByTagName, findElementByTagName } from '../../helpers';

/** Collects every property key present on any element in the tree. */
function allPropertyKeys(tree: ReturnType<typeof mdxish>): string[] {
const keys = new Set<string>();
visit(tree, 'element', node => {
Object.keys(node.properties ?? {}).forEach(key => keys.add(key));
});
return [...keys];
}

describe('mdxish raw HTML sanitization', () => {
describe('script execution vectors', () => {
it('strips the MathML namespace-confusion payload from the report', () => {
const tree = mdxish('# Docs\n\n<math><mtext><script>window.__xssfired=1</script></mtext></math>\n');

expect(findElementByTagName(tree, 'script')).toBeNull();
expect(findElementByTagName(tree, 'math')).toBeNull();
expect(findElementByTagName(tree, 'mtext')).toBeNull();
// The heading and surrounding structure survive.
expect(findElementByTagName(tree, 'h1')).not.toBeNull();
});

it('strips scripts containing String.fromCharCode payload', () => {
const payload =
'<math><mtext><script>fetch(String.fromCharCode(47,97,112,105)).then(function(r){return r.text()})</script></mtext></math>';
const tree = mdxish(`# Docs\n\n${payload}\n`);

expect(findElementByTagName(tree, 'script')).toBeNull();
expect(JSON.stringify(tree)).not.toContain('fromCharCode');
});

it('strips a bare top-level <script>', () => {
const tree = mdxish('<script>alert(1)</script>');

expect(findElementByTagName(tree, 'script')).toBeNull();
});

it('strips SVG foreign content carrying a script', () => {
const tree = mdxish('<svg><foreignObject><script>alert(1)</script></foreignObject></svg>');

expect(findElementByTagName(tree, 'svg')).toBeNull();
expect(findElementByTagName(tree, 'script')).toBeNull();
});

it('strips embedders (iframe/object)', () => {
const tree = mdxish('<iframe src="javascript:alert(1)"></iframe>\n\n<object data="x"></object>');

expect(findElementByTagName(tree, 'iframe')).toBeNull();
expect(findElementByTagName(tree, 'object')).toBeNull();
});
});

describe('attribute vectors', () => {
it('removes event-handler attributes but keeps the element', () => {
const tree = mdxish('<img src="x.png" onerror="alert(1)" alt="ok">');

const img = findElementByTagName(tree, 'img');
expect(img).not.toBeNull();
expect(allPropertyKeys(tree)).not.toContain('onError');
expect(img?.properties?.src).toBe('x.png');
});

it('removes javascript: hrefs but keeps the anchor text', () => {
const tree = mdxish('<a href="javascript:alert(1)">click me</a>');

const anchor = findElementByTagName(tree, 'a');
expect(anchor).not.toBeNull();
expect(anchor?.properties?.href).toBeUndefined();
expect(JSON.stringify(tree)).toContain('click me');
});

it('ignores whitespace/control-char obfuscated javascript: URLs', () => {
const tree = mdxish('<a href="java\tscript:alert(1)">x</a>');

expect(findElementByTagName(tree, 'a')?.properties?.href).toBeUndefined();
});
});

describe('safe content is preserved', () => {
it('keeps benign formatting, links, and images', () => {
const tree = mdxish(
'<div class="note"><strong>Bold</strong> and <a href="https://example.com">link</a></div>\n\n<img src="https://example.com/a.png" alt="ok">',
);

expect(findElementByTagName(tree, 'strong')).not.toBeNull();
expect(findElementByTagName(tree, 'a')?.properties?.href).toBe('https://example.com');
expect(findElementByTagName(tree, 'img')?.properties?.src).toBe('https://example.com/a.png');
});

it('keeps relative and mailto links', () => {
const tree = mdxish('<a href="/docs/start">a</a> <a href="mailto:x@y.com">b</a>');

const hrefs = findAllElementsByTagName(tree, 'a').map(node => node.properties?.href);
expect(hrefs).toStrictEqual(['/docs/start', 'mailto:x@y.com']);
});
});

describe('custom components', () => {
const testComponents: Record<string, RMDXModule> = {
TestComponent: {} as RMDXModule
}

it('keeps event-handler-named props but strips dangerous URL props on PascalCase components', () => {
const tree = mdxish('<TestComponent onClick="fn" href="javascript:alert(1)" />', {
components: testComponents,
});

const component = findElementByTagName(tree, 'TestComponent');
// `on*` props are React props on a component, not DOM handlers, so they survive...
expect(component?.properties?.onClick).toBe('fn');
// ...but a `javascript:` URL prop is stripped: a component may forward it to a host element.
expect(component?.properties?.href).toBeUndefined();
});

it('keeps safe URL props on PascalCase components', () => {
const tree = mdxish('<TestComponent href="https://example.com" />', {
components: testComponents,
});

const component = findElementByTagName(tree, 'TestComponent');
expect(component?.properties?.href).toBe('https://example.com');
});

it('still sanitizes raw HTML nested inside a component', () => {
const tree = mdxish('<TestComponent>\n\n<img src="x" onerror="alert(1)">\n\n</TestComponent>', {
components: testComponents,
});

expect(allPropertyKeys(tree)).not.toContain('onError');
expect(findElementByTagName(tree, 'img')).not.toBeNull();
});
});

describe('integration with other nodes', () => {
it('sanitizes raw HTML embedded inside a table cell', () => {
const tree = mdxish('| A | B |\n| --- | --- |\n| <img src=x onerror=alert(1)> | ok |');

expect(allPropertyKeys(tree)).not.toContain('onError');
expect(findElementByTagName(tree, 'script')).toBeNull();
});

it('sanitizes raw HTML nested inside a callout', () => {
const tree = mdxish('> 📘 Title\n>\n> <script>alert(1)</script> body text');

expect(findElementByTagName(tree, 'script')).toBeNull();
expect(JSON.stringify(tree)).toContain('body text');
});

it('sanitizes raw HTML nested inside a JSX table cell', () => {
const tree = mdxish(`
<Table>
<tbody>
<tr>
<td>
<script>alert(1)</script>
</td>
</tr>
</tbody>
</Table>
`);

expect(findElementByTagName(tree, 'script')).toBeNull();
expect(findElementByTagName(tree, 'table')).not.toBeNull();
});
});
});
Loading