Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions __tests__/components/HTMLBlock.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ describe('HTML Block', () => {
expect(view.indexOf('<h1>')).toBeGreaterThanOrEqual(0);
});

// TODO: Skipped about the mdxish engine fails this test since it wraps the <pre> in a <p> tag
// Rendering looks correct, so skip this for now until we decide if we want to fix this or not
it.skip.each(renderingEngines)('%s: renders the html in a `<pre>` tag if safeMode={true}', (_label, renderContent) => {

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now resolved

it.each(renderingEngines)('%s: renders the html in a `<pre>` tag if safeMode={true}', (_label, renderContent) => {
const md = '<HTMLBlock safeMode={true}>{`<button onload="alert(\'gotcha!\')"/>`}</HTMLBlock>';
const Component = renderContent(md);
expect(renderToStaticMarkup(<Component />)).toBe(
Expand Down
50 changes: 50 additions & 0 deletions __tests__/lib/compile-sanitize.test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { render } from '@testing-library/react';
import React from 'react';

import { execute } from '../helpers';

// `md` format sanitizes via rehype-sanitize's allow-list (covered in run.test.tsx).
// Default MDX keeps raw HTML as JSX nodes that allow-list never sees, so these assert
// the deny-list stripper removes the known script-execution vectors on that path.
describe('MDX (compile) sanitization', () => {
it('strips script-execution vectors in default MDX format', () => {
const md = [
'# Docs',
'',
'<script>window.__xss = 1</script>',
'',
'<a href="javascript:alert(1)">link</a>',
'',
'<img src="x" onerror="window.__xss = 1" />',
'',
'<iframe src="javascript:alert(1)"></iframe>',
].join('\n');

const Component = execute(md, {}, {}); // no format => MDX
const { container } = render(<Component />);

expect(container.querySelector('script')).not.toBeInTheDocument();
expect(container.querySelector('iframe')).not.toBeInTheDocument();

// The link text still renders, but no anchor carries a script-executing href.
const dangerousScheme = /^\s*(?:javascript|vbscript|data):/i;
const hrefs = [...container.querySelectorAll('a')].map(a => a.getAttribute('href'));
expect(hrefs.some(href => href !== null && dangerousScheme.test(href))).toBe(false);
expect(container.textContent).toContain('link');

// Image still renders, but the onerror handler is gone.
const image = container.querySelector('img');
expect(image?.getAttribute('onerror')).toBeNull();
});

it('strips the MathML namespace-confusion payload in default MDX format', () => {
const md = '# Docs\n\n<math><mtext><script>window.__xss = 1</script></mtext></math>';

const Component = execute(md, {}, {});
const { container } = render(<Component />);

expect(container.querySelector('script')).not.toBeInTheDocument();
expect(container.querySelector('math')).not.toBeInTheDocument();
expect(container.querySelector('h1')).toBeInTheDocument();
});
});
162 changes: 162 additions & 0 deletions __tests__/lib/mdxish/sanitize-raw-html.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import type { RMDXModule } from '../../../types';

import { visit } from 'unist-util-visit';

import { mdxish } from '../../../lib';
import { findAllElementsByTagName, findElementByTagName } from '../../helpers';

/** Collects every property key present on any element in the tree. */
function allPropertyKeys(tree: ReturnType<typeof mdxish>): string[] {
const keys = new Set<string>();
visit(tree, 'element', node => {
Object.keys(node.properties ?? {}).forEach(key => keys.add(key));
});
return [...keys];
}

describe('mdxish raw HTML sanitization', () => {
describe('script execution vectors', () => {
it('strips the MathML namespace-confusion payload from the report', () => {
const tree = mdxish('# Docs\n\n<math><mtext><script>window.__xssfired=1</script></mtext></math>\n');

expect(findElementByTagName(tree, 'script')).toBeNull();
expect(findElementByTagName(tree, 'math')).toBeNull();
expect(findElementByTagName(tree, 'mtext')).toBeNull();
// The heading and surrounding structure survive.
expect(findElementByTagName(tree, 'h1')).not.toBeNull();
});

it('strips scripts containing String.fromCharCode payload', () => {
const payload =
'<math><mtext><script>fetch(String.fromCharCode(47,97,112,105)).then(function(r){return r.text()})</script></mtext></math>';
const tree = mdxish(`# Docs\n\n${payload}\n`);

expect(findElementByTagName(tree, 'script')).toBeNull();
expect(JSON.stringify(tree)).not.toContain('fromCharCode');
});

it('strips a bare top-level <script>', () => {
const tree = mdxish('<script>alert(1)</script>');

expect(findElementByTagName(tree, 'script')).toBeNull();
});

it('strips SVG foreign content carrying a script', () => {
const tree = mdxish('<svg><foreignObject><script>alert(1)</script></foreignObject></svg>');

expect(findElementByTagName(tree, 'svg')).toBeNull();
expect(findElementByTagName(tree, 'script')).toBeNull();
});

it('strips embedders (iframe/object)', () => {
const tree = mdxish('<iframe src="javascript:alert(1)"></iframe>\n\n<object data="x"></object>');

expect(findElementByTagName(tree, 'iframe')).toBeNull();
expect(findElementByTagName(tree, 'object')).toBeNull();
});
});

describe('attribute vectors', () => {
it('removes event-handler attributes but keeps the element', () => {
const tree = mdxish('<img src="x.png" onerror="alert(1)" alt="ok">');

const img = findElementByTagName(tree, 'img');
expect(img).not.toBeNull();
expect(allPropertyKeys(tree)).not.toContain('onError');
expect(img?.properties?.src).toBe('x.png');
});

it('removes javascript: hrefs but keeps the anchor text', () => {
const tree = mdxish('<a href="javascript:alert(1)">click me</a>');

const anchor = findElementByTagName(tree, 'a');
expect(anchor).not.toBeNull();
expect(anchor?.properties?.href).toBeUndefined();
expect(JSON.stringify(tree)).toContain('click me');
});

it('ignores whitespace/control-char obfuscated javascript: URLs', () => {
const tree = mdxish('<a href="java\tscript:alert(1)">x</a>');

expect(findElementByTagName(tree, 'a')?.properties?.href).toBeUndefined();
});
});

describe('safe content is preserved', () => {
it('keeps benign formatting, links, and images', () => {
const tree = mdxish(
'<div class="note"><strong>Bold</strong> and <a href="https://example.com">link</a></div>\n\n<img src="https://example.com/a.png" alt="ok">',
);

expect(findElementByTagName(tree, 'strong')).not.toBeNull();
expect(findElementByTagName(tree, 'a')?.properties?.href).toBe('https://example.com');
expect(findElementByTagName(tree, 'img')?.properties?.src).toBe('https://example.com/a.png');
});

it('keeps relative and mailto links', () => {
const tree = mdxish('<a href="/docs/start">a</a> <a href="mailto:x@y.com">b</a>');

const hrefs = findAllElementsByTagName(tree, 'a').map(node => node.properties?.href);
expect(hrefs).toStrictEqual(['/docs/start', 'mailto:x@y.com']);
});
});

describe('custom components', () => {
const testComponents: Record<string, RMDXModule> = {
TestComponent: {} as RMDXModule
}

it('preserves event-handler-named props on PascalCase components', () => {
const tree = mdxish('<TestComponent onClick="fn" href="javascript:alert(1)" />', {
components: testComponents,
});

const component = findElementByTagName(tree, 'TestComponent');
expect(component?.properties?.onClick).toBe('fn');
// eslint-disable-next-line no-script-url
expect(component?.properties?.href).toBe('javascript:alert(1)');
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

it('still sanitizes raw HTML nested inside a component', () => {
const tree = mdxish('<TestComponent>\n\n<img src="x" onerror="alert(1)">\n\n</TestComponent>', {
components: testComponents,
});

expect(allPropertyKeys(tree)).not.toContain('onError');
expect(findElementByTagName(tree, 'img')).not.toBeNull();
});
});

describe('integration with other nodes', () => {
it('sanitizes raw HTML embedded inside a table cell', () => {
const tree = mdxish('| A | B |\n| --- | --- |\n| <img src=x onerror=alert(1)> | ok |');

expect(allPropertyKeys(tree)).not.toContain('onError');
expect(findElementByTagName(tree, 'script')).toBeNull();
});

it('sanitizes raw HTML nested inside a callout', () => {
const tree = mdxish('> 📘 Title\n>\n> <script>alert(1)</script> body text');

expect(findElementByTagName(tree, 'script')).toBeNull();
expect(JSON.stringify(tree)).toContain('body text');
});

it('sanitizes raw HTML nested inside a JSX table cell', () => {
const tree = mdxish(`
<Table>
<tbody>
<tr>
<td>
<script>alert(1)</script>
</td>
</tr>
</tbody>
</Table>
`);

expect(findElementByTagName(tree, 'script')).toBeNull();
expect(findElementByTagName(tree, 'table')).not.toBeNull();
});
});
});
156 changes: 156 additions & 0 deletions __tests__/processor/plugin/dangerous-html.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/* eslint-disable no-script-url -- the `javascript:`/`vbscript:` URLs are intentional XSS fixtures */
import type { Element, Root } from 'hast';
import type { MdxJsxFlowElementHast } from 'mdast-util-mdx-jsx';

import { stripDangerousHtml } from '../../../processor/plugin/dangerous-html';

const root = (...children: Root['children']): Root => ({ type: 'root', children });

const el = (tagName: string, properties: Element['properties'] = {}, children: Element['children'] = []): Element => ({
type: 'element',
tagName,
properties,
children,
});

const jsx = (name: string | null, attributes: MdxJsxFlowElementHast['attributes'] = []): MdxJsxFlowElementHast => ({
type: 'mdxJsxFlowElement',
name,
attributes,
children: [],
});

describe('stripDangerousHtml', () => {
describe('dangerous tag removal', () => {
it.each([
'script',
'noscript',
'template',
'iframe',
'frame',
'frameset',
'object',
'applet',
'embed',
'base',
'link',
'meta',
'svg',
'math',
])('removes <%s> and its subtree', tagName => {
const tree = root(el('p'), el(tagName, {}, [el('span')]), el('div'));

stripDangerousHtml(tree);

const tags = tree.children.map(child => (child.type === 'element' ? child.tagName : child.type));
expect(tags).toStrictEqual(['p', 'div']);
});

// Lowercase-leading names are host elements (uppercase-leading ones are custom
// components), so the deny-set lookup lowercases to also catch e.g. `iFrame`.
it('matches lowercase-leading dangerous tags case-insensitively', () => {
const tree = root(el('iFrame'));

stripDangerousHtml(tree);

expect(tree.children).toHaveLength(0);
});

it('removes consecutive dangerous siblings', () => {
const tree = root(el('script'), el('iframe'), el('p'));

stripDangerousHtml(tree);

expect(tree.children).toHaveLength(1);
expect((tree.children[0] as Element).tagName).toBe('p');
});
});

describe('host element attribute cleaning', () => {
it('drops event-handler attributes', () => {
const node = el('img', { src: 'x', onError: 'steal()', onClick: 'go()' });
stripDangerousHtml(root(node));

expect(node.properties).toStrictEqual({ src: 'x' });
});

it('drops javascript: and vbscript: URLs on url-valued attributes', () => {
const node = el('a', { href: 'javascript:alert(1)' });
const node2 = el('a', { href: 'vbscript:msgbox(1)' });
stripDangerousHtml(root(node, node2));

expect(node.properties).toStrictEqual({});
expect(node2.properties).toStrictEqual({});
});

it('keeps safe URLs', () => {
const node = el('a', { href: 'https://example.com/javascript:not-a-scheme' });
stripDangerousHtml(root(node));

expect(node.properties?.href).toBe('https://example.com/javascript:not-a-scheme');
});

it('drops dangerous data: URLs but keeps benign ones', () => {
const danger = el('a', { href: 'data:text/html,<script>alert(1)</script>' });
const safe = el('img', { src: 'data:image/png;base64,iVBOR' });
stripDangerousHtml(root(danger, safe));

expect(danger.properties).toStrictEqual({});
expect(safe.properties?.src).toBe('data:image/png;base64,iVBOR');
});

it('ignores control characters when resolving the scheme', () => {
const node = el('a', { href: 'java\tscript:alert(1)' });
stripDangerousHtml(root(node));

expect(node.properties).toStrictEqual({});
});

it('keeps a normal srcset (treated as a single URL, no javascript: scheme)', () => {
const node = el('img', { srcSet: 'a.png 1x, b.png 2x' });
stripDangerousHtml(root(node));

expect(node.properties?.srcSet).toBe('a.png 1x, b.png 2x');
});

it('normalizes attribute names so xlink:href / formaction are checked', () => {
const node = el('a', { xLinkHref: 'javascript:alert(1)', formAction: 'javascript:alert(1)' });
stripDangerousHtml(root(node));

expect(node.properties).toStrictEqual({});
});
});

describe('MDX JSX nodes', () => {
it('drops event-handler and javascript: attributes on host JSX elements', () => {
const node = jsx('a', [
{ type: 'mdxJsxAttribute', name: 'onClick', value: 'go()' },
{ type: 'mdxJsxAttribute', name: 'href', value: 'javascript:alert(1)' },
{ type: 'mdxJsxAttribute', name: 'id', value: 'keep' },
]);
stripDangerousHtml(root(node));

expect(node.attributes).toStrictEqual([{ type: 'mdxJsxAttribute', name: 'id', value: 'keep' }]);
});

it('keeps spread expression attributes untouched', () => {
const spread = { type: 'mdxJsxExpressionAttribute', value: '...{ onClick: handler }' } as const;
const node = jsx('div', [spread]);
stripDangerousHtml(root(node));

expect(node.attributes).toStrictEqual([spread]);
});

it('preserves PascalCase custom components and their props, but descends to clean children', () => {
const child = el('img', { onError: 'steal()' });
const component = jsx('Callout', [{ type: 'mdxJsxAttribute', name: 'onClick', value: 'props-not-a-handler' }]);
component.children = [child];
stripDangerousHtml(root(component));

// Component prop survives...
expect(component.attributes).toHaveLength(1);
// ...but the nested raw <img> handler is stripped.
expect(child.properties).toStrictEqual({});
});
});
});
Loading