Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions __tests__/components/HTMLBlock.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ describe('HTML Block', () => {
expect(view.indexOf('<h1>')).toBeGreaterThanOrEqual(0);
});

// TODO: Skipped about the mdxish engine fails this test since it wraps the <pre> in a <p> tag
// Rendering looks correct, so skip this for now until we decide if we want to fix this or not
it.skip.each(renderingEngines)('%s: renders the html in a `<pre>` tag if safeMode={true}', (_label, renderContent) => {

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now resolved

it.each(renderingEngines)('%s: renders the html in a `<pre>` tag if safeMode={true}', (_label, renderContent) => {
const md = '<HTMLBlock safeMode={true}>{`<button onload="alert(\'gotcha!\')"/>`}</HTMLBlock>';
const Component = renderContent(md);
expect(renderToStaticMarkup(<Component />)).toBe(
Expand Down
52 changes: 52 additions & 0 deletions __tests__/lib/compile-sanitize.test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { render } from '@testing-library/react';
import React from 'react';

import { execute } from '../helpers';

/**
* The `md` format sanitizes via rehype-sanitize (covered in run.test.tsx). Default
* MDX format keeps raw HTML as JSX nodes that schema never sees, so these assert the
* shared dangerous-HTML stripper closes that path too.
*/
describe('MDX (compile) sanitization', () => {
it('strips script-execution vectors in default MDX format', () => {
const md = [
'# Docs',
'',
'<script>window.__xss = 1</script>',
'',
'<a href="javascript:alert(1)">link</a>',
'',
'<img src="x" onerror="window.__xss = 1" />',
'',
'<iframe src="javascript:alert(1)"></iframe>',
].join('\n');

const Component = execute(md, {}, {}); // no format => MDX
const { container } = render(<Component />);

expect(container.querySelector('script')).not.toBeInTheDocument();
expect(container.querySelector('iframe')).not.toBeInTheDocument();

// The link text still renders, but no anchor carries a script-executing href.
const dangerousScheme = /^\s*(?:javascript|vbscript|data):/i;
const hrefs = [...container.querySelectorAll('a')].map(a => a.getAttribute('href'));
expect(hrefs.some(href => href !== null && dangerousScheme.test(href))).toBe(false);
expect(container.textContent).toContain('link');

// Image still renders, but the onerror handler is gone.
const image = container.querySelector('img');
expect(image?.getAttribute('onerror')).toBeNull();
});

it('strips the MathML namespace-confusion payload in default MDX format', () => {
const md = '# Docs\n\n<math><mtext><script>window.__xss = 1</script></mtext></math>';

const Component = execute(md, {}, {});
const { container } = render(<Component />);

expect(container.querySelector('script')).not.toBeInTheDocument();
expect(container.querySelector('math')).not.toBeInTheDocument();
expect(container.querySelector('h1')).toBeInTheDocument();
});
});
145 changes: 145 additions & 0 deletions __tests__/lib/mdxish/sanitize-raw-html.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import type { RMDXModule } from '../../../types';

import { visit } from 'unist-util-visit';

import { mdxish } from '../../../lib';
import { findAllElementsByTagName, findElementByTagName } from '../../helpers';

/** Collects every property key present on any element in the tree. */
function allPropertyKeys(tree: ReturnType<typeof mdxish>): string[] {
const keys = new Set<string>();
visit(tree, 'element', node => {
Object.keys(node.properties ?? {}).forEach(key => keys.add(key));
});
return [...keys];
}

describe('mdxish raw HTML sanitization', () => {
describe('script execution vectors', () => {
it('strips the MathML namespace-confusion payload from the report', () => {
const tree = mdxish('# Docs\n\n<math><mtext><script>window.__xssfired=1</script></mtext></math>\n');

expect(findElementByTagName(tree, 'script')).toBeNull();
expect(findElementByTagName(tree, 'math')).toBeNull();
expect(findElementByTagName(tree, 'mtext')).toBeNull();
// The heading and surrounding structure survive.
expect(findElementByTagName(tree, 'h1')).not.toBeNull();
});

it('strips the exact String.fromCharCode exfil payload from the report', () => {
const payload =
'<math><mtext><script>fetch(String.fromCharCode(47,97,112,105)).then(function(r){return r.text()})</script></mtext></math>';
const tree = mdxish(`# Docs\n\n${payload}\n`);

expect(findElementByTagName(tree, 'script')).toBeNull();
expect(JSON.stringify(tree)).not.toContain('fromCharCode');
});

it('strips a bare top-level <script>', () => {
const tree = mdxish('<script>alert(1)</script>');

expect(findElementByTagName(tree, 'script')).toBeNull();
});

it('strips SVG foreign content carrying a script', () => {
const tree = mdxish('<svg><foreignObject><script>alert(1)</script></foreignObject></svg>');

expect(findElementByTagName(tree, 'svg')).toBeNull();
expect(findElementByTagName(tree, 'script')).toBeNull();
});

it('strips dangerous embedders (iframe/object)', () => {
const tree = mdxish('<iframe src="javascript:alert(1)"></iframe>\n\n<object data="x"></object>');

expect(findElementByTagName(tree, 'iframe')).toBeNull();
expect(findElementByTagName(tree, 'object')).toBeNull();
});
});

describe('attribute vectors', () => {
it('removes event-handler attributes but keeps the element', () => {
const tree = mdxish('<img src="x.png" onerror="alert(1)" alt="ok">');

const img = findElementByTagName(tree, 'img');
expect(img).not.toBeNull();
expect(allPropertyKeys(tree)).not.toContain('onError');
expect(img?.properties?.src).toBe('x.png');
});

it('removes javascript: hrefs but keeps the anchor text', () => {
const tree = mdxish('<a href="javascript:alert(1)">click me</a>');

const anchor = findElementByTagName(tree, 'a');
expect(anchor).not.toBeNull();
expect(anchor?.properties?.href).toBeUndefined();
expect(JSON.stringify(tree)).toContain('click me');
});

it('ignores whitespace/control-char obfuscated javascript: URLs', () => {
const tree = mdxish('<a href="java\tscript:alert(1)">x</a>');

expect(findElementByTagName(tree, 'a')?.properties?.href).toBeUndefined();
});
});

describe('safe content is preserved', () => {
it('keeps benign formatting, links, and images', () => {
const tree = mdxish(
'<div class="note"><strong>Bold</strong> and <a href="https://example.com">link</a></div>\n\n<img src="https://example.com/a.png" alt="ok">',
);

expect(findElementByTagName(tree, 'strong')).not.toBeNull();
expect(findElementByTagName(tree, 'a')?.properties?.href).toBe('https://example.com');
expect(findElementByTagName(tree, 'img')?.properties?.src).toBe('https://example.com/a.png');
});

it('keeps relative and mailto links', () => {
const tree = mdxish('<a href="/docs/start">a</a> <a href="mailto:x@y.com">b</a>');

const hrefs = findAllElementsByTagName(tree, 'a').map(node => node.properties?.href);
expect(hrefs).toStrictEqual(['/docs/start', 'mailto:x@y.com']);
});
});

describe('custom components', () => {
const testComponents: Record<string, RMDXModule> = {
TestComponent: {} as RMDXModule
}

it('preserves event-handler-named props on PascalCase components', () => {
const tree = mdxish('<TestComponent onClick="fn" href="javascript:alert(1)" />', {
components: testComponents,
});

const component = findElementByTagName(tree, 'TestComponent');
expect(component?.properties?.onClick).toBe('fn');
// eslint-disable-next-line no-script-url
expect(component?.properties?.href).toBe('javascript:alert(1)');
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

it('still sanitizes raw HTML nested inside a component', () => {
const tree = mdxish('<TestComponent>\n\n<img src="x" onerror="alert(1)">\n\n</TestComponent>', {
components: testComponents,
});

expect(allPropertyKeys(tree)).not.toContain('onError');
expect(findElementByTagName(tree, 'img')).not.toBeNull();
});
});

describe('integration with other nodes', () => {
it('sanitizes raw HTML embedded inside a table cell', () => {
const tree = mdxish('| A | B |\n| --- | --- |\n| <img src=x onerror=alert(1)> | ok |');

expect(allPropertyKeys(tree)).not.toContain('onError');
expect(findElementByTagName(tree, 'script')).toBeNull();
});

it('sanitizes raw HTML nested inside a callout', () => {
const tree = mdxish('> 📘 Title\n>\n> <script>alert(1)</script> body text');

expect(findElementByTagName(tree, 'script')).toBeNull();
expect(JSON.stringify(tree)).toContain('body text');
});
});
});
5 changes: 5 additions & 0 deletions lib/compile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import remarkGfm from 'remark-gfm';

import MdxSyntaxError from '../errors/mdx-syntax-error';
import { rehypeToc } from '../processor/plugin/toc';
import rehypeStripDangerousHtml from '../processor/sanitize/rehype-strip-dangerous-html';
import {
defaultTransforms,
tailwindTransformer,
Expand Down Expand Up @@ -79,6 +80,10 @@ const compile = (
},
]);
rehypePlugins.push([rehypeSanitize, sanitizeSchema]);
} else {

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the main MDX path has never sanitise these scripts. They don't get executed during compilation, but later during execution.

// MDX (non-`md`) content keeps raw HTML as JSX nodes that the schema above never
// sees, so strip script-execution vectors regardless of format.
rehypePlugins.push(rehypeStripDangerousHtml);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

try {
Expand Down
2 changes: 2 additions & 0 deletions lib/mdxish.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { mdxishCompilers } from '../processor/compile';
import { rehypeFlattenTableCellParagraphs } from '../processor/plugin/flatten-table-cell-paragraphs';
import { rehypeMdxishComponents } from '../processor/plugin/mdxish-components';
import { mdxComponentHandlers } from '../processor/plugin/mdxish-handlers';
import rehypeStripDangerousHtml from '../processor/sanitize/rehype-strip-dangerous-html';
import calloutTransformer from '../processor/transform/callouts';
import codeTabsTransformer from '../processor/transform/code-tabs';
import embedTransformer from '../processor/transform/embeds';
Expand Down Expand Up @@ -286,6 +287,7 @@ export function mdxish(mdContent: string, opts: MdxishOpts = {}): Root {
.use(restoreBooleanProperties)
.use(safeMode ? undefined : resolveDeferredAttributeExpressionProps) // Evaluate deferred attribute expressions on mdx-jsx nodes (now past rehypeRaw's clone)
.use(normalizeMdxJsxNodes) // Rewrite `mdx-jsx` back to standard `element` nodes for downstream plugins
.use(rehypeStripDangerousHtml) // Strip script/foreign-content/event-handler XSS vectors from raw HTML
.use(rehypeFlattenTableCellParagraphs) // Remove <p> wrappers inside table cells to prevent margin issues
.use(mdxishMermaidTransformer) // Add mermaid-render className to pre wrappers
.use(generateSlugForHeadings)
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
},
{
"path": "dist/main.node.js",
"maxSize": "950KB"
"maxSize": "955KB"
}
]
},
Expand Down
Loading