readmeio · eaglethrost · Jun 26, 2026 · Jun 26, 2026 · Jun 26, 2026 · Jun 29, 2026
diff --git a/__tests__/components/HTMLBlock.test.tsx b/__tests__/components/HTMLBlock.test.tsx
@@ -55,9 +55,7 @@ describe('HTML Block', () => {
     expect(view.indexOf('<h1>')).toBeGreaterThanOrEqual(0);
   });
 
-  // TODO: Skipped about the mdxish engine fails this test since it wraps the <pre> in a <p> tag
-  // Rendering looks correct, so skip this for now until we decide if we want to fix this or not
-  it.skip.each(renderingEngines)('%s: renders the html in a `<pre>` tag if safeMode={true}', (_label, renderContent) => {
+  it.each(renderingEngines)('%s: renders the html in a `<pre>` tag if safeMode={true}', (_label, renderContent) => {
     const md = '<HTMLBlock safeMode={true}>{`<button onload="alert(\'gotcha!\')"/>`}</HTMLBlock>';
     const Component = renderContent(md);
     expect(renderToStaticMarkup(<Component />)).toBe(

diff --git a/__tests__/lib/compile-sanitize.test.tsx b/__tests__/lib/compile-sanitize.test.tsx
@@ -0,0 +1,50 @@
+import { render } from '@testing-library/react';
+import React from 'react';
+
+import { execute } from '../helpers';
+
+// `md` format sanitizes via rehype-sanitize's allow-list (covered in run.test.tsx).
+// Default MDX keeps raw HTML as JSX nodes that allow-list never sees, so these assert
+// the deny-list stripper removes the known script-execution vectors on that path.
+describe('MDX (compile) sanitization', () => {
+  it('strips script-execution vectors in default MDX format', () => {
+    const md = [
+      '# Docs',
+      '',
+      '<script>window.__xss = 1</script>',
+      '',
+      '<a href="javascript:alert(1)">link</a>',
+      '',
+      '<img src="x" onerror="window.__xss = 1" />',
+      '',
+      '<iframe src="javascript:alert(1)"></iframe>',
+    ].join('\n');
+
+    const Component = execute(md, {}, {}); // no format => MDX
+    const { container } = render(<Component />);
+
+    expect(container.querySelector('script')).not.toBeInTheDocument();
+    expect(container.querySelector('iframe')).not.toBeInTheDocument();
+
+    // The link text still renders, but no anchor carries a script-executing href.
+    const dangerousScheme = /^\s*(?:javascript|vbscript|data):/i;
+    const hrefs = [...container.querySelectorAll('a')].map(a => a.getAttribute('href'));
+    expect(hrefs.some(href => href !== null && dangerousScheme.test(href))).toBe(false);
+    expect(container.textContent).toContain('link');
+
+    // Image still renders, but the onerror handler is gone.
+    const image = container.querySelector('img');
+    expect(image?.getAttribute('onerror')).toBeNull();
+  });
+
+  it('strips the MathML namespace-confusion payload in default MDX format', () => {
+    const md = '# Docs\n\n<math><mtext><script>window.__xss = 1</script></mtext></math>';
+
+    const Component = execute(md, {}, {});
+    const { container } = render(<Component />);
+
+    expect(container.querySelector('script')).not.toBeInTheDocument();
+    expect(container.querySelector('math')).not.toBeInTheDocument();
+    expect(container.querySelector('h1')).toBeInTheDocument();
+  });
+});
diff --git a/__tests__/lib/mdxish/sanitize-raw-html.test.ts b/__tests__/lib/mdxish/sanitize-raw-html.test.ts
@@ -0,0 +1,162 @@
+import type { RMDXModule } from '../../../types';
+
+import { visit } from 'unist-util-visit';
+
+import { mdxish } from '../../../lib';
+import { findAllElementsByTagName, findElementByTagName } from '../../helpers';
+
+/** Collects every property key present on any element in the tree. */
+function allPropertyKeys(tree: ReturnType<typeof mdxish>): string[] {
+  const keys = new Set<string>();
+  visit(tree, 'element', node => {
+    Object.keys(node.properties ?? {}).forEach(key => keys.add(key));
+  });
+  return [...keys];
+}
+
+describe('mdxish raw HTML sanitization', () => {
+  describe('script execution vectors', () => {
+    it('strips the MathML namespace-confusion payload from the report', () => {
+      const tree = mdxish('# Docs\n\n<math><mtext><script>window.__xssfired=1</script></mtext></math>\n');
+
+      expect(findElementByTagName(tree, 'script')).toBeNull();
+      expect(findElementByTagName(tree, 'math')).toBeNull();
+      expect(findElementByTagName(tree, 'mtext')).toBeNull();
+      // The heading and surrounding structure survive.
+      expect(findElementByTagName(tree, 'h1')).not.toBeNull();
+    });
+
+    it('strips scripts containing String.fromCharCode payload', () => {
+      const payload =
+        '<math><mtext><script>fetch(String.fromCharCode(47,97,112,105)).then(function(r){return r.text()})</script></mtext></math>';
+      const tree = mdxish(`# Docs\n\n${payload}\n`);
+
+      expect(findElementByTagName(tree, 'script')).toBeNull();
+      expect(JSON.stringify(tree)).not.toContain('fromCharCode');
+    });
+
+    it('strips a bare top-level <script>', () => {
+      const tree = mdxish('<script>alert(1)</script>');
+
+      expect(findElementByTagName(tree, 'script')).toBeNull();
+    });
+
+    it('strips SVG foreign content carrying a script', () => {
+      const tree = mdxish('<svg><foreignObject><script>alert(1)</script></foreignObject></svg>');
+
+      expect(findElementByTagName(tree, 'svg')).toBeNull();
+      expect(findElementByTagName(tree, 'script')).toBeNull();
+    });
+
+    it('strips embedders (iframe/object)', () => {
+      const tree = mdxish('<iframe src="javascript:alert(1)"></iframe>\n\n<object data="x"></object>');
+
+      expect(findElementByTagName(tree, 'iframe')).toBeNull();
+      expect(findElementByTagName(tree, 'object')).toBeNull();
+    });
+  });
+
+  describe('attribute vectors', () => {
+    it('removes event-handler attributes but keeps the element', () => {
+      const tree = mdxish('<img src="x.png" onerror="alert(1)" alt="ok">');
+
+      const img = findElementByTagName(tree, 'img');
+      expect(img).not.toBeNull();
+      expect(allPropertyKeys(tree)).not.toContain('onError');
+      expect(img?.properties?.src).toBe('x.png');
+    });
+
+    it('removes javascript: hrefs but keeps the anchor text', () => {
+      const tree = mdxish('<a href="javascript:alert(1)">click me</a>');
+
+      const anchor = findElementByTagName(tree, 'a');
+      expect(anchor).not.toBeNull();
+      expect(anchor?.properties?.href).toBeUndefined();
+      expect(JSON.stringify(tree)).toContain('click me');
+    });
+
+    it('ignores whitespace/control-char obfuscated javascript: URLs', () => {
+      const tree = mdxish('<a href="java\tscript:alert(1)">x</a>');
+
+      expect(findElementByTagName(tree, 'a')?.properties?.href).toBeUndefined();
+    });
+  });
+
+  describe('safe content is preserved', () => {
+    it('keeps benign formatting, links, and images', () => {
+      const tree = mdxish(
+        '<div class="note"><strong>Bold</strong> and <a href="https://example.com">link</a></div>\n\n<img src="https://example.com/a.png" alt="ok">',
+      );
+
+      expect(findElementByTagName(tree, 'strong')).not.toBeNull();
+      expect(findElementByTagName(tree, 'a')?.properties?.href).toBe('https://example.com');
+      expect(findElementByTagName(tree, 'img')?.properties?.src).toBe('https://example.com/a.png');
+    });
+
+    it('keeps relative and mailto links', () => {
+      const tree = mdxish('<a href="/docs/start">a</a> <a href="mailto:x@y.com">b</a>');
+
+      const hrefs = findAllElementsByTagName(tree, 'a').map(node => node.properties?.href);
+      expect(hrefs).toStrictEqual(['/docs/start', 'mailto:x@y.com']);
+    });
+  });
+
+  describe('custom components', () => {
+    const testComponents: Record<string, RMDXModule> = {
+      TestComponent: {} as RMDXModule
+    }
+
+    it('preserves event-handler-named props on PascalCase components', () => {
+      const tree = mdxish('<TestComponent onClick="fn" href="javascript:alert(1)" />', {
+        components: testComponents,
+      });
+
+      const component = findElementByTagName(tree, 'TestComponent');
+      expect(component?.properties?.onClick).toBe('fn');
+      // eslint-disable-next-line no-script-url
+      expect(component?.properties?.href).toBe('javascript:alert(1)');
+    });
+
+    it('still sanitizes raw HTML nested inside a component', () => {
+      const tree = mdxish('<TestComponent>\n\n<img src="x" onerror="alert(1)">\n\n</TestComponent>', {
+        components: testComponents,
+      });
+
+      expect(allPropertyKeys(tree)).not.toContain('onError');
+      expect(findElementByTagName(tree, 'img')).not.toBeNull();
+    });
+  });
+
+  describe('integration with other nodes', () => {
+    it('sanitizes raw HTML embedded inside a table cell', () => {
+      const tree = mdxish('| A | B |\n| --- | --- |\n| <img src=x onerror=alert(1)> | ok |');
+
+      expect(allPropertyKeys(tree)).not.toContain('onError');
+      expect(findElementByTagName(tree, 'script')).toBeNull();
+    });
+
+    it('sanitizes raw HTML nested inside a callout', () => {
+      const tree = mdxish('> 📘 Title\n>\n> <script>alert(1)</script> body text');
+
+      expect(findElementByTagName(tree, 'script')).toBeNull();
+      expect(JSON.stringify(tree)).toContain('body text');
+    });
+
+    it('sanitizes raw HTML nested inside a JSX table cell', () => {
+      const tree = mdxish(`
+<Table>
+  <tbody>
+    <tr>
+      <td>
+        <script>alert(1)</script>
+      </td>
+    </tr>
+  </tbody>
+</Table>
+        `);
+
+      expect(findElementByTagName(tree, 'script')).toBeNull();
+      expect(findElementByTagName(tree, 'table')).not.toBeNull();
+    });
+  });
+});
diff --git a/__tests__/processor/plugin/dangerous-html.test.ts b/__tests__/processor/plugin/dangerous-html.test.ts
@@ -0,0 +1,156 @@
+/* eslint-disable no-script-url -- the `javascript:`/`vbscript:` URLs are intentional XSS fixtures */
+import type { Element, Root } from 'hast';
+import type { MdxJsxFlowElementHast } from 'mdast-util-mdx-jsx';
+
+import { stripDangerousHtml } from '../../../processor/plugin/dangerous-html';
+
+const root = (...children: Root['children']): Root => ({ type: 'root', children });
+
+const el = (tagName: string, properties: Element['properties'] = {}, children: Element['children'] = []): Element => ({
+  type: 'element',
+  tagName,
+  properties,
+  children,
+});
+
+const jsx = (name: string | null, attributes: MdxJsxFlowElementHast['attributes'] = []): MdxJsxFlowElementHast => ({
+  type: 'mdxJsxFlowElement',
+  name,
+  attributes,
+  children: [],
+});
+
+describe('stripDangerousHtml', () => {
+  describe('dangerous tag removal', () => {
+    it.each([
+      'script',
+      'noscript',
+      'template',
+      'iframe',
+      'frame',
+      'frameset',
+      'object',
+      'applet',
+      'embed',
+      'base',
+      'link',
+      'meta',
+      'svg',
+      'math',
+    ])('removes <%s> and its subtree', tagName => {
+      const tree = root(el('p'), el(tagName, {}, [el('span')]), el('div'));
+
+      stripDangerousHtml(tree);
+
+      const tags = tree.children.map(child => (child.type === 'element' ? child.tagName : child.type));
+      expect(tags).toStrictEqual(['p', 'div']);
+    });
+
+    // Lowercase-leading names are host elements (uppercase-leading ones are custom
+    // components), so the deny-set lookup lowercases to also catch e.g. `iFrame`.
+    it('matches lowercase-leading dangerous tags case-insensitively', () => {
+      const tree = root(el('iFrame'));
+
+      stripDangerousHtml(tree);
+
+      expect(tree.children).toHaveLength(0);
+    });
+
+    it('removes consecutive dangerous siblings', () => {
+      const tree = root(el('script'), el('iframe'), el('p'));
+
+      stripDangerousHtml(tree);
+
+      expect(tree.children).toHaveLength(1);
+      expect((tree.children[0] as Element).tagName).toBe('p');
+    });
+  });
+
+  describe('host element attribute cleaning', () => {
+    it('drops event-handler attributes', () => {
+      const node = el('img', { src: 'x', onError: 'steal()', onClick: 'go()' });
+      stripDangerousHtml(root(node));
+
+      expect(node.properties).toStrictEqual({ src: 'x' });
+    });
+
+    it('drops javascript: and vbscript: URLs on url-valued attributes', () => {
+      const node = el('a', { href: 'javascript:alert(1)' });
+      const node2 = el('a', { href: 'vbscript:msgbox(1)' });
+      stripDangerousHtml(root(node, node2));
+
+      expect(node.properties).toStrictEqual({});
+      expect(node2.properties).toStrictEqual({});
+    });
+
+    it('keeps safe URLs', () => {
+      const node = el('a', { href: 'https://example.com/javascript:not-a-scheme' });
+      stripDangerousHtml(root(node));
+
+      expect(node.properties?.href).toBe('https://example.com/javascript:not-a-scheme');
+    });
+
+    it('drops dangerous data: URLs but keeps benign ones', () => {
+      const danger = el('a', { href: 'data:text/html,<script>alert(1)</script>' });
+      const safe = el('img', { src: 'data:image/png;base64,iVBOR' });
+      stripDangerousHtml(root(danger, safe));
+
+      expect(danger.properties).toStrictEqual({});
+      expect(safe.properties?.src).toBe('data:image/png;base64,iVBOR');
+    });
+
+    it('ignores control characters when resolving the scheme', () => {
+      const node = el('a', { href: 'java\tscript:alert(1)' });
+      stripDangerousHtml(root(node));
+
+      expect(node.properties).toStrictEqual({});
+    });
+
+    it('keeps a normal srcset (treated as a single URL, no javascript: scheme)', () => {
+      const node = el('img', { srcSet: 'a.png 1x, b.png 2x' });
+      stripDangerousHtml(root(node));
+
+      expect(node.properties?.srcSet).toBe('a.png 1x, b.png 2x');
+    });
+
+    it('normalizes attribute names so xlink:href / formaction are checked', () => {
+      const node = el('a', { xLinkHref: 'javascript:alert(1)', formAction: 'javascript:alert(1)' });
+      stripDangerousHtml(root(node));
+
+      expect(node.properties).toStrictEqual({});
+    });
+  });
+
+  describe('MDX JSX nodes', () => {
+    it('drops event-handler and javascript: attributes on host JSX elements', () => {
+      const node = jsx('a', [
+        { type: 'mdxJsxAttribute', name: 'onClick', value: 'go()' },
+        { type: 'mdxJsxAttribute', name: 'href', value: 'javascript:alert(1)' },
+        { type: 'mdxJsxAttribute', name: 'id', value: 'keep' },
+      ]);
+      stripDangerousHtml(root(node));
+
+      expect(node.attributes).toStrictEqual([{ type: 'mdxJsxAttribute', name: 'id', value: 'keep' }]);
+    });
+
+    it('keeps spread expression attributes untouched', () => {
+      const spread = { type: 'mdxJsxExpressionAttribute', value: '...{ onClick: handler }' } as const;
+      const node = jsx('div', [spread]);
+      stripDangerousHtml(root(node));
+
+      expect(node.attributes).toStrictEqual([spread]);
+    });
+
+    it('preserves PascalCase custom components and their props, but descends to clean children', () => {
+      const child = el('img', { onError: 'steal()' });
+      const component = jsx('Callout', [{ type: 'mdxJsxAttribute', name: 'onClick', value: 'props-not-a-handler' }]);
+      component.children = [child];
+      stripDangerousHtml(root(component));
+
+      // Component prop survives...
+      expect(component.attributes).toHaveLength(1);
+      // ...but the nested raw <img> handler is stripped.
+      expect(child.properties).toStrictEqual({});
+    });
+  });
+});