Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions __tests__/lib/stripComments.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,51 @@ end"`);
expect(output).toContain('A name field');
});

it('does not break an HTML block when an inner comment occupies a whole line', async () => {
const input = `<table>
<tbody>
<tr><td>Row 1</td></tr>
<!-- <tr><td>Row 2</td></tr> -->
<tr><td>Row 3</td></tr>
</tbody>
</table>`;

const output = await stripComments(input, { mdxish: true });
expect(output).not.toContain('<!--');
Comment thread
maximilianfalco marked this conversation as resolved.
Outdated
expect(output).not.toMatch(/\n[ \t]+\n/);
expect(output).toContain('<tr><td>Row 1</td></tr>');
expect(output).toContain('<tr><td>Row 3</td></tr>');
});

it('removes only the comment line and preserves authorial blank lines around it', async () => {
const input = `<table>
<tbody>
<tr><td>row 1</td></tr>
<!-- <tr><td>commented out</td></tr> -->

<tr><td>row 2</td></tr>


<!-- <tr><td>another commented out</td></tr> -->
<tr><td>row 3</td></tr>
</tbody>
</table>`;

const expected = `<table>
<tbody>
<tr><td>row 1</td></tr>

<tr><td>row 2</td></tr>


<tr><td>row 3</td></tr>
</tbody>
</table>`;

const output = await stripComments(input, { mdxish: true });
expect(output).toBe(expected);
});

it('strips comments inside jsx tables in mdxish mode', async () => {
const input = `<!-- top comment -->

Expand Down
53 changes: 53 additions & 0 deletions __tests__/parsers/tables.test.ts
Comment thread
maximilianfalco marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { removePosition } from 'unist-util-remove-position';

import { mdast } from '../../lib';
import { mdxish } from '../../lib/mdxish';
import { collapseBlankLines } from '../../processor/transform/mdxish/tables/mdxish-tables';
import { collectNodes, findAllElementsByTagName, parseMdxishWithSource } from '../helpers';

describe('table parser', () => {
Expand Down Expand Up @@ -1170,4 +1171,56 @@ None of the following content will get rendered!`;
expect(html).toContain('<code>another_name_here</code>');
});
});

describe('lowercase <table> fallback path', () => {
it('keeps a block-with-whitespace-only-line intact when the mdx-aware parser throws on a cell expression', () => {
const doc = ['<table>', ' <tr>', ' <td>{not valid jsx}</td>', ' ', ' </tr>', '</table>'].join('\n');

const hast = mdxish(doc);
const html = toHtml(hast);

expect(html).not.toContain('<pre>');
expect(html).toContain('{not valid jsx}');
});

it('keeps a block-with-empty-line intact when the mdx-aware parser throws on a cell expression', () => {
const doc = ['<table>', ' <tr>', ' <td>{not valid jsx}</td>', '', ' </tr>', '</table>'].join('\n');

const hast = mdxish(doc);
const html = toHtml(hast);

expect(html).not.toContain('<pre>');
expect(html).toContain('{not valid jsx}');
});

it('collapseBlankLines: removes exactly one blank line per run, leaves every other line byte-identical', () => {
const input = [
'<table>',
' <tr>',
' <td>row 1</td>',
' ',
' <td>row 2</td>',
'',
' <td>row 3</td>',
' </tr>',
'</table>',
].join('\n');

const output = collapseBlankLines(input);
const inputLines = input.split('\n');
const nonBlankInputLines = inputLines.filter(line => !/^[ \t]*$/.test(line));

expect(output.split('\n')).toStrictEqual(nonBlankInputLines);
});

it('collapseBlankLines: only consumes one blank line per run (consecutive blanks are mostly preserved)', () => {
expect(collapseBlankLines('a\n\n\nb')).toBe('a\n\nb');
expect(collapseBlankLines('a\n \n \nb')).toBe('a\n \nb');
});

it('collapseBlankLines: does not strip trailing whitespace on non-empty lines', () => {
const input = 'a \nb';
expect(collapseBlankLines(input)).toBe(input);
});
});
});
58 changes: 41 additions & 17 deletions example/Doc.tsx
Comment thread
maximilianfalco marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,20 @@ const variables = {
],
};

interface StripState {
error: string | null;
stripped: string | null;
}

type PipelineKey = 'legacy' | 'mdxish' | 'rmdx';

const EMPTY_STRIP_STATE: StripState = { error: null, stripped: null };
const EMPTY_STRIP_STATE_MAP: Record<PipelineKey, StripState> = {
legacy: EMPTY_STRIP_STATE,
mdxish: EMPTY_STRIP_STATE,
rmdx: EMPTY_STRIP_STATE,
};

const Doc = () => {
const { fixture } = useParams();
const [searchParams] = useSearchParams();
Expand All @@ -61,7 +75,6 @@ const Doc = () => {
const mdxish = searchParams.has('mdxish');
const showRmdx = searchParams.has('rmdx');

type PipelineKey = 'legacy' | 'mdxish' | 'rmdx';
const selectedPipelines: PipelineKey[] = [];
if (showRmdx) selectedPipelines.push('rmdx');
if (legacy) selectedPipelines.push('legacy');
Expand All @@ -88,8 +101,8 @@ const Doc = () => {
const [rmdxHast, setRmdxHast] = useState<object | null>(null);
const [mdxishMdast, setMdxishMdast] = useState<object | null>(null);
const [mdxishHast, setMdxishHast] = useState<object | null>(null);
const [strippedMarkdown, setStrippedMarkdown] = useState<string | null>(null);
const [stripError, setStripError] = useState<string | null>(null);
const [stripByPipeline, setStripByPipeline] = useState<Record<PipelineKey, StripState>>(EMPTY_STRIP_STATE_MAP);
const hasAnyStripped = Object.values(stripByPipeline).some(s => s.stripped !== null);
const [view, setView] = useState<'hast' | 'markdown' | 'mdast' | 'rendered'>('rendered');
const showToc = fixture === 'tableOfContentsTests';

Expand All @@ -101,24 +114,21 @@ const Doc = () => {
useEffect(() => {
const sanitize = async (mode: PipelineKey) => {
if (!stripComments) {
setStrippedMarkdown(null);
setStripError(null);
setStripByPipeline(prev => ({ ...prev, [mode]: EMPTY_STRIP_STATE }));
return doc;
}
try {
const sanitized = await mdx.stripComments(doc, {
mdx: mode === 'rmdx',
mdxish: mode === 'mdxish',
});
setStrippedMarkdown(sanitized);
setStripError(null);
setStripByPipeline(prev => ({ ...prev, [mode]: { stripped: sanitized, error: null } }));
return sanitized;
} catch (e) {
// eslint-disable-next-line no-console
console.error(e);
const message = e instanceof Error ? e.message : String(e);
setStripError(message);
setStrippedMarkdown(null);
setStripByPipeline(prev => ({ ...prev, [mode]: { stripped: null, error: message } }));
return null;
}
};
Expand Down Expand Up @@ -261,7 +271,7 @@ const Doc = () => {
<div className="rdmd-demo--display">
<section id="hub-content">
{!ci && <h2 className="rdmd-demo--markdown-header">{name}</h2>}
{(strippedMarkdown !== null || showAst) && (
{(hasAnyStripped || showAst) && (
<div className="rdmd-demo--view-toggle">
<button
className={view === 'rendered' ? 'active' : ''}
Expand All @@ -270,7 +280,7 @@ const Doc = () => {
>
Rendered
</button>
{strippedMarkdown !== null && (
{hasAnyStripped && (
<button
className={view === 'markdown' ? 'active' : ''}
onClick={() => setView('markdown')}
Expand Down Expand Up @@ -299,13 +309,27 @@ const Doc = () => {
)}
</div>
)}
{stripError && (
<div className="rdmd-demo--strip-error">
<strong>stripComments error:</strong> {stripError}
{view === 'markdown' && hasAnyStripped ? (
<div className="rdmd-demo--panels">
{activePipelines.map(p => {
const { error, stripped } = stripByPipeline[p];
return (
<div key={p} className="rdmd-demo--panel">
<div className="rdmd-demo--panel-label">{pipelineLabels[p]}</div>
{error && (
<div className="rdmd-demo--strip-error">
<strong>stripComments error:</strong> {error}
</div>
)}
{stripped !== null ? (
<pre className="rdmd-demo--stripped-output">{stripped}</pre>
) : (
!error && <div className="rdmd-demo--empty">No stripped output for this pipeline</div>
)}
</div>
);
})}
</div>
)}
{view === 'markdown' && strippedMarkdown !== null ? (
<pre className="rdmd-demo--stripped-output">{strippedMarkdown}</pre>
) : (view === 'mdast' || view === 'hast') && showAst ? (
<div className="rdmd-demo--panels">
{activePipelines.map(p => {
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
},
{
"path": "dist/main.node.js",
"maxSize": "947KB"
"maxSize": "950KB"
}
]
},
Expand Down
14 changes: 12 additions & 2 deletions processor/transform/mdxish/tables/mdxish-tables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ const buildTableNodeProcessor = (withMdx: boolean) =>
const tableNodeProcessor = buildTableNodeProcessor(true);
const fallbackTableNodeProcessor = buildTableNodeProcessor(false);

const BLANK_LINE_REGEX = /(\r?\n)[ \t]*\r?\n(?![ \t]*\r?\n)/g;

/**
* Collapses one blank line per match so it doesn't terminate the CommonMark
* type-6 block. Non-greedy: runs of multiple blank lines lose just one.
*/
export const collapseBlankLines = (value: string): string =>

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do any blank lines between html elements in a <table> cause component splitting? Or only in some cases?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from my understanding yes, iirc the micromark html block states that an empty line causes it to terminate, which is the cause of the table split

value.replace(BLANK_LINE_REGEX, '$1');

/**
* Parse the HTML node that contains the full table substring
* into the table parts (headers, rows, cells).
Expand Down Expand Up @@ -355,8 +364,9 @@ const mdxishTables = (): Transform => tree => {
} else if (node.value.startsWith('<table')) {
// If the parsing still fails, give an opportunity to the fallback parser
// without remarkMdx to process lowercase tables as it's likely to not
// have needed MDX parsing anyway
const fallback = parseTableNode(fallbackTableNodeProcessor, node);
// have needed MDX parsing anyway.
const sanitizedValue = collapseBlankLines(node.value);
const fallback = parseTableNode(fallbackTableNodeProcessor, { ...node, value: sanitizedValue });
if (!fallback || fallback.children.length <= 1) return;
parent.children.splice(index, 1, ...(fallback.children as typeof parent.children));
}
Expand Down
8 changes: 7 additions & 1 deletion processor/transform/stripComments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import { visit, SKIP } from 'unist-util-visit';

const HTML_COMMENT_REGEX = /<!--[\s\S]*?-->/g;
// Indented whole-line comment plus trailing newline; removing the whole line
// avoids leaving a whitespace-only line that terminates the surrounding block.
const WHOLE_LINE_HTML_COMMENT_REGEX = /^[ \t]+<!--[\s\S]*?-->[ \t]*(?:\r?\n|$)/gm;
Comment thread
maximilianfalco marked this conversation as resolved.
export const MDX_COMMENT_REGEX = /\/\*(?:(?!\*\/)[\s\S])*\*\//g;

/**
Expand All @@ -14,7 +17,10 @@
if (parent && typeof index === 'number') {
// Remove HTML comments
if (node.type === 'html' && HTML_COMMENT_REGEX.test(node.value)) {
const newValue = node.value.replace(HTML_COMMENT_REGEX, '').trim();
const newValue = node.value
.replace(WHOLE_LINE_HTML_COMMENT_REGEX, '')
.replace(HTML_COMMENT_REGEX, '')

Check failure

Code scanning / CodeQL

Incomplete multi-character sanitization High

This string may still contain
<!--
, which may cause an HTML element injection vulnerability.
Comment on lines +20 to +22

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im thinking of ignoring this tbh, this seems very unlikely and very much unnecessary.

CodeQL's concern is that replace(HTML_COMMENT_REGEX, '') is a single pass. A crafted input can leave behind <!-- after one pass:

  input:   <<!---->!-->
                   ↑ regex matches <!---->  (positions 1-7)
  output:  <!-->   ← starts with <!--

The regex matched (the inner complete comment), removed it, and the surrounding bits collapsed into <!-->. The leftover string still contains <!-- as a substring. A downstream HTML parser could read that as the start of a new comment.

I feel like this is fine since stripComments is gated by mdxSanitizeComments most of the time anyway in the monorepo, which exists to hide internal notes from end users on pages the author themselves wrote. The author already controls the source. If they wanted to leak content to end users they would probably just write that and dont need these extra steps

As far as Im seeing this, there's no untrusted-input boundary being crossed so I think this is fine and it saves us from these complexities.... wdyt @eaglethrost @kevinports

.trim();
if (newValue) {
node.value = newValue;
} else {
Expand Down