Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 175 additions & 0 deletions __tests__/transformers/mdxish-tables.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,181 @@ describe('mdxish tables transformation', () => {
});
});

describe('given a leading list marker inside a JSX cell', () => {
it('renders an unescaped leading dash as a bullet list (default markdown behaviour)', () => {
const doc = `<table>
<thead><tr><th>head</th></tr></thead>
<tbody><tr><td>- foo</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).toContain('<ul>');
expect(html).toContain('<li>foo</li>');
});

it('renders an escaped leading dash as literal text (raw HTML <table>)', () => {
const doc = `<table>
<thead><tr><th>\\-</th><th>\\-</th></tr></thead>
<tbody><tr><td>\\- fqefeq</td><td>\\- hello</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).not.toContain('<ul>');
expect(html).not.toContain('<li>');
expect(html).toContain('- fqefeq');
expect(html).toContain('- hello');
});

it('renders an escaped leading dash as literal text (<Table> component)', () => {
const doc = `<Table>
<thead>
<tr><th>\\-</th><th>\\-</th></tr>
</thead>
<tbody>
<tr><td>\\- fqefeq</td><td>\\- hello</td></tr>
</tbody>
</Table>`;
const html = toHtml(mdxish(doc));
expect(html).not.toContain('<ul>');
expect(html).not.toContain('<li>');
expect(html).toContain('- fqefeq');
expect(html).toContain('- hello');
});

it('renders an escaped lone marker (no following content) as literal text', () => {
const doc = `<table>
<thead><tr><th>\\-</th></tr></thead>
<tbody><tr><td>body</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).not.toContain('<ul>');
expect(html).toContain('-');
});

it('still parses emphasis (asterisk hugging text) inside a JSX cell', () => {
const doc = `<table>
<thead><tr><th>head</th></tr></thead>
<tbody><tr><td>*italic*</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).toContain('<em>italic</em>');
});

it('preserves a real <ul> inside a JSX cell', () => {
const doc = `<table>
<thead><tr><th>head</th></tr></thead>
<tbody><tr><td><ul><li>real item</li></ul></td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).toContain('<ul>');
expect(html).toContain('<li>real item</li>');
});

it('renders an unescaped leading hash as a heading (default markdown behaviour)', () => {
const doc = `<table>
<thead><tr><th>head</th></tr></thead>
<tbody><tr><td># heading</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).toMatch(/<h1[^>]*>heading<\/h1>/);
});

it('renders an escaped leading hash as literal text', () => {
const doc = `<table>
<thead><tr><th>head</th></tr></thead>
<tbody><tr><td>\\# heading</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).not.toMatch(/<h1[^>]*>/);
expect(html).toContain('# heading');
});

it('renders an escaped leading hash with multiple hashes as literal text', () => {
const doc = `<table>
<thead><tr><th>head</th></tr></thead>
<tbody><tr><td>\\## subheading</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).not.toMatch(/<h2[^>]*>/);
expect(html).toContain('## subheading');
});

it.each([
['-', '-'],
['*', '*'],
['+', '+'],
['#', '#'],
['##', '##'],
['###', '###'],
])('flattens a standalone "%s" cell to literal text instead of an empty list/heading', (marker, expected) => {
const doc = `<table>
<thead><tr><th>${marker}</th></tr></thead>
<tbody><tr><td>body</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).not.toContain('<ul>');
expect(html).not.toMatch(/<h[1-6][^>]*><\/h[1-6]>/);
expect(html).toContain(`<th>${expected}</th>`);
});

it('keeps a list when the cell has actual content after the marker', () => {
const doc = `<table>
<thead><tr><th>head</th></tr></thead>
<tbody><tr><td>- foo</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).toContain('<ul>');
expect(html).toContain('<li>foo</li>');
});

it('keeps a heading when the cell has actual content after the hash', () => {
const doc = `<table>
<thead><tr><th>head</th></tr></thead>
<tbody><tr><td># heading</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).toMatch(/<h1[^>]*>heading<\/h1>/);
});

it('flattens a standalone `>` blockquote marker to literal text', () => {
const doc = `<table>
<thead><tr><th>></th></tr></thead>
<tbody><tr><td>body</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).not.toMatch(/<blockquote/);
expect(html).toMatch(/<th>(>|&gt;|&#x3E;)<\/th>/);
});

it('flattens a standalone `---` thematic break to literal text', () => {
const doc = `<table>
<thead><tr><th>head</th></tr></thead>
<tbody><tr><td>---</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).not.toContain('<hr');
expect(html).toContain('---');
});

it('flattens a standalone `***` thematic break to literal text', () => {
const doc = `<table>
<thead><tr><th>head</th></tr></thead>
<tbody><tr><td>***</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).not.toContain('<hr');
expect(html).toContain('***');
});

it('keeps a blockquote when the cell has actual content after the marker', () => {
const doc = `<table>
<thead><tr><th>head</th></tr></thead>
<tbody><tr><td>> quote</td></tr></tbody>
</table>`;
const html = toHtml(mdxish(doc));
expect(html).toContain('<blockquote');
expect(html).toContain('quote');
});
});

describe('given malformed JSX inside <table>', () => {
// Stray duplicated </td></tr> makes mdxjs reject the captured value;
// the non-MDX fallback should still split the html node so blank-line
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
},
{
"path": "dist/main.node.js",
"maxSize": "947KB"
"maxSize": "950KB"
}
]
},
Expand Down
73 changes: 64 additions & 9 deletions processor/transform/mdxish/tables/mdxish-tables.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { Html, Node, Parents, Root, Table, TableCell, TableRow } from 'mdast';
import type { Html, List, ListItem, Node, Parent, Parents, Root, Table, TableCell, TableRow, Text } from 'mdast';
import type { Transform } from 'mdast-util-from-markdown';
import type { MdxJsxFlowElement, MdxJsxTextElement } from 'mdast-util-mdx';

Expand Down Expand Up @@ -38,6 +38,48 @@ const tableTypes = {
td: 'tableCell',
};

const CELL_OPEN_TAG_RE = /^<(td|th)(?:\s[^>]*)?>/i;
const LEADING_ESCAPED_MARKER_RE = /^\s*\\(?:[-*+](?=[ \t]|<|$|\n)|#)/;

/**
* Cell starts with `\-`/`\*`/`\+`/`\#`; restore `\` before re-parse.
*/
const cellSourceHasEscapedMarker = (cellSrc: string): boolean => {
const open = cellSrc.match(CELL_OPEN_TAG_RE);
return open != null && LEADING_ESCAPED_MARKER_RE.test(cellSrc.slice(open[0].length));
};

/**
* Re-parsed children are a phantom empty block from a lone marker.
*/
const isLonePhantomBlock = (children: Node[]): boolean => {
if (children.length !== 1) return false;
const child = children[0];
switch (child.type) {
case 'list': {
const items = (child as List).children;
return items.length === 1 && ((items[0] as ListItem).children?.length ?? 0) === 0;
}
case 'heading':
case 'blockquote':
return ((child as Parent).children?.length ?? 0) === 0;
case 'thematicBreak':
return true;
default:
return false;
}
};

/** Slice the cell's substring via its outer-document offsets. */
const sliceCellSource = (
tableSource: string | undefined,
cellPosition: Node['position'] | undefined,
baseOffset: number,
): string | undefined => {
if (!tableSource || cellPosition?.start?.offset == null || cellPosition?.end?.offset == null) return undefined;
return tableSource.slice(cellPosition.start.offset - baseOffset, cellPosition.end.offset - baseOffset);
};

// `mdxjs` + `mdxFromMarkdown` is what `remarkMdx` registers internally; we
// register them manually so we control ordering against our other tokenizers.
// The fallback omits these so blank-line-separated markdown inside cells still
Expand Down Expand Up @@ -142,6 +184,7 @@ const processTableNode = (
index: number,
parent: Parents,
documentPosition?: Node['position'],
tableSource?: string,
): void => {
if (node.name !== 'Table' && node.name !== 'table') return;

Expand All @@ -150,24 +193,36 @@ const processTableNode = (
const align = Array.isArray(alignAttr) ? alignAttr : null;

let tableHasFlowContent = false;
const tableBaseOffset = position?.start?.offset ?? 0;

// Re-parse text-only cells through markdown and detect flow content
visit(node as Node, isTableCell, (cell: MdxJsxTableCell) => {
if (!isTextOnly(cell.children as unknown[])) return;

const textContent = extractTextFromChildren(cell.children as unknown[]);
if (!textContent.trim()) return;
const originalText = extractTextFromChildren(cell.children as unknown[]);
if (!originalText.trim()) return;

// Restore the `\` so `\- foo` re-parses as text.
const cellSrc = sliceCellSource(tableSource, cell.position, tableBaseOffset);
const textContent = cellSrc && cellSourceHasEscapedMarker(cellSrc) ? `\\${originalText}` : originalText;

// Since now we are using remarkMdx, which can fail and error, we need to
// gate this behind a try/catch to ensure that malformed syntaxes do not
// crash the page
try {
const parsed = tableNodeProcessor.runSync(tableNodeProcessor.parse(textContent)) as Root;
if (parsed.children.length > 0) {
cell.children = parsed.children as MdxJsxTableCell['children'];
if (hasFlowContent(parsed.children as Node[])) {
tableHasFlowContent = true;
}
if (parsed.children.length === 0) return;

// Lone marker → empty block; render as literal char.
if (isLonePhantomBlock(parsed.children as Node[])) {
const textNode: Text = { type: 'text', value: originalText.trim() };
cell.children = [textNode] as unknown as MdxJsxTableCell['children'];
return;
}

cell.children = parsed.children as MdxJsxTableCell['children'];
if (hasFlowContent(parsed.children as Node[])) {
tableHasFlowContent = true;
}
} catch {
// If parsing fails, keep original children
Expand Down Expand Up @@ -349,7 +404,7 @@ const mdxishTables = (): Transform => tree => {
// to build on the markdown / JSX table
visit(parsed as Node, isMDXElement, (tableNode: MdxJsxFlowElement | MdxJsxTextElement) => {
if (tableNode.name !== 'Table' && tableNode.name !== 'table') return undefined;
processTableNode(tableNode, index, parent as Parents, node.position);
processTableNode(tableNode, index, parent as Parents, node.position, node.value);
return EXIT;
});
} else if (node.value.startsWith('<table')) {
Expand Down