Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions __tests__/compilers/gfm.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -272,5 +272,75 @@ describe('GFM footnotes', () => {
| undefined;
expect(footnoteDef).toBeDefined();
});

it('processes footnote reference inside a GFM pipe-table cell', () => {
const markdown = [
'| Term | Definition |',
'| ------------ | ------------- |',
'| Example[^1] | See footnote |',
'',
'[^1]: This footnote should render.',
].join('\n');
const hast = mdxish(markdown);

const findElement = (root: typeof hast, tag: string): Element | undefined => {
const stack: typeof hast.children = [...root.children];
while (stack.length) {
const node = stack.shift();
if (node?.type === 'element') {
if (node.tagName === tag) return node;
stack.unshift(...node.children);
}
}
return undefined;
};

const footnoteRef = findElement(hast, 'sup');
expect(footnoteRef).toBeDefined();

const footnoteSection = hast.children.find(
child => child.type === 'element' && child.tagName === 'section',
) as Element | undefined;
expect(footnoteSection).toBeDefined();
});

it('processes footnote reference inside a JSX <Table> cell', () => {
const markdown = [
'<Table>',
' <thead>',
' <tr><th>Term</th><th>Definition</th></tr>',
' </thead>',
' <tbody>',
' <tr>',
' <td>Example[^1]</td>',
' <td>See footnote</td>',
' </tr>',
' </tbody>',
'</Table>',
'',
'[^1]: This footnote should render.',
].join('\n');
const hast = mdxish(markdown);

const findElement = (root: typeof hast, tag: string): Element | undefined => {
const stack: typeof hast.children = [...root.children];
while (stack.length) {
const node = stack.shift();
if (node?.type === 'element') {
if (node.tagName === tag) return node;
stack.unshift(...node.children);
}
}
return undefined;
};

const footnoteRef = findElement(hast, 'sup');
expect(footnoteRef).toBeDefined();

const footnoteSection = hast.children.find(
child => child.type === 'element' && child.tagName === 'section',
) as Element | undefined;
expect(footnoteSection).toBeDefined();
});
});
});
64 changes: 53 additions & 11 deletions processor/transform/mdxish/tables/mdxish-tables.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { Html, Node, Parents, Root, Table, TableCell, TableRow } from 'mdast';
import type { FootnoteDefinition, Html, Node, Parents, Root, RootContent, Table, TableCell, TableRow } from 'mdast';
import type { Transform } from 'mdast-util-from-markdown';
import type { MdxJsxFlowElement, MdxJsxTextElement } from 'mdast-util-mdx';

Expand Down Expand Up @@ -62,6 +62,29 @@ const buildTableNodeProcessor = (withMdx: boolean) =>
const tableNodeProcessor = buildTableNodeProcessor(true);
const fallbackTableNodeProcessor = buildTableNodeProcessor(false);

/**
* Collect outer-tree footnote ids so cell re-parses can be primed with
* placeholder defs and recognize `[^id]` as a `footnoteReference`.
*/
const collectFootnoteIds = (tree: Node): string[] => {
const ids = new Set<string>();
visit(tree, 'footnoteDefinition', (definition: FootnoteDefinition) => {
if (definition.identifier) ids.add(definition.identifier);
});
return [...ids];
};

/**
* Append placeholder defs so the isolated cell parse tokenizes `[^id]` as a
* `footnoteReference`, `remark-gfm` requires the def in the same parse context.
*/
const appendFootnotePlaceholders = (value: string, ids: string[]): string => {
if (ids.length === 0) return value;
const placeholders = ids.map(id => `[^${id}]: x`).join('\n');
const separator = value.endsWith('\n') ? '\n' : '\n\n';
return `${value}${separator}${placeholders}`;
};

/**
* Parse the HTML node that contains the full table substring
* into the table parts (headers, rows, cells).
Expand All @@ -72,10 +95,12 @@ const parseTableNode = (
processor: typeof tableNodeProcessor,
node: Html,
repair?: { inserts: Insert[]; originalSource: string },
outerFootnoteIds: string[] = [],
): Root | undefined => {
const value = appendFootnotePlaceholders(node.value, outerFootnoteIds);
let parsed: Root;
try {
parsed = processor.runSync(processor.parse(node.value)) as Root;
parsed = processor.runSync(processor.parse(value)) as Root;
} catch {
return undefined;
}
Expand Down Expand Up @@ -143,6 +168,7 @@ const processTableNode = (
index: number,
parent: Parents,
documentPosition?: Node['position'],
outerFootnoteIds: string[] = [],
): void => {
if (node.name !== 'Table' && node.name !== 'table') return;

Expand Down Expand Up @@ -178,10 +204,13 @@ const processTableNode = (
// gate this behind a try/catch to ensure that malformed syntaxes do not
// crash the page
try {
const parsed = tableNodeProcessor.runSync(tableNodeProcessor.parse(textContent)) as Root;
if (parsed.children.length > 0) {
cell.children = parsed.children as MdxJsxTableCell['children'];
if (hasFlowContent(parsed.children as Node[])) {
const inputForParse = appendFootnotePlaceholders(textContent, outerFootnoteIds);
const parsed = tableNodeProcessor.runSync(tableNodeProcessor.parse(inputForParse)) as Root;
// Synthetic placeholder definitions belong to the outer document, not the cell
const cleanedChildren = parsed.children.filter(child => child.type !== 'footnoteDefinition');
if (cleanedChildren.length > 0) {
cell.children = cleanedChildren as MdxJsxTableCell['children'];
if (hasFlowContent(cleanedChildren as Node[])) {
tableHasFlowContent = true;
}
}
Expand Down Expand Up @@ -348,6 +377,7 @@ const processTableNode = (
* is kept as a JSX <Table> element so that remarkRehype can properly handle the flow content.
*/
const mdxishTables = (): Transform => tree => {
const outerFootnoteIds = collectFootnoteIds(tree as Node);
visit(tree, 'html', (_node, index, parent) => {
const node = _node as Html;
if (typeof index !== 'number' || !parent || !('children' in parent)) return;
Expand All @@ -357,7 +387,7 @@ const mdxishTables = (): Transform => tree => {
// Because the processor uses remarkMdx, it is stricter in what it accepts
// and only accepts valid MDX syntax. in the table node.
// To get around that, we have some fallback logics after trying to repair the table content.
let parsed = parseTableNode(tableNodeProcessor, node);
let parsed = parseTableNode(tableNodeProcessor, node, undefined, outerFootnoteIds);
if (!parsed) {
// Try a sequence of targeted repairs and re-parse
// after each, stopping at the first that yields a parseable tree:
Expand All @@ -375,7 +405,12 @@ const mdxishTables = (): Transform => tree => {
repairs.some(repair => {
const { value, inserts } = repair(node.value);
if (value !== node.value) {
parsed = parseTableNode(tableNodeProcessor, { ...node, value }, { inserts, originalSource: node.value });
parsed = parseTableNode(
tableNodeProcessor,
{ ...node, value },
{ inserts, originalSource: node.value },
outerFootnoteIds,
);
}
return Boolean(parsed);
});
Expand All @@ -386,16 +421,23 @@ const mdxishTables = (): Transform => tree => {
// to build on the markdown / JSX table
visit(parsed as Node, isMDXElement, (tableNode: MdxJsxFlowElement | MdxJsxTextElement) => {
if (tableNode.name !== 'Table' && tableNode.name !== 'table') return undefined;
processTableNode(tableNode, index, parent as Parents, node.position);
processTableNode(tableNode, index, parent as Parents, node.position, outerFootnoteIds);
return EXIT;
});
} else if (node.value.startsWith('<table')) {
// If the parsing still fails, give an opportunity to the fallback parser
// without remarkMdx to process lowercase tables as it's likely to not
// have needed MDX parsing anyway
const fallback = parseTableNode(fallbackTableNodeProcessor, node);
const fallback = parseTableNode(fallbackTableNodeProcessor, node, undefined, outerFootnoteIds);
if (!fallback || fallback.children.length <= 1) return;
parent.children.splice(index, 1, ...(fallback.children as typeof parent.children));
// Drop synthetic placeholder definitions before merging into the outer tree
const outerFootnoteIdSet = new Set(outerFootnoteIds);
const cleaned = (fallback.children as RootContent[]).filter(child => {
if (child.type !== 'footnoteDefinition') return true;
return !outerFootnoteIdSet.has((child as FootnoteDefinition).identifier);
});
if (cleaned.length <= 1) return;
parent.children.splice(index, 1, ...(cleaned as typeof parent.children));
}
// Otherwise, there's no point in trying to parse the table content further
// More repairs are needed in that case
Expand Down
Loading