diff --git a/__tests__/compilers/html-block.test.ts b/__tests__/compilers/html-block.test.ts index 94880a0b2..7b62977fb 100644 --- a/__tests__/compilers/html-block.test.ts +++ b/__tests__/compilers/html-block.test.ts @@ -1,15 +1,4 @@ -import type { Element } from 'hast'; - -import { mdast, mdx, mdxish } from '../../index'; - -function findHTMLBlock(element: Element): Element | undefined { - if (element.tagName === 'HTMLBlock' || element.tagName === 'html-block') { - return element; - } - return element.children - .filter((child): child is Element => child.type === 'element') - .reduce((found, child) => found || findHTMLBlock(child), undefined); -} +import { mdast, mdx } from '../../index'; describe('html-block compiler', () => { it('compiles html blocks within containers', () => { @@ -51,194 +40,3 @@ const foo = () => { expect(mdx(mdast(markdown)).trim()).toBe(expected.trim()); }); }); - -describe('mdxish html-block compiler', () => { - it('compiles html blocks within containers', () => { - const markdown = ` -> 🚧 It compiles! -> -> {\` -> Hello, World! -> \`} -`; - - const hast = mdxish(markdown.trim()); - const callout = hast.children[0] as Element; - - expect(callout.type).toBe('element'); - expect(callout.tagName).toBe('Callout'); - - // Find HTMLBlock within the callout - const htmlBlock = findHTMLBlock(callout); - expect(htmlBlock).toBeDefined(); - expect(htmlBlock?.tagName).toBe('html-block'); - }); - - it('compiles html blocks preserving newlines', () => { - const markdown = ` -{\` -

-const foo = () => {
-  const bar = {
-    baz: 'blammo'
-  }
-
-  return bar
-}
-
-\`}
-`; - - const hast = mdxish(markdown.trim()); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - expect(htmlBlock?.tagName).toBe('html-block'); - }); - - it('adds newlines for readability', () => { - const markdown = '{`

Hello, World!

`}
'; - - const hast = mdxish(markdown); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - expect(htmlBlock?.tagName).toBe('html-block'); - }); - - it('unescapes backticks in HTML content', () => { - const markdown = '{`\\`example\\``}'; - - const hast = mdxish(markdown); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - expect(htmlBlock?.tagName).toBe('html-block'); - - // Verify that escaped backticks \` are unescaped to ` in the HTML - const htmlProp = htmlBlock?.properties?.html as string; - expect(htmlProp).toBeDefined(); - expect(htmlProp).toContain('`example`'); - expect(htmlProp).not.toContain('\\`'); - }); - - it('passes safeMode property correctly', () => { - // Test with both JSX expression and string syntax - const markdown = '{`

Content

`}
'; - - const hast = mdxish(markdown); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - - const allProps = htmlBlock?.properties; - expect(allProps).toBeDefined(); - - const safeMode = allProps?.safeMode; - expect(safeMode).toBe('true'); - - // Verify that html property is still present (for safeMode to render as escaped text) - const htmlProp = allProps?.html as string; - expect(htmlProp).toBeDefined(); - expect(htmlProp).toContain(''); - expect(htmlProp).toContain('

Content

'); - }); - - it('should handle template literal with variables', () => { - // eslint-disable-next-line quotes - const markdown = `{\`const x = \${variable}\`}`; - - const hast = mdxish(markdown); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - // eslint-disable-next-line no-template-curly-in-string - expect(htmlBlock?.properties?.html).toBe('const x = ${variable}'); - }); - - it('should handle nested template literals', () => { - // Use a regular string to avoid nested template literal syntax error - // The content should be:
```javascript\nconst x = 1;\n```
- const markdown = '{`
\\`\\`\\`javascript\\nconst x = 1;\\n\\`\\`\\`
`}
'; - - const hast = mdxish(markdown); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - - // Verify that the HTML content is preserved correctly with newlines - const htmlProp = htmlBlock?.properties?.html as string; - expect(htmlProp).toBeDefined(); - - // The expected content should have triple backticks - expect(htmlProp).toBe('
```javascript\nconst x = 1;\n```
'); - }); - - it('expands \\n only inside
/, not in plain text after tags', () => {
-    const markdown = [
-      '{`',
-      '
qerq3er \\n qerreqqe
', - 'qerq3er \\n qerreqqe', - 'hello \\n world', - '`}
', - ].join('\n'); - - const hast = mdxish(markdown); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - - const htmlProp = htmlBlock?.properties?.html as string; - expect(htmlProp).toBeDefined(); - - // Literal `\n` expands to real newlines only inside
 / .
-    expect(htmlProp).toBe(
-      '
qerq3er \n qerreqqe
\nqerq3er \n qerreqqe\n\nhello \\n world', - ); - // Must not turn the plain-text `hello \n world` into a line break between words. - expect(htmlProp).toContain('hello \\n world'); - expect(htmlProp).not.toMatch(/hello \n world/); // space + LF + space (wrong) - }); - - it('preserves \\n escape sequences inside ', - '', - '`}', - ].join('\n'); - - const hast = mdxish(markdown); - const paragraph = hast.children[0] as Element; - - expect(paragraph.type).toBe('element'); - const htmlBlock = findHTMLBlock(paragraph); - expect(htmlBlock).toBeDefined(); - - const htmlProp = htmlBlock?.properties?.html as string; - expect(htmlProp).toBeDefined(); - - // The `\n` inside the JS string literal must survive as the two-byte escape - // sequence so eval() sees a well-formed JS string. A real LF here would break it. - expect(htmlProp).toContain('var x = "hello\\nworld";'); - expect(htmlProp).not.toContain('var x = "hello\nworld";'); - }); -}); diff --git a/__tests__/lib/mdxish/html-blocks.test.ts b/__tests__/lib/mdxish/html-blocks.test.ts new file mode 100644 index 000000000..8f7c384ed --- /dev/null +++ b/__tests__/lib/mdxish/html-blocks.test.ts @@ -0,0 +1,435 @@ +import type { Element } from 'hast'; +import type { MdxJsxFlowElement } from 'mdast-util-mdx'; + +import { mdxish } from '../../../lib'; +import { collectNodes, findAllElementsByTagName, findElementByTagName, parseMdxishWithSource } from '../../helpers'; + +function expectJsxTableIsParsed(md: string) { + const { tree: mdastTree } = parseMdxishWithSource(md); + // A table containing an carries block-level content, so it is kept + // as a JSX (mdxJsxFlowElement) rather than collapsed to a markdown table. + const tableNodes = collectNodes( + mdastTree, + node => node.type === 'mdxJsxFlowElement' && (node as MdxJsxFlowElement).name === 'Table', + ); + expect(tableNodes).toHaveLength(1); +} + +/** Decoded `html` props of every in the rendered tree, in document order. */ +function htmlBlockPayloads(tree: ReturnType) { + return findAllElementsByTagName(tree, 'html-block').map(node => node.properties?.html); +} + +/** Asserts no raw survived and no protected marker leaked into the tree. */ +function expectFullyConverted(tree: ReturnType) { + expect(findElementByTagName(tree, 'HTMLBlock')).toBeNull(); + expect(JSON.stringify(tree)).not.toContain('RDMX_HTMLBLOCK'); +} + +describe(' parsing', () => { + describe('standalone', () => { + it('renders as with the decoded html prop', () => { + const tree = mdxish('{`
Hello
`}
'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['
Hello
']); + }); + + it('renders between surrounding paragraphs', () => { + const tree = mdxish('text before\n\n{`
x
`}
\n\ntext after'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['
x
']); + const json = JSON.stringify(tree); + expect(json).toContain('text before'); + expect(json).toContain('text after'); + }); + + it('renders after a markdown heading', () => { + const tree = mdxish('# Heading\n\n{`

after heading

`}
'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['

after heading

']); + expect(findElementByTagName(tree, 'h1')).not.toBeNull(); + }); + + it('renders two consecutive top-level HTMLBlocks', () => { + const tree = mdxish('{`
one
`}
\n\n{`
two
`}
'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['
one
', '
two
']); + }); + + it('renders inline within a paragraph alongside text', () => { + const tree = mdxish('Inline {`x`} text'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['x']); + const json = JSON.stringify(tree); + expect(json).toContain('Inline'); + expect(json).toContain('text'); + }); + + it('compiles html blocks preserving newlines', () => { + const markdown = `{\` +

+const foo = () => {
+  const bar = {
+    baz: 'blammo'
+  }
+
+  return bar
+}
+
+\`}
`; + + const tree = mdxish(markdown); + expect(htmlBlockPayloads(tree)).toStrictEqual([`

+const foo = () => {
+  const bar = {
+    baz: 'blammo'
+  }
+
+  return bar
+}
+
`]); + }); + + it('handles standalone multiline HTMLBlock with surrounding paragraphs', () => { + const markdown = `Hello + +{\` +

Hello, World!

+\`}
+ +there`; + const tree = mdxish(markdown); + expect(htmlBlockPayloads(tree)).toStrictEqual(['

Hello, World!

']); + }); + + it('handles nested HTMLBlock tags in content', () => { + const tree = mdxish('{`{Hello}`}'); + expect(htmlBlockPayloads(tree)).toStrictEqual(['{Hello}']); + }); + }); + + describe('content formatting', () => { + it('preserves multiline HTML content verbatim', () => { + const tree = mdxish('{`
\n multi\n
`}
'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['
\n multi\n
']); + expectFullyConverted(tree); + }); + + it('preserves raw `}
'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['']); + // The script must not become a real

Content

`}
'); + expect(htmlBlockPayloads(tree)).toStrictEqual(['

Content

']); + }); + + it('handles template literal with variables', () => { + // eslint-disable-next-line quotes + const tree = mdxish(`{\`const x = \${variable}\`}`); + // eslint-disable-next-line no-template-curly-in-string + expect(htmlBlockPayloads(tree)).toStrictEqual(['const x = ${variable}']); + }); + + it('handles nested template literals', () => { + const tree = mdxish('{`
\\`\\`\\`javascript\\nconst x = 1;\\n\\`\\`\\`
`}
'); + expect(htmlBlockPayloads(tree)).toStrictEqual(['
```javascript\nconst x = 1;\n```
']); + }); + + it('handles trailing whitespace after closing tag', () => { + const tree = mdxish('{`
hello
`}
'); + expect(htmlBlockPayloads(tree)).toStrictEqual(['
hello
']); + }); + }); + + describe('inside generic HTML tags & markdown', () => { + it('renders inside a
with the decoded html prop', () => { + const tree = mdxish('
{`

nested

`}
'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['

nested

']); + expect(findElementByTagName(tree, 'div')).not.toBeNull(); + }); + + it('renders inside a
separated by blank lines', () => { + const tree = mdxish('
\n\n{`

n

`}
\n\n
'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['

n

']); + expect(findElementByTagName(tree, 'section')).not.toBeNull(); + }); + + it('renders inside a blockquote', () => { + const tree = mdxish('> {`

n

`}
'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['

n

']); + expect(findElementByTagName(tree, 'blockquote')).not.toBeNull(); + }); + + it('renders inside a list item', () => { + const tree = mdxish('- {`

n

`}
'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['

n

']); + expect(findElementByTagName(tree, 'li')).not.toBeNull(); + }); + + it('renders inside callout blockquotes', () => { + const md = `> 🚧 It compiles! +> +> {\` +> Hello, World! +> \`}`; + + const tree = mdxish(md); + const callout = tree.children[0] as Element; + + expect(callout.tagName).toBe('Callout'); + + expect(htmlBlockPayloads(tree)).toStrictEqual([' Hello, World!']); + }); + + it('does not render inside code blocks', () => { + const md = '```{`

n

`}
```'; + + const tree = mdxish(md); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toBeNull(); + }); + }); + + describe('inside ReadMe components', () => { + describe('callouts', () => { + it('handles HTMLBlock in an empty callout (no title text)', () => { + const markdown = `> 📘 +> +> {\`

body only

\`}
`; + + const tree = mdxish(markdown); + expect((tree.children[0] as Element).tagName).toBe('Callout'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['

body only

']); + }); + + it('renders inside a ', () => { + const tree = mdxish( + '\n\n{`
\n

n

\n\n

m

\n
`}
\n\n
', + ); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['
\n

n

\n\n

m

\n
']); + expectFullyConverted(tree); + }); + }); + + it('renders inside an ', () => { + const tree = mdxish('\n\n{`

n

`}
\n\n
'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['

n

']); + expectFullyConverted(tree); + }); + + it('renders inside /', () => { + const tree = mdxish('\n\n\n{`

n

`}
\n\n
\n
'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['

n

']); + expectFullyConverted(tree); + }); + + it('renders inside /', () => { + const tree = mdxish('\n\n\n{`

n

`}
\n\n
\n
'); + + expect(htmlBlockPayloads(tree)).toStrictEqual(['

n

']); + expectFullyConverted(tree); + }); + }); + + describe('inside
', () => { + it('renders inside a
cell as with the decoded html prop', () => { + const md = `
+ + + + + + + + + +
NameMarkup
Custom{\`
+

Hello

+ +

World

+
+\`}
`; + + expectJsxTableIsParsed(md); + + const tree = mdxish(md); + + const rawHtmlBlock = findElementByTagName(tree, 'HTMLBlock'); + expect(rawHtmlBlock).toBeNull(); + + // Newlines (including the blank line) inside the content must survive the + // table re-parse and not fragment the HTMLBlock. + expect(htmlBlockPayloads(tree)).toStrictEqual(['
\n

Hello

\n\n

World

\n
']); + }); + + it('still renders markdown in a sibling text cell', () => { + const md = ` + + + + + + + + + +
AB
**bold** here{\`
    +
  • one
  • + +
  • two
  • +
+\`}
`; + + expectJsxTableIsParsed(md); + + const tree = mdxish(md); + // The sibling cell's markdown must still be processed into a . + const strongs = findAllElementsByTagName(tree, 'strong'); + expect(strongs.length).toBeGreaterThan(0); + expect(JSON.stringify(strongs[0])).toContain('bold'); + expect(htmlBlockPayloads(tree)).toStrictEqual(['
    \n
  • one
  • \n\n
  • two
  • \n
']); + }); + + it('renders inside a lowercase cell', () => { + const md = `
+ + + + + +
{\`
+

a

+ +

b

+
+\`}
`; + + const tree = mdxish(md); + expect(htmlBlockPayloads(tree)).toStrictEqual(['
\n

a

\n\n

b

\n
']); + expectFullyConverted(tree); + }); + + it('preserves curly braces in HTMLBlock content inside a table cell', () => { + const md = ` + + + + + +
{\`
{notTemplate}
\`}
`; + + expectJsxTableIsParsed(md); + const tree = mdxish(md); + expect(htmlBlockPayloads(tree)).toStrictEqual(['
{notTemplate}
']); + }); + + it('preserves safeMode and runScripts attributes when nested', () => { + const md = ` + + + + + +
{\`
raw
\`}
`; + + expectJsxTableIsParsed(md); + + const tree = mdxish(md); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + type: 'element', + tagName: 'html-block', + properties: { + html: '
raw
', + safeMode: 'true', + runScripts: false, + }, + }); + }); + + it('renders multiple HTMLBlocks inside the same Table', () => { + const md = ` + + + + + + +
{\`
+one + +uno +
+\`}
{\`two\`}
`; + + expectJsxTableIsParsed(md); + + const tree = mdxish(md); + + const htmlBlocks = findAllElementsByTagName(tree, 'html-block'); + expect(htmlBlocks).toHaveLength(2); + expect(htmlBlocks[0].properties).toMatchObject({ + html: '
\none\n\nuno\n
', + }); + expect(htmlBlocks[1].properties).toMatchObject({ html: 'two' }); + }); + + it('leaves no RDMX_HTMLBLOCK markers or stray comment nodes in the tree', () => { + const md = ` + + + + + +
+ {\`
x
\`}
+
`; + + expectJsxTableIsParsed(md); + + const tree = mdxish(md); + const serialized = JSON.stringify(tree); + + expect(serialized).not.toContain('RDMX_HTMLBLOCK'); + + const htmlBlock = findElementByTagName(tree, 'html-block') as Element; + expect(htmlBlock.children).toStrictEqual([]); + }); + }); +}); diff --git a/__tests__/transformers/mdxish-html-blocks.test.ts b/__tests__/transformers/mdxish-html-blocks.test.ts new file mode 100644 index 000000000..c87f83f05 --- /dev/null +++ b/__tests__/transformers/mdxish-html-blocks.test.ts @@ -0,0 +1,135 @@ +import { mdxish } from '../../lib'; +import { findElementByTagName } from '../helpers'; + +describe('mdxish html blocks transformer', () => { + describe('attribute extraction', () => { + it('extracts safeMode from JSX syntax', () => { + const tree = mdxish('{`

content

`}
'); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + properties: { html: '

content

', safeMode: 'true' }, + }); + }); + + it('extracts safeMode from string syntax', () => { + const tree = mdxish('{`

content

`}
'); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + properties: { html: '

content

', safeMode: 'false' }, + }); + }); + + it('extracts runScripts boolean true', () => { + const tree = mdxish('{`

content

`}
'); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + properties: { html: '

content

', runScripts: true }, + }); + }); + + it('extracts runScripts boolean false', () => { + const tree = mdxish('{`

content

`}
'); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + properties: { html: '

content

', runScripts: false }, + }); + }); + + it('extracts runScripts string value', () => { + const tree = mdxish('{`

content

`}
'); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + properties: { html: '

content

', runScripts: 'afterRender' }, + }); + }); + + it('extracts multiple attributes', () => { + const tree = mdxish( + '{`

content

`}
', + ); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + properties: { html: '

content

', safeMode: 'true', runScripts: true }, + }); + }); + + it('omits runScripts and safeMode when absent', () => { + const tree = mdxish('{`

content

`}
'); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + properties: { html: '

content

' }, + }); + }); + }); + + describe('content extraction', () => { + it('strips template literal delimiters', () => { + const tree = mdxish('{`
hello
`}
'); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + properties: { html: '
hello
' }, + }); + }); + + it('handles content without template literal syntax', () => { + const tree = mdxish('{`plain`}'); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + properties: { html: 'plain' }, + }); + }); + + it('unescapes backticks in HTML content', () => { + const tree = mdxish('{`\\`example\\``}'); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + properties: { html: '`example`' }, + }); + }); + + it('preserves multiline content', () => { + const markdown = `{\` +
    +
  • one
  • +
  • two
  • +
+\`}
`; + const tree = mdxish(markdown); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + properties: { html: '
    \n
  • one
  • \n
  • two
  • \n
' }, + }); + }); + + it('starts indent relative to the HTMLBlock opening tag', () => { + const markdown = ` + {\`first +second + third + fourth + \`}`; + const tree = mdxish(markdown); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + properties: { html: 'first\nsecond\n third\nfourth' }, + }); + }); + }); + + describe('node structure', () => { + it('produces correct node type and hName', () => { + const tree = mdxish('{`

test

`}
'); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toMatchObject({ + type: 'element', + tagName: 'html-block', + }); + }); + + it('does not transform non-HTMLBlock html nodes', () => { + const tree = mdxish('
just html
'); + const htmlBlock = findElementByTagName(tree, 'html-block'); + expect(htmlBlock).toBeNull(); + }); + }); +}) \ No newline at end of file diff --git a/lib/constants.ts b/lib/constants.ts index 4911d743b..36978809f 100644 --- a/lib/constants.ts +++ b/lib/constants.ts @@ -63,6 +63,7 @@ export const INLINE_COMPONENT_TAGS = new Set(['Anchor', 'Glossary']); /** * PascalCase tags that have their own dedicated tokenizer / transformer * and must not be claimed by the generic `mdxComponent` construct. + * Subject to change as we add more dedicated tokenizers. */ const DEDICATED_COMPONENT_TAGS = ['HTMLBlock', 'Table'] as const; @@ -75,6 +76,15 @@ export const GENERIC_MDX_COMPONENT_EXCLUDED_TAGS = new Set([ ...INLINE_COMPONENT_TAGS, ]); +/** + * Tags the micromark `mdxComponent` tokenizer must not claim, which + * are inline components and those that have their own dedicated tokenizer + */ +export const TOKENIZER_MDX_COMPONENT_EXCLUDED_TAGS = new Set([ + 'Table', + ...INLINE_COMPONENT_TAGS, +]); + /** * Lowercased variant of {@link INLINE_COMPONENT_TAGS} for consumers that * run after rehype (where hast `tagName` is normalized to lowercase). diff --git a/lib/mdxish.ts b/lib/mdxish.ts index 855c10673..1219dfbd8 100644 --- a/lib/mdxish.ts +++ b/lib/mdxish.ts @@ -201,7 +201,7 @@ export function mdxishAstProcessor(mdContent: string, opts: MdxishOpts = {}) { .use(mdxishInlineMdxHtmlBlocks, { safeMode }) .use(restoreSnakeCaseComponentNames, { mapping: snakeCaseMapping }) .use(mdxishTables) - .use(mdxishHtmlBlocks) + .use(mdxishHtmlBlocks) // Convert every shape → html-block // The next few transformers must appear after mdxishMdxComponentBlocks // so nodes produced by the inline re-parse of component bodies // (e.g. code/image/embed inside ) get visited too diff --git a/lib/micromark/mdx-component/syntax.ts b/lib/micromark/mdx-component/syntax.ts index 5d4f7eab9..1468472bb 100644 --- a/lib/micromark/mdx-component/syntax.ts +++ b/lib/micromark/mdx-component/syntax.ts @@ -4,7 +4,7 @@ import type { Code, Construct, Effects, Extension, Resolver, State, TokenizeCont import { markdownLineEnding } from 'micromark-util-character'; import { codes, types } from 'micromark-util-symbol'; -import { GENERIC_MDX_COMPONENT_EXCLUDED_TAGS } from '../../constants'; +import { TOKENIZER_MDX_COMPONENT_EXCLUDED_TAGS } from '../../constants'; declare module 'micromark-util-types' { interface TokenTypeMap { @@ -312,7 +312,7 @@ function createTokenize(mode: 'flow' | 'text') { } // Tag name complete — check exclusions - if (GENERIC_MDX_COMPONENT_EXCLUDED_TAGS.has(tagName)) { + if (TOKENIZER_MDX_COMPONENT_EXCLUDED_TAGS.has(tagName)) { return nok(code); } diff --git a/package.json b/package.json index 727b0a00f..4005436f9 100644 --- a/package.json +++ b/package.json @@ -177,7 +177,7 @@ }, { "path": "dist/main.node.js", - "maxSize": "947KB" + "maxSize": "950KB" } ] }, diff --git a/processor/transform/mdxish/mdxish-html-blocks.ts b/processor/transform/mdxish/mdxish-html-blocks.ts index 26398b11f..c57490619 100644 --- a/processor/transform/mdxish/mdxish-html-blocks.ts +++ b/processor/transform/mdxish/mdxish-html-blocks.ts @@ -1,392 +1,196 @@ import type { HTMLBlock } from '../../../types'; -import type { Paragraph, Parent } from 'mdast'; +import type { Html, Paragraph, Parent, RootContent } from 'mdast'; import type { Transform } from 'mdast-util-from-markdown'; +import type { MdxJsxFlowElement, MdxJsxTextElement } from 'mdast-util-mdx'; import { visit } from 'unist-util-visit'; import { NodeTypes } from '../../../enums'; import { formatHtmlForMdxish } from '../../utils'; -import { base64Decode, HTML_BLOCK_CONTENT_END, HTML_BLOCK_CONTENT_START } from './preprocess-jsx-expressions'; +type HtmlBlockJsx = MdxJsxFlowElement | MdxJsxTextElement; -/** - * Decodes HTMLBlock content that was protected during preprocessing. - * Content is wrapped in - */ -function decodeProtectedContent(content: string): string { - // Escape special regex characters in the markers - const startEscaped = HTML_BLOCK_CONTENT_START.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - const endEscaped = HTML_BLOCK_CONTENT_END.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - const markerRegex = new RegExp(`${startEscaped}([A-Za-z0-9+/=]+)${endEscaped}`, 'g'); - return content.replace(markerRegex, (_match, encoded: string) => { - try { - return base64Decode(encoded); - } catch { - return encoded; - } - }); -} - -/** - * Collects text content from a node and its children recursively - */ -function collectTextContent(node: { children?: unknown[]; lang?: string; type?: string; value?: string }): string { - const parts: string[] = []; - - if (node.type === 'text' && node.value) { - parts.push(node.value); - } else if (node.type === 'html' && node.value) { - parts.push(node.value); - } else if (node.type === 'inlineCode' && node.value) { - parts.push(node.value); - } else if (node.type === 'code' && node.value) { - // Reconstruct code fence syntax (markdown parser consumes opening ```) - const lang = node.lang || ''; - const fence = `\`\`\`${lang ? `${lang}\n` : ''}`; - parts.push(fence); - parts.push(node.value); - // Add newline before closing fence if missing - const closingFence = node.value.endsWith('\n') ? '```' : '\n```'; - parts.push(closingFence); - } else if (node.children && Array.isArray(node.children)) { - node.children.forEach(child => { - if (typeof child === 'object' && child !== null) { - parts.push(collectTextContent(child as { children?: unknown[]; lang?: string; type?: string; value?: string })); - } - }); - } - - return parts.join(''); -} - -/** - * Extracts boolean attribute from HTML tag. Handles JSX (safeMode={true}) and string (safeMode="true") syntax. - * Returns "true"/"false" string to survive rehypeRaw serialization. - */ -function extractBooleanAttr(attrs: string, name: string): string | undefined { - // Try JSX syntax: name={true|false} - const jsxMatch = attrs.match(new RegExp(`${name}=\\{(true|false)\\}`)); - if (jsxMatch) { - return jsxMatch[1]; - } - // Try string syntax: name="true"|true - const stringMatch = attrs.match(new RegExp(`${name}="?(true|false)"?`)); - if (stringMatch) { - return stringMatch[1]; - } - return undefined; -} - -/** - * Extracts runScripts attribute from HTML tag. Returns boolean for "true"/"false", string for other values, or undefined if not found. - */ -function extractRunScriptsAttr(attrs: string): boolean | string | undefined { - const runScriptsMatch = attrs.match(/runScripts="?([^">\s]+)"?/); - if (!runScriptsMatch) { - return undefined; - } - const value = runScriptsMatch[1]; - if (value === 'true') { - return true; - } - if (value === 'false') { - return false; - } - return value; -} +// `{`…`}` embedded inside a raw HTML block (e.g. a +// single-line `
`). CommonMark slurps the whole div as one `html` +// node, so the tokenizer never sees the HTMLBlock — we recover it here. +const RAW_HTML_BLOCK_RE = /]*)>\s*\{\s*`((?:[^`\\]|\\.)*)`\s*\}\s*<\/HTMLBlock>/g; +// Opening `` as its own `html` node — produced inside a paragraph +// when an HTMLBlock appears inline alongside text. +const HTML_BLOCK_OPEN_RE = /^]*)>$/; /** - * Creates an HTMLBlock node from HTML string and optional attributes + * Builds the canonical `html-block` MDAST node the renderer expects. */ -function createHTMLBlockNode( - htmlString: string, +const createHtmlBlockNode = ( + html: string, position: HTMLBlock['position'], runScripts?: boolean | string, safeMode?: string, -): HTMLBlock { - return { - position, - children: [{ type: 'text', value: htmlString }], - type: NodeTypes.htmlBlock, - data: { - hName: 'html-block', - hProperties: { - html: htmlString, - ...(runScripts !== undefined && { runScripts }), - ...(safeMode !== undefined && { safeMode }), - }, +): HTMLBlock => ({ + position, + children: [{ type: 'text', value: html }], + type: NodeTypes.htmlBlock, + data: { + hName: 'html-block', + hProperties: { + html, + ...(runScripts !== undefined && { runScripts }), + ...(safeMode !== undefined && { safeMode }), }, - }; -} + }, +}); /** - * Checks for opening tag only (for split detection) + * Reads the cooked string out of a brace expression wrapping a single template + * literal (`` `

n

` `` → `

n

`). */ -function hasOpeningTagOnly(node: { children?: unknown[]; type?: string; value?: string }): { - attrs: string; - found: boolean; -} { - let hasOpening = false; - let hasClosed = false; - let attrs = ''; +const extractTemplateLiteral = (value: string | undefined): string => { + if (!value) return ''; + const match = value.trim().match(/^`([\s\S]*)`$/); + // Non-template-literal bodies (e.g. `{someVar}`) are malformed mdxish input; + // returning '' beats shipping JS identifier source as an HTML payload. + return match ? match[1] : ''; +}; - const check = (n: { children?: unknown[]; type?: string; value?: string }) => { - if (n.type === 'html' && n.value) { - if (n.value === '') { - hasOpening = true; - } else { - const match = n.value.match(/^]*)?>$/); - if (match) { - hasOpening = true; - attrs = match[1] || ''; - } - } - if (n.value === '' || n.value.includes('
')) { - hasClosed = true; - } - } - if (n.children && Array.isArray(n.children)) { - n.children.forEach(child => { - check(child as { children?: unknown[]; type?: string; value?: string }); - }); - } - }; +const toRunScripts = (raw: string | undefined): boolean | string | undefined => + raw === 'true' ? true : raw === 'false' ? false : raw; - check(node); - // Return true only if opening without closing (split case) - return { attrs, found: hasOpening && !hasClosed }; -} +/** Reads an attribute from a raw `` attribute string. */ +const rawAttr = (attrs: string, name: string): string | undefined => { + const quoted = attrs.match(new RegExp(`\\b${name}\\s*=\\s*"([^"]*)"`)); + if (quoted) return quoted[1]; + const expr = attrs.match(new RegExp(`\\b${name}\\s*=\\s*\\{(true|false)\\}`)); + if (expr) return expr[1]; + return new RegExp(`\\b${name}\\b`).test(attrs) ? 'true' : undefined; +}; + +/** Reads an attribute from a parsed `` JSX element. */ +const jsxAttr = (element: HtmlBlockJsx, name: string): string | undefined => { + const attr = element.attributes.find(a => a.type === 'mdxJsxAttribute' && a.name === name); + if (!attr || attr.type !== 'mdxJsxAttribute') return undefined; + if (typeof attr.value === 'string') return attr.value; + if (attr.value && typeof attr.value === 'object' && 'value' in attr.value) return attr.value.value; + return 'true'; // bare boolean attribute, e.g. +}; + +/** Builds an `html-block` from a raw attribute string and (unparsed) body. */ +const htmlBlockFromRaw = ( + attrs: string, + html: string, + position: HTMLBlock['position'], + openingTagIndent = 0, +): HTMLBlock => + createHtmlBlockNode( + formatHtmlForMdxish(html, openingTagIndent), + position, + toRunScripts(rawAttr(attrs, 'runScripts')), + rawAttr(attrs, 'safeMode'), + ); /** - * Checks if a node contains an HTMLBlock closing tag + * Splits a raw `html` node that embeds one or more ``s into + * `[html before, html-block, html after, …]`. Returns null when there is none. + * + * `String.split` on a regex with capture groups interleaves the captures into + * the result, so segments arrive as `[text, attrs, body, text, attrs, body, …]`. */ -function hasClosingTag(node: { children?: unknown[]; type?: string; value?: string }): boolean { - if (node.type === 'html' && node.value) { - if (node.value === '' || node.value.includes('')) return true; - } - if (node.children && Array.isArray(node.children)) { - return node.children.some(child => hasClosingTag(child as { children?: unknown[]; type?: string; value?: string })); +const splitRawHtmlBlocks = (node: Html): RootContent[] | null => { + const segments = node.value.split(RAW_HTML_BLOCK_RE); + if (segments.length === 1) return null; // no present + + const parts: RootContent[] = []; + for (let i = 0; i < segments.length; i += 3) { + const [text, attrs, body] = segments.slice(i, i + 3); + if (text) parts.push({ type: 'html', value: text }); + if (body !== undefined) { + // The opening tag's column equals the length of the line it starts on + // (the text run since the previous newline preceding the match). + const openingTagIndent = text.slice(text.lastIndexOf('\n') + 1).length; + parts.push(htmlBlockFromRaw(attrs, body, node.position, openingTagIndent)); + } } - return false; -} + return parts; +}; /** - * Transforms HTMLBlock MDX JSX to html-block nodes. Handles {`...`} syntax. + * Converts every `` shape that survives parsing into the canonical + * `html-block` MDAST node, reading the body from the tokenizer's template-literal + * expression. Three shapes occur: + * + * 1. JSX element (`mdxJsxFlowElement`/`mdxJsxTextElement`) — multiline/block + * context and table cells (after their remarkMdx re-parse). + * 2. Raw `html` blob (`splitRawHtmlBlocks`) — single-line top-level, or nested + * in raw HTML like an inline `
`. + * 3. Inline-in-paragraph — split into `html` + expression + `html` siblings. + * + * Runs *after* `mdxishTables` so table cells are re-parsed first; + * `mdxishTables` recognizes the still-JSX `` element when deciding to + * keep a table as a JSX ``. This replaces the old base64-comment marker + * machinery — the #1455 tokenizer hands the body over already parsed. */ const mdxishHtmlBlocks = (): Transform => tree => { - // Handle HTMLBlock split across root children (caused by newlines) - visit(tree, 'root', (root: Parent) => { - const children = root.children; - let i = 0; - - while (i < children.length) { - const child = children[i] as { children?: unknown[]; type?: string; value?: string }; - const { attrs, found: hasOpening } = hasOpeningTagOnly(child); - - if (hasOpening) { - // Find closing tag in subsequent siblings - let closingIdx = -1; - for (let j = i + 1; j < children.length; j += 1) { - if (hasClosingTag(children[j] as { children?: unknown[]; type?: string; value?: string })) { - closingIdx = j; - break; - } - } - - if (closingIdx !== -1) { - // Collect inner content between tags - const contentParts: string[] = []; - for (let j = i; j <= closingIdx; j += 1) { - const node = children[j] as { children?: unknown[]; type?: string; value?: string }; - contentParts.push(collectTextContent(node)); - } - - // Remove the opening/closing tags and template literal syntax from content - let content = contentParts.join(''); - content = content.replace(/^]*>\s*\{?\s*`?/, '').replace(/`?\s*\}?\s*<\/HTMLBlock>$/, ''); - // Decode protected content that was base64 encoded during preprocessing - content = decodeProtectedContent(content); - - const htmlString = formatHtmlForMdxish(content); - const runScripts = extractRunScriptsAttr(attrs); - const safeMode = extractBooleanAttr(attrs, 'safeMode'); - - // Replace range with single HTMLBlock node - const mdNode = createHTMLBlockNode( - htmlString, - (children[i] as { position?: unknown }).position as HTMLBlock['position'], - runScripts, - safeMode, - ); - root.children.splice(i, closingIdx - i + 1, mdNode); - } - } - i += 1; - } - }); + // Shape 1: tokenized JSX element. + visit( + tree, + node => node.type === 'mdxJsxFlowElement' || node.type === 'mdxJsxTextElement', + (node, index, parent: Parent | undefined) => { + const element = node as HtmlBlockJsx; + if (element.name !== 'HTMLBlock' || !parent || index === undefined) return; + + const exprChild = element.children.find( + child => child.type === 'mdxFlowExpression' || child.type === 'mdxTextExpression', + ) as { value?: string } | undefined; + + const openingTagIndent = (element.position?.start.column ?? 1) - 1; + parent.children[index] = createHtmlBlockNode( + formatHtmlForMdxish(extractTemplateLiteral(exprChild?.value), openingTagIndent), + element.position, + toRunScripts(jsxAttr(element, 'runScripts')), + jsxAttr(element, 'safeMode'), + ); + }, + ); - // Handle HTMLBlock parsed as HTML elements (when template literal contains block-level HTML tags) - visit(tree, 'html', (node, index, parent: Parent | undefined) => { + // Shape 2: raw HTML blob. + visit(tree, 'html', (node: Html, index, parent: Parent | undefined) => { if (!parent || index === undefined) return; - - const value = (node as { value?: string }).value; - if (!value) return; - - // Case 1: Full HTMLBlock in single node - const fullMatch = value.match(/^]*)?>([\s\S]*)<\/HTMLBlock>$/); - if (fullMatch) { - const attrs = fullMatch[1] || ''; - let content = fullMatch[2] || ''; - - // Remove template literal syntax if present: {`...`} - content = content.replace(/^\s*\{\s*`/, '').replace(/`\s*\}\s*$/, ''); - // Decode protected content that was base64 encoded during preprocessing - content = decodeProtectedContent(content); - - const htmlString = formatHtmlForMdxish(content); - const runScripts = extractRunScriptsAttr(attrs); - const safeMode = extractBooleanAttr(attrs, 'safeMode'); - - parent.children[index] = createHTMLBlockNode(htmlString, node.position, runScripts, safeMode); - return; - } - - // Case 2: Opening tag only (split by blank lines) - if (value === '' || value.match(/^]*>$/)) { - const siblings = parent.children; - let closingIdx = -1; - - // Find closing tag in siblings - for (let i = index + 1; i < siblings.length; i += 1) { - const sibling = siblings[i]; - if (sibling.type === 'html') { - const sibVal = (sibling as { value?: string }).value; - if (sibVal === '' || sibVal?.includes('')) { - closingIdx = i; - break; - } - } - } - - if (closingIdx === -1) return; - - // Collect content between tags, skipping template literal delimiters - const contentParts: string[] = []; - for (let i = index + 1; i < closingIdx; i += 1) { - const sibling = siblings[i]; - // Skip template literal delimiters - if (sibling.type === 'text') { - const textVal = (sibling as { value?: string }).value; - if (textVal === '{' || textVal === '}' || textVal === '{`' || textVal === '`}') { - // eslint-disable-next-line no-continue - continue; - } - } - contentParts.push(collectTextContent(sibling as { children?: unknown[]; type?: string; value?: string })); - } - - // Decode protected content that was base64 encoded during preprocessing - const decodedContent = decodeProtectedContent(contentParts.join('')); - const htmlString = formatHtmlForMdxish(decodedContent); - const runScripts = extractRunScriptsAttr(value); - const safeMode = extractBooleanAttr(value, 'safeMode'); - - // Replace opening tag with HTMLBlock node, remove consumed siblings - parent.children[index] = createHTMLBlockNode(htmlString, node.position, runScripts, safeMode); - parent.children.splice(index + 1, closingIdx - index); - } + const replacement = splitRawHtmlBlocks(node); + if (replacement) parent.children.splice(index, 1, ...(replacement as typeof parent.children)); }); - // Handle HTMLBlock inside paragraphs (parsed as inline elements) - visit(tree, 'paragraph', (node: Paragraph, index, parent: Parent | undefined) => { - if (!parent || index === undefined) return; - - const children = node.children || []; - - let htmlBlockStartIdx = -1; - let htmlBlockEndIdx = -1; - let templateLiteralStartIdx = -1; - let templateLiteralEndIdx = -1; - + // Shape 3: inline within a paragraph — `` open/close arrive as + // separate `html` siblings with the template-literal expression between them. + visit(tree, 'paragraph', (paragraph: Paragraph) => { + // An html-block is block content, so it isn't a valid PhrasingContent child; + // widen to RootContent (which HTMLBlock belongs to) for the in-place splice. + const children = paragraph.children as RootContent[]; for (let i = 0; i < children.length; i += 1) { - const child = children[i]; - - if (child.type === 'html' && typeof (child as { value?: string }).value === 'string') { - const value = (child as { value: string }).value; - if (value === '' || value.match(/^]*>$/)) { - htmlBlockStartIdx = i; - } else if (value === '') { - htmlBlockEndIdx = i; - } - } - - // Find opening brace after HTMLBlock start - if (htmlBlockStartIdx !== -1 && templateLiteralStartIdx === -1 && child.type === 'text') { - const value = (child as { value?: string }).value; - if (value === '{') { - templateLiteralStartIdx = i; - } - } - - // Find closing brace before HTMLBlock end - if (htmlBlockStartIdx !== -1 && htmlBlockEndIdx === -1 && child.type === 'text') { - const value = (child as { value?: string }).value; - if (value === '}') { - templateLiteralEndIdx = i; - } - } - } - - if ( - htmlBlockStartIdx !== -1 && - htmlBlockEndIdx !== -1 && - templateLiteralStartIdx !== -1 && - templateLiteralEndIdx !== -1 && - templateLiteralStartIdx < templateLiteralEndIdx - ) { - const openingTag = children[htmlBlockStartIdx] as { value?: string }; - - // Collect content between braces (handles code blocks) - const templateContent: string[] = []; - for (let i = templateLiteralStartIdx + 1; i < templateLiteralEndIdx; i += 1) { - const child = children[i]; - templateContent.push( - collectTextContent(child as { children?: unknown[]; lang?: string; type?: string; value?: string }), - ); - } - - // Decode protected content that was base64 encoded during preprocessing - const decodedContent = decodeProtectedContent(templateContent.join('')); - const htmlString = formatHtmlForMdxish(decodedContent); - - const runScripts = openingTag.value ? extractRunScriptsAttr(openingTag.value) : undefined; - const safeMode = openingTag.value ? extractBooleanAttr(openingTag.value, 'safeMode') : undefined; - - const mdNode = createHTMLBlockNode(htmlString, node.position, runScripts, safeMode); - - parent.children[index] = mdNode; - } - }); - - // Ensure html-block nodes have HTML in children as text node - visit(tree, 'html-block', (node: HTMLBlock) => { - const html = node.data?.hProperties?.html; - if ( - html && - (!node.children || - node.children.length === 0 || - (node.children.length === 1 && node.children[0].type === 'text' && node.children[0].value !== html)) - ) { - node.children = [ - { - type: 'text', - value: html, - }, - ]; + const open = children[i]; + const openMatch = open.type === 'html' ? open.value.match(HTML_BLOCK_OPEN_RE) : null; + if (!openMatch) continue; // eslint-disable-line no-continue + + const closeIdx = children.findIndex( + (child, j) => j > i && child.type === 'html' && child.value === '', + ); + if (closeIdx === -1) continue; // eslint-disable-line no-continue + + const body = children + .slice(i + 1, closeIdx) + .map(child => { + if (child.type === 'mdxTextExpression' || child.type === 'mdxFlowExpression') { + return extractTemplateLiteral(child.value); + } + // Preserve raw text from any other phrasing sibling (e.g. stray + // whitespace or content the tokenizer didn't claim) so it isn't + // silently dropped from the html payload. + return 'value' in child && typeof child.value === 'string' ? child.value : ''; + }) + .join(''); + + const openingTagIndent = (open.position?.start.column ?? 1) - 1; + children.splice(i, closeIdx - i + 1, htmlBlockFromRaw(openMatch[1], body, open.position, openingTagIndent)); } }); - - return tree; }; export default mdxishHtmlBlocks; diff --git a/processor/transform/mdxish/preprocess-jsx-expressions.ts b/processor/transform/mdxish/preprocess-jsx-expressions.ts index 41ebb93f7..e64b32b86 100644 --- a/processor/transform/mdxish/preprocess-jsx-expressions.ts +++ b/processor/transform/mdxish/preprocess-jsx-expressions.ts @@ -1,48 +1,6 @@ import { JSX_COMMENT_REGEX } from '../../../lib/micromark/jsx-comment/pattern'; import { protectCodeBlocks, restoreCodeBlocks } from '../../../lib/utils/mdxish/protect-code-blocks'; -// Base64 encode (Node.js + browser compatible) -function base64Encode(str: string): string { - if (typeof Buffer !== 'undefined') { - return Buffer.from(str, 'utf-8').toString('base64'); - } - return btoa(unescape(encodeURIComponent(str))); -} - -// Base64 decode (Node.js + browser compatible) -export function base64Decode(str: string): string { - if (typeof Buffer !== 'undefined') { - return Buffer.from(str, 'base64').toString('utf-8'); - } - return decodeURIComponent(escape(atob(str))); -} - -// Markers for protected HTMLBlock content (HTML comments avoid markdown parsing issues) -export const HTML_BLOCK_CONTENT_START = ''; - -/** - * Base64 encodes HTMLBlock template literal content to prevent markdown parser from consuming `}'; - * protectHTMLBlockContent(input) - * // Returns: '' - * ``` - */ -function protectHTMLBlockContent(content: string): string { - return content.replace( - /(]*>)\{\s*`((?:[^`\\]|\\.)*)`\s*\}(<\/HTMLBlock>)/g, - (_match, openTag: string, templateContent: string, closeTag: string) => { - const encoded = base64Encode(templateContent); - return `${openTag}${HTML_BLOCK_CONTENT_START}${encoded}${HTML_BLOCK_CONTENT_END}${closeTag}`; - }, - ); -} - /** * Removes JSX-style comments (e.g., { /* comment *\/ }) from content. * @@ -208,10 +166,9 @@ function escapeProblematicBraces(content: string): string { * @returns Preprocessed content ready for markdown parsing */ export function preprocessJSXExpressions(content: string): string { - let processed = protectHTMLBlockContent(content); - const { protectedCode, protectedContent } = protectCodeBlocks(processed); + const { protectedCode, protectedContent } = protectCodeBlocks(content); - processed = escapeProblematicBraces(protectedContent); + let processed = escapeProblematicBraces(protectedContent); processed = restoreCodeBlocks(processed, protectedCode); return processed; diff --git a/processor/transform/mdxish/tables/mdxish-tables.ts b/processor/transform/mdxish/tables/mdxish-tables.ts index 46e217ff7..f7825f91a 100644 --- a/processor/transform/mdxish/tables/mdxish-tables.ts +++ b/processor/transform/mdxish/tables/mdxish-tables.ts @@ -10,6 +10,7 @@ import remarkParse from 'remark-parse'; import { unified } from 'unified'; import { EXIT, visit } from 'unist-util-visit'; +import { NodeTypes } from '../../../../enums'; import { gemojiFromMarkdown } from '../../../../lib/mdast-util/gemoji'; import { legacyVariableFromMarkdown } from '../../../../lib/mdast-util/legacy-variable'; import { gemoji } from '../../../../lib/micromark/gemoji'; @@ -151,6 +152,21 @@ const processTableNode = ( let tableHasFlowContent = false; + // An `` (still a JSX element here; converted to `html-block` by + // `mdxishHtmlBlocks` after this transformer) is block-level content that a + // markdown table cell can't represent, so keep the table as a JSX `
`. + visit( + node as Node, + candidate => + candidate.type === NodeTypes.htmlBlock || + ((candidate.type === 'mdxJsxFlowElement' || candidate.type === 'mdxJsxTextElement') && + (candidate as MdxJsxFlowElement | MdxJsxTextElement).name === 'HTMLBlock'), + () => { + tableHasFlowContent = true; + return EXIT; + }, + ); + // Re-parse text-only cells through markdown and detect flow content visit(node as Node, isTableCell, (cell: MdxJsxTableCell) => { if (!isTextOnly(cell.children as unknown[])) return; diff --git a/processor/utils.ts b/processor/utils.ts index afc646390..60309e021 100644 --- a/processor/utils.ts +++ b/processor/utils.ts @@ -161,18 +161,28 @@ export const isMDXEsm = (node: Node): node is MdxjsEsm => { * Takes an HTML string and formats it for display in the editor. Removes leading/trailing newlines * and unindents the HTML. * - * @param {string} html - HTML content from template literal + * @param {string} html - cooked HTML payload (callers strip any template-literal backticks first) + * @param {number} [openingTagIndent=0] - column the `` opening tag sits at, used to + * dedent each content line so its indentation reads relative to the tag, not the line start * @returns {string} processed HTML */ -export function formatHtmlForMdxish(html: string): string { - // Remove leading/trailing backticks if present, since they're used to keep the HTML - // from being parsed prematurely - let processed = html; - if (processed.startsWith('`') && processed.endsWith('`')) { - processed = processed.slice(1, -1); - } +export function formatHtmlForMdxish(html: string, openingTagIndent = 0): string { // Removes the leading/trailing newlines - let cleaned = processed.replace(/^\s*\n|\n\s*$/g, ''); + let cleaned = html.replace(/^\s*\n|\n\s*$/g, ''); + + // Strip / deindent the lines in the HTML string so that the indents are relative + // to the opening HTMLBlock tag, not the literal line start + // Keep any deeper indent + if (openingTagIndent > 0) { + cleaned = cleaned + .split('\n') + .map(line => { + let i = 0; + while (i < openingTagIndent && (line[i] === ' ' || line[i] === '\t')) i += 1; + return line.slice(i); + }) + .join('\n'); + } // Convert literal \n sequences to actual newlines only inside
 and .
   // Because 
 needs to respect the newline visual and