-const foo = () => {
- const bar = {
- baz: 'blammo'
- }
-
- return bar
-}
-
-\`}Hello, World!
`}\\`example\\``}`example`');
- expect(htmlProp).not.toContain('\\`');
- });
-
- it('passes safeMode property correctly', () => {
- // Test with both JSX expression and string syntax
- const markdown = 'Content
`}Content
'); - }); - - it('should handle template literal with variables', () => { - // eslint-disable-next-line quotes - const markdown = `const x = \${variable}\`}const x = ${variable}');
- });
-
- it('should handle nested template literals', () => {
- // Use a regular string to avoid nested template literal syntax error
- // The content should be: ```javascript\nconst x = 1;\n```- const markdown = '
\\`\\`\\`javascript\\nconst x = 1;\\n\\`\\`\\``}
```javascript\nconst x = 1;\n```'); - }); - - it('expands \\n only inside
/, not in plain text after tags', () => {
- const markdown = [
- '{`',
- 'qerq3er \\n qerreqqe
',
- 'qerq3er \\n qerreqqe',
- 'hello \\n world',
- '`} ',
- ].join('\n');
-
- const hast = mdxish(markdown);
- const paragraph = hast.children[0] as Element;
-
- expect(paragraph.type).toBe('element');
- const htmlBlock = findHTMLBlock(paragraph);
- expect(htmlBlock).toBeDefined();
-
- const htmlProp = htmlBlock?.properties?.html as string;
- expect(htmlProp).toBeDefined();
-
- // Literal `\n` expands to real newlines only inside / .
- expect(htmlProp).toBe(
- 'qerq3er \n qerreqqe
\nqerq3er \n qerreqqe\n\nhello \\n world',
- );
- // Must not turn the plain-text `hello \n world` into a line break between words.
- expect(htmlProp).toContain('hello \\n world');
- expect(htmlProp).not.toMatch(/hello \n world/); // space + LF + space (wrong)
- });
-
- it('preserves \\n escape sequences inside ',
- '',
- '`}',
- ].join('\n');
-
- const hast = mdxish(markdown);
- const paragraph = hast.children[0] as Element;
-
- expect(paragraph.type).toBe('element');
- const htmlBlock = findHTMLBlock(paragraph);
- expect(htmlBlock).toBeDefined();
-
- const htmlProp = htmlBlock?.properties?.html as string;
- expect(htmlProp).toBeDefined();
-
- // The `\n` inside the JS string literal must survive as the two-byte escape
- // sequence so eval() sees a well-formed JS string. A real LF here would break it.
- expect(htmlProp).toContain('var x = "hello\\nworld";');
- expect(htmlProp).not.toContain('var x = "hello\nworld";');
- });
-});
diff --git a/__tests__/lib/mdxish/html-blocks.test.ts b/__tests__/lib/mdxish/html-blocks.test.ts
new file mode 100644
index 000000000..8f7c384ed
--- /dev/null
+++ b/__tests__/lib/mdxish/html-blocks.test.ts
@@ -0,0 +1,435 @@
+import type { Element } from 'hast';
+import type { MdxJsxFlowElement } from 'mdast-util-mdx';
+
+import { mdxish } from '../../../lib';
+import { collectNodes, findAllElementsByTagName, findElementByTagName, parseMdxishWithSource } from '../../helpers';
+
+function expectJsxTableIsParsed(md: string) {
+ const { tree: mdastTree } = parseMdxishWithSource(md);
+ // A table containing an carries block-level content, so it is kept
+ // as a JSX (mdxJsxFlowElement) rather than collapsed to a markdown table.
+ const tableNodes = collectNodes(
+ mdastTree,
+ node => node.type === 'mdxJsxFlowElement' && (node as MdxJsxFlowElement).name === 'Table',
+ );
+ expect(tableNodes).toHaveLength(1);
+}
+
+/** Decoded `html` props of every in the rendered tree, in document order. */
+function htmlBlockPayloads(tree: ReturnType) {
+ return findAllElementsByTagName(tree, 'html-block').map(node => node.properties?.html);
+}
+
+/** Asserts no raw survived and no protected marker leaked into the tree. */
+function expectFullyConverted(tree: ReturnType) {
+ expect(findElementByTagName(tree, 'HTMLBlock')).toBeNull();
+ expect(JSON.stringify(tree)).not.toContain('RDMX_HTMLBLOCK');
+}
+
+describe(' parsing', () => {
+ describe('standalone', () => {
+ it('renders as with the decoded html prop', () => {
+ const tree = mdxish('{`Hello`} ');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['Hello']);
+ });
+
+ it('renders between surrounding paragraphs', () => {
+ const tree = mdxish('text before\n\n{`x`} \n\ntext after');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['x']);
+ const json = JSON.stringify(tree);
+ expect(json).toContain('text before');
+ expect(json).toContain('text after');
+ });
+
+ it('renders after a markdown heading', () => {
+ const tree = mdxish('# Heading\n\n{`after heading
`} ');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['after heading
']);
+ expect(findElementByTagName(tree, 'h1')).not.toBeNull();
+ });
+
+ it('renders two consecutive top-level HTMLBlocks', () => {
+ const tree = mdxish('{`one`} \n\n{`two`} ');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['one', 'two']);
+ });
+
+ it('renders inline within a paragraph alongside text', () => {
+ const tree = mdxish('Inline {`x`} text');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['x']);
+ const json = JSON.stringify(tree);
+ expect(json).toContain('Inline');
+ expect(json).toContain('text');
+ });
+
+ it('compiles html blocks preserving newlines', () => {
+ const markdown = `{\`
+
+const foo = () => {
+ const bar = {
+ baz: 'blammo'
+ }
+
+ return bar
+}
+
+\`} `;
+
+ const tree = mdxish(markdown);
+ expect(htmlBlockPayloads(tree)).toStrictEqual([`
+const foo = () => {
+ const bar = {
+ baz: 'blammo'
+ }
+
+ return bar
+}
+
`]);
+ });
+
+ it('handles standalone multiline HTMLBlock with surrounding paragraphs', () => {
+ const markdown = `Hello
+
+{\`
+Hello, World!
+\`}
+
+there`;
+ const tree = mdxish(markdown);
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['Hello, World!
']);
+ });
+
+ it('handles nested HTMLBlock tags in content', () => {
+ const tree = mdxish('{`{Hello} `} ');
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['{Hello} ']);
+ });
+ });
+
+ describe('content formatting', () => {
+ it('preserves multiline HTML content verbatim', () => {
+ const tree = mdxish('{`\n multi\n`} ');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['\n multi\n']);
+ expectFullyConverted(tree);
+ });
+
+ it('preserves raw `} ');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['']);
+ // The script must not become a real Content
`} ');
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['Content
']);
+ });
+
+ it('handles template literal with variables', () => {
+ // eslint-disable-next-line quotes
+ const tree = mdxish(`{\`const x = \${variable}\`} `);
+ // eslint-disable-next-line no-template-curly-in-string
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['const x = ${variable}']);
+ });
+
+ it('handles nested template literals', () => {
+ const tree = mdxish('{`\\`\\`\\`javascript\\nconst x = 1;\\n\\`\\`\\`
`} ');
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['```javascript\nconst x = 1;\n```
']);
+ });
+
+ it('handles trailing whitespace after closing tag', () => {
+ const tree = mdxish('{`hello`} ');
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['hello']);
+ });
+ });
+
+ describe('inside generic HTML tags & markdown', () => {
+ it('renders inside a with the decoded html prop', () => {
+ const tree = mdxish('{`nested
`} ');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['nested
']);
+ expect(findElementByTagName(tree, 'div')).not.toBeNull();
+ });
+
+ it('renders inside a separated by blank lines', () => {
+ const tree = mdxish('\n\n{`n
`} \n\n ');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['n
']);
+ expect(findElementByTagName(tree, 'section')).not.toBeNull();
+ });
+
+ it('renders inside a blockquote', () => {
+ const tree = mdxish('> {`n
`} ');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['n
']);
+ expect(findElementByTagName(tree, 'blockquote')).not.toBeNull();
+ });
+
+ it('renders inside a list item', () => {
+ const tree = mdxish('- {`n
`} ');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['n
']);
+ expect(findElementByTagName(tree, 'li')).not.toBeNull();
+ });
+
+ it('renders inside callout blockquotes', () => {
+ const md = `> 🚧 It compiles!
+>
+> {\`
+> Hello, World!
+> \`} `;
+
+ const tree = mdxish(md);
+ const callout = tree.children[0] as Element;
+
+ expect(callout.tagName).toBe('Callout');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual([' Hello, World!']);
+ });
+
+ it('does not render inside code blocks', () => {
+ const md = '```{`n
`} ```';
+
+ const tree = mdxish(md);
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toBeNull();
+ });
+ });
+
+ describe('inside ReadMe components', () => {
+ describe('callouts', () => {
+ it('handles HTMLBlock in an empty callout (no title text)', () => {
+ const markdown = `> 📘
+>
+> {\`body only
\`} `;
+
+ const tree = mdxish(markdown);
+ expect((tree.children[0] as Element).tagName).toBe('Callout');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['body only
']);
+ });
+
+ it('renders inside a ', () => {
+ const tree = mdxish(
+ '\n\n{`\n n
\n\n m
\n`} \n\n ',
+ );
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['\n n
\n\n m
\n']);
+ expectFullyConverted(tree);
+ });
+ });
+
+ it('renders inside an ', () => {
+ const tree = mdxish('\n\n{`n
`} \n\n ');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['n
']);
+ expectFullyConverted(tree);
+ });
+
+ it('renders inside /', () => {
+ const tree = mdxish('\n\n\n{`n
`} \n\n \n ');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['n
']);
+ expectFullyConverted(tree);
+ });
+
+ it('renders inside /', () => {
+ const tree = mdxish('\n\n\n{`n
`} \n\n \n ');
+
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['n
']);
+ expectFullyConverted(tree);
+ });
+ });
+
+ describe('inside ', () => {
+ it('renders inside a cell as with the decoded html prop', () => {
+ const md = `
+
+ Name Markup
+
+
+
+ Custom
+ {\`
+Hello
+
+World
+
+\`}
+
+
+
`;
+
+ expectJsxTableIsParsed(md);
+
+ const tree = mdxish(md);
+
+ const rawHtmlBlock = findElementByTagName(tree, 'HTMLBlock');
+ expect(rawHtmlBlock).toBeNull();
+
+ // Newlines (including the blank line) inside the content must survive the
+ // table re-parse and not fragment the HTMLBlock.
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['\nHello
\n\nWorld
\n']);
+ });
+
+ it('still renders markdown in a sibling text cell', () => {
+ const md = `
+
+ A B
+
+
+
+ **bold** here
+ {\`
+- one
+
+- two
+
+\`}
+
+
+
`;
+
+ expectJsxTableIsParsed(md);
+
+ const tree = mdxish(md);
+ // The sibling cell's markdown must still be processed into a .
+ const strongs = findAllElementsByTagName(tree, 'strong');
+ expect(strongs.length).toBeGreaterThan(0);
+ expect(JSON.stringify(strongs[0])).toContain('bold');
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['\n- one
\n\n- two
\n
']);
+ });
+
+ it('renders inside a lowercase cell', () => {
+ const md = `
+
+
+ {\`
+a
+
+b
+
+\`}
+
+
+
`;
+
+ const tree = mdxish(md);
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['\na
\n\nb
\n ']);
+ expectFullyConverted(tree);
+ });
+
+ it('preserves curly braces in HTMLBlock content inside a table cell', () => {
+ const md = `
+
+
+ {\`{notTemplate}\`}
+
+
+
`;
+
+ expectJsxTableIsParsed(md);
+ const tree = mdxish(md);
+ expect(htmlBlockPayloads(tree)).toStrictEqual(['{notTemplate}']);
+ });
+
+ it('preserves safeMode and runScripts attributes when nested', () => {
+ const md = `
+
+
+ {\`raw\`}
+
+
+
`;
+
+ expectJsxTableIsParsed(md);
+
+ const tree = mdxish(md);
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ type: 'element',
+ tagName: 'html-block',
+ properties: {
+ html: 'raw',
+ safeMode: 'true',
+ runScripts: false,
+ },
+ });
+ });
+
+ it('renders multiple HTMLBlocks inside the same Table', () => {
+ const md = `
+
+
+ {\`
+one
+
+uno
+
+\`}
+ {\`two\`}
+
+
+
`;
+
+ expectJsxTableIsParsed(md);
+
+ const tree = mdxish(md);
+
+ const htmlBlocks = findAllElementsByTagName(tree, 'html-block');
+ expect(htmlBlocks).toHaveLength(2);
+ expect(htmlBlocks[0].properties).toMatchObject({
+ html: '\none\n\nuno\n',
+ });
+ expect(htmlBlocks[1].properties).toMatchObject({ html: 'two' });
+ });
+
+ it('leaves no RDMX_HTMLBLOCK markers or stray comment nodes in the tree', () => {
+ const md = `
+
+
+
+ {\`x\`}
+
+
+
+
`;
+
+ expectJsxTableIsParsed(md);
+
+ const tree = mdxish(md);
+ const serialized = JSON.stringify(tree);
+
+ expect(serialized).not.toContain('RDMX_HTMLBLOCK');
+
+ const htmlBlock = findElementByTagName(tree, 'html-block') as Element;
+ expect(htmlBlock.children).toStrictEqual([]);
+ });
+ });
+});
diff --git a/__tests__/transformers/mdxish-html-blocks.test.ts b/__tests__/transformers/mdxish-html-blocks.test.ts
new file mode 100644
index 000000000..c87f83f05
--- /dev/null
+++ b/__tests__/transformers/mdxish-html-blocks.test.ts
@@ -0,0 +1,135 @@
+import { mdxish } from '../../lib';
+import { findElementByTagName } from '../helpers';
+
+describe('mdxish html blocks transformer', () => {
+ describe('attribute extraction', () => {
+ it('extracts safeMode from JSX syntax', () => {
+ const tree = mdxish('{`content
`} ');
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ properties: { html: 'content
', safeMode: 'true' },
+ });
+ });
+
+ it('extracts safeMode from string syntax', () => {
+ const tree = mdxish('{`content
`} ');
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ properties: { html: 'content
', safeMode: 'false' },
+ });
+ });
+
+ it('extracts runScripts boolean true', () => {
+ const tree = mdxish('{`content
`} ');
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ properties: { html: 'content
', runScripts: true },
+ });
+ });
+
+ it('extracts runScripts boolean false', () => {
+ const tree = mdxish('{`content
`} ');
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ properties: { html: 'content
', runScripts: false },
+ });
+ });
+
+ it('extracts runScripts string value', () => {
+ const tree = mdxish('{`content
`} ');
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ properties: { html: 'content
', runScripts: 'afterRender' },
+ });
+ });
+
+ it('extracts multiple attributes', () => {
+ const tree = mdxish(
+ '{`content
`} ',
+ );
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ properties: { html: 'content
', safeMode: 'true', runScripts: true },
+ });
+ });
+
+ it('omits runScripts and safeMode when absent', () => {
+ const tree = mdxish('{`content
`} ');
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ properties: { html: 'content
' },
+ });
+ });
+ });
+
+ describe('content extraction', () => {
+ it('strips template literal delimiters', () => {
+ const tree = mdxish('{`hello`} ');
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ properties: { html: 'hello' },
+ });
+ });
+
+ it('handles content without template literal syntax', () => {
+ const tree = mdxish('{`plain`} ');
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ properties: { html: 'plain' },
+ });
+ });
+
+ it('unescapes backticks in HTML content', () => {
+ const tree = mdxish('{`\\`example\\``} ');
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ properties: { html: '`example`' },
+ });
+ });
+
+ it('preserves multiline content', () => {
+ const markdown = `{\`
+
+ - one
+ - two
+
+\`} `;
+ const tree = mdxish(markdown);
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ properties: { html: '\n - one
\n - two
\n
' },
+ });
+ });
+
+ it('starts indent relative to the HTMLBlock opening tag', () => {
+ const markdown = `
+ {\`first
+second
+ third
+ fourth
+ \`} `;
+ const tree = mdxish(markdown);
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ properties: { html: 'first\nsecond\n third\nfourth' },
+ });
+ });
+ });
+
+ describe('node structure', () => {
+ it('produces correct node type and hName', () => {
+ const tree = mdxish('{`test
`} ');
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toMatchObject({
+ type: 'element',
+ tagName: 'html-block',
+ });
+ });
+
+ it('does not transform non-HTMLBlock html nodes', () => {
+ const tree = mdxish('just html');
+ const htmlBlock = findElementByTagName(tree, 'html-block');
+ expect(htmlBlock).toBeNull();
+ });
+ });
+})
\ No newline at end of file
diff --git a/lib/constants.ts b/lib/constants.ts
index 4911d743b..36978809f 100644
--- a/lib/constants.ts
+++ b/lib/constants.ts
@@ -63,6 +63,7 @@ export const INLINE_COMPONENT_TAGS = new Set(['Anchor', 'Glossary']);
/**
* PascalCase tags that have their own dedicated tokenizer / transformer
* and must not be claimed by the generic `mdxComponent` construct.
+ * Subject to change as we add more dedicated tokenizers.
*/
const DEDICATED_COMPONENT_TAGS = ['HTMLBlock', 'Table'] as const;
@@ -75,6 +76,15 @@ export const GENERIC_MDX_COMPONENT_EXCLUDED_TAGS = new Set([
...INLINE_COMPONENT_TAGS,
]);
+/**
+ * Tags the micromark `mdxComponent` tokenizer must not claim, which
+ * are inline components and those that have their own dedicated tokenizer
+ */
+export const TOKENIZER_MDX_COMPONENT_EXCLUDED_TAGS = new Set([
+ 'Table',
+ ...INLINE_COMPONENT_TAGS,
+]);
+
/**
* Lowercased variant of {@link INLINE_COMPONENT_TAGS} for consumers that
* run after rehype (where hast `tagName` is normalized to lowercase).
diff --git a/lib/mdxish.ts b/lib/mdxish.ts
index 855c10673..1219dfbd8 100644
--- a/lib/mdxish.ts
+++ b/lib/mdxish.ts
@@ -201,7 +201,7 @@ export function mdxishAstProcessor(mdContent: string, opts: MdxishOpts = {}) {
.use(mdxishInlineMdxHtmlBlocks, { safeMode })
.use(restoreSnakeCaseComponentNames, { mapping: snakeCaseMapping })
.use(mdxishTables)
- .use(mdxishHtmlBlocks)
+ .use(mdxishHtmlBlocks) // Convert every shape → html-block
// The next few transformers must appear after mdxishMdxComponentBlocks
// so nodes produced by the inline re-parse of component bodies
// (e.g. code/image/embed inside ) get visited too
diff --git a/lib/micromark/mdx-component/syntax.ts b/lib/micromark/mdx-component/syntax.ts
index 5d4f7eab9..1468472bb 100644
--- a/lib/micromark/mdx-component/syntax.ts
+++ b/lib/micromark/mdx-component/syntax.ts
@@ -4,7 +4,7 @@ import type { Code, Construct, Effects, Extension, Resolver, State, TokenizeCont
import { markdownLineEnding } from 'micromark-util-character';
import { codes, types } from 'micromark-util-symbol';
-import { GENERIC_MDX_COMPONENT_EXCLUDED_TAGS } from '../../constants';
+import { TOKENIZER_MDX_COMPONENT_EXCLUDED_TAGS } from '../../constants';
declare module 'micromark-util-types' {
interface TokenTypeMap {
@@ -312,7 +312,7 @@ function createTokenize(mode: 'flow' | 'text') {
}
// Tag name complete — check exclusions
- if (GENERIC_MDX_COMPONENT_EXCLUDED_TAGS.has(tagName)) {
+ if (TOKENIZER_MDX_COMPONENT_EXCLUDED_TAGS.has(tagName)) {
return nok(code);
}
diff --git a/package.json b/package.json
index 727b0a00f..4005436f9 100644
--- a/package.json
+++ b/package.json
@@ -177,7 +177,7 @@
},
{
"path": "dist/main.node.js",
- "maxSize": "947KB"
+ "maxSize": "950KB"
}
]
},
diff --git a/processor/transform/mdxish/mdxish-html-blocks.ts b/processor/transform/mdxish/mdxish-html-blocks.ts
index 26398b11f..c57490619 100644
--- a/processor/transform/mdxish/mdxish-html-blocks.ts
+++ b/processor/transform/mdxish/mdxish-html-blocks.ts
@@ -1,392 +1,196 @@
import type { HTMLBlock } from '../../../types';
-import type { Paragraph, Parent } from 'mdast';
+import type { Html, Paragraph, Parent, RootContent } from 'mdast';
import type { Transform } from 'mdast-util-from-markdown';
+import type { MdxJsxFlowElement, MdxJsxTextElement } from 'mdast-util-mdx';
import { visit } from 'unist-util-visit';
import { NodeTypes } from '../../../enums';
import { formatHtmlForMdxish } from '../../utils';
-import { base64Decode, HTML_BLOCK_CONTENT_END, HTML_BLOCK_CONTENT_START } from './preprocess-jsx-expressions';
+type HtmlBlockJsx = MdxJsxFlowElement | MdxJsxTextElement;
-/**
- * Decodes HTMLBlock content that was protected during preprocessing.
- * Content is wrapped in
- */
-function decodeProtectedContent(content: string): string {
- // Escape special regex characters in the markers
- const startEscaped = HTML_BLOCK_CONTENT_START.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
- const endEscaped = HTML_BLOCK_CONTENT_END.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
- const markerRegex = new RegExp(`${startEscaped}([A-Za-z0-9+/=]+)${endEscaped}`, 'g');
- return content.replace(markerRegex, (_match, encoded: string) => {
- try {
- return base64Decode(encoded);
- } catch {
- return encoded;
- }
- });
-}
-
-/**
- * Collects text content from a node and its children recursively
- */
-function collectTextContent(node: { children?: unknown[]; lang?: string; type?: string; value?: string }): string {
- const parts: string[] = [];
-
- if (node.type === 'text' && node.value) {
- parts.push(node.value);
- } else if (node.type === 'html' && node.value) {
- parts.push(node.value);
- } else if (node.type === 'inlineCode' && node.value) {
- parts.push(node.value);
- } else if (node.type === 'code' && node.value) {
- // Reconstruct code fence syntax (markdown parser consumes opening ```)
- const lang = node.lang || '';
- const fence = `\`\`\`${lang ? `${lang}\n` : ''}`;
- parts.push(fence);
- parts.push(node.value);
- // Add newline before closing fence if missing
- const closingFence = node.value.endsWith('\n') ? '```' : '\n```';
- parts.push(closingFence);
- } else if (node.children && Array.isArray(node.children)) {
- node.children.forEach(child => {
- if (typeof child === 'object' && child !== null) {
- parts.push(collectTextContent(child as { children?: unknown[]; lang?: string; type?: string; value?: string }));
- }
- });
- }
-
- return parts.join('');
-}
-
-/**
- * Extracts boolean attribute from HTML tag. Handles JSX (safeMode={true}) and string (safeMode="true") syntax.
- * Returns "true"/"false" string to survive rehypeRaw serialization.
- */
-function extractBooleanAttr(attrs: string, name: string): string | undefined {
- // Try JSX syntax: name={true|false}
- const jsxMatch = attrs.match(new RegExp(`${name}=\\{(true|false)\\}`));
- if (jsxMatch) {
- return jsxMatch[1];
- }
- // Try string syntax: name="true"|true
- const stringMatch = attrs.match(new RegExp(`${name}="?(true|false)"?`));
- if (stringMatch) {
- return stringMatch[1];
- }
- return undefined;
-}
-
-/**
- * Extracts runScripts attribute from HTML tag. Returns boolean for "true"/"false", string for other values, or undefined if not found.
- */
-function extractRunScriptsAttr(attrs: string): boolean | string | undefined {
- const runScriptsMatch = attrs.match(/runScripts="?([^">\s]+)"?/);
- if (!runScriptsMatch) {
- return undefined;
- }
- const value = runScriptsMatch[1];
- if (value === 'true') {
- return true;
- }
- if (value === 'false') {
- return false;
- }
- return value;
-}
+// `{`…`} ` embedded inside a raw HTML block (e.g. a
+// single-line `…`). CommonMark slurps the whole div as one `html`
+// node, so the tokenizer never sees the HTMLBlock — we recover it here.
+const RAW_HTML_BLOCK_RE = /]*)>\s*\{\s*`((?:[^`\\]|\\.)*)`\s*\}\s*<\/HTMLBlock>/g;
+// Opening `` as its own `html` node — produced inside a paragraph
+// when an HTMLBlock appears inline alongside text.
+const HTML_BLOCK_OPEN_RE = /^]*)>$/;
/**
- * Creates an HTMLBlock node from HTML string and optional attributes
+ * Builds the canonical `html-block` MDAST node the renderer expects.
*/
-function createHTMLBlockNode(
- htmlString: string,
+const createHtmlBlockNode = (
+ html: string,
position: HTMLBlock['position'],
runScripts?: boolean | string,
safeMode?: string,
-): HTMLBlock {
- return {
- position,
- children: [{ type: 'text', value: htmlString }],
- type: NodeTypes.htmlBlock,
- data: {
- hName: 'html-block',
- hProperties: {
- html: htmlString,
- ...(runScripts !== undefined && { runScripts }),
- ...(safeMode !== undefined && { safeMode }),
- },
+): HTMLBlock => ({
+ position,
+ children: [{ type: 'text', value: html }],
+ type: NodeTypes.htmlBlock,
+ data: {
+ hName: 'html-block',
+ hProperties: {
+ html,
+ ...(runScripts !== undefined && { runScripts }),
+ ...(safeMode !== undefined && { safeMode }),
},
- };
-}
+ },
+});
/**
- * Checks for opening tag only (for split detection)
+ * Reads the cooked string out of a brace expression wrapping a single template
+ * literal (`` `n
` `` → `n
`).
*/
-function hasOpeningTagOnly(node: { children?: unknown[]; type?: string; value?: string }): {
- attrs: string;
- found: boolean;
-} {
- let hasOpening = false;
- let hasClosed = false;
- let attrs = '';
+const extractTemplateLiteral = (value: string | undefined): string => {
+ if (!value) return '';
+ const match = value.trim().match(/^`([\s\S]*)`$/);
+ // Non-template-literal bodies (e.g. `{someVar}`) are malformed mdxish input;
+ // returning '' beats shipping JS identifier source as an HTML payload.
+ return match ? match[1] : '';
+};
- const check = (n: { children?: unknown[]; type?: string; value?: string }) => {
- if (n.type === 'html' && n.value) {
- if (n.value === '') {
- hasOpening = true;
- } else {
- const match = n.value.match(/^]*)?>$/);
- if (match) {
- hasOpening = true;
- attrs = match[1] || '';
- }
- }
- if (n.value === ' ' || n.value.includes(' ')) {
- hasClosed = true;
- }
- }
- if (n.children && Array.isArray(n.children)) {
- n.children.forEach(child => {
- check(child as { children?: unknown[]; type?: string; value?: string });
- });
- }
- };
+const toRunScripts = (raw: string | undefined): boolean | string | undefined =>
+ raw === 'true' ? true : raw === 'false' ? false : raw;
- check(node);
- // Return true only if opening without closing (split case)
- return { attrs, found: hasOpening && !hasClosed };
-}
+/** Reads an attribute from a raw `` attribute string. */
+const rawAttr = (attrs: string, name: string): string | undefined => {
+ const quoted = attrs.match(new RegExp(`\\b${name}\\s*=\\s*"([^"]*)"`));
+ if (quoted) return quoted[1];
+ const expr = attrs.match(new RegExp(`\\b${name}\\s*=\\s*\\{(true|false)\\}`));
+ if (expr) return expr[1];
+ return new RegExp(`\\b${name}\\b`).test(attrs) ? 'true' : undefined;
+};
+
+/** Reads an attribute from a parsed `` JSX element. */
+const jsxAttr = (element: HtmlBlockJsx, name: string): string | undefined => {
+ const attr = element.attributes.find(a => a.type === 'mdxJsxAttribute' && a.name === name);
+ if (!attr || attr.type !== 'mdxJsxAttribute') return undefined;
+ if (typeof attr.value === 'string') return attr.value;
+ if (attr.value && typeof attr.value === 'object' && 'value' in attr.value) return attr.value.value;
+ return 'true'; // bare boolean attribute, e.g.
+};
+
+/** Builds an `html-block` from a raw attribute string and (unparsed) body. */
+const htmlBlockFromRaw = (
+ attrs: string,
+ html: string,
+ position: HTMLBlock['position'],
+ openingTagIndent = 0,
+): HTMLBlock =>
+ createHtmlBlockNode(
+ formatHtmlForMdxish(html, openingTagIndent),
+ position,
+ toRunScripts(rawAttr(attrs, 'runScripts')),
+ rawAttr(attrs, 'safeMode'),
+ );
/**
- * Checks if a node contains an HTMLBlock closing tag
+ * Splits a raw `html` node that embeds one or more ``s into
+ * `[html before, html-block, html after, …]`. Returns null when there is none.
+ *
+ * `String.split` on a regex with capture groups interleaves the captures into
+ * the result, so segments arrive as `[text, attrs, body, text, attrs, body, …]`.
*/
-function hasClosingTag(node: { children?: unknown[]; type?: string; value?: string }): boolean {
- if (node.type === 'html' && node.value) {
- if (node.value === ' ' || node.value.includes(' ')) return true;
- }
- if (node.children && Array.isArray(node.children)) {
- return node.children.some(child => hasClosingTag(child as { children?: unknown[]; type?: string; value?: string }));
+const splitRawHtmlBlocks = (node: Html): RootContent[] | null => {
+ const segments = node.value.split(RAW_HTML_BLOCK_RE);
+ if (segments.length === 1) return null; // no present
+
+ const parts: RootContent[] = [];
+ for (let i = 0; i < segments.length; i += 3) {
+ const [text, attrs, body] = segments.slice(i, i + 3);
+ if (text) parts.push({ type: 'html', value: text });
+ if (body !== undefined) {
+ // The opening tag's column equals the length of the line it starts on
+ // (the text run since the previous newline preceding the match).
+ const openingTagIndent = text.slice(text.lastIndexOf('\n') + 1).length;
+ parts.push(htmlBlockFromRaw(attrs, body, node.position, openingTagIndent));
+ }
}
- return false;
-}
+ return parts;
+};
/**
- * Transforms HTMLBlock MDX JSX to html-block nodes. Handles {`...`} syntax.
+ * Converts every `` shape that survives parsing into the canonical
+ * `html-block` MDAST node, reading the body from the tokenizer's template-literal
+ * expression. Three shapes occur:
+ *
+ * 1. JSX element (`mdxJsxFlowElement`/`mdxJsxTextElement`) — multiline/block
+ * context and table cells (after their remarkMdx re-parse).
+ * 2. Raw `html` blob (`splitRawHtmlBlocks`) — single-line top-level, or nested
+ * in raw HTML like an inline ``.
+ * 3. Inline-in-paragraph — split into `html` + expression + `html` siblings.
+ *
+ * Runs *after* `mdxishTables` so table cells are re-parsed first;
+ * `mdxishTables` recognizes the still-JSX `` element when deciding to
+ * keep a table as a JSX ``. This replaces the old base64-comment marker
+ * machinery — the #1455 tokenizer hands the body over already parsed.
*/
const mdxishHtmlBlocks = (): Transform => tree => {
- // Handle HTMLBlock split across root children (caused by newlines)
- visit(tree, 'root', (root: Parent) => {
- const children = root.children;
- let i = 0;
-
- while (i < children.length) {
- const child = children[i] as { children?: unknown[]; type?: string; value?: string };
- const { attrs, found: hasOpening } = hasOpeningTagOnly(child);
-
- if (hasOpening) {
- // Find closing tag in subsequent siblings
- let closingIdx = -1;
- for (let j = i + 1; j < children.length; j += 1) {
- if (hasClosingTag(children[j] as { children?: unknown[]; type?: string; value?: string })) {
- closingIdx = j;
- break;
- }
- }
-
- if (closingIdx !== -1) {
- // Collect inner content between tags
- const contentParts: string[] = [];
- for (let j = i; j <= closingIdx; j += 1) {
- const node = children[j] as { children?: unknown[]; type?: string; value?: string };
- contentParts.push(collectTextContent(node));
- }
-
- // Remove the opening/closing tags and template literal syntax from content
- let content = contentParts.join('');
- content = content.replace(/^]*>\s*\{?\s*`?/, '').replace(/`?\s*\}?\s*<\/HTMLBlock>$/, '');
- // Decode protected content that was base64 encoded during preprocessing
- content = decodeProtectedContent(content);
-
- const htmlString = formatHtmlForMdxish(content);
- const runScripts = extractRunScriptsAttr(attrs);
- const safeMode = extractBooleanAttr(attrs, 'safeMode');
-
- // Replace range with single HTMLBlock node
- const mdNode = createHTMLBlockNode(
- htmlString,
- (children[i] as { position?: unknown }).position as HTMLBlock['position'],
- runScripts,
- safeMode,
- );
- root.children.splice(i, closingIdx - i + 1, mdNode);
- }
- }
- i += 1;
- }
- });
+ // Shape 1: tokenized JSX element.
+ visit(
+ tree,
+ node => node.type === 'mdxJsxFlowElement' || node.type === 'mdxJsxTextElement',
+ (node, index, parent: Parent | undefined) => {
+ const element = node as HtmlBlockJsx;
+ if (element.name !== 'HTMLBlock' || !parent || index === undefined) return;
+
+ const exprChild = element.children.find(
+ child => child.type === 'mdxFlowExpression' || child.type === 'mdxTextExpression',
+ ) as { value?: string } | undefined;
+
+ const openingTagIndent = (element.position?.start.column ?? 1) - 1;
+ parent.children[index] = createHtmlBlockNode(
+ formatHtmlForMdxish(extractTemplateLiteral(exprChild?.value), openingTagIndent),
+ element.position,
+ toRunScripts(jsxAttr(element, 'runScripts')),
+ jsxAttr(element, 'safeMode'),
+ );
+ },
+ );
- // Handle HTMLBlock parsed as HTML elements (when template literal contains block-level HTML tags)
- visit(tree, 'html', (node, index, parent: Parent | undefined) => {
+ // Shape 2: raw HTML blob.
+ visit(tree, 'html', (node: Html, index, parent: Parent | undefined) => {
if (!parent || index === undefined) return;
-
- const value = (node as { value?: string }).value;
- if (!value) return;
-
- // Case 1: Full HTMLBlock in single node
- const fullMatch = value.match(/^]*)?>([\s\S]*)<\/HTMLBlock>$/);
- if (fullMatch) {
- const attrs = fullMatch[1] || '';
- let content = fullMatch[2] || '';
-
- // Remove template literal syntax if present: {`...`}
- content = content.replace(/^\s*\{\s*`/, '').replace(/`\s*\}\s*$/, '');
- // Decode protected content that was base64 encoded during preprocessing
- content = decodeProtectedContent(content);
-
- const htmlString = formatHtmlForMdxish(content);
- const runScripts = extractRunScriptsAttr(attrs);
- const safeMode = extractBooleanAttr(attrs, 'safeMode');
-
- parent.children[index] = createHTMLBlockNode(htmlString, node.position, runScripts, safeMode);
- return;
- }
-
- // Case 2: Opening tag only (split by blank lines)
- if (value === '' || value.match(/^]*>$/)) {
- const siblings = parent.children;
- let closingIdx = -1;
-
- // Find closing tag in siblings
- for (let i = index + 1; i < siblings.length; i += 1) {
- const sibling = siblings[i];
- if (sibling.type === 'html') {
- const sibVal = (sibling as { value?: string }).value;
- if (sibVal === ' ' || sibVal?.includes(' ')) {
- closingIdx = i;
- break;
- }
- }
- }
-
- if (closingIdx === -1) return;
-
- // Collect content between tags, skipping template literal delimiters
- const contentParts: string[] = [];
- for (let i = index + 1; i < closingIdx; i += 1) {
- const sibling = siblings[i];
- // Skip template literal delimiters
- if (sibling.type === 'text') {
- const textVal = (sibling as { value?: string }).value;
- if (textVal === '{' || textVal === '}' || textVal === '{`' || textVal === '`}') {
- // eslint-disable-next-line no-continue
- continue;
- }
- }
- contentParts.push(collectTextContent(sibling as { children?: unknown[]; type?: string; value?: string }));
- }
-
- // Decode protected content that was base64 encoded during preprocessing
- const decodedContent = decodeProtectedContent(contentParts.join(''));
- const htmlString = formatHtmlForMdxish(decodedContent);
- const runScripts = extractRunScriptsAttr(value);
- const safeMode = extractBooleanAttr(value, 'safeMode');
-
- // Replace opening tag with HTMLBlock node, remove consumed siblings
- parent.children[index] = createHTMLBlockNode(htmlString, node.position, runScripts, safeMode);
- parent.children.splice(index + 1, closingIdx - index);
- }
+ const replacement = splitRawHtmlBlocks(node);
+ if (replacement) parent.children.splice(index, 1, ...(replacement as typeof parent.children));
});
- // Handle HTMLBlock inside paragraphs (parsed as inline elements)
- visit(tree, 'paragraph', (node: Paragraph, index, parent: Parent | undefined) => {
- if (!parent || index === undefined) return;
-
- const children = node.children || [];
-
- let htmlBlockStartIdx = -1;
- let htmlBlockEndIdx = -1;
- let templateLiteralStartIdx = -1;
- let templateLiteralEndIdx = -1;
-
+ // Shape 3: inline within a paragraph — `` open/close arrive as
+ // separate `html` siblings with the template-literal expression between them.
+ visit(tree, 'paragraph', (paragraph: Paragraph) => {
+ // An html-block is block content, so it isn't a valid PhrasingContent child;
+ // widen to RootContent (which HTMLBlock belongs to) for the in-place splice.
+ const children = paragraph.children as RootContent[];
for (let i = 0; i < children.length; i += 1) {
- const child = children[i];
-
- if (child.type === 'html' && typeof (child as { value?: string }).value === 'string') {
- const value = (child as { value: string }).value;
- if (value === '' || value.match(/^]*>$/)) {
- htmlBlockStartIdx = i;
- } else if (value === ' ') {
- htmlBlockEndIdx = i;
- }
- }
-
- // Find opening brace after HTMLBlock start
- if (htmlBlockStartIdx !== -1 && templateLiteralStartIdx === -1 && child.type === 'text') {
- const value = (child as { value?: string }).value;
- if (value === '{') {
- templateLiteralStartIdx = i;
- }
- }
-
- // Find closing brace before HTMLBlock end
- if (htmlBlockStartIdx !== -1 && htmlBlockEndIdx === -1 && child.type === 'text') {
- const value = (child as { value?: string }).value;
- if (value === '}') {
- templateLiteralEndIdx = i;
- }
- }
- }
-
- if (
- htmlBlockStartIdx !== -1 &&
- htmlBlockEndIdx !== -1 &&
- templateLiteralStartIdx !== -1 &&
- templateLiteralEndIdx !== -1 &&
- templateLiteralStartIdx < templateLiteralEndIdx
- ) {
- const openingTag = children[htmlBlockStartIdx] as { value?: string };
-
- // Collect content between braces (handles code blocks)
- const templateContent: string[] = [];
- for (let i = templateLiteralStartIdx + 1; i < templateLiteralEndIdx; i += 1) {
- const child = children[i];
- templateContent.push(
- collectTextContent(child as { children?: unknown[]; lang?: string; type?: string; value?: string }),
- );
- }
-
- // Decode protected content that was base64 encoded during preprocessing
- const decodedContent = decodeProtectedContent(templateContent.join(''));
- const htmlString = formatHtmlForMdxish(decodedContent);
-
- const runScripts = openingTag.value ? extractRunScriptsAttr(openingTag.value) : undefined;
- const safeMode = openingTag.value ? extractBooleanAttr(openingTag.value, 'safeMode') : undefined;
-
- const mdNode = createHTMLBlockNode(htmlString, node.position, runScripts, safeMode);
-
- parent.children[index] = mdNode;
- }
- });
-
- // Ensure html-block nodes have HTML in children as text node
- visit(tree, 'html-block', (node: HTMLBlock) => {
- const html = node.data?.hProperties?.html;
- if (
- html &&
- (!node.children ||
- node.children.length === 0 ||
- (node.children.length === 1 && node.children[0].type === 'text' && node.children[0].value !== html))
- ) {
- node.children = [
- {
- type: 'text',
- value: html,
- },
- ];
+ const open = children[i];
+ const openMatch = open.type === 'html' ? open.value.match(HTML_BLOCK_OPEN_RE) : null;
+ if (!openMatch) continue; // eslint-disable-line no-continue
+
+ const closeIdx = children.findIndex(
+ (child, j) => j > i && child.type === 'html' && child.value === ' ',
+ );
+ if (closeIdx === -1) continue; // eslint-disable-line no-continue
+
+ const body = children
+ .slice(i + 1, closeIdx)
+ .map(child => {
+ if (child.type === 'mdxTextExpression' || child.type === 'mdxFlowExpression') {
+ return extractTemplateLiteral(child.value);
+ }
+ // Preserve raw text from any other phrasing sibling (e.g. stray
+ // whitespace or content the tokenizer didn't claim) so it isn't
+ // silently dropped from the html payload.
+ return 'value' in child && typeof child.value === 'string' ? child.value : '';
+ })
+ .join('');
+
+ const openingTagIndent = (open.position?.start.column ?? 1) - 1;
+ children.splice(i, closeIdx - i + 1, htmlBlockFromRaw(openMatch[1], body, open.position, openingTagIndent));
}
});
-
- return tree;
};
export default mdxishHtmlBlocks;
diff --git a/processor/transform/mdxish/preprocess-jsx-expressions.ts b/processor/transform/mdxish/preprocess-jsx-expressions.ts
index 41ebb93f7..e64b32b86 100644
--- a/processor/transform/mdxish/preprocess-jsx-expressions.ts
+++ b/processor/transform/mdxish/preprocess-jsx-expressions.ts
@@ -1,48 +1,6 @@
import { JSX_COMMENT_REGEX } from '../../../lib/micromark/jsx-comment/pattern';
import { protectCodeBlocks, restoreCodeBlocks } from '../../../lib/utils/mdxish/protect-code-blocks';
-// Base64 encode (Node.js + browser compatible)
-function base64Encode(str: string): string {
- if (typeof Buffer !== 'undefined') {
- return Buffer.from(str, 'utf-8').toString('base64');
- }
- return btoa(unescape(encodeURIComponent(str)));
-}
-
-// Base64 decode (Node.js + browser compatible)
-export function base64Decode(str: string): string {
- if (typeof Buffer !== 'undefined') {
- return Buffer.from(str, 'base64').toString('utf-8');
- }
- return decodeURIComponent(escape(atob(str)));
-}
-
-// Markers for protected HTMLBlock content (HTML comments avoid markdown parsing issues)
-export const HTML_BLOCK_CONTENT_START = '';
-
-/**
- * Base64 encodes HTMLBlock template literal content to prevent markdown parser from consuming `} ';
- * protectHTMLBlockContent(input)
- * // Returns: ' '
- * ```
- */
-function protectHTMLBlockContent(content: string): string {
- return content.replace(
- /(]*>)\{\s*`((?:[^`\\]|\\.)*)`\s*\}(<\/HTMLBlock>)/g,
- (_match, openTag: string, templateContent: string, closeTag: string) => {
- const encoded = base64Encode(templateContent);
- return `${openTag}${HTML_BLOCK_CONTENT_START}${encoded}${HTML_BLOCK_CONTENT_END}${closeTag}`;
- },
- );
-}
-
/**
* Removes JSX-style comments (e.g., { /* comment *\/ }) from content.
*
@@ -208,10 +166,9 @@ function escapeProblematicBraces(content: string): string {
* @returns Preprocessed content ready for markdown parsing
*/
export function preprocessJSXExpressions(content: string): string {
- let processed = protectHTMLBlockContent(content);
- const { protectedCode, protectedContent } = protectCodeBlocks(processed);
+ const { protectedCode, protectedContent } = protectCodeBlocks(content);
- processed = escapeProblematicBraces(protectedContent);
+ let processed = escapeProblematicBraces(protectedContent);
processed = restoreCodeBlocks(processed, protectedCode);
return processed;
diff --git a/processor/transform/mdxish/tables/mdxish-tables.ts b/processor/transform/mdxish/tables/mdxish-tables.ts
index 46e217ff7..f7825f91a 100644
--- a/processor/transform/mdxish/tables/mdxish-tables.ts
+++ b/processor/transform/mdxish/tables/mdxish-tables.ts
@@ -10,6 +10,7 @@ import remarkParse from 'remark-parse';
import { unified } from 'unified';
import { EXIT, visit } from 'unist-util-visit';
+import { NodeTypes } from '../../../../enums';
import { gemojiFromMarkdown } from '../../../../lib/mdast-util/gemoji';
import { legacyVariableFromMarkdown } from '../../../../lib/mdast-util/legacy-variable';
import { gemoji } from '../../../../lib/micromark/gemoji';
@@ -151,6 +152,21 @@ const processTableNode = (
let tableHasFlowContent = false;
+ // An `` (still a JSX element here; converted to `html-block` by
+ // `mdxishHtmlBlocks` after this transformer) is block-level content that a
+ // markdown table cell can't represent, so keep the table as a JSX ``.
+ visit(
+ node as Node,
+ candidate =>
+ candidate.type === NodeTypes.htmlBlock ||
+ ((candidate.type === 'mdxJsxFlowElement' || candidate.type === 'mdxJsxTextElement') &&
+ (candidate as MdxJsxFlowElement | MdxJsxTextElement).name === 'HTMLBlock'),
+ () => {
+ tableHasFlowContent = true;
+ return EXIT;
+ },
+ );
+
// Re-parse text-only cells through markdown and detect flow content
visit(node as Node, isTableCell, (cell: MdxJsxTableCell) => {
if (!isTextOnly(cell.children as unknown[])) return;
diff --git a/processor/utils.ts b/processor/utils.ts
index afc646390..60309e021 100644
--- a/processor/utils.ts
+++ b/processor/utils.ts
@@ -161,18 +161,28 @@ export const isMDXEsm = (node: Node): node is MdxjsEsm => {
* Takes an HTML string and formats it for display in the editor. Removes leading/trailing newlines
* and unindents the HTML.
*
- * @param {string} html - HTML content from template literal
+ * @param {string} html - cooked HTML payload (callers strip any template-literal backticks first)
+ * @param {number} [openingTagIndent=0] - column the `` opening tag sits at, used to
+ * dedent each content line so its indentation reads relative to the tag, not the line start
* @returns {string} processed HTML
*/
-export function formatHtmlForMdxish(html: string): string {
- // Remove leading/trailing backticks if present, since they're used to keep the HTML
- // from being parsed prematurely
- let processed = html;
- if (processed.startsWith('`') && processed.endsWith('`')) {
- processed = processed.slice(1, -1);
- }
+export function formatHtmlForMdxish(html: string, openingTagIndent = 0): string {
// Removes the leading/trailing newlines
- let cleaned = processed.replace(/^\s*\n|\n\s*$/g, '');
+ let cleaned = html.replace(/^\s*\n|\n\s*$/g, '');
+
+ // Strip / deindent the lines in the HTML string so that the indents are relative
+ // to the opening HTMLBlock tag, not the literal line start
+ // Keep any deeper indent
+ if (openingTagIndent > 0) {
+ cleaned = cleaned
+ .split('\n')
+ .map(line => {
+ let i = 0;
+ while (i < openingTagIndent && (line[i] === ' ' || line[i] === '\t')) i += 1;
+ return line.slice(i);
+ })
+ .join('\n');
+ }
// Convert literal \n sequences to actual newlines only inside and .
// Because needs to respect the newline visual and