Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions __tests__/lib/mdxish/mdxish-jsx-to-mdast.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,18 @@ describe('mdxish-jsx-to-mdast transformer', () => {
expect(imageNode.data?.hProperties?.border).toBeUndefined();
});

it('should parse Image with unquoted attributes containing special characters', () => {
const md = '<Image src=https://example.com/image.png alt=test />';
const ast = processWithNewTypes(md);

expect(ast.children).toHaveLength(1);
expect(ast.children[0].type).toBe(NodeTypes.imageBlock);

const imageNode = ast.children[0] as ImageBlock;
expect(imageNode.data?.hProperties?.src).toBe('https://example.com/image.png');
expect(imageNode.data?.hProperties?.alt).toBe('test');
});

it('should parse caption with markdown and HTML entities into children', () => {
const md = '<Image src="test.png" alt="test" caption="With **Default Handling** enabled, the `default` value &#x22;Buster&#x22; is used." />';
const ast = processWithNewTypes(md);
Expand Down Expand Up @@ -131,6 +143,20 @@ This is a warning message.
expect(calloutNode.children).toBeDefined();
expect(calloutNode.children.length).toBeGreaterThan(0);
});

it('should parse Callout with unquoted attributes containing special characters', () => {
const md = `<Callout icon=📘 theme=info>
content
</Callout>`;
const ast = processWithNewTypes(md);

expect(ast.children).toHaveLength(1);
expect(ast.children[0].type).toBe(NodeTypes.callout);

const calloutNode = ast.children[0] as Callout;
expect(calloutNode.data?.hProperties?.icon).toBe('📘');
expect(calloutNode.data?.hProperties?.theme).toBe('info');
});
});

describe('Embed component', () => {
Expand Down Expand Up @@ -197,6 +223,18 @@ This is a warning message.
expect(embedNode.data?.hProperties?.url).toBe(url);
});
});

it('should parse Embed with unquoted attributes containing special characters', () => {
const md = '<Embed url=https://example.com title=Example />';
const ast = processWithNewTypes(md);

expect(ast.children).toHaveLength(1);
expect(ast.children[0].type).toBe(NodeTypes.embedBlock);

const embedNode = ast.children[0] as EmbedBlock;
expect(embedNode.data?.hProperties?.url).toBe('https://example.com');
expect(embedNode.data?.hProperties?.title).toBe('Example');
});
});

describe('Anchor component', () => {
Expand Down Expand Up @@ -257,6 +295,17 @@ This is a warning message.
expect(anchorNode.data?.hProperties?.href).toBe('https://readme.com');
expect(anchorNode.children).toHaveLength(0);
});

it('should parse Anchor with unquoted attributes containing special characters', () => {
const md = '<Anchor href=https://readme.com>ReadMe</Anchor>';
const ast = processWithNewTypes(md);

const para = ast.children[0] as Paragraph;
const anchorNode = para.children.find(c => c.type === NodeTypes.anchor) as Anchor;
expect(anchorNode).toBeDefined();
expect(anchorNode.data?.hProperties?.href).toBe('https://readme.com');
expect(anchorNode.children[0]).toMatchObject({ type: 'text', value: 'ReadMe' });
});
});

describe('Recipe component', () => {
Expand All @@ -282,6 +331,18 @@ This is a warning message.
expect(recipeNode.emoji).toBe('🍳');
expect(recipeNode.backgroundColor).toBe('#fff');
});

it('should parse Recipe with unquoted attributes containing special characters', () => {
const md = '<Recipe slug=my-recipe title=Recipe link=https://example.com/recipe />';
const ast = processWithNewTypes(md);

expect(ast.children).toHaveLength(1);
expect(ast.children[0].type).toBe(NodeTypes.recipe);

const recipeNode = ast.children[0] as Recipe;
expect(recipeNode.slug).toBe('my-recipe');
expect(recipeNode.title).toBe('Recipe');
});
});

describe('unknown components', () => {
Expand Down
128 changes: 128 additions & 0 deletions __tests__/transformers/normalize-component-attributes.test.ts
Comment thread
maximilianfalco marked this conversation as resolved.
Outdated
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import { normalizeComponentAttributes } from '../../processor/transform/mdxish/normalize-component-attributes';

describe('normalize-component-attributes', () => {
describe('self-closing block components', () => {
it('quotes unquoted URL in Image src', () => {
expect(normalizeComponentAttributes('<Image src=https://example.com/image.png alt=test />')).toBe(
'<Image src="https://example.com/image.png" alt="test" />',
);
});

it('quotes unquoted URL in Embed', () => {
expect(normalizeComponentAttributes('<Embed url=https://example.com title=Example />')).toBe(
'<Embed url="https://example.com" title="Example" />',
);
});

it('quotes unquoted attributes in Recipe', () => {
expect(normalizeComponentAttributes('<Recipe slug=my-recipe title=Recipe link=https://example.com/recipe />')).toBe(
'<Recipe slug="my-recipe" title="Recipe" link="https://example.com/recipe" />',
);
});
});

describe('block components with children', () => {
it('quotes unquoted emoji and simple values in Callout', () => {
expect(normalizeComponentAttributes('<Callout icon=📘 theme=info>')).toBe('<Callout icon="📘" theme="info">');
});

it('quotes unquoted attributes in multi-line Callout', () => {
const input = `<Callout icon=📘 theme=info>
content
</Callout>`;
const expected = `<Callout icon="📘" theme="info">
content
</Callout>`;
expect(normalizeComponentAttributes(input)).toBe(expected);
});
});

describe('inline components', () => {
it('quotes unquoted URL in Anchor at line start', () => {
expect(normalizeComponentAttributes('<Anchor href=https://readme.com>ReadMe</Anchor>')).toBe(
'<Anchor href="https://readme.com">ReadMe</Anchor>',
);
});
});

describe('already-quoted attributes', () => {
it('leaves double-quoted attributes unchanged', () => {
const input = '<Image src="https://example.com/image.png" alt="test" />';
expect(normalizeComponentAttributes(input)).toBe(input);
});

it('normalizes single-quoted attributes to double quotes', () => {
const input = "<Callout icon='📘' theme='info'>";
expect(normalizeComponentAttributes(input)).toBe('<Callout icon="📘" theme="info">');
});
});

describe('boolean attributes', () => {
it('preserves boolean attributes without values', () => {
expect(normalizeComponentAttributes('<Image src=test.png border />')).toBe(
'<Image src="test.png" border />',
);
});
});

describe('mixed quoted and unquoted attributes', () => {
it('only quotes the unquoted ones', () => {
expect(normalizeComponentAttributes('<Image src="test.png" alt=test />')).toBe(
'<Image src="test.png" alt="test" />',
);
});
});

describe('does not affect non-component content', () => {
it('ignores lowercase HTML tags', () => {
const input = '<div class=container>';
expect(normalizeComponentAttributes(input)).toBe(input);
});

it('ignores content inside fenced code blocks', () => {
const input = `\`\`\`html
<Image src=https://example.com/image.png alt=test />
\`\`\``;
expect(normalizeComponentAttributes(input)).toBe(input);
});

it('ignores content inside indented code blocks', () => {
const input = ' <Image src=https://example.com/image.png alt=test />';
expect(normalizeComponentAttributes(input)).toBe(input);
});

it('ignores PascalCase tags not at line start (inline)', () => {
const input = 'Use <Anchor href=https://readme.com>ReadMe</Anchor> for links.';
expect(normalizeComponentAttributes(input)).toBe(input);
});
});

describe('multi-line documents', () => {
it('normalizes multiple component tags independently', () => {
const input = `<Image src=https://example.com/a.png alt=first />

<Recipe slug=my-recipe title=Recipe />

<Callout icon=📘 theme=info>
content
</Callout>`;
const expected = `<Image src="https://example.com/a.png" alt="first" />

<Recipe slug="my-recipe" title="Recipe" />

<Callout icon="📘" theme="info">
content
</Callout>`;
expect(normalizeComponentAttributes(input)).toBe(expected);
});

it('does not affect multi-line JSX expression tags', () => {
const input = `<AdvancedTable
data={[
{ 'code': '<INPUT_CODE_1>' }
]}
/>`;
expect(normalizeComponentAttributes(input)).toBe(input);
});
});
});
2 changes: 2 additions & 0 deletions lib/mdxish.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import mdxishJsxToMdast from '../processor/transform/mdxish/mdxish-jsx-to-mdast'
import mdxishMermaidTransformer from '../processor/transform/mdxish/mdxish-mermaid';
import { processSnakeCaseComponent } from '../processor/transform/mdxish/mdxish-snake-case-components';
import mdxishTables from '../processor/transform/mdxish/mdxish-tables';
import { normalizeComponentAttributes } from '../processor/transform/mdxish/normalize-component-attributes';
import normalizeEmphasisAST from '../processor/transform/mdxish/normalize-malformed-md-syntax';
import { normalizeTableSeparator } from '../processor/transform/mdxish/normalize-table-separator';
import {
Expand Down Expand Up @@ -107,6 +108,7 @@ function preprocessContent(
let result = normalizeTableSeparator(content);
result = terminateHtmlFlowBlocks(result);
result = safeMode ? result : preprocessJSXExpressions(result, jsxContext);
result = normalizeComponentAttributes(result);

return processSnakeCaseComponent(result, { knownComponents });
}
Expand Down
39 changes: 39 additions & 0 deletions processor/transform/mdxish/normalize-component-attributes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import { protectCodeBlocks, restoreCodeBlocks } from '../../../lib/utils/mdxish/protect-code-blocks';

import { parseAttributes } from './mdxish-component-blocks';

// Matches single-line PascalCase tags at line start (up to 3 spaces indent per CommonMark).
// Only block-level HTML is affected, inline HTML already allows unquoted attribute values.
const SINGLE_LINE_PASCAL_TAG_RE = /^([ ]{0,3})<([A-Z][A-Za-z0-9_]*)([^\n]*?)(\/?)\s*>/gm;

/**
* Wraps unquoted attribute values in PascalCase component tags with double quotes.
*
* Micromark's HTML block tokenizer rejects tags whose unquoted attribute values
* contain characters like `:` or `/` (e.g. `src=https://example.com`), and GFM
* autolinks then fragment the URLs into link nodes. By quoting these values before
* parsing, the tags are recognized as valid HTML blocks and flow through to the
* component-block transformer unchanged.
*/
export function normalizeComponentAttributes(content: string): string {
const { protectedContent, protectedCode } = protectCodeBlocks(content);

const normalized = protectedContent.replace(
SINGLE_LINE_PASCAL_TAG_RE,
(_match, indent: string, tagName: string, attrsPart: string, closing: string) => {
const attrs = parseAttributes(attrsPart);
if (!attrs.length) return `${indent}<${tagName}${attrsPart}${closing}>`;

const rebuilt = attrs
.map(attr => {
if (attr.value === null) return ` ${attr.name}`;
return ` ${attr.name}="${attr.value}"`;
})
.join('');

return `${indent}<${tagName}${rebuilt}${closing ? ` ${closing}` : ''}>`;
},
);

return restoreCodeBlocks(normalized, protectedCode);
}
Loading