diff --git a/__tests__/lib/mdxish/mdxish-jsx-to-mdast.test.ts b/__tests__/lib/mdxish/mdxish-jsx-to-mdast.test.ts index 1f184315a..6474aca33 100644 --- a/__tests__/lib/mdxish/mdxish-jsx-to-mdast.test.ts +++ b/__tests__/lib/mdxish/mdxish-jsx-to-mdast.test.ts @@ -89,6 +89,33 @@ describe('mdxish-jsx-to-mdast transformer', () => { expect(imageNode.data?.hProperties?.border).toBeUndefined(); }); + it('should parse Image with unquoted attributes containing special characters', () => { + const md = 'test'; + const ast = processWithNewTypes(md); + + expect(ast.children).toHaveLength(1); + expect(ast.children[0].type).toBe(NodeTypes.imageBlock); + + const imageNode = ast.children[0] as ImageBlock; + expect(imageNode.data?.hProperties?.src).toBe('https://example.com/image.png'); + expect(imageNode.data?.hProperties?.alt).toBe('test'); + }); + + it('should parse Image with unquoted attributes spanning multiple lines', () => { + const md = `test`; + const ast = processWithNewTypes(md); + + expect(ast.children).toHaveLength(1); + expect(ast.children[0].type).toBe(NodeTypes.imageBlock); + + const imageNode = ast.children[0] as ImageBlock; + expect(imageNode.data?.hProperties?.src).toBe('https://example.com/image.png'); + expect(imageNode.data?.hProperties?.alt).toBe('test'); + }); + it('should parse caption with markdown and HTML entities into children', () => { const md = 'test'; const ast = processWithNewTypes(md); @@ -131,6 +158,20 @@ This is a warning message. expect(calloutNode.children).toBeDefined(); expect(calloutNode.children.length).toBeGreaterThan(0); }); + + it('should parse Callout with unquoted attributes containing special characters', () => { + const md = ` +content +`; + const ast = processWithNewTypes(md); + + expect(ast.children).toHaveLength(1); + expect(ast.children[0].type).toBe(NodeTypes.callout); + + const calloutNode = ast.children[0] as Callout; + expect(calloutNode.data?.hProperties?.icon).toBe('📘'); + expect(calloutNode.data?.hProperties?.theme).toBe('info'); + }); }); describe('Embed component', () => { @@ -197,6 +238,18 @@ This is a warning message. expect(embedNode.data?.hProperties?.url).toBe(url); }); }); + + it('should parse Embed with unquoted attributes containing special characters', () => { + const md = ''; + const ast = processWithNewTypes(md); + + expect(ast.children).toHaveLength(1); + expect(ast.children[0].type).toBe(NodeTypes.embedBlock); + + const embedNode = ast.children[0] as EmbedBlock; + expect(embedNode.data?.hProperties?.url).toBe('https://example.com'); + expect(embedNode.data?.hProperties?.title).toBe('Example'); + }); }); describe('Anchor component', () => { @@ -257,6 +310,17 @@ This is a warning message. expect(anchorNode.data?.hProperties?.href).toBe('https://readme.com'); expect(anchorNode.children).toHaveLength(0); }); + + it('should parse Anchor with unquoted attributes containing special characters', () => { + const md = 'ReadMe'; + const ast = processWithNewTypes(md); + + const para = ast.children[0] as Paragraph; + const anchorNode = para.children.find(c => c.type === NodeTypes.anchor) as Anchor; + expect(anchorNode).toBeDefined(); + expect(anchorNode.data?.hProperties?.href).toBe('https://readme.com'); + expect(anchorNode.children[0]).toMatchObject({ type: 'text', value: 'ReadMe' }); + }); }); describe('Recipe component', () => { @@ -282,6 +346,18 @@ This is a warning message. expect(recipeNode.emoji).toBe('🍳'); expect(recipeNode.backgroundColor).toBe('#fff'); }); + + it('should parse Recipe with unquoted attributes containing special characters', () => { + const md = ''; + const ast = processWithNewTypes(md); + + expect(ast.children).toHaveLength(1); + expect(ast.children[0].type).toBe(NodeTypes.recipe); + + const recipeNode = ast.children[0] as Recipe; + expect(recipeNode.slug).toBe('my-recipe'); + expect(recipeNode.title).toBe('Recipe'); + }); }); describe('unknown components', () => { diff --git a/__tests__/transformers/mdxish-component-blocks.test.ts b/__tests__/transformers/mdxish-component-blocks.test.ts index b0d972fcf..73ba3d348 100644 --- a/__tests__/transformers/mdxish-component-blocks.test.ts +++ b/__tests__/transformers/mdxish-component-blocks.test.ts @@ -585,6 +585,36 @@ Some content }); }); + describe('closing tag inside inline code span', () => { + it('should not treat a closing tag inside backticks as the real closing tag', () => { + const markdown = ` +Here is some code: \`\` and more text. +`; + const tree = parseWithPlugin(markdown); + + const mdxNodes = findNodesByType(tree, 'mdxJsxFlowElement'); + expect(mdxNodes).toHaveLength(1); + expect(mdxNodes[0]).toMatchObject({ + type: 'mdxJsxFlowElement', + name: 'Callout', + }); + }); + + it('should skip closing tags inside multi-backtick code spans', () => { + const markdown = ` +Here is some code: \`\`\`\` and more text. +`; + const tree = parseWithPlugin(markdown); + + const mdxNodes = findNodesByType(tree, 'mdxJsxFlowElement'); + expect(mdxNodes).toHaveLength(1); + expect(mdxNodes[0]).toMatchObject({ + type: 'mdxJsxFlowElement', + name: 'Callout', + }); + }); + }); + describe('Anchor component (inline, excluded)', () => { it('should NOT convert to mdxJsxFlowElement', () => { // Anchor is an inline component and must remain as raw html nodes so that diff --git a/lib/mdast-util/jsx-component/index.ts b/lib/mdast-util/jsx-component/index.ts new file mode 100644 index 000000000..b8ba90f89 --- /dev/null +++ b/lib/mdast-util/jsx-component/index.ts @@ -0,0 +1,58 @@ +import type { HTML } from 'mdast'; +import type { CompileContext, Extension as FromMarkdownExtension, Handle, Token } from 'mdast-util-from-markdown'; + +const contextMap = new WeakMap(); + +function findBlockToken(this: CompileContext): Token | undefined { + const events = this.tokenStack; + for (let i = events.length - 1; i >= 0; i -= 1) { + if (events[i][0].type === 'jsxComponentBlock') return events[i][0]; + } + return undefined; +} + +function enterBlock(this: CompileContext, token: Parameters[0]): void { + contextMap.set(token, { chunks: [] }); + this.enter({ type: 'html', value: '' } as HTML, token); +} + +function exitBlockData(this: CompileContext, token: Parameters[0]): void { + const blockToken = findBlockToken.call(this); + if (!blockToken) return; + const ctx = contextMap.get(blockToken); + if (ctx) ctx.chunks.push(this.sliceSerialize(token)); +} + +function exitBlock(this: CompileContext, token: Parameters[0]): void { + const ctx = contextMap.get(token); + const node = this.stack[this.stack.length - 1] as HTML; + if (ctx) { + node.value = ctx.chunks.join('\n'); + contextMap.delete(token); + } + this.exit(token); +} + +function enterText(this: CompileContext, token: Parameters[0]): void { + this.enter({ type: 'html', value: '' } as HTML, token); +} + +function exitText(this: CompileContext, token: Parameters[0]): void { + const node = this.stack[this.stack.length - 1] as HTML; + node.value = this.sliceSerialize(token); + this.exit(token); +} + +export function jsxComponentBlockFromMarkdown(): FromMarkdownExtension { + return { + enter: { + jsxComponentBlock: enterBlock, + jsxComponentText: enterText, + }, + exit: { + jsxComponentBlockData: exitBlockData, + jsxComponentBlock: exitBlock, + jsxComponentText: exitText, + }, + }; +} diff --git a/lib/mdxish.ts b/lib/mdxish.ts index 2fa7038c7..4a73afa57 100644 --- a/lib/mdxish.ts +++ b/lib/mdxish.ts @@ -57,9 +57,11 @@ import variablesTextTransformer from '../processor/transform/mdxish/variables-te import tailwindTransformer from '../processor/transform/tailwind'; import { emptyTaskListItemFromMarkdown } from './mdast-util/empty-task-list-item'; +import { jsxComponentBlockFromMarkdown } from './mdast-util/jsx-component'; import { jsxTableFromMarkdown } from './mdast-util/jsx-table'; import { legacyVariableFromMarkdown } from './mdast-util/legacy-variable'; import { magicBlockFromMarkdown } from './mdast-util/magic-block'; +import { jsxComponentBlock } from './micromark/jsx-component'; import { jsxTable } from './micromark/jsx-table'; import { legacyVariable } from './micromark/legacy-variable'; import { looseHtmlEntity, looseHtmlEntityFromMarkdown } from './micromark/loose-html-entities'; @@ -155,13 +157,14 @@ export function mdxishAstProcessor(mdContent: string, opts: MdxishOpts = {}) { .data( 'micromarkExtensions', safeMode - ? [jsxTable(), magicBlock(), legacyVariable(), looseHtmlEntity()] - : [jsxTable(), magicBlock(), mdxExprTextOnly, legacyVariable(), looseHtmlEntity()], + ? [jsxComponentBlock(), jsxTable(), magicBlock(), legacyVariable(), looseHtmlEntity()] + : [jsxComponentBlock(), jsxTable(), magicBlock(), mdxExprTextOnly, legacyVariable(), looseHtmlEntity()], ) .data( 'fromMarkdownExtensions', safeMode ? [ + jsxComponentBlockFromMarkdown(), jsxTableFromMarkdown(), magicBlockFromMarkdown(), legacyVariableFromMarkdown(), @@ -169,6 +172,7 @@ export function mdxishAstProcessor(mdContent: string, opts: MdxishOpts = {}) { looseHtmlEntityFromMarkdown(), ] : [ + jsxComponentBlockFromMarkdown(), jsxTableFromMarkdown(), magicBlockFromMarkdown(), mdxExpressionFromMarkdown(), diff --git a/lib/micromark/jsx-component/index.ts b/lib/micromark/jsx-component/index.ts new file mode 100644 index 000000000..b6d8b1a31 --- /dev/null +++ b/lib/micromark/jsx-component/index.ts @@ -0,0 +1 @@ +export { jsxComponentBlock } from './syntax'; diff --git a/lib/micromark/jsx-component/syntax.ts b/lib/micromark/jsx-component/syntax.ts new file mode 100644 index 000000000..cae4bf240 --- /dev/null +++ b/lib/micromark/jsx-component/syntax.ts @@ -0,0 +1,454 @@ +/* eslint-disable @typescript-eslint/no-use-before-define */ +import type { Code, Construct, Effects, Extension, Resolver, State, TokenizeContext } from 'micromark-util-types'; + +import { markdownLineEnding } from 'micromark-util-character'; +import { codes, types } from 'micromark-util-symbol'; + +import { nonLazyContinuationStart } from '../non-lazy-continuation'; + +import { matchSequence, suffixForFirstChar } from './tags'; + +declare module 'micromark-util-types' { + interface TokenTypeMap { + jsxComponentBlock: 'jsxComponentBlock'; + jsxComponentBlockData: 'jsxComponentBlockData'; + jsxComponentText: 'jsxComponentText'; + } +} + +function resolveToBlock(events: Parameters[0]) { + let index = events.length; + + while (index > 0) { + index -= 1; + if (events[index][0] === 'enter' && events[index][1].type === 'jsxComponentBlock') { + break; + } + } + + if (index > 1 && events[index - 2][1].type === types.linePrefix) { + events[index][1].start = events[index - 2][1].start; + events[index + 1][1].start = events[index - 2][1].start; + events.splice(index - 2, 2); + } + + return events; +} + +const flowConstruct: Construct = { + name: 'jsxComponentBlock', + tokenize: tokenizeFlow, + resolveTo: resolveToBlock, + concrete: true, +}; + +const textConstruct: Construct = { + name: 'jsxComponentText', + tokenize: tokenizeText, +}; + +function createFirstChar( + tagName: Code[], + effects: Effects, + afterTagName: State, + nok: State, +): State { + return ((code: Code): State | undefined => { + const suffix = suffixForFirstChar(code); + if (!suffix) return nok(code); + tagName.push(code); + effects.consume(code); + return matchSequence(suffix, 0, tagName, effects, afterTagName, nok); + }) as State; +} + +function createInDoubleQuote(effects: Effects, nok: State, returnTo: State): State { + const self: State = ((code: Code): State | undefined => { + if (code === null || markdownLineEnding(code)) return nok(code); + effects.consume(code); + return code === codes.quotationMark ? returnTo : self; + }) as State; + return self; +} + +function createInSingleQuote(effects: Effects, nok: State, returnTo: State): State { + const self: State = ((code: Code): State | undefined => { + if (code === null || markdownLineEnding(code)) return nok(code); + effects.consume(code); + return code === codes.apostrophe ? returnTo : self; + }) as State; + return self; +} + +// --------------------------------------------------------------------------- +// Flow construct: block-level component tags +// +// Matches known ReadMe component/tag names (Image, img, Callout, Embed, +// Recipe, Anchor) with relaxed attribute parsing so unquoted URLs aren't +// fragmented by GFM autolinks. Supports multi-line attribute lists. +// +// Self-closing tags () are captured as single-line blocks. +// Opening tags () must be alone on their line; the body is +// scanned until the matching closing tag, following the jsxTable pattern. +// --------------------------------------------------------------------------- + +function tokenizeFlow(this: TokenizeContext, effects: Effects, ok: State, nok: State) { + const tagName: Code[] = []; + let closingTagIndex = 0; + let depth = 1; + let codeSpanOpenSize = 0; + let codeSpanCloseSize = 0; + + const inDoubleQuote = createInDoubleQuote(effects, nok, inTag); + const inSingleQuote = createInSingleQuote(effects, nok, inTag); + + return start; + + function start(code: Code): State | undefined { + if (code !== codes.lessThan) return nok(code); + effects.enter('jsxComponentBlock'); + effects.enter('jsxComponentBlockData'); + effects.consume(code); + return createFirstChar(tagName, effects, afterTagName, nok); + } + + function afterTagName(code: Code): State | undefined { + if (code === codes.space || code === codes.horizontalTab) { + effects.consume(code); + return inTag; + } + if (code === codes.greaterThan) { + effects.consume(code); + return afterOpeningGt; + } + if (code === codes.slash) { + effects.consume(code); + return selfCloseGt; + } + if (markdownLineEnding(code)) { + effects.exit('jsxComponentBlockData'); + return tagAtLineEnding(code); + } + return nok(code); + } + + function inTag(code: Code): State | undefined { + if (code === null) return nok(code); + if (markdownLineEnding(code)) { + effects.exit('jsxComponentBlockData'); + return tagAtLineEnding(code); + } + if (code === codes.greaterThan) { + effects.consume(code); + return afterOpeningGt; + } + if (code === codes.quotationMark) { + effects.consume(code); + return inDoubleQuote; + } + if (code === codes.apostrophe) { + effects.consume(code); + return inSingleQuote; + } + if (code === codes.slash) { + effects.consume(code); + return maybeSlashClose; + } + effects.consume(code); + return inTag; + } + + function maybeSlashClose(code: Code): State | undefined { + if (code === codes.greaterThan) { + effects.consume(code); + return afterSelfClose; + } + return inTag(code); + } + + function selfCloseGt(code: Code): State | undefined { + if (code === codes.greaterThan) { + effects.consume(code); + return afterSelfClose; + } + return nok(code); + } + + function afterSelfClose(code: Code): State | undefined { + if (code === null || markdownLineEnding(code)) { + effects.exit('jsxComponentBlockData'); + effects.exit('jsxComponentBlock'); + return ok(code); + } + if (code === codes.space || code === codes.horizontalTab) { + effects.consume(code); + return afterSelfClose; + } + return nok(code); + } + + function tagAtLineEnding(code: Code): State | undefined { + if (code === null) return nok(code); + return effects.check(nonLazyContinuationStart, tagContinuationNonLazy, nok)(code); + } + + function tagContinuationNonLazy(code: Code): State | undefined { + effects.enter(types.lineEnding); + effects.consume(code); + effects.exit(types.lineEnding); + return tagContinuationBefore; + } + + function tagContinuationBefore(code: Code): State | undefined { + if (code === null || markdownLineEnding(code)) { + return tagAtLineEnding(code); + } + effects.enter('jsxComponentBlockData'); + return inTag(code); + } + + function afterOpeningGt(code: Code): State | undefined { + if (code === null || markdownLineEnding(code)) { + effects.exit('jsxComponentBlockData'); + return bodyAtLineEnding(code); + } + if (code === codes.space || code === codes.horizontalTab) { + effects.consume(code); + return afterOpeningGt; + } + return nok(code); + } + + function bodyAtLineEnding(code: Code): State | undefined { + if (code === null) { + effects.exit('jsxComponentBlock'); + return ok(code); + } + return effects.check(nonLazyContinuationStart, bodyContinuationNonLazy, bodyContinuationAfter)(code); + } + + function bodyContinuationNonLazy(code: Code): State | undefined { + effects.enter(types.lineEnding); + effects.consume(code); + effects.exit(types.lineEnding); + return bodyContinuationBefore; + } + + function bodyContinuationBefore(code: Code): State | undefined { + if (code === null || markdownLineEnding(code)) { + return bodyAtLineEnding(code); + } + effects.enter('jsxComponentBlockData'); + return body(code); + } + + function bodyContinuationAfter(code: Code): State | undefined { + if (code === null) { + return nok(code); + } + effects.exit('jsxComponentBlock'); + return ok(code); + } + + function body(code: Code): State | undefined { + if (code === null) return nok(code); + if (markdownLineEnding(code)) { + effects.exit('jsxComponentBlockData'); + return bodyAtLineEnding(code); + } + if (code === codes.lessThan) { + effects.consume(code); + return bodyAfterLt; + } + if (code === codes.graveAccent) { + codeSpanOpenSize = 0; + return countOpenTicks(code); + } + effects.consume(code); + return body; + } + + function countOpenTicks(code: Code): State | undefined { + if (code === codes.graveAccent) { + codeSpanOpenSize += 1; + effects.consume(code); + return countOpenTicks; + } + return inCodeSpan(code); + } + + function inCodeSpan(code: Code): State | undefined { + if (code === null || markdownLineEnding(code)) return body(code); + if (code === codes.graveAccent) { + codeSpanCloseSize = 0; + return countCloseTicks(code); + } + effects.consume(code); + return inCodeSpan; + } + + function countCloseTicks(code: Code): State | undefined { + if (code === codes.graveAccent) { + codeSpanCloseSize += 1; + effects.consume(code); + return countCloseTicks; + } + return codeSpanCloseSize === codeSpanOpenSize ? body(code) : inCodeSpan(code); + } + + function bodyAfterLt(code: Code): State | undefined { + if (code === codes.slash) { + effects.consume(code); + closingTagIndex = 0; + return closingTagMatch; + } + if (tagName.length > 0 && code === tagName[0]) { + effects.consume(code); + closingTagIndex = 1; + return nestedOpenTagMatch; + } + return body(code); + } + + function nestedOpenTagMatch(code: Code): State | undefined { + if (closingTagIndex < tagName.length && code === tagName[closingTagIndex]) { + closingTagIndex += 1; + effects.consume(code); + return nestedOpenTagMatch; + } + if ( + closingTagIndex === tagName.length && + (code === codes.greaterThan || code === codes.space || code === codes.horizontalTab || code === codes.slash) + ) { + depth += 1; + effects.consume(code); + return body; + } + return body(code); + } + + function closingTagMatch(code: Code): State | undefined { + if (closingTagIndex < tagName.length && code === tagName[closingTagIndex]) { + closingTagIndex += 1; + effects.consume(code); + return closingTagMatch; + } + if (closingTagIndex === tagName.length && code === codes.greaterThan) { + depth -= 1; + effects.consume(code); + if (depth === 0) { + return afterBlockClose; + } + return body; + } + return body(code); + } + + function afterBlockClose(code: Code): State | undefined { + if (code === null || markdownLineEnding(code)) { + effects.exit('jsxComponentBlockData'); + effects.exit('jsxComponentBlock'); + return ok(code); + } + effects.consume(code); + return afterBlockClose; + } +} + +// --------------------------------------------------------------------------- +// Text construct: inline component tags (single-line only) +// --------------------------------------------------------------------------- + +function tokenizeText(this: TokenizeContext, effects: Effects, ok: State, nok: State) { + const tagName: Code[] = []; + + const inDoubleQuote = createInDoubleQuote(effects, nok, inTag); + const inSingleQuote = createInSingleQuote(effects, nok, inTag); + + return start; + + function start(code: Code): State | undefined { + if (code !== codes.lessThan) return nok(code); + effects.enter('jsxComponentText'); + effects.consume(code); + return createFirstChar(tagName, effects, afterTagName, nok); + } + + function afterTagName(code: Code): State | undefined { + if (code === codes.space || code === codes.horizontalTab) { + effects.consume(code); + return inTag; + } + if (code === codes.greaterThan) { + effects.consume(code); + effects.exit('jsxComponentText'); + return ok(code); + } + if (code === codes.slash) { + effects.consume(code); + return selfCloseGt; + } + return nok(code); + } + + function inTag(code: Code): State | undefined { + if (code === null || markdownLineEnding(code)) return nok(code); + if (code === codes.greaterThan) { + effects.consume(code); + effects.exit('jsxComponentText'); + return ok(code); + } + if (code === codes.quotationMark) { + effects.consume(code); + return inDoubleQuote; + } + if (code === codes.apostrophe) { + effects.consume(code); + return inSingleQuote; + } + if (code === codes.slash) { + effects.consume(code); + return maybeSlashClose; + } + effects.consume(code); + return inTag; + } + + function maybeSlashClose(code: Code): State | undefined { + if (code === codes.greaterThan) { + effects.consume(code); + effects.exit('jsxComponentText'); + return ok(code); + } + return inTag(code); + } + + function selfCloseGt(code: Code): State | undefined { + if (code === codes.greaterThan) { + effects.consume(code); + effects.exit('jsxComponentText'); + return ok(code); + } + return nok(code); + } +} + +/** + * Micromark extension that tokenizes known ReadMe component tags (Image, img, + * Callout, Embed, Recipe, Anchor) with relaxed attribute parsing. + * + * Micromark's built-in HTML block tokenizer rejects tags whose unquoted + * attribute values contain characters like `:` or `/`. GFM autolinks then + * fragment the URLs into link nodes, breaking component rendering. This + * extension bypasses the strict validation for these specific tag names. + */ +export function jsxComponentBlock(): Extension { + return { + flow: { + [codes.lessThan]: [flowConstruct], + }, + text: { + [codes.lessThan]: [textConstruct], + }, + }; +} diff --git a/lib/micromark/jsx-component/tags.ts b/lib/micromark/jsx-component/tags.ts new file mode 100644 index 000000000..07bce0130 --- /dev/null +++ b/lib/micromark/jsx-component/tags.ts @@ -0,0 +1,78 @@ +import type { Code, Effects, State } from 'micromark-util-types'; + +import { codes } from 'micromark-util-symbol'; + +export const IMAGE_SUFFIX: Code[] = [codes.lowercaseM, codes.lowercaseA, codes.lowercaseG, codes.lowercaseE]; +export const IMG_SUFFIX: Code[] = [codes.lowercaseM, codes.lowercaseG]; +export const CALLOUT_SUFFIX: Code[] = [ + codes.lowercaseA, + codes.lowercaseL, + codes.lowercaseL, + codes.lowercaseO, + codes.lowercaseU, + codes.lowercaseT, +]; +export const EMBED_SUFFIX: Code[] = [codes.lowercaseM, codes.lowercaseB, codes.lowercaseE, codes.lowercaseD]; +export const RECIPE_SUFFIX: Code[] = [ + codes.lowercaseE, + codes.lowercaseC, + codes.lowercaseI, + codes.lowercaseP, + codes.lowercaseE, +]; +export const ANCHOR_SUFFIX: Code[] = [ + codes.lowercaseN, + codes.lowercaseC, + codes.lowercaseH, + codes.lowercaseO, + codes.lowercaseR, +]; + +/** + * Look up the suffix for a known tag given its first character code. + * Returns undefined if the character doesn't start any known tag. + * + * `I` and `i` both start known names but are unambiguous because `I` → Image + * (uppercase) and `i` → img (lowercase) never conflict. + */ +/** + * Build a state chain that matches a sequence of character codes. + * On match calls onMatch; on any mismatch calls nok. + */ +export function matchSequence( + chars: Code[], + idx: number, + tagName: Code[], + effects: Effects, + onMatch: State, + nok: State, +): State { + if (idx >= chars.length) return onMatch; + return ((code: Code): State | undefined => { + if (code === chars[idx]) { + tagName.push(code); + effects.consume(code); + return matchSequence(chars, idx + 1, tagName, effects, onMatch, nok); + } + return nok(code); + }) as State; +} + +export function suffixForFirstChar(code: Code): Code[] | undefined { + switch (code) { + case codes.uppercaseI: + return IMAGE_SUFFIX; + case codes.lowercaseI: + return IMG_SUFFIX; + case codes.uppercaseC: + return CALLOUT_SUFFIX; + case codes.uppercaseE: + return EMBED_SUFFIX; + case codes.uppercaseR: + return RECIPE_SUFFIX; + case codes.uppercaseA: + return ANCHOR_SUFFIX; + default: + return undefined; + } +} diff --git a/lib/micromark/jsx-table/syntax.ts b/lib/micromark/jsx-table/syntax.ts index 35ef8ac4c..8e2ad7ba7 100644 --- a/lib/micromark/jsx-table/syntax.ts +++ b/lib/micromark/jsx-table/syntax.ts @@ -4,6 +4,8 @@ import type { Code, Construct, Effects, Extension, Resolver, State, TokenizeCont import { markdownLineEnding } from 'micromark-util-character'; import { codes, types } from 'micromark-util-symbol'; +import { nonLazyContinuationStart } from '../non-lazy-continuation'; + declare module 'micromark-util-types' { interface TokenTypeMap { jsxTable: 'jsxTable'; @@ -11,10 +13,6 @@ declare module 'micromark-util-types' { } } -const nonLazyContinuationStart: Construct = { - tokenize: tokenizeNonLazyContinuationStart, - partial: true, -}; function resolveToJsxTable(events: Parameters[0]) { let index = events.length; @@ -222,30 +220,6 @@ function tokenizeJsxTable(this: TokenizeContext, effects: Effects, ok: State, no } } -function tokenizeNonLazyContinuationStart(this: TokenizeContext, effects: Effects, ok: State, nok: State) { - // eslint-disable-next-line @typescript-eslint/no-this-alias - const self = this; - - return start; - - function start(code: Code): State | undefined { - if (markdownLineEnding(code)) { - effects.enter(types.lineEnding); - effects.consume(code); - effects.exit(types.lineEnding); - return after; - } - return nok(code); - } - - function after(code: Code): State | undefined { - if (self.parser.lazy[self.now().line]) { - return nok(code); - } - return ok(code); - } -} - /** * Micromark extension that tokenizes `...
` as a single flow block. * diff --git a/lib/micromark/non-lazy-continuation.ts b/lib/micromark/non-lazy-continuation.ts new file mode 100644 index 000000000..7ce8d31bd --- /dev/null +++ b/lib/micromark/non-lazy-continuation.ts @@ -0,0 +1,34 @@ +/* eslint-disable @typescript-eslint/no-use-before-define */ +import type { Code, Construct, Effects, State, TokenizeContext } from 'micromark-util-types'; + +import { markdownLineEnding } from 'micromark-util-character'; +import { types } from 'micromark-util-symbol'; + +function tokenizeNonLazyContinuationStart(this: TokenizeContext, effects: Effects, ok: State, nok: State) { + // eslint-disable-next-line @typescript-eslint/no-this-alias + const self = this; + + return start; + + function start(code: Code): State | undefined { + if (markdownLineEnding(code)) { + effects.enter(types.lineEnding); + effects.consume(code); + effects.exit(types.lineEnding); + return after; + } + return nok(code); + } + + function after(code: Code): State | undefined { + if (self.parser.lazy[self.now().line]) { + return nok(code); + } + return ok(code); + } +} + +export const nonLazyContinuationStart: Construct = { + tokenize: tokenizeNonLazyContinuationStart, + partial: true, +}; diff --git a/package.json b/package.json index 2c37e71a7..ba6a5c598 100644 --- a/package.json +++ b/package.json @@ -169,7 +169,7 @@ }, { "path": "dist/main.node.js", - "maxSize": "855KB" + "maxSize": "860KB" } ] }, diff --git a/processor/transform/mdxish/mdxish-component-blocks.ts b/processor/transform/mdxish/mdxish-component-blocks.ts index 65098ca92..0f00b2147 100644 --- a/processor/transform/mdxish/mdxish-component-blocks.ts +++ b/processor/transform/mdxish/mdxish-component-blocks.ts @@ -37,6 +37,44 @@ const inlineMdProcessor = unified() const isClosingTag = (value: string, tag: string) => value.trim() === ``; +/** + * Find the index of a closing tag in a string, skipping over backtick code spans. + * Returns -1 if not found outside of code spans. + */ +const findClosingTagIndex = (str: string, closingTag: string): number => { + let i = 0; + while (i < str.length) { + if (str[i] === '`') { + let tickCount = 0; + while (i < str.length && str[i] === '`') { + tickCount += 1; + i += 1; + } + // Skip until matching closing backticks + let found = false; + while (i < str.length && !found) { + if (str[i] === '`') { + let closeCount = 0; + while (i < str.length && str[i] === '`') { + closeCount += 1; + i += 1; + } + if (closeCount === tickCount) { + found = true; + } + } else { + i += 1; + } + } + } else if (str.startsWith(closingTag, i)) { + return i; + } else { + i += 1; + } + } + return -1; +}; + /** * Parse markdown content into mdast children nodes. */ @@ -186,8 +224,8 @@ const scanForClosingTag = (parent: Parent, startIndex: number, tag: string): Sca } // Embedded closing tag (closing tag within HTML block content) - if (siblingValue.includes(closingTagStr)) { - const closeTagPos = siblingValue.indexOf(closingTagStr); + const closeTagPos = findClosingTagIndex(siblingValue, closingTagStr); + if (closeTagPos !== -1) { const contentBeforeClose = siblingValue.substring(0, closeTagPos).trim(); const contentAfterClose = siblingValue.substring(closeTagPos + closingTagStr.length).trim(); const extraChildren = contentBeforeClose @@ -322,9 +360,8 @@ const mdxishComponentBlocks: Plugin<[], Parent> = () => tree => { } // Case 2: Self-contained block (closing tag in content) - if (contentAfterTag.includes(closingTagStr)) { - // Find the first closing tag - const closingTagIndex = contentAfterTag.indexOf(closingTagStr); + const closingTagIndex = findClosingTagIndex(contentAfterTag, closingTagStr); + if (closingTagIndex !== -1) { const componentInnerContent = contentAfterTag.substring(0, closingTagIndex).trim(); const contentAfterClose = contentAfterTag.substring(closingTagIndex + closingTagStr.length).trim(); const componentNode = createComponentNode({