-
Notifications
You must be signed in to change notification settings - Fork 18
fix(stripComments): stop leaving empty lines when entire line is a comment #1500
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: next
Are you sure you want to change the base?
Changes from 4 commits
026c279
1d46c93
421762c
1a96e93
d171fdb
a6f87ba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
|
maximilianfalco marked this conversation as resolved.
|
|
maximilianfalco marked this conversation as resolved.
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -177,7 +177,7 @@ | |
| }, | ||
| { | ||
| "path": "dist/main.node.js", | ||
| "maxSize": "947KB" | ||
| "maxSize": "950KB" | ||
| } | ||
| ] | ||
| }, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -61,6 +61,15 @@ const buildTableNodeProcessor = (withMdx: boolean) => | |
| const tableNodeProcessor = buildTableNodeProcessor(true); | ||
| const fallbackTableNodeProcessor = buildTableNodeProcessor(false); | ||
|
|
||
| const BLANK_LINE_REGEX = /(\r?\n)[ \t]*\r?\n(?![ \t]*\r?\n)/g; | ||
|
|
||
| /** | ||
| * Collapses one blank line per match so it doesn't terminate the CommonMark | ||
| * type-6 block. Non-greedy: runs of multiple blank lines lose just one. | ||
| */ | ||
| export const collapseBlankLines = (value: string): string => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do any blank lines between html elements in a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. from my understanding yes, iirc the micromark html block states that an empty line causes it to terminate, which is the cause of the table split |
||
| value.replace(BLANK_LINE_REGEX, '$1'); | ||
|
|
||
| /** | ||
| * Parse the HTML node that contains the full table substring | ||
| * into the table parts (headers, rows, cells). | ||
|
|
@@ -355,8 +364,9 @@ const mdxishTables = (): Transform => tree => { | |
| } else if (node.value.startsWith('<table')) { | ||
| // If the parsing still fails, give an opportunity to the fallback parser | ||
| // without remarkMdx to process lowercase tables as it's likely to not | ||
| // have needed MDX parsing anyway | ||
| const fallback = parseTableNode(fallbackTableNodeProcessor, node); | ||
| // have needed MDX parsing anyway. | ||
| const sanitizedValue = collapseBlankLines(node.value); | ||
| const fallback = parseTableNode(fallbackTableNodeProcessor, { ...node, value: sanitizedValue }); | ||
| if (!fallback || fallback.children.length <= 1) return; | ||
| parent.children.splice(index, 1, ...(fallback.children as typeof parent.children)); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,9 @@ | |
| import { visit, SKIP } from 'unist-util-visit'; | ||
|
|
||
| const HTML_COMMENT_REGEX = /<!--[\s\S]*?-->/g; | ||
| // Indented whole-line comment plus trailing newline; removing the whole line | ||
| // avoids leaving a whitespace-only line that terminates the surrounding block. | ||
| const WHOLE_LINE_HTML_COMMENT_REGEX = /^[ \t]+<!--[\s\S]*?-->[ \t]*(?:\r?\n|$)/gm; | ||
|
maximilianfalco marked this conversation as resolved.
|
||
| export const MDX_COMMENT_REGEX = /\/\*(?:(?!\*\/)[\s\S])*\*\//g; | ||
|
|
||
| /** | ||
|
|
@@ -14,7 +17,10 @@ | |
| if (parent && typeof index === 'number') { | ||
| // Remove HTML comments | ||
| if (node.type === 'html' && HTML_COMMENT_REGEX.test(node.value)) { | ||
| const newValue = node.value.replace(HTML_COMMENT_REGEX, '').trim(); | ||
| const newValue = node.value | ||
| .replace(WHOLE_LINE_HTML_COMMENT_REGEX, '') | ||
| .replace(HTML_COMMENT_REGEX, '') | ||
Check failureCode scanning / CodeQL Incomplete multi-character sanitization High
This string may still contain
<!-- Error loading related location Loading |
||
|
Comment on lines
+20
to
+22
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. im thinking of ignoring this tbh, this seems very unlikely and very much unnecessary. CodeQL's concern is that The regex matched (the inner complete comment), removed it, and the surrounding bits collapsed into I feel like this is fine since As far as Im seeing this, there's no untrusted-input boundary being crossed so I think this is fine and it saves us from these complexities.... wdyt @eaglethrost @kevinports |
||
| .trim(); | ||
| if (newValue) { | ||
| node.value = newValue; | ||
| } else { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.