From 0252681313caf55d12641f5922f8c45a7b8523c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Jablo=C3=B1ski?=
 <43938777+GermanJablo@users.noreply.github.com>
Date: Mon, 13 Jan 2025 11:51:26 -0300
Subject: [PATCH] fix(richtext-lexical): combine 2 normalizeMarkdown
 implementations and fix code block regex (#10470)

This should fix it https://github.com/payloadcms/payload/issues/10387

I don't know why we had 2 different copies of normalizeMarkdown.

Also, the most up-to-date one still had a bug where lines were
considered as if they were inside codeblocks when they weren't.

How I tested that it works:

1. I copied the `normalizeMarkdown` implementation from this PR into the
website repo, and made sure it is called before the conversion to
editorState.
2. In the admin panel, sync docs.
3. In the admin panel, refresh mdx to lexical (new button, below sync
docs).
4. Look for the examples from bug #10387 and verify that they have been
resolved.

An extra pair of eyes would be nice to make sure I'm not getting
confused with the imports.
---
 .../blocks/client/markdownTransformer.ts      |   6 +-
 .../blocks/server/markdownTransformer.ts      |   2 +-
 .../experimental_table/markdownTransformer.ts |   2 +-
 packages/richtext-lexical/src/index.ts        |   6 +-
 .../@lexical/markdown/MarkdownTransformers.ts |  44 +++++--
 .../src/packages/@lexical/markdown/index.ts   |   2 +-
 .../src/utilities/jsx/lexicalMarkdownCopy.ts  | 112 ------------------
 test/lexical-mdx/int.spec.ts                  |   2 -
 8 files changed, 44 insertions(+), 132 deletions(-)
 delete mode 100644 packages/richtext-lexical/src/utilities/jsx/lexicalMarkdownCopy.ts

diff --git a/packages/richtext-lexical/src/features/blocks/client/markdownTransformer.ts b/packages/richtext-lexical/src/features/blocks/client/markdownTransformer.ts
index 7bae3ca5b83..5b2d7a45f8c 100644
--- a/packages/richtext-lexical/src/features/blocks/client/markdownTransformer.ts
+++ b/packages/richtext-lexical/src/features/blocks/client/markdownTransformer.ts
@@ -6,10 +6,12 @@ import { createHeadlessEditor } from '@lexical/headless'
 import type { Transformer } from '../../../packages/@lexical/markdown/index.js'
 import type { MultilineElementTransformer } from '../../../packages/@lexical/markdown/MarkdownTransformers.js'
 
-import { $convertToMarkdownString } from '../../../packages/@lexical/markdown/index.js'
+import {
+  $convertFromMarkdownString,
+  $convertToMarkdownString,
+} from '../../../packages/@lexical/markdown/index.js'
 import { extractPropsFromJSXPropsString } from '../../../utilities/jsx/extractPropsFromJSXPropsString.js'
 import { propsToJSXString } from '../../../utilities/jsx/jsx.js'
-import { $convertFromMarkdownString } from '../../../utilities/jsx/lexicalMarkdownCopy.js'
 import { $createBlockNode, $isBlockNode, BlockNode } from './nodes/BlocksNode.js'
 
 function createTagRegexes(tagName: string) {
diff --git a/packages/richtext-lexical/src/features/blocks/server/markdownTransformer.ts b/packages/richtext-lexical/src/features/blocks/server/markdownTransformer.ts
index 9162e69a3c1..ae40bc73513 100644
--- a/packages/richtext-lexical/src/features/blocks/server/markdownTransformer.ts
+++ b/packages/richtext-lexical/src/features/blocks/server/markdownTransformer.ts
@@ -8,6 +8,7 @@ import type { NodeWithHooks } from '../../typesServer.js'
 
 import { getEnabledNodesFromServerNodes } from '../../../lexical/nodes/index.js'
 import {
+  $convertFromMarkdownString,
   $convertToMarkdownString,
   type MultilineElementTransformer,
   type TextMatchTransformer,
@@ -15,7 +16,6 @@ import {
 } from '../../../packages/@lexical/markdown/index.js'
 import { extractPropsFromJSXPropsString } from '../../../utilities/jsx/extractPropsFromJSXPropsString.js'
 import { propsToJSXString } from '../../../utilities/jsx/jsx.js'
-import { $convertFromMarkdownString } from '../../../utilities/jsx/lexicalMarkdownCopy.js'
 import { linesFromStartToContentAndPropsString } from './linesFromMatchToContentAndPropsString.js'
 import { $createServerBlockNode, $isServerBlockNode, ServerBlockNode } from './nodes/BlocksNode.js'
 import {
diff --git a/packages/richtext-lexical/src/features/experimental_table/markdownTransformer.ts b/packages/richtext-lexical/src/features/experimental_table/markdownTransformer.ts
index 4b7836a24c7..7776842a2d3 100644
--- a/packages/richtext-lexical/src/features/experimental_table/markdownTransformer.ts
+++ b/packages/richtext-lexical/src/features/experimental_table/markdownTransformer.ts
@@ -15,11 +15,11 @@ import {
 import { $isParagraphNode, $isTextNode } from 'lexical'
 
 import {
+  $convertFromMarkdownString,
   $convertToMarkdownString,
   type ElementTransformer,
   type Transformer,
 } from '../../packages/@lexical/markdown/index.js'
-import { $convertFromMarkdownString } from '../../utilities/jsx/lexicalMarkdownCopy.js'
 
 // Very primitive table setup
 const TABLE_ROW_REG_EXP = /^\|(.+)\|\s?$/
diff --git a/packages/richtext-lexical/src/index.ts b/packages/richtext-lexical/src/index.ts
index f2f9cc8f3e6..0bd0deb04c3 100644
--- a/packages/richtext-lexical/src/index.ts
+++ b/packages/richtext-lexical/src/index.ts
@@ -1010,14 +1010,15 @@ export { sanitizeUrl, validateUrl } from './lexical/utils/url.js'
 
 export type * from './nodeTypes.js'
 
-export { defaultRichTextValue } from './populateGraphQL/defaultValue.js'
+export { $convertFromMarkdownString } from './packages/@lexical/markdown/index.js'
 
+export { defaultRichTextValue } from './populateGraphQL/defaultValue.js'
 export { populate } from './populateGraphQL/populate.js'
 export type { LexicalEditorProps, LexicalRichTextAdapter } from './types.js'
+
 export { createServerFeature } from './utilities/createServerFeature.js'
 
 export type { FieldsDrawerProps } from './utilities/fieldsDrawer/Drawer.js'
-
 export { extractPropsFromJSXPropsString } from './utilities/jsx/extractPropsFromJSXPropsString.js'
 export {
   extractFrontmatter,
@@ -1025,5 +1026,4 @@ export {
   objectToFrontmatter,
   propsToJSXString,
 } from './utilities/jsx/jsx.js'
-export { $convertFromMarkdownString } from './utilities/jsx/lexicalMarkdownCopy.js'
 export { upgradeLexicalData } from './utilities/upgradeLexicalData/index.js'
diff --git a/packages/richtext-lexical/src/packages/@lexical/markdown/MarkdownTransformers.ts b/packages/richtext-lexical/src/packages/@lexical/markdown/MarkdownTransformers.ts
index a7a04302f8e..a0b11e666ab 100644
--- a/packages/richtext-lexical/src/packages/@lexical/markdown/MarkdownTransformers.ts
+++ b/packages/richtext-lexical/src/packages/@lexical/markdown/MarkdownTransformers.ts
@@ -185,16 +185,19 @@ export type TextMatchTransformer = Readonly<{
   type: 'text-match'
 }>
 
+const EMPTY_OR_WHITESPACE_ONLY = /^[\t ]*$/
 const ORDERED_LIST_REGEX = /^(\s*)(\d+)\.\s/
 const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/
 const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i
 const HEADING_REGEX = /^(#{1,6})\s/
 const QUOTE_REGEX = /^>\s/
-const CODE_START_REGEX = /^[ \t]*```(\w+)?/
-const CODE_END_REGEX = /[ \t]*```$/
+const CODE_START_REGEX = /^[ \t]*(\\`\\`\\`|```)(\w+)?/
+const CODE_END_REGEX = /[ \t]*(\\`\\`\\`|```)$/
 const CODE_SINGLE_LINE_REGEX = /^[ \t]*```[^`]+(?:(?:`{1,2}|`{4,})[^`]+)*```(?:[^`]|$)/
 const TABLE_ROW_REG_EXP = /^\|(.+)\|\s?$/
 const TABLE_ROW_DIVIDER_REG_EXP = /^(\| ?:?-*:? ?)+\|\s?$/
+const TAG_START_REGEX = /^[ \t]*<[a-z_][\w-]*(?:\s[^<>]*)?\/?>/i
+const TAG_END_REGEX = /^[ \t]*<\/[a-z_][\w-]*\s*>/i
 
 const createBlockNode = (
   createNode: (match: Array<string>) => ElementNode,
@@ -433,10 +436,11 @@ export const ITALIC_UNDERSCORE: TextFormatTransformer = {
   tag: '_',
 }
 
-export function normalizeMarkdown(input: string, shouldMergeAdjacentLines = false): string {
+export function normalizeMarkdown(input: string, shouldMergeAdjacentLines: boolean): string {
   const lines = input.split('\n')
   let inCodeBlock = false
   const sanitizedLines: string[] = []
+  let nestedDeepCodeBlock = 0
 
   for (let i = 0; i < lines.length; i++) {
     const line = lines[i]
@@ -448,9 +452,24 @@ export function normalizeMarkdown(input: string, shouldMergeAdjacentLines = fals
       continue
     }
 
-    // Detect the start or end of a code block
-    if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) {
-      inCodeBlock = !inCodeBlock
+    if (CODE_END_REGEX.test(line)) {
+      if (nestedDeepCodeBlock === 0) {
+        inCodeBlock = true
+      }
+      if (nestedDeepCodeBlock === 1) {
+        inCodeBlock = false
+      }
+      if (nestedDeepCodeBlock > 0) {
+        nestedDeepCodeBlock--
+      }
+      sanitizedLines.push(line)
+      continue
+    }
+
+    // Toggle inCodeBlock state when encountering start or end of a code block
+    if (CODE_START_REGEX.test(line)) {
+      inCodeBlock = true
+      nestedDeepCodeBlock++
       sanitizedLines.push(line)
       continue
     }
@@ -464,8 +483,8 @@ export function normalizeMarkdown(input: string, shouldMergeAdjacentLines = fals
     // In markdown the concept of "empty paragraphs" does not exist.
     // Blocks must be separated by an empty line. Non-empty adjacent lines must be merged.
     if (
-      line === '' ||
-      lastLine === '' ||
+      EMPTY_OR_WHITESPACE_ONLY.test(line) ||
+      EMPTY_OR_WHITESPACE_ONLY.test(lastLine) ||
       !lastLine ||
       HEADING_REGEX.test(lastLine) ||
       HEADING_REGEX.test(line) ||
@@ -475,11 +494,16 @@ export function normalizeMarkdown(input: string, shouldMergeAdjacentLines = fals
       CHECK_LIST_REGEX.test(line) ||
       TABLE_ROW_REG_EXP.test(line) ||
       TABLE_ROW_DIVIDER_REG_EXP.test(line) ||
-      !shouldMergeAdjacentLines
+      !shouldMergeAdjacentLines ||
+      TAG_START_REGEX.test(line) ||
+      TAG_END_REGEX.test(line) ||
+      TAG_START_REGEX.test(lastLine) ||
+      TAG_END_REGEX.test(lastLine) ||
+      CODE_END_REGEX.test(lastLine)
     ) {
       sanitizedLines.push(line)
     } else {
-      sanitizedLines[sanitizedLines.length - 1] = lastLine + line
+      sanitizedLines[sanitizedLines.length - 1] = lastLine + ' ' + line.trim()
     }
   }
 
diff --git a/packages/richtext-lexical/src/packages/@lexical/markdown/index.ts b/packages/richtext-lexical/src/packages/@lexical/markdown/index.ts
index 5fb75782571..bd8a2c41e9b 100644
--- a/packages/richtext-lexical/src/packages/@lexical/markdown/index.ts
+++ b/packages/richtext-lexical/src/packages/@lexical/markdown/index.ts
@@ -82,7 +82,7 @@ function $convertFromMarkdownString(
   transformers: Array<Transformer> = TRANSFORMERS,
   node?: ElementNode,
   shouldPreserveNewLines = false,
-  shouldMergeAdjacentLines = false,
+  shouldMergeAdjacentLines = true,
 ): void {
   const sanitizedMarkdown = shouldPreserveNewLines
     ? markdown
diff --git a/packages/richtext-lexical/src/utilities/jsx/lexicalMarkdownCopy.ts b/packages/richtext-lexical/src/utilities/jsx/lexicalMarkdownCopy.ts
deleted file mode 100644
index dc3e20305a9..00000000000
--- a/packages/richtext-lexical/src/utilities/jsx/lexicalMarkdownCopy.ts
+++ /dev/null
@@ -1,112 +0,0 @@
-/* eslint-disable regexp/no-unused-capturing-group */
-
-import type { ElementNode } from 'lexical'
-
-import type {
-  MultilineElementTransformer as _MultilineElementTransformer,
-  Transformer,
-} from '../../packages/@lexical/markdown/index.js'
-
-import {
-  $convertFromMarkdownString as $originalConvertFromMarkdownString,
-  TRANSFORMERS,
-} from '../../packages/@lexical/markdown/index.js'
-
-const EMPTY_OR_WHITESPACE_ONLY = /^[\t ]*$/
-const ORDERED_LIST_REGEX = /^(\s*)(\d+)\.\s/
-const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/
-const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i
-const HEADING_REGEX = /^(#{1,6})\s/
-const QUOTE_REGEX = /^>\s/
-// Match start of ``` or escaped \`\`\` code blocks
-const CODE_START_REGEX = /^[ \t]*(\\`\\`\\`|```)(\w+)?/
-// Match end of ``` or escaped \`\`\` code blocks
-const CODE_END_REGEX = /[ \t]*(\\`\\`\\`|```)$/
-const CODE_SINGLE_LINE_REGEX = /^[ \t]*```[^`]+(?:(?:`{1,2}|`{4,})[^`]+)*```(?:[^`]|$)/
-const TABLE_ROW_REG_EXP = /^\|(.+)\|\s?$/
-const TABLE_ROW_DIVIDER_REG_EXP = /^(\| ?:?-*:? ?)+\|\s?$/
-const TAG_START_REGEX = /^[ \t]*<[a-z_][\w-]*(?:\s[^<>]*)?\/?>/i
-const TAG_END_REGEX = /^[ \t]*<\/[a-z_][\w-]*\s*>/i
-
-export function normalizeMarkdown(input: string, shouldMergeAdjacentLines: boolean): string {
-  const lines = input.split('\n')
-  let inCodeBlock = false
-  const sanitizedLines: string[] = []
-
-  for (let i = 0; i < lines.length; i++) {
-    const line = lines[i]
-    const lastLine = sanitizedLines[sanitizedLines.length - 1]
-
-    // Code blocks of ```single line``` don't toggle the inCodeBlock flag
-    if (CODE_SINGLE_LINE_REGEX.test(line)) {
-      sanitizedLines.push(line)
-      continue
-    }
-
-    // Toggle inCodeBlock state when encountering start or end of a code block
-    if (CODE_START_REGEX.test(line)) {
-      inCodeBlock = true
-      sanitizedLines.push(line)
-      continue
-    }
-
-    if (CODE_END_REGEX.test(line)) {
-      inCodeBlock = false
-      sanitizedLines.push(line)
-      continue
-    }
-
-    // If we are inside a code block, keep the line unchanged
-    if (inCodeBlock) {
-      sanitizedLines.push(line)
-      continue
-    }
-
-    // In markdown the concept of "empty paragraphs" does not exist.
-    // Blocks must be separated by an empty line. Non-empty adjacent lines must be merged.
-    if (
-      EMPTY_OR_WHITESPACE_ONLY.test(line) ||
-      EMPTY_OR_WHITESPACE_ONLY.test(lastLine) ||
-      !lastLine ||
-      HEADING_REGEX.test(lastLine) ||
-      HEADING_REGEX.test(line) ||
-      QUOTE_REGEX.test(line) ||
-      ORDERED_LIST_REGEX.test(line) ||
-      UNORDERED_LIST_REGEX.test(line) ||
-      CHECK_LIST_REGEX.test(line) ||
-      TABLE_ROW_REG_EXP.test(line) ||
-      TABLE_ROW_DIVIDER_REG_EXP.test(line) ||
-      !shouldMergeAdjacentLines ||
-      TAG_START_REGEX.test(line) ||
-      TAG_END_REGEX.test(line) ||
-      TAG_START_REGEX.test(lastLine) ||
-      TAG_END_REGEX.test(lastLine)
-    ) {
-      sanitizedLines.push(line)
-    } else {
-      sanitizedLines[sanitizedLines.length - 1] = lastLine + ' ' + line.trim()
-    }
-  }
-
-  return sanitizedLines.join('\n')
-}
-
-/**
- * Renders markdown from a string. The selection is moved to the start after the operation.
- *
- *  @param {boolean} [shouldPreserveNewLines] By setting this to true, new lines will be preserved between conversions
- *  @param {boolean} [shouldMergeAdjacentLines] By setting this to true, adjacent non empty lines will be merged according to commonmark spec: https://spec.commonmark.org/0.24/#example-177. Not applicable if shouldPreserveNewLines = true.
- */
-export function $convertFromMarkdownString(
-  markdown: string,
-  transformers: Array<Transformer> = TRANSFORMERS,
-  node?: ElementNode,
-  shouldPreserveNewLines = false,
-  shouldMergeAdjacentLines = true,
-): void {
-  const sanitizedMarkdown = shouldPreserveNewLines
-    ? markdown
-    : normalizeMarkdown(markdown, shouldMergeAdjacentLines)
-
-  return $originalConvertFromMarkdownString(sanitizedMarkdown, transformers, node) // shouldPreserveNewLines to true, as we do our own, modified markdown normalization here.
-}
diff --git a/test/lexical-mdx/int.spec.ts b/test/lexical-mdx/int.spec.ts
index c4bdc4f8ea9..76b9d0e3115 100644
--- a/test/lexical-mdx/int.spec.ts
+++ b/test/lexical-mdx/int.spec.ts
@@ -174,8 +174,6 @@ describe('Lexical MDX', () => {
           ? (sanitizedInputAfterConvertFromEditorJSON ?? sanitizedInput).replace(/\s/g, '')
           : (sanitizedInputAfterConvertFromEditorJSON ?? sanitizedInput)
 
-        console.log('resultNoSpace', resultNoSpace)
-        console.log('inputNoSpace', inputNoSpace)
         expect(resultNoSpace).toBe(inputNoSpace)
       })
     }