klz-cables.com/src/payload/utils/lexicalParser.ts

/**
 * Converts a Markdown+JSX string into a Lexical AST node array.
 * Specifically adapted for klz-cables.com custom Component Blocks.
 */

function propValue(chunk: string, prop: string): string {
  // Match prop="value" or prop='value' or prop={value}
  // and also multiline props like prop={\n  [\n    {...}\n  ]\n}
  // For arrays or complex objects passed as props, basic regex might fail,
  // but the MDX in klz-cables usually uses simpler props or children.
  const match =
    chunk.match(new RegExp(`${prop}=["']([^"']+)["']`)) ||
    chunk.match(new RegExp(`${prop}=\\{([^}]+)\\}`));
  return match ? match[1] : '';
}

function extractItemsProp(chunk: string, startTag: string): any[] {
  // Match items={ [ ... ] } robustly without stopping at inner object braces
  const itemsMatch = chunk.match(/items=\{\s*(\[[\s\S]*?\])\s*\}/);
  if (itemsMatch) {
    try {
      const arrayString = itemsMatch[1].trim();
      // Since klz-cables MDX passes pure JS object arrays like `items={[{title: 'A', content: 'B'}]}`,
      // parsing it via Regex to JSON is extremely brittle due to unquoted keys and trailing commas.
      // Using `new Function` safely evaluates the array AST directly in this Node script environment.
      const fn = new Function(`return ${arrayString};`);
      return fn();
    } catch (_e: any) {
      console.warn(`Could not parse items array for block ${startTag}:`, _e.message);
      return [];
    }
  }
  return [];
}

function blockNode(blockType: string, fields: Record<string, any>) {
  return { type: 'block', format: '', version: 2, fields: { blockType, ...fields } };
}

function ensureChildren(parsedNodes: any[]): any[] {
  // Lexical root nodes require at least one child node, or validation fails
  if (parsedNodes.length === 0) {
    return [
      {
        type: 'paragraph',
        format: '',
        indent: 0,
        version: 1,
        children: [{ mode: 'normal', type: 'text', text: ' ', version: 1 }],
      },
    ];
  }
  return parsedNodes;
}

function parseInlineMarkdown(text: string): any[] {
  // Simple regex-based inline parser for bold and italic
  // Matches **bold**, __bold__, *italic*, _italic_
  const regex = /(\*\*|__|TextNode)(.*?)\1|(\*|_)(.*?)\3/g;
  const nodes: any[] = [];
  let lastIndex = 0;
  let match;

  const createTextNode = (content: string, format = 0) => ({
    detail: 0,
    format,
    mode: 'normal',
    style: '',
    text: content,
    type: 'text',
    version: 1,
  });

  const rawMatch = text.matchAll(
    /(\*\*(.*?)\*\*|__(.*?)__|(?<!\*)\*(?!\*)(.*?)\*|(?<!_)_(?!_)(.*?)_)/g,
  );

  for (const m of rawMatch) {
    const offset = m.index!;
    // Leading plain text
    if (offset > lastIndex) {
      nodes.push(createTextNode(text.slice(lastIndex, offset)));
    }

    const boldContent = m[2] || m[3];
    const italicContent = m[4] || m[5];

    if (boldContent) {
      nodes.push(createTextNode(boldContent, 1)); // 1 = Bold
    } else if (italicContent) {
      nodes.push(createTextNode(italicContent, 2)); // 2 = Italic
    }

    lastIndex = offset + m[0].length;
  }

  // Trailing plain text
  if (lastIndex < text.length) {
    nodes.push(createTextNode(text.slice(lastIndex)));
  }

  return nodes.length > 0 ? nodes : [createTextNode(text)];
}

export function parseMarkdownToLexical(markdown: string): any[] {
  const paragraphNode = (text: string) => ({
    type: 'paragraph',
    format: '',
    indent: 0,
    version: 1,
    direction: 'ltr',
    children: parseInlineMarkdown(text),
  });

  const nodes: any[] = [];
  let content = markdown;

  // Strip frontmatter
  const fm = content.match(/^---\s*\n[\s\S]*?\n---/);
  if (fm) content = content.replace(fm[0], '').trim();

  // 1. EXTRACT MULTILINE WRAPPERS BEFORE CHUNKING
  const extractBlocks = [
    {
      tag: 'HighlightBox',
      regex: /<HighlightBox([^>]*)>([\s\S]*?)<\/HighlightBox>/g,
      build: (props: string, inner: string) =>
        blockNode('highlightBox', {
          title: propValue(`<Tag ${props}>`, 'title'),
          color: propValue(`<Tag ${props}>`, 'color') || 'primary',
          content: {
            root: {
              type: 'root',
              format: '',
              indent: 0,
              version: 1,
              direction: 'ltr',
              children: ensureChildren(parseMarkdownToLexical(inner.trim())),
            },
          },
        }),
    },
    {
      tag: 'ChatBubble',
      regex: /<ChatBubble([^>]*)>([\s\S]*?)<\/ChatBubble>/g,
      build: (props: string, inner: string) =>
        blockNode('chatBubble', {
          author: propValue(`<Tag ${props}>`, 'author') || 'KLZ Team',
          avatar: propValue(`<Tag ${props}>`, 'avatar'),
          role: propValue(`<Tag ${props}>`, 'role') || 'Assistant',
          align: propValue(`<Tag ${props}>`, 'align') || 'left',
          content: {
            root: {
              type: 'root',
              format: '',
              indent: 0,
              version: 1,
              direction: 'ltr',
              children: ensureChildren(parseMarkdownToLexical(inner.trim())),
            },
          },
        }),
    },
    {
      tag: 'Callout',
      regex: /<Callout([^>]*)>([\s\S]*?)<\/Callout>/g,
      build: (props: string, inner: string) =>
        blockNode('callout', {
          type: propValue(`<Tag ${props}>`, 'type') || 'info',
          title: propValue(`<Tag ${props}>`, 'title'),
          content: {
            root: {
              type: 'root',
              format: '',
              indent: 0,
              version: 1,
              direction: 'ltr',
              children: ensureChildren(parseMarkdownToLexical(inner.trim())),
            },
          },
        }),
    },
    {
      tag: 'ProductTabs',
      regex: /<ProductTabs([^>]*)>([\s\S]*?)<\/ProductTabs>/g,
      build: (props: string, inner: string) => {
        const fullTag = `<ProductTabs ${props}>`;
        const dataMatch = fullTag.match(/data=\{({[\s\S]*?})\}\s*\/>/);
        let technicalItems = [];
        let voltageTables = [];

        if (dataMatch) {
          try {
            const parsedData = JSON.parse(dataMatch[1]);
            technicalItems = parsedData.technicalItems || [];
            voltageTables = parsedData.voltageTables || [];

            voltageTables.forEach((vt: any) => {
              vt.rows?.forEach((row: any) => {
                if (row.cells) {
                  row.cells = row.cells.map((c: any) =>
                    typeof c !== 'object' ? { value: String(c) } : c,
                  );
                }
              });
            });
          } catch (e) {
            console.warn('Failed to parse ProductTabs JSON data:', e);
          }
        }

        return blockNode('productTabs', {
          technicalItems,
          voltageTables,
          content: {
            root: {
              type: 'root',
              format: '',
              indent: 0,
              version: 1,
              direction: 'ltr',
              children: ensureChildren(parseMarkdownToLexical(inner.trim())),
            },
          },
        });
      },
    },
  ];

  function cleanMdxContent(text: string): string {
    return text
      .replace(/<section[^>]*>/g, '')
      .replace(/<\/section>/g, '')
      .replace(/<h3[^>]*>(.*?)<\/h3>/g, '### $1\n\n')
      .replace(/<p[^>]*>(.*?)<\/p>/g, '$1\n\n')
      .replace(/<strong[^>]*>(.*?)<\/strong>/g, '**$1**')
      .replace(/&nbsp;/g, ' ')
      .trim();
  }

  content = cleanMdxContent(content);

  const placeholders = new Map<string, any>();
  let placeholderIdx = 0;

  for (const block of extractBlocks) {
    content = content.replace(block.regex, (match, propsMatch, innerMatch) => {
      const id = `__BLOCK_PLACEHOLDER_${placeholderIdx++}__`;
      placeholders.set(id, block.build(propsMatch, innerMatch));
      return `\n\n${id}\n\n`;
    });
  }

  // 2. CHUNK THE REST
  const rawChunks = content.split(/\n\s*\n/);

  for (let chunk of rawChunks) {
    chunk = chunk.trim();
    if (!chunk) continue;

    if (chunk.startsWith('__BLOCK_PLACEHOLDER_')) {
      nodes.push(placeholders.get(chunk));
      continue;
    }

    if (chunk.includes('<StickyNarrative')) {
      nodes.push(
        blockNode('stickyNarrative', {
          title: propValue(chunk, 'title'),
          items: extractItemsProp(chunk, 'StickyNarrative'),
        }),
      );
      continue;
    }

    if (chunk.includes('<ComparisonGrid')) {
      nodes.push(
        blockNode('comparisonGrid', {
          title: propValue(chunk, 'title'),
          leftLabel: propValue(chunk, 'leftLabel'),
          rightLabel: propValue(chunk, 'rightLabel'),
          items: extractItemsProp(chunk, 'ComparisonGrid'),
        }),
      );
      continue;
    }

    if (chunk.includes('<VisualLinkPreview')) {
      nodes.push(
        blockNode('visualLinkPreview', {
          url: propValue(chunk, 'url'),
          title: propValue(chunk, 'title'),
          summary: propValue(chunk, 'summary'),
          image: propValue(chunk, 'image'),
        }),
      );
      continue;
    }

    if (chunk.includes('<TechnicalGrid')) {
      nodes.push(
        blockNode('technicalGrid', {
          title: propValue(chunk, 'title'),
          items: extractItemsProp(chunk, 'TechnicalGrid'),
        }),
      );
      continue;
    }

    if (chunk.includes('<AnimatedImage')) {
      const widthMatch = chunk.match(/width=\{?(\d+)\}?/);
      const heightMatch = chunk.match(/height=\{?(\d+)\}?/);
      nodes.push(
        blockNode('animatedImage', {
          src: propValue(chunk, 'src'),
          alt: propValue(chunk, 'alt'),
          width: widthMatch ? parseInt(widthMatch[1], 10) : undefined,
          height: heightMatch ? parseInt(heightMatch[1], 10) : undefined,
        }),
      );
      continue;
    }

    if (chunk.includes('<PowerCTA')) {
      nodes.push(
        blockNode('powerCTA', {
          locale: propValue(chunk, 'locale') || 'de',
        }),
      );
      continue;
    }

    const headingMatch = chunk.match(/^(#{1,6})\s+(.*)/);
    if (headingMatch) {
      const level = Math.min(headingMatch[1].length + 1, 6);
      nodes.push({
        type: 'heading',
        tag: `h${level}`,
        format: '',
        indent: 0,
        version: 1,
        direction: 'ltr',
        children: parseInlineMarkdown(headingMatch[2]),
      });
      continue;
    }

    const imageMatch = chunk.match(/^!\[([^\]]*)\]\(([^)]+)\)$/);
    if (imageMatch) {
      nodes.push(paragraphNode(chunk));
      continue;
    }

    nodes.push(paragraphNode(chunk));
  }

  return nodes;
}