Files
klz-cables.com/src/payload/utils/lexicalParser.ts
2026-02-24 15:52:16 +01:00

359 lines
10 KiB
TypeScript

/**
* Converts a Markdown+JSX string into a Lexical AST node array.
* Specifically adapted for klz-cables.com custom Component Blocks.
*/
function propValue(chunk: string, prop: string): string {
// Match prop="value" or prop='value' or prop={value}
// and also multiline props like prop={\n [\n {...}\n ]\n}
// For arrays or complex objects passed as props, basic regex might fail,
// but the MDX in klz-cables usually uses simpler props or children.
const match =
chunk.match(new RegExp(`${prop}=["']([^"']+)["']`)) ||
chunk.match(new RegExp(`${prop}=\\{([^}]+)\\}`));
return match ? match[1] : '';
}
function extractItemsProp(chunk: string, startTag: string): any[] {
// Match items={ [ ... ] } robustly without stopping at inner object braces
const itemsMatch = chunk.match(/items=\{\s*(\[[\s\S]*?\])\s*\}/);
if (itemsMatch) {
try {
const arrayString = itemsMatch[1].trim();
// Since klz-cables MDX passes pure JS object arrays like `items={[{title: 'A', content: 'B'}]}`,
// parsing it via Regex to JSON is extremely brittle due to unquoted keys and trailing commas.
// Using `new Function` safely evaluates the array AST directly in this Node script environment.
const fn = new Function(`return ${arrayString};`);
return fn();
} catch (_e: any) {
console.warn(`Could not parse items array for block ${startTag}:`, _e.message);
return [];
}
}
return [];
}
function blockNode(blockType: string, fields: Record<string, any>) {
return { type: 'block', format: '', version: 2, fields: { blockType, ...fields } };
}
function ensureChildren(parsedNodes: any[]): any[] {
// Lexical root nodes require at least one child node, or validation fails
if (parsedNodes.length === 0) {
return [
{
type: 'paragraph',
format: '',
indent: 0,
version: 1,
children: [{ mode: 'normal', type: 'text', text: ' ', version: 1 }],
},
];
}
return parsedNodes;
}
function parseInlineMarkdown(text: string): any[] {
// Simple regex-based inline parser for bold and italic
// Matches **bold**, __bold__, *italic*, _italic_
const regex = /(\*\*|__|TextNode)(.*?)\1|(\*|_)(.*?)\3/g;
const nodes: any[] = [];
let lastIndex = 0;
let match;
const createTextNode = (content: string, format = 0) => ({
detail: 0,
format,
mode: 'normal',
style: '',
text: content,
type: 'text',
version: 1,
});
const rawMatch = text.matchAll(
/(\*\*(.*?)\*\*|__(.*?)__|(?<!\*)\*(?!\*)(.*?)\*|(?<!_)_(?!_)(.*?)_)/g,
);
for (const m of rawMatch) {
const offset = m.index!;
// Leading plain text
if (offset > lastIndex) {
nodes.push(createTextNode(text.slice(lastIndex, offset)));
}
const boldContent = m[2] || m[3];
const italicContent = m[4] || m[5];
if (boldContent) {
nodes.push(createTextNode(boldContent, 1)); // 1 = Bold
} else if (italicContent) {
nodes.push(createTextNode(italicContent, 2)); // 2 = Italic
}
lastIndex = offset + m[0].length;
}
// Trailing plain text
if (lastIndex < text.length) {
nodes.push(createTextNode(text.slice(lastIndex)));
}
return nodes.length > 0 ? nodes : [createTextNode(text)];
}
export function parseMarkdownToLexical(markdown: string): any[] {
const paragraphNode = (text: string) => ({
type: 'paragraph',
format: '',
indent: 0,
version: 1,
direction: 'ltr',
children: parseInlineMarkdown(text),
});
const nodes: any[] = [];
let content = markdown;
// Strip frontmatter
const fm = content.match(/^---\s*\n[\s\S]*?\n---/);
if (fm) content = content.replace(fm[0], '').trim();
// 1. EXTRACT MULTILINE WRAPPERS BEFORE CHUNKING
const extractBlocks = [
{
tag: 'HighlightBox',
regex: /<HighlightBox([^>]*)>([\s\S]*?)<\/HighlightBox>/g,
build: (props: string, inner: string) =>
blockNode('highlightBox', {
title: propValue(`<Tag ${props}>`, 'title'),
color: propValue(`<Tag ${props}>`, 'color') || 'primary',
content: {
root: {
type: 'root',
format: '',
indent: 0,
version: 1,
direction: 'ltr',
children: ensureChildren(parseMarkdownToLexical(inner.trim())),
},
},
}),
},
{
tag: 'ChatBubble',
regex: /<ChatBubble([^>]*)>([\s\S]*?)<\/ChatBubble>/g,
build: (props: string, inner: string) =>
blockNode('chatBubble', {
author: propValue(`<Tag ${props}>`, 'author') || 'KLZ Team',
avatar: propValue(`<Tag ${props}>`, 'avatar'),
role: propValue(`<Tag ${props}>`, 'role') || 'Assistant',
align: propValue(`<Tag ${props}>`, 'align') || 'left',
content: {
root: {
type: 'root',
format: '',
indent: 0,
version: 1,
direction: 'ltr',
children: ensureChildren(parseMarkdownToLexical(inner.trim())),
},
},
}),
},
{
tag: 'Callout',
regex: /<Callout([^>]*)>([\s\S]*?)<\/Callout>/g,
build: (props: string, inner: string) =>
blockNode('callout', {
type: propValue(`<Tag ${props}>`, 'type') || 'info',
title: propValue(`<Tag ${props}>`, 'title'),
content: {
root: {
type: 'root',
format: '',
indent: 0,
version: 1,
direction: 'ltr',
children: ensureChildren(parseMarkdownToLexical(inner.trim())),
},
},
}),
},
{
tag: 'ProductTabs',
regex: /<ProductTabs([^>]*)>([\s\S]*?)<\/ProductTabs>/g,
build: (props: string, inner: string) => {
const fullTag = `<ProductTabs ${props}>`;
const dataMatch = fullTag.match(/data=\{({[\s\S]*?})\}\s*\/>/);
let technicalItems = [];
let voltageTables = [];
if (dataMatch) {
try {
const parsedData = JSON.parse(dataMatch[1]);
technicalItems = parsedData.technicalItems || [];
voltageTables = parsedData.voltageTables || [];
voltageTables.forEach((vt: any) => {
vt.rows?.forEach((row: any) => {
if (row.cells) {
row.cells = row.cells.map((c: any) =>
typeof c !== 'object' ? { value: String(c) } : c,
);
}
});
});
} catch (e) {
console.warn('Failed to parse ProductTabs JSON data:', e);
}
}
return blockNode('productTabs', {
technicalItems,
voltageTables,
content: {
root: {
type: 'root',
format: '',
indent: 0,
version: 1,
direction: 'ltr',
children: ensureChildren(parseMarkdownToLexical(inner.trim())),
},
},
});
},
},
];
function cleanMdxContent(text: string): string {
return text
.replace(/<section[^>]*>/g, '')
.replace(/<\/section>/g, '')
.replace(/<h3[^>]*>(.*?)<\/h3>/g, '### $1\n\n')
.replace(/<p[^>]*>(.*?)<\/p>/g, '$1\n\n')
.replace(/<strong[^>]*>(.*?)<\/strong>/g, '**$1**')
.replace(/&nbsp;/g, ' ')
.trim();
}
content = cleanMdxContent(content);
const placeholders = new Map<string, any>();
let placeholderIdx = 0;
for (const block of extractBlocks) {
content = content.replace(block.regex, (match, propsMatch, innerMatch) => {
const id = `__BLOCK_PLACEHOLDER_${placeholderIdx++}__`;
placeholders.set(id, block.build(propsMatch, innerMatch));
return `\n\n${id}\n\n`;
});
}
// 2. CHUNK THE REST
const rawChunks = content.split(/\n\s*\n/);
for (let chunk of rawChunks) {
chunk = chunk.trim();
if (!chunk) continue;
if (chunk.startsWith('__BLOCK_PLACEHOLDER_')) {
nodes.push(placeholders.get(chunk));
continue;
}
if (chunk.includes('<StickyNarrative')) {
nodes.push(
blockNode('stickyNarrative', {
title: propValue(chunk, 'title'),
items: extractItemsProp(chunk, 'StickyNarrative'),
}),
);
continue;
}
if (chunk.includes('<ComparisonGrid')) {
nodes.push(
blockNode('comparisonGrid', {
title: propValue(chunk, 'title'),
leftLabel: propValue(chunk, 'leftLabel'),
rightLabel: propValue(chunk, 'rightLabel'),
items: extractItemsProp(chunk, 'ComparisonGrid'),
}),
);
continue;
}
if (chunk.includes('<VisualLinkPreview')) {
nodes.push(
blockNode('visualLinkPreview', {
url: propValue(chunk, 'url'),
title: propValue(chunk, 'title'),
summary: propValue(chunk, 'summary'),
image: propValue(chunk, 'image'),
}),
);
continue;
}
if (chunk.includes('<TechnicalGrid')) {
nodes.push(
blockNode('technicalGrid', {
title: propValue(chunk, 'title'),
items: extractItemsProp(chunk, 'TechnicalGrid'),
}),
);
continue;
}
if (chunk.includes('<AnimatedImage')) {
const widthMatch = chunk.match(/width=\{?(\d+)\}?/);
const heightMatch = chunk.match(/height=\{?(\d+)\}?/);
nodes.push(
blockNode('animatedImage', {
src: propValue(chunk, 'src'),
alt: propValue(chunk, 'alt'),
width: widthMatch ? parseInt(widthMatch[1], 10) : undefined,
height: heightMatch ? parseInt(heightMatch[1], 10) : undefined,
}),
);
continue;
}
if (chunk.includes('<PowerCTA')) {
nodes.push(
blockNode('powerCTA', {
locale: propValue(chunk, 'locale') || 'de',
}),
);
continue;
}
const headingMatch = chunk.match(/^(#{1,6})\s+(.*)/);
if (headingMatch) {
const level = Math.min(headingMatch[1].length + 1, 6);
nodes.push({
type: 'heading',
tag: `h${level}`,
format: '',
indent: 0,
version: 1,
direction: 'ltr',
children: parseInlineMarkdown(headingMatch[2]),
});
continue;
}
const imageMatch = chunk.match(/^!\[([^\]]*)\]\(([^)]+)\)$/);
if (imageMatch) {
nodes.push(paragraphNode(chunk));
continue;
}
nodes.push(paragraphNode(chunk));
}
return nodes;
}