359 lines
10 KiB
TypeScript
359 lines
10 KiB
TypeScript
/**
|
|
* Converts a Markdown+JSX string into a Lexical AST node array.
|
|
* Specifically adapted for klz-cables.com custom Component Blocks.
|
|
*/
|
|
|
|
function propValue(chunk: string, prop: string): string {
|
|
// Match prop="value" or prop='value' or prop={value}
|
|
// and also multiline props like prop={\n [\n {...}\n ]\n}
|
|
// For arrays or complex objects passed as props, basic regex might fail,
|
|
// but the MDX in klz-cables usually uses simpler props or children.
|
|
const match =
|
|
chunk.match(new RegExp(`${prop}=["']([^"']+)["']`)) ||
|
|
chunk.match(new RegExp(`${prop}=\\{([^}]+)\\}`));
|
|
return match ? match[1] : '';
|
|
}
|
|
|
|
function extractItemsProp(chunk: string, startTag: string): any[] {
|
|
// Match items={ [ ... ] } robustly without stopping at inner object braces
|
|
const itemsMatch = chunk.match(/items=\{\s*(\[[\s\S]*?\])\s*\}/);
|
|
if (itemsMatch) {
|
|
try {
|
|
const arrayString = itemsMatch[1].trim();
|
|
// Since klz-cables MDX passes pure JS object arrays like `items={[{title: 'A', content: 'B'}]}`,
|
|
// parsing it via Regex to JSON is extremely brittle due to unquoted keys and trailing commas.
|
|
// Using `new Function` safely evaluates the array AST directly in this Node script environment.
|
|
const fn = new Function(`return ${arrayString};`);
|
|
return fn();
|
|
} catch (_e: any) {
|
|
console.warn(`Could not parse items array for block ${startTag}:`, _e.message);
|
|
return [];
|
|
}
|
|
}
|
|
return [];
|
|
}
|
|
|
|
function blockNode(blockType: string, fields: Record<string, any>) {
|
|
return { type: 'block', format: '', version: 2, fields: { blockType, ...fields } };
|
|
}
|
|
|
|
function ensureChildren(parsedNodes: any[]): any[] {
|
|
// Lexical root nodes require at least one child node, or validation fails
|
|
if (parsedNodes.length === 0) {
|
|
return [
|
|
{
|
|
type: 'paragraph',
|
|
format: '',
|
|
indent: 0,
|
|
version: 1,
|
|
children: [{ mode: 'normal', type: 'text', text: ' ', version: 1 }],
|
|
},
|
|
];
|
|
}
|
|
return parsedNodes;
|
|
}
|
|
|
|
function parseInlineMarkdown(text: string): any[] {
|
|
// Simple regex-based inline parser for bold and italic
|
|
// Matches **bold**, __bold__, *italic*, _italic_
|
|
const regex = /(\*\*|__|TextNode)(.*?)\1|(\*|_)(.*?)\3/g;
|
|
const nodes: any[] = [];
|
|
let lastIndex = 0;
|
|
let match;
|
|
|
|
const createTextNode = (content: string, format = 0) => ({
|
|
detail: 0,
|
|
format,
|
|
mode: 'normal',
|
|
style: '',
|
|
text: content,
|
|
type: 'text',
|
|
version: 1,
|
|
});
|
|
|
|
const rawMatch = text.matchAll(
|
|
/(\*\*(.*?)\*\*|__(.*?)__|(?<!\*)\*(?!\*)(.*?)\*|(?<!_)_(?!_)(.*?)_)/g,
|
|
);
|
|
|
|
for (const m of rawMatch) {
|
|
const offset = m.index!;
|
|
// Leading plain text
|
|
if (offset > lastIndex) {
|
|
nodes.push(createTextNode(text.slice(lastIndex, offset)));
|
|
}
|
|
|
|
const boldContent = m[2] || m[3];
|
|
const italicContent = m[4] || m[5];
|
|
|
|
if (boldContent) {
|
|
nodes.push(createTextNode(boldContent, 1)); // 1 = Bold
|
|
} else if (italicContent) {
|
|
nodes.push(createTextNode(italicContent, 2)); // 2 = Italic
|
|
}
|
|
|
|
lastIndex = offset + m[0].length;
|
|
}
|
|
|
|
// Trailing plain text
|
|
if (lastIndex < text.length) {
|
|
nodes.push(createTextNode(text.slice(lastIndex)));
|
|
}
|
|
|
|
return nodes.length > 0 ? nodes : [createTextNode(text)];
|
|
}
|
|
|
|
export function parseMarkdownToLexical(markdown: string): any[] {
|
|
const paragraphNode = (text: string) => ({
|
|
type: 'paragraph',
|
|
format: '',
|
|
indent: 0,
|
|
version: 1,
|
|
direction: 'ltr',
|
|
children: parseInlineMarkdown(text),
|
|
});
|
|
|
|
const nodes: any[] = [];
|
|
let content = markdown;
|
|
|
|
// Strip frontmatter
|
|
const fm = content.match(/^---\s*\n[\s\S]*?\n---/);
|
|
if (fm) content = content.replace(fm[0], '').trim();
|
|
|
|
// 1. EXTRACT MULTILINE WRAPPERS BEFORE CHUNKING
|
|
const extractBlocks = [
|
|
{
|
|
tag: 'HighlightBox',
|
|
regex: /<HighlightBox([^>]*)>([\s\S]*?)<\/HighlightBox>/g,
|
|
build: (props: string, inner: string) =>
|
|
blockNode('highlightBox', {
|
|
title: propValue(`<Tag ${props}>`, 'title'),
|
|
color: propValue(`<Tag ${props}>`, 'color') || 'primary',
|
|
content: {
|
|
root: {
|
|
type: 'root',
|
|
format: '',
|
|
indent: 0,
|
|
version: 1,
|
|
direction: 'ltr',
|
|
children: ensureChildren(parseMarkdownToLexical(inner.trim())),
|
|
},
|
|
},
|
|
}),
|
|
},
|
|
{
|
|
tag: 'ChatBubble',
|
|
regex: /<ChatBubble([^>]*)>([\s\S]*?)<\/ChatBubble>/g,
|
|
build: (props: string, inner: string) =>
|
|
blockNode('chatBubble', {
|
|
author: propValue(`<Tag ${props}>`, 'author') || 'KLZ Team',
|
|
avatar: propValue(`<Tag ${props}>`, 'avatar'),
|
|
role: propValue(`<Tag ${props}>`, 'role') || 'Assistant',
|
|
align: propValue(`<Tag ${props}>`, 'align') || 'left',
|
|
content: {
|
|
root: {
|
|
type: 'root',
|
|
format: '',
|
|
indent: 0,
|
|
version: 1,
|
|
direction: 'ltr',
|
|
children: ensureChildren(parseMarkdownToLexical(inner.trim())),
|
|
},
|
|
},
|
|
}),
|
|
},
|
|
{
|
|
tag: 'Callout',
|
|
regex: /<Callout([^>]*)>([\s\S]*?)<\/Callout>/g,
|
|
build: (props: string, inner: string) =>
|
|
blockNode('callout', {
|
|
type: propValue(`<Tag ${props}>`, 'type') || 'info',
|
|
title: propValue(`<Tag ${props}>`, 'title'),
|
|
content: {
|
|
root: {
|
|
type: 'root',
|
|
format: '',
|
|
indent: 0,
|
|
version: 1,
|
|
direction: 'ltr',
|
|
children: ensureChildren(parseMarkdownToLexical(inner.trim())),
|
|
},
|
|
},
|
|
}),
|
|
},
|
|
{
|
|
tag: 'ProductTabs',
|
|
regex: /<ProductTabs([^>]*)>([\s\S]*?)<\/ProductTabs>/g,
|
|
build: (props: string, inner: string) => {
|
|
const fullTag = `<ProductTabs ${props}>`;
|
|
const dataMatch = fullTag.match(/data=\{({[\s\S]*?})\}\s*\/>/);
|
|
let technicalItems = [];
|
|
let voltageTables = [];
|
|
|
|
if (dataMatch) {
|
|
try {
|
|
const parsedData = JSON.parse(dataMatch[1]);
|
|
technicalItems = parsedData.technicalItems || [];
|
|
voltageTables = parsedData.voltageTables || [];
|
|
|
|
voltageTables.forEach((vt: any) => {
|
|
vt.rows?.forEach((row: any) => {
|
|
if (row.cells) {
|
|
row.cells = row.cells.map((c: any) =>
|
|
typeof c !== 'object' ? { value: String(c) } : c,
|
|
);
|
|
}
|
|
});
|
|
});
|
|
} catch (e) {
|
|
console.warn('Failed to parse ProductTabs JSON data:', e);
|
|
}
|
|
}
|
|
|
|
return blockNode('productTabs', {
|
|
technicalItems,
|
|
voltageTables,
|
|
content: {
|
|
root: {
|
|
type: 'root',
|
|
format: '',
|
|
indent: 0,
|
|
version: 1,
|
|
direction: 'ltr',
|
|
children: ensureChildren(parseMarkdownToLexical(inner.trim())),
|
|
},
|
|
},
|
|
});
|
|
},
|
|
},
|
|
];
|
|
|
|
function cleanMdxContent(text: string): string {
|
|
return text
|
|
.replace(/<section[^>]*>/g, '')
|
|
.replace(/<\/section>/g, '')
|
|
.replace(/<h3[^>]*>(.*?)<\/h3>/g, '### $1\n\n')
|
|
.replace(/<p[^>]*>(.*?)<\/p>/g, '$1\n\n')
|
|
.replace(/<strong[^>]*>(.*?)<\/strong>/g, '**$1**')
|
|
.replace(/ /g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
content = cleanMdxContent(content);
|
|
|
|
const placeholders = new Map<string, any>();
|
|
let placeholderIdx = 0;
|
|
|
|
for (const block of extractBlocks) {
|
|
content = content.replace(block.regex, (match, propsMatch, innerMatch) => {
|
|
const id = `__BLOCK_PLACEHOLDER_${placeholderIdx++}__`;
|
|
placeholders.set(id, block.build(propsMatch, innerMatch));
|
|
return `\n\n${id}\n\n`;
|
|
});
|
|
}
|
|
|
|
// 2. CHUNK THE REST
|
|
const rawChunks = content.split(/\n\s*\n/);
|
|
|
|
for (let chunk of rawChunks) {
|
|
chunk = chunk.trim();
|
|
if (!chunk) continue;
|
|
|
|
if (chunk.startsWith('__BLOCK_PLACEHOLDER_')) {
|
|
nodes.push(placeholders.get(chunk));
|
|
continue;
|
|
}
|
|
|
|
if (chunk.includes('<StickyNarrative')) {
|
|
nodes.push(
|
|
blockNode('stickyNarrative', {
|
|
title: propValue(chunk, 'title'),
|
|
items: extractItemsProp(chunk, 'StickyNarrative'),
|
|
}),
|
|
);
|
|
continue;
|
|
}
|
|
|
|
if (chunk.includes('<ComparisonGrid')) {
|
|
nodes.push(
|
|
blockNode('comparisonGrid', {
|
|
title: propValue(chunk, 'title'),
|
|
leftLabel: propValue(chunk, 'leftLabel'),
|
|
rightLabel: propValue(chunk, 'rightLabel'),
|
|
items: extractItemsProp(chunk, 'ComparisonGrid'),
|
|
}),
|
|
);
|
|
continue;
|
|
}
|
|
|
|
if (chunk.includes('<VisualLinkPreview')) {
|
|
nodes.push(
|
|
blockNode('visualLinkPreview', {
|
|
url: propValue(chunk, 'url'),
|
|
title: propValue(chunk, 'title'),
|
|
summary: propValue(chunk, 'summary'),
|
|
image: propValue(chunk, 'image'),
|
|
}),
|
|
);
|
|
continue;
|
|
}
|
|
|
|
if (chunk.includes('<TechnicalGrid')) {
|
|
nodes.push(
|
|
blockNode('technicalGrid', {
|
|
title: propValue(chunk, 'title'),
|
|
items: extractItemsProp(chunk, 'TechnicalGrid'),
|
|
}),
|
|
);
|
|
continue;
|
|
}
|
|
|
|
if (chunk.includes('<AnimatedImage')) {
|
|
const widthMatch = chunk.match(/width=\{?(\d+)\}?/);
|
|
const heightMatch = chunk.match(/height=\{?(\d+)\}?/);
|
|
nodes.push(
|
|
blockNode('animatedImage', {
|
|
src: propValue(chunk, 'src'),
|
|
alt: propValue(chunk, 'alt'),
|
|
width: widthMatch ? parseInt(widthMatch[1], 10) : undefined,
|
|
height: heightMatch ? parseInt(heightMatch[1], 10) : undefined,
|
|
}),
|
|
);
|
|
continue;
|
|
}
|
|
|
|
if (chunk.includes('<PowerCTA')) {
|
|
nodes.push(
|
|
blockNode('powerCTA', {
|
|
locale: propValue(chunk, 'locale') || 'de',
|
|
}),
|
|
);
|
|
continue;
|
|
}
|
|
|
|
const headingMatch = chunk.match(/^(#{1,6})\s+(.*)/);
|
|
if (headingMatch) {
|
|
const level = Math.min(headingMatch[1].length + 1, 6);
|
|
nodes.push({
|
|
type: 'heading',
|
|
tag: `h${level}`,
|
|
format: '',
|
|
indent: 0,
|
|
version: 1,
|
|
direction: 'ltr',
|
|
children: parseInlineMarkdown(headingMatch[2]),
|
|
});
|
|
continue;
|
|
}
|
|
|
|
const imageMatch = chunk.match(/^!\[([^\]]*)\]\(([^)]+)\)$/);
|
|
if (imageMatch) {
|
|
nodes.push(paragraphNode(chunk));
|
|
continue;
|
|
}
|
|
|
|
nodes.push(paragraphNode(chunk));
|
|
}
|
|
|
|
return nodes;
|
|
}
|