This commit is contained in:
2026-01-06 13:55:04 +01:00
parent 297de69928
commit f991ea6b9b
393 changed files with 41362 additions and 4811 deletions

View File

@@ -3,6 +3,8 @@
* Handles HTML entities, formatting, and class conversions from WordPress exports
*/
import { getMediaById } from './data';
/**
* Process HTML content from WordPress
* - Sanitizes dangerous content
@@ -84,7 +86,17 @@ function replaceHTMLEntities(html: string): string {
'“': '"',
'”': '"',
'•': '•',
'·': '·'
'·': '·',
// Additional common entities that might appear in WordPress exports
'„': '"', // Double low-reversed-9 quote
'‟': '"', // Double high-reversed-9 quote
'': "'", // Prime
'″': '"', // Double prime
'': '<', // Single left-pointing angle quotation mark
'': '>', // Single right-pointing angle quotation mark
'†': '†', // Dagger
'‡': '‡', // Double dagger
'‰': '‰', // Per mille
};
let processed = html;
@@ -137,6 +149,65 @@ function sanitizeHTML(html: string): string {
return processed;
}
/**
* Strip ChatGPT artifacts from HTML content
* Removes react-scroll-to-bottom--css-* classes and data-message-* attributes
*/
function stripChatGPTArtifacts(html: string): string {
let processed = html;
// Remove react-scroll-to-bottom CSS classes
processed = processed.replace(/class=["'][^"']*react-scroll-to-bottom--css[^"']*["']/gi, '');
// Remove data-message-* attributes
processed = processed.replace(/\s+data-message-[^\s=]*=["'][^"']*["']/gi, '');
// Clean up multiple spaces that might result from removal
processed = processed.replace(/\s+/g, ' ');
// Clean up empty class attributes
processed = processed.replace(/\s+class=["']\s*["']/gi, '');
return processed;
}
/**
* Process [split_line_heading] shortcode
* Converts to split h2 with line styling
*/
function processSplitLineHeading(html: string): string {
return html.replace(/\[split_line_heading([^\]]*)\]([\s\S]*?)\[\/split_line_heading\]/g, (match, attrs, content) => {
// Extract alignment from attributes
const alignMatch = attrs.match(/align=["']([^"']*)["']/i);
const align = alignMatch ? alignMatch[1] : 'left';
// Parse the content - it might contain HTML or just text
// Format: "Line 1|Line 2" or "Line 1|Line 2|Line 3"
const lines = content.split('|').map((line: string) => line.trim());
// Build the HTML structure
const classes = ['split-line-heading', 'text-center'];
if (align === 'center') classes.push('text-center');
if (align === 'right') classes.push('text-right');
if (align === 'left') classes.push('text-left');
let html = `<div class="${classes.join(' ')}">`;
html += '<h2 class="split-heading">';
lines.forEach((line: string, index: number) => {
if (index > 0) {
html += '<span class="line-separator"></span>';
}
html += `<span class="line">${line}</span>`;
});
html += '</h2>';
html += '</div>';
return html;
});
}
/**
* Process WordPress shortcodes by converting them to HTML with proper styling
* Also handles mixed scenarios where some content is already HTML with WordPress classes
@@ -145,62 +216,104 @@ export function processShortcodes(html: string): string {
let processed = html;
try {
// Step 1: Convert any existing HTML with WordPress classes back to shortcode format
// This ensures we have a consistent format to work with
// Step 0: Decode HTML entities in shortcode attributes
processed = replaceHTMLEntities(processed);
// Handle vc_row and vc_row_inner
processed = processed.replace(/<div[^>]*class=["'][^"']*(?:vc-row|vc_row|vc_row_inner)[^"']*["'][^>]*>/gi, (match) => {
const attrs = extractAttributesFromHTML(match);
const isInner = match.includes('vc_row_inner') || match.includes('vc-row-inner');
return `[${isInner ? 'vc_row_inner' : 'vc_row'} ${attrs}]`;
});
// Step 0.5: Strip ChatGPT artifacts (react-scroll-*, data-message-*)
processed = stripChatGPTArtifacts(processed);
// Handle vc_column and vc_column_inner
processed = processed.replace(/<div[^>]*class=["'][^"']*(?:vc-column|vc_column|vc_column_inner)[^"']*["'][^>]*>/gi, (match) => {
const attrs = extractAttributesFromHTML(match);
const isInner = match.includes('vc_column_inner') || match.includes('vc-column-inner');
return `[${isInner ? 'vc_column_inner' : 'vc_column'} ${attrs}]`;
});
// Step 0.6: Process split_line_heading shortcode
processed = processSplitLineHeading(processed);
// Handle vc_column_text
processed = processed.replace(/<div[^>]*class=["'][^"']*(?:vc-column-text|vc_column_text)[^"']*["'][^>]*>/gi, (match) => {
const attrs = extractAttributesFromHTML(match);
return `[vc_column_text ${attrs}]`;
});
// Check if input has divs that need conversion
const hasDivs = /<div[^>]*class=["'][^"']*(?:vc-row|vc_row|vc_row_inner|vc-column|vc_column|vc_column_inner|vc-column-text|vc_column_text)[^"']*["'][^>]*>/i.test(processed);
const hasShortcodes = /\[(vc_row|vc_column|vc_column_text|vc_single_image|vc_btn|vc_separator|vc_video)/i.test(processed);
// Handle vc_single_image
processed = processed.replace(/<img[^>]*class=["'][^"']*(?:vc-single-image|vc_single_image)[^"']*["'][^>]*>/gi, (match) => {
const attrs = extractAttributesFromHTML(match);
const imageId = extractAttribute(attrs, 'data-wp-image-id') || extractAttribute(attrs, 'src');
const width = extractAttribute(attrs, 'data-width') || '';
return `[vc_single_image src="${imageId}" width="${width}"]`;
});
// Only convert divs to shortcodes if there are divs but no existing shortcodes
if (hasDivs && !hasShortcodes) {
// Use a stack-based approach to handle nested divs
const stack: string[] = [];
let result = '';
let i = 0;
// Handle vc_btn
processed = processed.replace(/<a[^>]*class=["'][^"']*(?:vc-btn|vc_btn)[^"']*["'][^>]*>(.*?)<\/a>/gi, (match, content) => {
const attrs = extractAttributesFromHTML(match);
const href = extractAttribute(attrs, 'href');
const title = content;
return `[vc_btn href="${href}" title="${title}"]`;
});
while (i < processed.length) {
// Check for opening div tags
const openDivMatch = processed.slice(i).match(/^<div[^>]*class=["'][^"']*(?:vc-row|vc_row|vc_row_inner|vc-column|vc_column|vc_column_inner|vc-column-text|vc_column_text)[^"']*["'][^>]*>/i);
if (openDivMatch) {
const attrs = extractAttributesFromHTML(openDivMatch[0]);
let tag: string;
if (openDivMatch[0].includes('vc_row_inner') || openDivMatch[0].includes('vc-row-inner')) {
tag = 'vc_row_inner';
} else if (openDivMatch[0].includes('vc-row') || openDivMatch[0].includes('vc_row')) {
tag = 'vc_row';
} else if (openDivMatch[0].includes('vc_column_inner') || openDivMatch[0].includes('vc-column-inner')) {
tag = 'vc_column_inner';
} else if (openDivMatch[0].includes('vc-column') || openDivMatch[0].includes('vc_column')) {
tag = 'vc_column';
} else if (openDivMatch[0].includes('vc-column-text') || openDivMatch[0].includes('vc_column_text')) {
tag = 'vc_column_text';
} else {
// Unknown tag, skip
result += openDivMatch[0];
i += openDivMatch[0].length;
continue;
}
stack.push(tag);
result += `[${tag} ${attrs}]`;
i += openDivMatch[0].length;
continue;
}
// Handle vc_separator
processed = processed.replace(/<hr[^>]*class=["'][^"']*(?:vc-separator|vc_separator)[^"']*["'][^>]*>/gi, (match) => {
const attrs = extractAttributesFromHTML(match);
return `[vc_separator ${attrs}]`;
});
// Check for closing div
if (processed.slice(i, i+6) === '</div>') {
if (stack.length > 0) {
const tag = stack.pop();
result += `[/${tag}]`;
i += 6;
continue;
}
}
// Handle closing div tags by looking for matching opening shortcode tags
// This is more complex, so we'll handle it carefully
processed = processed.replace(/<\/div>/gi, (match, offset) => {
const beforeContent = processed.substring(0, offset);
const lastOpenTag = beforeContent.match(/\[(vc_row(?:_inner)?|vc_column(?:_inner)?|vc_column_text)\s*[^\]]*\]$/i);
if (lastOpenTag) {
return `[/${lastOpenTag[1]}]`;
// Check for img tags (vc_single_image)
const imgMatch = processed.slice(i).match(/^<img[^>]*class=["'][^"']*(?:vc-single-image|vc_single_image)[^"']*["'][^>]*>/i);
if (imgMatch) {
const attrs = extractAttributesFromHTML(imgMatch[0]);
const imageId = extractAttribute(attrs, 'data-wp-image-id') || extractAttribute(attrs, 'src');
const width = extractAttribute(attrs, 'data-width') || '';
result += `[vc_single_image src="${imageId}" width="${width}"]`;
i += imgMatch[0].length;
continue;
}
// Check for anchor tags (vc_btn)
const anchorMatch = processed.slice(i).match(/^<a[^>]*class=["'][^"']*(?:vc-btn|vc_btn)[^"']*["'][^>]*>(.*?)<\/a>/i);
if (anchorMatch) {
const attrs = extractAttributesFromHTML(anchorMatch[0]);
const href = extractAttribute(attrs, 'href');
const title = anchorMatch[1]; // Content between tags
result += `[vc_btn href="${href}" title="${title}"]`;
i += anchorMatch[0].length;
continue;
}
// Check for hr tags (vc_separator)
const hrMatch = processed.slice(i).match(/^<hr[^>]*class=["'][^"']*(?:vc-separator|vc_separator)[^"']*["'][^>]*>/i);
if (hrMatch) {
const attrs = extractAttributesFromHTML(hrMatch[0]);
result += `[vc_separator ${attrs}]`;
i += hrMatch[0].length;
continue;
}
// Regular character
result += processed[i];
i++;
}
// If no matching shortcode, keep the div closing tag
return match;
});
processed = result;
}
// Step 2: Process shortcode blocks into HTML
processed = processVcRowShortcodes(processed);
@@ -219,11 +332,10 @@ export function processShortcodes(html: string): string {
}
// Clean up any remaining shortcode artifacts
// Only remove shortcodes that weren't processed
processed = processed.replace(/\[[^\]]*\]/g, '');
// Step 4: Clean up any remaining empty div tags
processed = processed.replace(/<div[^>]*>\s*<\/div>/g, '');
processed = processed.replace(/<div>\s*<\/div>/g, '');
return processed;
} catch (error) {
@@ -268,27 +380,28 @@ function processVcRowShortcodes(html: string): string {
const classes = ['vc-row', 'flex', 'flex-wrap', '-mx-4'];
// Parse attributes for background colors, images, etc.
const bgImage = extractAttribute(attrs, 'bg_image');
const bgColor = extractAttribute(attrs, 'bg_color');
const colorOverlay = extractAttribute(attrs, 'color_overlay');
const colorOverlay2 = extractAttribute(attrs, 'color_overlay_2');
const overlayStrength = extractAttribute(attrs, 'overlay_strength');
const enableGradient = extractAttribute(attrs, 'enable_gradient');
const gradientDirection = extractAttribute(attrs, 'gradient_direction');
const topPadding = extractAttribute(attrs, 'top_padding');
const bottomPadding = extractAttribute(attrs, 'bottom_padding');
const fullScreen = extractAttribute(attrs, 'full_screen_row_position');
const videoBg = extractAttribute(attrs, 'video_bg');
const videoMp4 = extractAttribute(attrs, 'video_mp4');
const videoWebm = extractAttribute(attrs, 'video_webm');
const textAlign = extractAttribute(attrs, 'text_align');
const textColor = extractAttribute(attrs, 'text_color');
// Support both snake_case (shortcode) and camelCase (from data attributes)
const bgImage = extractAttribute(attrs, 'bg_image') || extractAttribute(attrs, 'bgImage');
const bgColor = extractAttribute(attrs, 'bg_color') || extractAttribute(attrs, 'bgColor');
const colorOverlay = extractAttribute(attrs, 'color_overlay') || extractAttribute(attrs, 'colorOverlay');
const colorOverlay2 = extractAttribute(attrs, 'color_overlay_2') || extractAttribute(attrs, 'colorOverlay2');
const overlayStrength = extractAttribute(attrs, 'overlay_strength') || extractAttribute(attrs, 'overlayStrength');
const enableGradient = extractAttribute(attrs, 'enable_gradient') || extractAttribute(attrs, 'enableGradient');
const gradientDirection = extractAttribute(attrs, 'gradient_direction') || extractAttribute(attrs, 'gradientDirection');
const topPadding = extractAttribute(attrs, 'top_padding') || extractAttribute(attrs, 'topPadding');
const bottomPadding = extractAttribute(attrs, 'bottom_padding') || extractAttribute(attrs, 'bottomPadding');
const fullScreen = extractAttribute(attrs, 'full_screen_row_position') || extractAttribute(attrs, 'fullScreenRowPosition');
const videoBg = extractAttribute(attrs, 'video_bg') || extractAttribute(attrs, 'videoBg');
const videoMp4 = extractAttribute(attrs, 'video_mp4') || extractAttribute(attrs, 'videoMp4');
const videoWebm = extractAttribute(attrs, 'video_webm') || extractAttribute(attrs, 'videoWebm');
const textAlign = extractAttribute(attrs, 'text_align') || extractAttribute(attrs, 'textAlign');
const textColor = extractAttribute(attrs, 'text_color') || extractAttribute(attrs, 'textColor');
const overflow = extractAttribute(attrs, 'overflow');
const equalHeight = extractAttribute(attrs, 'equal_height');
const contentPlacement = extractAttribute(attrs, 'content_placement');
const columnDirection = extractAttribute(attrs, 'column_direction');
const rowBorderRadius = extractAttribute(attrs, 'row_border_radius');
const rowBorderRadiusApplies = extractAttribute(attrs, 'row_border_radius_applies');
const equalHeight = extractAttribute(attrs, 'equal_height') || extractAttribute(attrs, 'equalHeight');
const contentPlacement = extractAttribute(attrs, 'content_placement') || extractAttribute(attrs, 'contentPlacement');
const columnDirection = extractAttribute(attrs, 'column_direction') || extractAttribute(attrs, 'columnDirection');
const rowBorderRadius = extractAttribute(attrs, 'row_border_radius') || extractAttribute(attrs, 'rowBorderRadius');
const rowBorderRadiusApplies = extractAttribute(attrs, 'row_border_radius_applies') || extractAttribute(attrs, 'rowBorderRadiusApplies');
// Build style string
let style = '';
@@ -323,17 +436,30 @@ function processVcRowShortcodes(html: string): string {
wrapperClasses.push('rounded-none');
}
// Handle background image
// Handle background image - FIXED: Support both numeric IDs and local paths
if (bgImage) {
// Try to get media by ID first
const mediaId = parseInt(bgImage);
if (!isNaN(mediaId)) {
// This will be handled by ContentRenderer with data attributes
// Check if it's already a local path (from processed data)
if (bgImage.startsWith('/media/')) {
wrapperClasses.push('bg-cover', 'bg-center');
style += `background-image: url(/media/${bgImage}.webp); `;
} else {
// Assume it's a direct URL
style += `background-image: url(${bgImage}); `;
} else {
// Try to parse as numeric ID
const mediaId = parseInt(bgImage);
if (!isNaN(mediaId)) {
// Use getMediaById to get the actual file path
const media = getMediaById(mediaId);
if (media) {
wrapperClasses.push('bg-cover', 'bg-center');
style += `background-image: url(${media.localPath}); `;
} else {
// Fallback if media not found
wrapperClasses.push('bg-cover', 'bg-center');
style += `background-image: url(/media/${bgImage}.webp); `;
}
} else {
// Assume it's a direct URL
style += `background-image: url(${bgImage}); `;
}
}
style += `background-size: cover; `;
style += `background-position: center; `;
@@ -350,7 +476,7 @@ function processVcRowShortcodes(html: string): string {
wrapperClasses.push('relative', 'overflow-hidden');
style += `position: relative; `;
// Create video background structure
// Create video background structure with data attributes
const videoAttrs = [];
if (videoMp4) videoAttrs.push(`data-video-mp4="${videoMp4}"`);
if (videoWebm) videoAttrs.push(`data-video-webm="${videoWebm}"`);
@@ -361,8 +487,37 @@ function processVcRowShortcodes(html: string): string {
</div>`;
}
// Handle video attributes even if not using video_bg flag (enhanced preservation)
if (videoMp4 || videoWebm) {
// Add video attributes to wrapper for preservation
if (videoMp4) wrapperClasses.push(`has-video-mp4`);
if (videoWebm) wrapperClasses.push(`has-video-webm`);
// Store video URLs in data attributes for later use
const videoDataAttrs = [];
if (videoMp4) videoDataAttrs.push(`data-video-mp4="${videoMp4}"`);
if (videoWebm) videoDataAttrs.push(`data-video-webm="${videoWebm}"`);
// If there's no other special handling, just add the attributes to the div
if (!bgImage && !bgColor && !colorOverlay && !enableGradient) {
return `<div class="${wrapperClasses.join(' ')}" style="${style}" ${videoDataAttrs.join(' ')}>
<div class="relative flex flex-wrap -mx-4 w-full">${content}</div>
</div>`;
}
// For complex backgrounds with video, add video attrs to existing structure
// We'll handle this in the ContentRenderer
const existingHtml = `<div class="${wrapperClasses.join(' ')}" style="${style}">
<div class="relative flex flex-wrap -mx-4 w-full">${content}</div>
</div>`;
// Add video attributes as data attributes on the wrapper
return existingHtml.replace('<div class="', `<div ${videoDataAttrs.join(' ')} class="`);
}
// Handle color overlay (single or gradient)
if (colorOverlay || colorOverlay2 || enableGradient === 'true' || enableGradient === '1') {
const hasOverlay = colorOverlay || colorOverlay2 || enableGradient === 'true' || enableGradient === '1';
if (hasOverlay) {
style += `position: relative; `;
wrapperClasses.push('relative');
@@ -394,10 +549,11 @@ function processVcRowShortcodes(html: string): string {
overlayStyle = `background-color: ${colorOverlay}; opacity: ${opacity};`;
}
return `<div class="${wrapperClasses.join(' ')}" style="${style}">
const resultHtml = `<div class="${wrapperClasses.join(' ')}" style="${style}">
<div class="absolute inset-0" style="${overlayStyle}"></div>
<div class="relative flex flex-wrap -mx-4 w-full">${content}</div>
</div>`;
return resultHtml;
}
// Handle gradient (without overlay)
@@ -420,6 +576,11 @@ function processVcRowShortcodes(html: string): string {
wrapperClasses.push('min-h-screen', 'flex', 'items-center');
}
// Don't return empty divs
if (!content || content.trim() === '') {
return '';
}
return `<div class="${wrapperClasses.join(' ')}" style="${style}">${content}</div>`;
});
}
@@ -574,18 +735,43 @@ function processVcVideoShortcodes(html: string): string {
*/
function processBackgroundShortcodes(html: string): string {
// Handle background image attributes in divs
html = html.replace(/bg_image="(\d+)"/g, (match, imageId) => {
return `data-bg-image="${imageId}"`;
// Support both numeric IDs (from raw data) and local paths (from processed data)
html = html.replace(/bg_image="([^"]+)"/g, (match, imageValue) => {
// If it's a numeric ID, keep it as-is for now (will be handled by ContentRenderer)
// If it's already a local path, convert to data-bg-image
if (/^\d+$/.test(imageValue)) {
// Numeric ID - keep as data attribute for ContentRenderer to resolve
return `data-bg-image="${imageValue}"`;
} else if (imageValue.startsWith('/media/')) {
// Already a local path - use directly
return `data-bg-image="${imageValue}"`;
} else {
// Unknown format - keep as-is
return `data-bg-image="${imageValue}"`;
}
});
// Handle video background attributes
// Handle video background attributes - enhanced to preserve all video data
html = html.replace(/video_bg="use_video"/g, 'data-video-bg="true"');
html = html.replace(/video_mp4="([^"]+)"/g, (match, url) => `data-video-mp4="${url}"`);
html = html.replace(/video_webm="([^"]+)"/g, (match, url) => `data-video-webm="${url}"`);
html = html.replace(/video_mp4="([^"]+)"/g, (match, url) => {
// Ensure URL is properly formatted
const cleanUrl = url.trim().replace(/^["']|["']$/g, '');
return `data-video-mp4="${cleanUrl}"`;
});
html = html.replace(/video_webm="([^"]+)"/g, (match, url) => {
// Ensure URL is properly formatted
const cleanUrl = url.trim().replace(/^["']|["']$/g, '');
return `data-video-webm="${cleanUrl}"`;
});
// Handle parallax
html = html.replace(/parallax_bg="true"/g, 'data-parallax="true"');
// Also handle video attributes that might appear without the video_bg flag
// This ensures video data is preserved even if the flag is missing
html = html.replace(/\s+mp4="([^"]+)"/g, (match, url) => ` data-video-mp4="${url}"`);
html = html.replace(/\s+webm="([^"]+)"/g, (match, url) => ` data-video-webm="${url}"`);
return html;
}