1262 lines
48 KiB
JavaScript
Executable File
1262 lines
48 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
||
|
||
/**
|
||
* WordPress → Next.js Data Processing Pipeline (Enhanced)
|
||
* Transforms raw WordPress data into Next.js compatible format
|
||
* Includes bg_image ID resolution
|
||
*/
|
||
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
const DATA_DIR = path.join(__dirname, '..', 'data');
|
||
const RAW_DIR = path.join(DATA_DIR, 'raw');
|
||
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
|
||
|
||
// Create processed directory
|
||
if (!fs.existsSync(PROCESSED_DIR)) {
|
||
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
|
||
}
|
||
|
||
// Find latest export
|
||
function getLatestExportDir() {
|
||
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
|
||
const stat = fs.statSync(path.join(RAW_DIR, f));
|
||
return stat.isDirectory();
|
||
});
|
||
dirs.sort().reverse();
|
||
return path.join(RAW_DIR, dirs[0]);
|
||
}
|
||
|
||
// Load media mapping for ID resolution
|
||
function loadMediaMapping() {
|
||
const exportDir = getLatestExportDir();
|
||
const mediaJsonPath = path.join(exportDir, 'media.json');
|
||
|
||
if (!fs.existsSync(mediaJsonPath)) {
|
||
console.warn('⚠️ No media.json found for ID resolution');
|
||
return {};
|
||
}
|
||
|
||
const mediaData = JSON.parse(fs.readFileSync(mediaJsonPath, 'utf8'));
|
||
const mapping = {};
|
||
|
||
mediaData.forEach(item => {
|
||
if (item.id) {
|
||
mapping[item.id] = `/media/${item.filename}`;
|
||
}
|
||
});
|
||
|
||
return mapping;
|
||
}
|
||
|
||
// Load asset map for URL replacement
|
||
function loadAssetMap() {
|
||
const assetMapPath = path.join(PROCESSED_DIR, 'asset-map.json');
|
||
|
||
if (!fs.existsSync(assetMapPath)) {
|
||
return {};
|
||
}
|
||
|
||
return JSON.parse(fs.readFileSync(assetMapPath, 'utf8'));
|
||
}
|
||
|
||
// Replace bg_image IDs with local paths
|
||
function replaceBgImageIds(html, mediaMapping) {
|
||
if (!html) return html;
|
||
|
||
let processed = html;
|
||
|
||
// Helper function to replace a single bg_image attribute
|
||
const replaceBgImage = (match, id) => {
|
||
const localPath = mediaMapping[id];
|
||
if (localPath) {
|
||
return `bg_image="${localPath}"`;
|
||
}
|
||
return match;
|
||
};
|
||
|
||
// Pattern 1: bg_image="ID" (regular quotes)
|
||
processed = processed.replace(/bg_image="(\d+)"/gi, replaceBgImage);
|
||
|
||
// Pattern 2: bg_image=”ID” (HTML entities for quotes)
|
||
processed = processed.replace(/bg_image=”(\d+)″/gi, replaceBgImage);
|
||
|
||
// Pattern 3: bg_image='ID' (single quotes)
|
||
processed = processed.replace(/bg_image='(\d+)'/gi, replaceBgImage);
|
||
|
||
// Pattern 4: layer_one_image="ID"
|
||
processed = processed.replace(/layer_one_image="(\d+)"/gi, (match, id) => {
|
||
const localPath = mediaMapping[id];
|
||
if (localPath) {
|
||
return `layer_one_image="${localPath}"`;
|
||
}
|
||
return match;
|
||
});
|
||
|
||
// Pattern 5: layer_one_image with HTML entities
|
||
processed = processed.replace(/layer_one_image=”(\d+)″/gi, (match, id) => {
|
||
const localPath = mediaMapping[id];
|
||
if (localPath) {
|
||
return `layer_one_image="${localPath}"`;
|
||
}
|
||
return match;
|
||
});
|
||
|
||
// Pattern 6: image_url="ID" (for image_with_animation)
|
||
processed = processed.replace(/image_url="(\d+)"/gi, (match, id) => {
|
||
const localPath = mediaMapping[id];
|
||
if (localPath) {
|
||
return `image_url="${localPath}"`;
|
||
}
|
||
return match;
|
||
});
|
||
|
||
// Pattern 7: image_url with HTML entities
|
||
processed = processed.replace(/image_url=”(\d+)″/gi, (match, id) => {
|
||
const localPath = mediaMapping[id];
|
||
if (localPath) {
|
||
return `image_url="${localPath}"`;
|
||
}
|
||
return match;
|
||
});
|
||
|
||
// Pattern 8: images="ID,ID,ID" (for vc_gallery)
|
||
processed = processed.replace(/images="([^"]+)"/gi, (match, idList) => {
|
||
const ids = idList.split(',').map(id => id.trim());
|
||
const localPaths = ids.map(id => {
|
||
// Check if it's a numeric ID
|
||
if (/^\d+$/.test(id)) {
|
||
return mediaMapping[id] || id;
|
||
}
|
||
return id;
|
||
});
|
||
return `images="${localPaths.join(',')}"`;
|
||
});
|
||
|
||
// Pattern 9: images with HTML entities
|
||
processed = processed.replace(/images=”([^&#]+)″/gi, (match, idList) => {
|
||
const ids = idList.split(',').map(id => id.trim());
|
||
const localPaths = ids.map(id => {
|
||
// Check if it's a numeric ID
|
||
if (/^\d+$/.test(id)) {
|
||
return mediaMapping[id] || id;
|
||
}
|
||
return id;
|
||
});
|
||
return `images="${localPaths.join(',')}"`;
|
||
});
|
||
|
||
return processed;
|
||
}
|
||
|
||
// Replace URLs with local paths using asset map
|
||
function replaceUrlsWithLocalPaths(html, assetMap) {
|
||
if (!html) return html;
|
||
|
||
let processed = html;
|
||
|
||
// Replace URLs in various attributes
|
||
Object.keys(assetMap).forEach(url => {
|
||
const localPath = assetMap[url];
|
||
// Escape special regex characters in URL
|
||
const escapedUrl = url.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||
const regex = new RegExp(escapedUrl, 'gi');
|
||
processed = processed.replace(regex, localPath);
|
||
});
|
||
|
||
return processed;
|
||
}
|
||
|
||
// Download video files from external URLs
|
||
async function downloadVideoFile(url, filename) {
|
||
const https = require('https');
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
const videoDir = path.join(__dirname, '..', 'public', 'media', 'videos');
|
||
if (!fs.existsSync(videoDir)) {
|
||
fs.mkdirSync(videoDir, { recursive: true });
|
||
}
|
||
|
||
const filePath = path.join(videoDir, filename);
|
||
|
||
// Check if file already exists
|
||
if (fs.existsSync(filePath)) {
|
||
console.log(`✅ Video already exists: ${filename}`);
|
||
return `/media/videos/${filename}`;
|
||
}
|
||
|
||
return new Promise((resolve, reject) => {
|
||
const file = fs.createWriteStream(filePath);
|
||
|
||
https.get(url, (res) => {
|
||
if (res.statusCode === 200) {
|
||
res.pipe(file);
|
||
file.on('finish', () => {
|
||
console.log(`✅ Downloaded video: ${filename}`);
|
||
resolve(`/media/videos/${filename}`);
|
||
});
|
||
} else {
|
||
reject(new Error(`Failed to download video: ${res.statusCode}`));
|
||
}
|
||
}).on('error', (err) => {
|
||
fs.unlink(filePath, () => {});
|
||
reject(err);
|
||
});
|
||
});
|
||
}
|
||
|
||
// Extract and download video files from vc_row attributes
|
||
async function processVideoAttributes(html) {
|
||
if (!html) return { html, videoMap: {} };
|
||
|
||
const videoMap = {};
|
||
let processed = html;
|
||
|
||
// Find all vc_row with video attributes
|
||
const videoRowRegex = /\[vc_row[^\]]*video_bg="use_video"[^\]]*video_mp4="([^"]*)"[^\]]*video_webm="([^"]*)"[^\]]*\]/gi;
|
||
let match;
|
||
|
||
while ((match = videoRowRegex.exec(html)) !== null) {
|
||
const videoMp4 = match[1];
|
||
const videoWebm = match[2];
|
||
|
||
// Generate filenames
|
||
const mp4Filename = `video-${Date.now()}-${Math.random().toString(36).substring(7)}.mp4`;
|
||
const webmFilename = `video-${Date.now()}-${Math.random().toString(36).substring(7)}.webm`;
|
||
|
||
try {
|
||
// Download files
|
||
const mp4Path = await downloadVideoFile(videoMp4, mp4Filename);
|
||
const webmPath = await downloadVideoFile(videoWebm, webmFilename);
|
||
|
||
// Store in map for replacement
|
||
videoMap[videoMp4] = mp4Path;
|
||
videoMap[videoWebm] = webmPath;
|
||
|
||
} catch (error) {
|
||
console.warn(`⚠️ Failed to download video files: ${error.message}`);
|
||
}
|
||
}
|
||
|
||
// Replace URLs in the HTML
|
||
Object.keys(videoMap).forEach(url => {
|
||
const localPath = videoMap[url];
|
||
const escapedUrl = url.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||
const regex = new RegExp(escapedUrl, 'gi');
|
||
processed = processed.replace(regex, localPath);
|
||
});
|
||
|
||
return { html: processed, videoMap };
|
||
}
|
||
|
||
// HTML sanitization - preserve content but clean dangerous elements
|
||
function sanitizeHTML(html) {
|
||
if (!html) return '';
|
||
|
||
let sanitized = html;
|
||
|
||
// Remove script tags and inline handlers (security)
|
||
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
|
||
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
|
||
|
||
// Extract and preserve ALL vc_row attributes before removing shortcodes
|
||
// This includes bg_image, video_bg, video_mp4, video_webm, etc.
|
||
const vcRowMatches = sanitized.match(/\[vc_row[^\]]*\]/gi) || [];
|
||
const vcRowAttributes = [];
|
||
|
||
vcRowMatches.forEach(match => {
|
||
const attrs = {
|
||
bgImage: null,
|
||
videoBg: null,
|
||
videoMp4: null,
|
||
videoWebm: null,
|
||
bgColor: null,
|
||
colorOverlay: null,
|
||
overlayStrength: null,
|
||
enableGradient: null,
|
||
gradientDirection: null,
|
||
colorOverlay2: null,
|
||
parallaxBg: null,
|
||
parallaxBgSpeed: null,
|
||
bgImageAnimation: null,
|
||
topPadding: null,
|
||
bottomPadding: null,
|
||
textAlignment: null,
|
||
textColor: null,
|
||
shapeType: null,
|
||
scenePosition: null,
|
||
fullScreenRowPosition: null,
|
||
fullScreen: null,
|
||
equalHeight: null,
|
||
contentPlacement: null,
|
||
columnDirection: null,
|
||
rowBorderRadius: null,
|
||
rowBorderRadiusApplies: null
|
||
};
|
||
|
||
// Extract all relevant attributes
|
||
const bgImageMatch = match.match(/bg_image="([^"]*)"/i);
|
||
if (bgImageMatch) attrs.bgImage = bgImageMatch[1];
|
||
|
||
const videoBgMatch = match.match(/video_bg="([^"]*)"/i);
|
||
if (videoBgMatch) attrs.videoBg = videoBgMatch[1];
|
||
|
||
const videoMp4Match = match.match(/video_mp4="([^"]*)"/i);
|
||
if (videoMp4Match) attrs.videoMp4 = videoMp4Match[1];
|
||
|
||
const videoWebmMatch = match.match(/video_webm="([^"]*)"/i);
|
||
if (videoWebmMatch) attrs.videoWebm = videoWebmMatch[1];
|
||
|
||
const bgColorMatch = match.match(/bg_color="([^"]*)"/i);
|
||
if (bgColorMatch) attrs.bgColor = bgColorMatch[1];
|
||
|
||
const colorOverlayMatch = match.match(/color_overlay="([^"]*)"/i);
|
||
if (colorOverlayMatch) attrs.colorOverlay = colorOverlayMatch[1];
|
||
|
||
const overlayStrengthMatch = match.match(/overlay_strength="([^"]*)"/i);
|
||
if (overlayStrengthMatch) attrs.overlayStrength = overlayStrengthMatch[1];
|
||
|
||
const enableGradientMatch = match.match(/enable_gradient="([^"]*)"/i);
|
||
if (enableGradientMatch) attrs.enableGradient = enableGradientMatch[1];
|
||
|
||
const gradientDirectionMatch = match.match(/gradient_direction="([^"]*)"/i);
|
||
if (gradientDirectionMatch) attrs.gradientDirection = gradientDirectionMatch[1];
|
||
|
||
const colorOverlay2Match = match.match(/color_overlay_2="([^"]*)"/i);
|
||
if (colorOverlay2Match) attrs.colorOverlay2 = colorOverlay2Match[1];
|
||
|
||
const parallaxBgMatch = match.match(/parallax_bg="([^"]*)"/i);
|
||
if (parallaxBgMatch) attrs.parallaxBg = parallaxBgMatch[1];
|
||
|
||
const parallaxBgSpeedMatch = match.match(/parallax_bg_speed="([^"]*)"/i);
|
||
if (parallaxBgSpeedMatch) attrs.parallaxBgSpeed = parallaxBgSpeedMatch[1];
|
||
|
||
const bgImageAnimationMatch = match.match(/bg_image_animation="([^"]*)"/i);
|
||
if (bgImageAnimationMatch) attrs.bgImageAnimation = bgImageAnimationMatch[1];
|
||
|
||
const topPaddingMatch = match.match(/top_padding="([^"]*)"/i);
|
||
if (topPaddingMatch) attrs.topPadding = topPaddingMatch[1];
|
||
|
||
const bottomPaddingMatch = match.match(/bottom_padding="([^"]*)"/i);
|
||
if (bottomPaddingMatch) attrs.bottomPadding = bottomPaddingMatch[1];
|
||
|
||
const textAlignmentMatch = match.match(/text_align="([^"]*)"/i);
|
||
if (textAlignmentMatch) attrs.textAlignment = textAlignmentMatch[1];
|
||
|
||
const textColorMatch = match.match(/text_color="([^"]*)"/i);
|
||
if (textColorMatch) attrs.textColor = textColorMatch[1];
|
||
|
||
const shapeTypeMatch = match.match(/shape_type="([^"]*)"/i);
|
||
if (shapeTypeMatch) attrs.shapeType = shapeTypeMatch[1];
|
||
|
||
const scenePositionMatch = match.match(/scene_position="([^"]*)"/i);
|
||
if (scenePositionMatch) attrs.scenePosition = scenePositionMatch[1];
|
||
|
||
const fullScreenRowPositionMatch = match.match(/full_screen_row_position="([^"]*)"/i);
|
||
if (fullScreenRowPositionMatch) attrs.fullScreenRowPosition = fullScreenRowPositionMatch[1];
|
||
|
||
const fullScreenMatch = match.match(/full_screen="([^"]*)"/i);
|
||
if (fullScreenMatch) attrs.fullScreen = fullScreenMatch[1];
|
||
|
||
const equalHeightMatch = match.match(/equal_height="([^"]*)"/i);
|
||
if (equalHeightMatch) attrs.equalHeight = equalHeightMatch[1];
|
||
|
||
const contentPlacementMatch = match.match(/content_placement="([^"]*)"/i);
|
||
if (contentPlacementMatch) attrs.contentPlacement = contentPlacementMatch[1];
|
||
|
||
const columnDirectionMatch = match.match(/column_direction="([^"]*)"/i);
|
||
if (columnDirectionMatch) attrs.columnDirection = columnDirectionMatch[1];
|
||
|
||
const rowBorderRadiusMatch = match.match(/row_border_radius="([^"]*)"/i);
|
||
if (rowBorderRadiusMatch) attrs.rowBorderRadius = rowBorderRadiusMatch[1];
|
||
|
||
const rowBorderRadiusAppliesMatch = match.match(/row_border_radius_applies="([^"]*)"/i);
|
||
if (rowBorderRadiusAppliesMatch) attrs.rowBorderRadiusApplies = rowBorderRadiusAppliesMatch[1];
|
||
|
||
vcRowAttributes.push(attrs);
|
||
});
|
||
|
||
// Remove WPBakery shortcode wrappers but keep their content with preserved attributes
|
||
let shortcodeIndex = 0;
|
||
|
||
sanitized = sanitized.replace(/\[vc_row[^\]]*\]/gi, (match) => {
|
||
const attrs = vcRowAttributes[shortcodeIndex] || {};
|
||
shortcodeIndex++;
|
||
|
||
// Build data attributes string
|
||
const dataAttrs = [];
|
||
if (attrs.bgImage) dataAttrs.push(`data-bg-image="${attrs.bgImage}"`);
|
||
if (attrs.videoBg) dataAttrs.push(`data-video-bg="${attrs.videoBg}"`);
|
||
if (attrs.videoMp4) dataAttrs.push(`data-video-mp4="${attrs.videoMp4}"`);
|
||
if (attrs.videoWebm) dataAttrs.push(`data-video-webm="${attrs.videoWebm}"`);
|
||
if (attrs.bgColor) dataAttrs.push(`data-bg-color="${attrs.bgColor}"`);
|
||
if (attrs.colorOverlay) dataAttrs.push(`data-color-overlay="${attrs.colorOverlay}"`);
|
||
if (attrs.overlayStrength) dataAttrs.push(`data-overlay-strength="${attrs.overlayStrength}"`);
|
||
if (attrs.enableGradient) dataAttrs.push(`data-enable-gradient="${attrs.enableGradient}"`);
|
||
if (attrs.gradientDirection) dataAttrs.push(`data-gradient-direction="${attrs.gradientDirection}"`);
|
||
if (attrs.colorOverlay2) dataAttrs.push(`data-color-overlay-2="${attrs.colorOverlay2}"`);
|
||
if (attrs.parallaxBg) dataAttrs.push(`data-parallax-bg="${attrs.parallaxBg}"`);
|
||
if (attrs.parallaxBgSpeed) dataAttrs.push(`data-parallax-bg-speed="${attrs.parallaxBgSpeed}"`);
|
||
if (attrs.bgImageAnimation) dataAttrs.push(`data-bg-image-animation="${attrs.bgImageAnimation}"`);
|
||
if (attrs.topPadding) dataAttrs.push(`data-top-padding="${attrs.topPadding}"`);
|
||
if (attrs.bottomPadding) dataAttrs.push(`data-bottom-padding="${attrs.bottomPadding}"`);
|
||
if (attrs.textAlignment) dataAttrs.push(`data-text-align="${attrs.textAlignment}"`);
|
||
if (attrs.textColor) dataAttrs.push(`data-text-color="${attrs.textColor}"`);
|
||
if (attrs.shapeType) dataAttrs.push(`data-shape-type="${attrs.shapeType}"`);
|
||
if (attrs.scenePosition) dataAttrs.push(`data-scene-position="${attrs.scenePosition}"`);
|
||
if (attrs.fullScreenRowPosition) dataAttrs.push(`data-full-screen-row-position="${attrs.fullScreenRowPosition}"`);
|
||
if (attrs.fullScreen) dataAttrs.push(`data-full-screen="${attrs.fullScreen}"`);
|
||
if (attrs.equalHeight) dataAttrs.push(`data-equal-height="${attrs.equalHeight}"`);
|
||
if (attrs.contentPlacement) dataAttrs.push(`data-content-placement="${attrs.contentPlacement}"`);
|
||
if (attrs.columnDirection) dataAttrs.push(`data-column-direction="${attrs.columnDirection}"`);
|
||
if (attrs.rowBorderRadius) dataAttrs.push(`data-row-border-radius="${attrs.rowBorderRadius}"`);
|
||
if (attrs.rowBorderRadiusApplies) dataAttrs.push(`data-row-border-radius-applies="${attrs.rowBorderRadiusApplies}"`);
|
||
|
||
const dataAttrsString = dataAttrs.length > 0 ? ' ' + dataAttrs.join(' ') : '';
|
||
|
||
// Check for full-width classes
|
||
const classes = ['vc-row'];
|
||
if (match.includes('full_width_background')) classes.push('full-width-bg');
|
||
if (match.includes('in_container')) classes.push('in-container');
|
||
if (match.includes('full_width_content')) classes.push('full-width-content');
|
||
|
||
return `<div class="${classes.join(' ')}"${dataAttrsString}>`;
|
||
});
|
||
|
||
// Reset index for closing tags
|
||
shortcodeIndex = 0;
|
||
|
||
// Handle vc_row with bgImage attribute (for excerptHtml)
|
||
sanitized = sanitized.replace(/\[vc_row[^\]]*bgImage="([^"]*)"[^\]]*\]/gi, (match, bgImage) => {
|
||
return `<div class="vc-row" data-bg-image="${bgImage}">`;
|
||
});
|
||
|
||
// Handle vc_row without bg_image
|
||
sanitized = sanitized.replace(/\[vc_row[^\]]*\]/gi, '<div class="vc-row">');
|
||
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
|
||
|
||
sanitized = sanitized.replace(/\[vc_column[^\]]*\]/gi, '<div class="vc-column">');
|
||
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
|
||
|
||
// Remove other shortcodes but keep text content
|
||
sanitized = sanitized.replace(/\[vc_column_text[^\]]*\]/gi, '<div class="vc-text">');
|
||
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||
|
||
// Handle Nectar shortcodes - remove them but keep any text content
|
||
// [nectar_cta] blocks often contain text we want to preserve
|
||
sanitized = sanitized.replace(/\[nectar_cta[^\]]*\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1');
|
||
sanitized = sanitized.replace(/\[nectar[^\]]*\]/gi, '');
|
||
|
||
// Remove all remaining shortcodes
|
||
sanitized = sanitized.replace(/\[.*?\]/g, '');
|
||
|
||
// Remove empty paragraphs and divs
|
||
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||
|
||
// Normalize whitespace but preserve HTML structure
|
||
sanitized = sanitized.replace(/\s+/g, ' ').trim();
|
||
|
||
return sanitized;
|
||
}
|
||
|
||
// Process excerpts specifically to handle shortcodes comprehensively
|
||
function processExcerptShortcodes(excerptHtml) {
|
||
if (!excerptHtml) return '';
|
||
|
||
let processed = excerptHtml;
|
||
|
||
// First, decode HTML entities to regular characters
|
||
// Handle both numeric entities (”) and named entities (")
|
||
processed = processed
|
||
// Numeric HTML entities commonly found in WordPress raw data
|
||
.replace(/”/g, '"') // ” - Right double quote
|
||
.replace(/“/g, '"') // “ - Left double quote
|
||
.replace(/„/g, ',') // „ - Low double quote
|
||
.replace(/‟/g, '"') // ‟ - High double quote
|
||
.replace(/‘/g, "'") // ‘ - Left single quote
|
||
.replace(/’/g, "'") // ’ - Right single quote
|
||
.replace(/–/g, '-') // – - En dash
|
||
.replace(/—/g, '—') // — - Em dash
|
||
.replace(/…/g, '…') // … - Ellipsis
|
||
.replace(/″/g, '"') // ″ - Inches/Prime
|
||
.replace(/′/g, "'") // ′ - Feet/Prime
|
||
.replace(/‚/g, ',') // ‚ - Single low quote
|
||
.replace(/‛/g, '`') // ‛ - Single high reversed quote
|
||
.replace(/•/g, '•') // • - Bullet
|
||
.replace(/€/g, '€') // € - Euro
|
||
|
||
// Unicode characters (from rendered content)
|
||
.replace(/”/g, '"') // Right double quote
|
||
.replace(/“/g, '"') // Left double quote
|
||
.replace(/„/g, ',') // Low double quote
|
||
.replace(/‟/g, '"') // High double quote
|
||
.replace(/‘/g, "'") // Left single quote
|
||
.replace(/’/g, "'") // Right single quote
|
||
.replace(/–/g, '-') // En dash
|
||
.replace(/—/g, '—') // Em dash
|
||
.replace(/…/g, '…') // Ellipsis
|
||
.replace(/″/g, '"') // Inches/Prime
|
||
.replace(/′/g, "'") // Feet/Prime
|
||
.replace(/•/g, '•') // Bullet
|
||
|
||
// Named HTML entities
|
||
.replace(/"/g, '"')
|
||
.replace(/'/g, "'")
|
||
.replace(/‘/g, "'")
|
||
.replace(/’/g, "'")
|
||
.replace(/“/g, '"')
|
||
.replace(/”/g, '"')
|
||
.replace(/–/g, '-')
|
||
.replace(/—/g, '—')
|
||
.replace(/…/g, '…')
|
||
.replace(/•/g, '•')
|
||
.replace(/€/g, '€');
|
||
|
||
// Process WPBakery shortcodes with HTML entities
|
||
processed = processed
|
||
// vc_row - convert to div with classes and handle bg_image/bgImage and video attributes
|
||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||
const classes = ['vc-row'];
|
||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||
if (attrs.includes('in_container')) classes.push('in-container');
|
||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||
|
||
// Build data attributes string
|
||
const dataAttrs = [];
|
||
|
||
// Extract bg_image attribute
|
||
const bgImageMatch = attrs.match(/bg_image="([^"]*)"/i);
|
||
if (bgImageMatch && bgImageMatch[1]) {
|
||
const bgImage = bgImageMatch[1];
|
||
// If it's already a local path, use it directly
|
||
if (bgImage.startsWith('/media/')) {
|
||
dataAttrs.push(`data-bg-image="${bgImage}"`);
|
||
} else {
|
||
// If it's a numeric ID, keep it as-is for now (will be resolved later)
|
||
dataAttrs.push(`data-bg-image="${bgImage}"`);
|
||
}
|
||
}
|
||
|
||
// Extract bgImage attribute (alternative format)
|
||
const bgImageMatch2 = attrs.match(/bgImage="([^"]*)"/i);
|
||
if (bgImageMatch2 && bgImageMatch2[1]) {
|
||
const bgImage = bgImageMatch2[1];
|
||
if (bgImage.startsWith('/media/')) {
|
||
dataAttrs.push(`data-bg-image="${bgImage}"`);
|
||
} else {
|
||
dataAttrs.push(`data-bg-image="${bgImage}"`);
|
||
}
|
||
}
|
||
|
||
// Extract video attributes
|
||
const videoBgMatch = attrs.match(/video_bg="([^"]*)"/i);
|
||
if (videoBgMatch && videoBgMatch[1]) {
|
||
dataAttrs.push(`data-video-bg="${videoBgMatch[1]}"`);
|
||
}
|
||
|
||
const videoMp4Match = attrs.match(/video_mp4="([^"]*)"/i);
|
||
if (videoMp4Match && videoMp4Match[1]) {
|
||
dataAttrs.push(`data-video-mp4="${videoMp4Match[1]}"`);
|
||
}
|
||
|
||
const videoWebmMatch = attrs.match(/video_webm="([^"]*)"/i);
|
||
if (videoWebmMatch && videoWebmMatch[1]) {
|
||
dataAttrs.push(`data-video-webm="${videoWebmMatch[1]}"`);
|
||
}
|
||
|
||
// Extract other styling attributes
|
||
const bgColorMatch = attrs.match(/bg_color="([^"]*)"/i);
|
||
if (bgColorMatch) dataAttrs.push(`data-bg-color="${bgColorMatch[1]}"`);
|
||
|
||
const colorOverlayMatch = attrs.match(/color_overlay="([^"]*)"/i);
|
||
if (colorOverlayMatch) dataAttrs.push(`data-color-overlay="${colorOverlayMatch[1]}"`);
|
||
|
||
const overlayStrengthMatch = attrs.match(/overlay_strength="([^"]*)"/i);
|
||
if (overlayStrengthMatch) dataAttrs.push(`data-overlay-strength="${overlayStrengthMatch[1]}"`);
|
||
|
||
const enableGradientMatch = attrs.match(/enable_gradient="([^"]*)"/i);
|
||
if (enableGradientMatch) dataAttrs.push(`data-enable-gradient="${enableGradientMatch[1]}"`);
|
||
|
||
const gradientDirectionMatch = attrs.match(/gradient_direction="([^"]*)"/i);
|
||
if (gradientDirectionMatch) dataAttrs.push(`data-gradient-direction="${gradientDirectionMatch[1]}"`);
|
||
|
||
const colorOverlay2Match = attrs.match(/color_overlay_2="([^"]*)"/i);
|
||
if (colorOverlay2Match) dataAttrs.push(`data-color-overlay-2="${colorOverlay2Match[1]}"`);
|
||
|
||
const parallaxBgMatch = attrs.match(/parallax_bg="([^"]*)"/i);
|
||
if (parallaxBgMatch) dataAttrs.push(`data-parallax-bg="${parallaxBgMatch[1]}"`);
|
||
|
||
const parallaxBgSpeedMatch = attrs.match(/parallax_bg_speed="([^"]*)"/i);
|
||
if (parallaxBgSpeedMatch) dataAttrs.push(`data-parallax-bg-speed="${parallaxBgSpeedMatch[1]}"`);
|
||
|
||
const bgImageAnimationMatch = attrs.match(/bg_image_animation="([^"]*)"/i);
|
||
if (bgImageAnimationMatch) dataAttrs.push(`data-bg-image-animation="${bgImageAnimationMatch[1]}"`);
|
||
|
||
const topPaddingMatch = attrs.match(/top_padding="([^"]*)"/i);
|
||
if (topPaddingMatch) dataAttrs.push(`data-top-padding="${topPaddingMatch[1]}"`);
|
||
|
||
const bottomPaddingMatch = attrs.match(/bottom_padding="([^"]*)"/i);
|
||
if (bottomPaddingMatch) dataAttrs.push(`data-bottom-padding="${bottomPaddingMatch[1]}"`);
|
||
|
||
const textAlignmentMatch = attrs.match(/text_align="([^"]*)"/i);
|
||
if (textAlignmentMatch) dataAttrs.push(`data-text-align="${textAlignmentMatch[1]}"`);
|
||
|
||
const textColorMatch = attrs.match(/text_color="([^"]*)"/i);
|
||
if (textColorMatch) dataAttrs.push(`data-text-color="${textColorMatch[1]}"`);
|
||
|
||
const shapeTypeMatch = attrs.match(/shape_type="([^"]*)"/i);
|
||
if (shapeTypeMatch) dataAttrs.push(`data-shape-type="${shapeTypeMatch[1]}"`);
|
||
|
||
const scenePositionMatch = attrs.match(/scene_position="([^"]*)"/i);
|
||
if (scenePositionMatch) dataAttrs.push(`data-scene-position="${scenePositionMatch[1]}"`);
|
||
|
||
const fullScreenRowPositionMatch = attrs.match(/full_screen_row_position="([^"]*)"/i);
|
||
if (fullScreenRowPositionMatch) dataAttrs.push(`data-full-screen-row-position="${fullScreenRowPositionMatch[1]}"`);
|
||
|
||
const fullScreenMatch = attrs.match(/full_screen="([^"]*)"/i);
|
||
if (fullScreenMatch) dataAttrs.push(`data-full-screen="${fullScreenMatch[1]}"`);
|
||
|
||
const equalHeightMatch = attrs.match(/equal_height="([^"]*)"/i);
|
||
if (equalHeightMatch) dataAttrs.push(`data-equal-height="${equalHeightMatch[1]}"`);
|
||
|
||
const contentPlacementMatch = attrs.match(/content_placement="([^"]*)"/i);
|
||
if (contentPlacementMatch) dataAttrs.push(`data-content-placement="${contentPlacementMatch[1]}"`);
|
||
|
||
const columnDirectionMatch = attrs.match(/column_direction="([^"]*)"/i);
|
||
if (columnDirectionMatch) dataAttrs.push(`data-column-direction="${columnDirectionMatch[1]}"`);
|
||
|
||
const rowBorderRadiusMatch = attrs.match(/row_border_radius="([^"]*)"/i);
|
||
if (rowBorderRadiusMatch) dataAttrs.push(`data-row-border-radius="${rowBorderRadiusMatch[1]}"`);
|
||
|
||
const rowBorderRadiusAppliesMatch = attrs.match(/row_border_radius_applies="([^"]*)"/i);
|
||
if (rowBorderRadiusAppliesMatch) dataAttrs.push(`data-row-border-radius-applies="${rowBorderRadiusAppliesMatch[1]}"`);
|
||
|
||
const dataAttrsString = dataAttrs.length > 0 ? ' ' + dataAttrs.join(' ') : '';
|
||
|
||
return `<div class="${classes.join(' ')}"${dataAttrsString}>`;
|
||
})
|
||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||
|
||
// Also handle vc_row that's wrapped in <p> tags
|
||
.replace(/<p>\[vc_row([^\]]*)\]<\/p>/gi, (match, attrs) => {
|
||
const classes = ['vc-row'];
|
||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||
if (attrs.includes('in_container')) classes.push('in-container');
|
||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||
|
||
// Extract bg_image attribute
|
||
const bgImageMatch = attrs.match(/bg_image="([^"]*)"/i);
|
||
if (bgImageMatch && bgImageMatch[1]) {
|
||
const bgImage = bgImageMatch[1];
|
||
if (bgImage.startsWith('/media/')) {
|
||
return `<div class="${classes.join(' ')}" data-bg-image="${bgImage}">`;
|
||
}
|
||
return `<div class="${classes.join(' ')}" data-bg-image="${bgImage}">`;
|
||
}
|
||
|
||
return `<div class="${classes.join(' ')}">`;
|
||
})
|
||
|
||
// vc_column - convert to div with classes
|
||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||
const classes = ['vc-column'];
|
||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||
return `<div class="${classes.join(' ')}">`;
|
||
})
|
||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||
|
||
// vc_column_text - convert to div
|
||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||
.replace(/\[\/vc_column_text\]/gi, '</div>')
|
||
|
||
// nectar_cta - convert to button
|
||
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||
'<a href="$4" class="nectar-cta">$2</a>')
|
||
|
||
// nectar_highlighted_text - convert to span
|
||
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
|
||
'<span class="nectar-highlighted">$2</span>')
|
||
|
||
// nectar_responsive_text - convert to span
|
||
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
|
||
'<span class="nectar-responsive">$2</span>')
|
||
|
||
// nectar_icon_list - convert to ul
|
||
.replace(/\[nectar_icon_list([^\]]*)\]/gi, '<ul class="nectar-icon-list">')
|
||
.replace(/\[\/nectar_icon_list\]/gi, '</ul>')
|
||
|
||
// nectar_icon_list_item - convert to li
|
||
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
|
||
'<li><strong>$2</strong>: $4</li>')
|
||
|
||
// nectar_btn - convert to button
|
||
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||
'<a href="$4" class="nectar-btn">$2</a>')
|
||
|
||
// split_line_heading - convert to heading
|
||
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
|
||
'<h2 class="split-line-heading">$2</h2>')
|
||
|
||
// vc_row_inner - convert to div
|
||
.replace(/\[vc_row_inner([^\]]*)\]/gi, '<div class="vc-row-inner">')
|
||
.replace(/\[\/vc_row_inner\]/gi, '</div>')
|
||
|
||
// vc_column_inner - convert to div
|
||
.replace(/\[vc_column_inner([^\]]*)\]/gi, '<div class="vc-column-inner">')
|
||
.replace(/\[\/vc_column_inner\]/gi, '</div>')
|
||
|
||
// divider - convert to hr
|
||
.replace(/\[divider([^\]]*)\]/gi, '<hr class="divider" />')
|
||
|
||
// vc_gallery - convert to div (placeholder)
|
||
.replace(/\[vc_gallery([^\]]*)\]/gi, '<div class="vc-gallery">[Gallery]</div>')
|
||
|
||
// vc_raw_js - remove or convert to div
|
||
.replace(/\[vc_raw_js\](.*?)\[\/vc_raw_js\]/gi, '<div class="vc-raw-js">[JavaScript]</div>')
|
||
|
||
// nectar_gmap - convert to div
|
||
.replace(/\[nectar_gmap([^\]]*)\]/gi, '<div class="nectar-gmap">[Google Map]</div>');
|
||
|
||
// Remove any remaining shortcodes
|
||
processed = processed.replace(/\[.*?\]/g, '');
|
||
|
||
// Clean up any HTML that might be broken
|
||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||
|
||
// Normalize whitespace
|
||
processed = processed.replace(/\s+/g, ' ').trim();
|
||
|
||
return processed;
|
||
}
|
||
|
||
// Extract excerpt from content
|
||
function generateExcerpt(content, maxLength = 200) {
|
||
const text = content.replace(/<[^>]*>/g, '');
|
||
if (text.length <= maxLength) return text;
|
||
return text.substring(0, maxLength) + '...';
|
||
}
|
||
|
||
// Process pages with bg_image resolution
|
||
async function processPages(pagesEN, pagesDE, translationMapping, mediaMapping, assetMap) {
|
||
const processed = [];
|
||
|
||
// Helper to decode HTML entities in content
|
||
const decodeContent = (html) => {
|
||
if (!html) return html;
|
||
|
||
// Decode numeric HTML entities first
|
||
let decoded = html
|
||
.replace(/"/g, '"') // ” - Right double quote
|
||
.replace(/"/g, '"') // “ - Left double quote
|
||
.replace(/'/g, "'") // ’ - Right single quote
|
||
.replace(/'/g, "'") // ‘ - Left single quote
|
||
.replace(/–/g, '-') // – - En dash
|
||
.replace(/—/g, '—') // — - Em dash
|
||
.replace(/…/g, '…') // … - Ellipsis
|
||
.replace(/"/g, '"') // ″ - Double quote
|
||
.replace(/'/g, "'") // ′ - Single quote
|
||
|
||
// Decode Unicode characters
|
||
.replace(/”/g, '"') // Right double quote
|
||
.replace(/“/g, '"') // Left double quote
|
||
.replace(/‘/g, "'") // Left single quote
|
||
.replace(/’/g, "'") // Right single quote
|
||
.replace(/–/g, '-') // En dash
|
||
.replace(/—/g, '—') // Em dash
|
||
.replace(/…/g, '…') // Ellipsis
|
||
.replace(/″/g, '"') // Double quote
|
||
.replace(/′/g, "'") // Single quote
|
||
|
||
// Decode named HTML entities
|
||
.replace(/"/g, '"')
|
||
.replace(/'/g, "'");
|
||
|
||
// Also handle any remaining numeric entities
|
||
decoded = decoded.replace(/&#(\d+);/g, (match, code) => {
|
||
return String.fromCharCode(parseInt(code, 10));
|
||
});
|
||
|
||
return decoded;
|
||
};
|
||
|
||
// Process English pages
|
||
for (const page of pagesEN) {
|
||
const translationKey = page.slug;
|
||
const deMatch = translationMapping.pages[translationKey];
|
||
|
||
// Decode HTML entities first, then replace bg_image IDs and URLs
|
||
let contentHtml = decodeContent(page.contentHtml);
|
||
contentHtml = replaceBgImageIds(contentHtml, mediaMapping);
|
||
contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);
|
||
|
||
// Process video attributes and download videos
|
||
const videoResult = await processVideoAttributes(contentHtml);
|
||
contentHtml = videoResult.html;
|
||
|
||
let excerptHtml = decodeContent(page.excerptHtml);
|
||
excerptHtml = replaceBgImageIds(excerptHtml, mediaMapping);
|
||
excerptHtml = replaceUrlsWithLocalPaths(excerptHtml, assetMap);
|
||
|
||
processed.push({
|
||
id: page.id,
|
||
translationKey: translationKey,
|
||
locale: 'en',
|
||
slug: page.slug,
|
||
path: `/${page.slug}`,
|
||
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
||
titleHtml: page.titleHtml,
|
||
contentHtml: sanitizeHTML(contentHtml),
|
||
excerptHtml: processExcerptShortcodes(excerptHtml) || generateExcerpt(contentHtml),
|
||
featuredImage: page.featuredImage,
|
||
updatedAt: page.updatedAt,
|
||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||
});
|
||
}
|
||
|
||
// Process German pages
|
||
for (const page of pagesDE) {
|
||
const translationKey = page.slug;
|
||
const enMatch = translationMapping.pages[translationKey];
|
||
|
||
// Decode HTML entities first, then replace bg_image IDs and URLs
|
||
let contentHtml = decodeContent(page.contentHtml);
|
||
contentHtml = replaceBgImageIds(contentHtml, mediaMapping);
|
||
contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);
|
||
|
||
// Process video attributes and download videos
|
||
const videoResult = await processVideoAttributes(contentHtml);
|
||
contentHtml = videoResult.html;
|
||
|
||
let excerptHtml = decodeContent(page.excerptHtml);
|
||
excerptHtml = replaceBgImageIds(excerptHtml, mediaMapping);
|
||
excerptHtml = replaceUrlsWithLocalPaths(excerptHtml, assetMap);
|
||
|
||
processed.push({
|
||
id: page.id,
|
||
translationKey: translationKey,
|
||
locale: 'de',
|
||
slug: page.slug,
|
||
path: `/de/${page.slug}`,
|
||
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
||
titleHtml: page.titleHtml,
|
||
contentHtml: sanitizeHTML(contentHtml),
|
||
excerptHtml: processExcerptShortcodes(excerptHtml) || generateExcerpt(contentHtml),
|
||
featuredImage: page.featuredImage,
|
||
updatedAt: page.updatedAt,
|
||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||
});
|
||
}
|
||
|
||
return processed;
|
||
}
|
||
|
||
// Process posts with bg_image resolution
|
||
async function processPosts(postsEN, postsDE, translationMapping, mediaMapping, assetMap) {
|
||
const processed = [];
|
||
|
||
// Helper to decode HTML entities in content
|
||
const decodeContent = (html) => {
|
||
if (!html) return html;
|
||
return html
|
||
// Numeric HTML entities
|
||
.replace(/”/g, '"') // Right double quote
|
||
.replace(/″/g, '"') // Right double quote
|
||
.replace(/“/g, '"') // Left double quote
|
||
.replace(/„/g, '"') // Left double quote
|
||
.replace(/‘/g, "'") // Left single quote
|
||
.replace(/’/g, "'") // Right single quote
|
||
.replace(/–/g, '-') // En dash
|
||
.replace(/—/g, '—') // Em dash
|
||
.replace(/…/g, '…') // Ellipsis
|
||
.replace(/”/g, '"') // Right double quote
|
||
.replace(/“/g, '"') // Left double quote
|
||
.replace(/‘/g, "'") // Left single quote
|
||
.replace(/’/g, "'") // Right single quote
|
||
// Unicode characters
|
||
.replace(/”/g, '"') // Right double quote
|
||
.replace(/“/g, '"') // Left double quote
|
||
.replace(/‘/g, "'") // Left single quote
|
||
.replace(/’/g, "'") // Right single quote
|
||
.replace(/–/g, '-') // En dash
|
||
.replace(/—/g, '—') // Em dash
|
||
.replace(/…/g, '…') // Ellipsis
|
||
.replace(/″/g, '"') // Double quote
|
||
.replace(/′/g, "'") // Single quote
|
||
// Named HTML entities
|
||
.replace(/"/g, '"')
|
||
.replace(/'/g, "'");
|
||
};
|
||
|
||
for (const post of postsEN) {
|
||
const translationKey = post.slug;
|
||
const deMatch = translationMapping.posts[translationKey];
|
||
|
||
// Decode HTML entities first, then replace bg_image IDs and URLs
|
||
let contentHtml = decodeContent(post.contentHtml);
|
||
contentHtml = replaceBgImageIds(contentHtml, mediaMapping);
|
||
contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);
|
||
|
||
// Process video attributes and download videos
|
||
const videoResult = await processVideoAttributes(contentHtml);
|
||
contentHtml = videoResult.html;
|
||
|
||
let excerptHtml = decodeContent(post.excerptHtml);
|
||
excerptHtml = replaceBgImageIds(excerptHtml, mediaMapping);
|
||
excerptHtml = replaceUrlsWithLocalPaths(excerptHtml, assetMap);
|
||
|
||
processed.push({
|
||
id: post.id,
|
||
translationKey: translationKey,
|
||
locale: 'en',
|
||
slug: post.slug,
|
||
path: `/blog/${post.slug}`,
|
||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||
titleHtml: post.titleHtml,
|
||
contentHtml: sanitizeHTML(contentHtml),
|
||
excerptHtml: processExcerptShortcodes(excerptHtml) || generateExcerpt(contentHtml),
|
||
featuredImage: post.featuredImage,
|
||
datePublished: post.datePublished,
|
||
updatedAt: post.updatedAt,
|
||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||
});
|
||
}
|
||
|
||
for (const post of postsDE) {
|
||
const translationKey = post.slug;
|
||
const enMatch = translationMapping.posts[translationKey];
|
||
|
||
// Decode HTML entities first, then replace bg_image IDs and URLs
|
||
let contentHtml = decodeContent(post.contentHtml);
|
||
contentHtml = replaceBgImageIds(contentHtml, mediaMapping);
|
||
contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);
|
||
|
||
// Process video attributes and download videos
|
||
const videoResult = await processVideoAttributes(contentHtml);
|
||
contentHtml = videoResult.html;
|
||
|
||
let excerptHtml = decodeContent(post.excerptHtml);
|
||
excerptHtml = replaceBgImageIds(excerptHtml, mediaMapping);
|
||
excerptHtml = replaceUrlsWithLocalPaths(excerptHtml, assetMap);
|
||
|
||
processed.push({
|
||
id: post.id,
|
||
translationKey: translationKey,
|
||
locale: 'de',
|
||
slug: post.slug,
|
||
path: `/de/blog/${post.slug}`,
|
||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||
titleHtml: post.titleHtml,
|
||
contentHtml: sanitizeHTML(contentHtml),
|
||
excerptHtml: processExcerptShortcodes(excerptHtml) || generateExcerpt(contentHtml),
|
||
featuredImage: post.featuredImage,
|
||
datePublished: post.datePublished,
|
||
updatedAt: post.updatedAt,
|
||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||
});
|
||
}
|
||
|
||
return processed;
|
||
}
|
||
|
||
// Process products
|
||
function processProducts(productsEN, productsDE, translationMapping) {
|
||
const processed = [];
|
||
|
||
productsEN.forEach(product => {
|
||
const translationKey = product.slug;
|
||
const deMatch = translationMapping.products[translationKey];
|
||
|
||
processed.push({
|
||
id: product.id,
|
||
translationKey: translationKey,
|
||
locale: 'en',
|
||
slug: product.slug,
|
||
path: `/product/${product.slug}`,
|
||
name: product.name,
|
||
shortDescriptionHtml: product.shortDescriptionHtml,
|
||
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
||
images: product.images,
|
||
featuredImage: product.featuredImage,
|
||
sku: product.sku,
|
||
regularPrice: product.regularPrice,
|
||
salePrice: product.salePrice,
|
||
currency: product.currency,
|
||
stockStatus: product.stockStatus,
|
||
categories: product.categories,
|
||
attributes: product.attributes,
|
||
variations: product.variations,
|
||
updatedAt: product.updatedAt,
|
||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||
});
|
||
});
|
||
|
||
productsDE.forEach(product => {
|
||
const translationKey = product.slug;
|
||
const enMatch = translationMapping.products[translationKey];
|
||
|
||
processed.push({
|
||
id: product.id,
|
||
translationKey: translationKey,
|
||
locale: 'de',
|
||
slug: product.slug,
|
||
path: `/de/product/${product.slug}`,
|
||
name: product.name,
|
||
shortDescriptionHtml: product.shortDescriptionHtml,
|
||
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
||
images: product.images,
|
||
featuredImage: product.featuredImage,
|
||
sku: product.sku,
|
||
regularPrice: product.regularPrice,
|
||
salePrice: product.salePrice,
|
||
currency: product.currency,
|
||
stockStatus: product.stockStatus,
|
||
categories: product.categories,
|
||
attributes: product.attributes,
|
||
variations: product.variations,
|
||
updatedAt: product.updatedAt,
|
||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||
});
|
||
});
|
||
|
||
return processed;
|
||
}
|
||
|
||
// Process product categories
|
||
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
|
||
const processed = [];
|
||
|
||
categoriesEN.forEach(category => {
|
||
const translationKey = category.slug;
|
||
const deMatch = translationMapping.productCategories[translationKey];
|
||
|
||
processed.push({
|
||
id: category.id,
|
||
translationKey: translationKey,
|
||
locale: 'en',
|
||
slug: category.slug,
|
||
name: category.name,
|
||
path: `/product-category/${category.slug}`,
|
||
description: category.description,
|
||
count: category.count,
|
||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||
});
|
||
});
|
||
|
||
categoriesDE.forEach(category => {
|
||
const translationKey = category.slug;
|
||
const enMatch = translationMapping.productCategories[translationKey];
|
||
|
||
processed.push({
|
||
id: category.id,
|
||
translationKey: translationKey,
|
||
locale: 'de',
|
||
slug: category.slug,
|
||
name: category.name,
|
||
path: `/de/product-category/${category.slug}`,
|
||
description: category.description,
|
||
count: category.count,
|
||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||
});
|
||
});
|
||
|
||
return processed;
|
||
}
|
||
|
||
// Process media manifest
|
||
function processMedia(media) {
|
||
return media.map(item => ({
|
||
id: item.id,
|
||
filename: item.filename,
|
||
url: item.url,
|
||
localPath: `/media/${item.filename}`,
|
||
alt: item.alt,
|
||
width: item.width,
|
||
height: item.height,
|
||
mimeType: item.mime_type
|
||
}));
|
||
}
|
||
|
||
// Generate asset map for URL replacement
|
||
function generateAssetMap(media) {
|
||
const map = {};
|
||
media.forEach(item => {
|
||
if (item.url) {
|
||
map[item.url] = `/media/${item.filename}`;
|
||
}
|
||
});
|
||
return map;
|
||
}
|
||
|
||
// Main processing function
|
||
async function main() {
|
||
const exportDir = getLatestExportDir();
|
||
console.log('🔄 Processing WordPress Data for Next.js (with bg_image support)');
|
||
console.log('===============================================================\n');
|
||
|
||
// Load media mapping and asset map
|
||
const mediaMapping = loadMediaMapping();
|
||
const assetMap = loadAssetMap();
|
||
|
||
console.log(`📊 Media mapping loaded: ${Object.keys(mediaMapping).length} IDs`);
|
||
console.log(`🔗 Asset map loaded: ${Object.keys(assetMap).length} URLs\n`);
|
||
|
||
// Load raw data
|
||
const loadJSON = (file) => {
|
||
try {
|
||
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
|
||
} catch (e) {
|
||
console.error(`❌ Failed to load ${file}:`, e.message);
|
||
return [];
|
||
}
|
||
};
|
||
|
||
const translationMapping = loadJSON('translation-mapping.json');
|
||
const pagesEN = loadJSON('pages.en.json');
|
||
const pagesDE = loadJSON('pages.de.json');
|
||
const postsEN = loadJSON('posts.en.json');
|
||
const postsDE = loadJSON('posts.de.json');
|
||
const productsEN = loadJSON('products.en.json');
|
||
const productsDE = loadJSON('products.de.json');
|
||
const categoriesEN = loadJSON('product-categories.en.json');
|
||
const categoriesDE = loadJSON('product-categories.de.json');
|
||
const media = loadJSON('media.json');
|
||
const redirects = loadJSON('redirects.json');
|
||
const siteInfo = loadJSON('site-info.json');
|
||
|
||
console.log('📊 Processing content types...\n');
|
||
|
||
// Process each content type with bg_image resolution
|
||
const pages = await processPages(pagesEN, pagesDE, translationMapping, mediaMapping, assetMap);
|
||
const posts = await processPosts(postsEN, postsDE, translationMapping, mediaMapping, assetMap);
|
||
const products = processProducts(productsEN, productsDE, translationMapping);
|
||
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
|
||
const processedMedia = processMedia(media);
|
||
const finalAssetMap = generateAssetMap(media);
|
||
|
||
// Create processed data structure
|
||
const processedData = {
|
||
site: {
|
||
title: siteInfo.siteTitle,
|
||
description: siteInfo.siteDescription,
|
||
baseUrl: siteInfo.baseUrl,
|
||
defaultLocale: siteInfo.defaultLocale || 'en',
|
||
locales: ['en', 'de']
|
||
},
|
||
content: {
|
||
pages,
|
||
posts,
|
||
products,
|
||
categories
|
||
},
|
||
assets: {
|
||
media: processedMedia,
|
||
map: finalAssetMap
|
||
},
|
||
redirects,
|
||
exportDate: new Date().toISOString()
|
||
};
|
||
|
||
// Save processed data
|
||
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
|
||
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
|
||
|
||
// Save individual files for easier access
|
||
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
|
||
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
|
||
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
|
||
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
|
||
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
|
||
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(finalAssetMap, null, 2));
|
||
|
||
// Summary
|
||
console.log('✅ Data Processing Complete\n');
|
||
console.log('📦 Processed Content:');
|
||
console.log(` Pages: ${pages.length} (with translations)`);
|
||
console.log(` Posts: ${posts.length} (with translations)`);
|
||
console.log(` Products: ${products.length} (with translations)`);
|
||
console.log(` Categories: ${categories.length} (with translations)`);
|
||
console.log(` Media: ${processedMedia.length} files`);
|
||
console.log(` Redirects: ${redirects.length} rules\n`);
|
||
|
||
console.log('📁 Output Files:');
|
||
console.log(` ${outputPath}`);
|
||
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
|
||
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
|
||
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
|
||
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
|
||
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
|
||
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
|
||
|
||
// Check for bg_image replacements
|
||
const bgImagePattern = /bg_image="(\d+)"/g;
|
||
const layerImagePattern = /layer_one_image="(\d+)"/g;
|
||
const imageUrlPattern = /image_url="(\d+)"/g;
|
||
|
||
let bgImageMatches = 0;
|
||
let layerImageMatches = 0;
|
||
let imageUrlMatches = 0;
|
||
|
||
[...pages, ...posts].forEach(item => {
|
||
const bgMatches = (item.contentHtml.match(bgImagePattern) || []).length;
|
||
const layerMatches = (item.contentHtml.match(layerImagePattern) || []).length;
|
||
const imageMatches = (item.contentHtml.match(imageUrlPattern) || []).length;
|
||
|
||
bgImageMatches += bgMatches;
|
||
layerImageMatches += layerMatches;
|
||
imageUrlMatches += imageMatches;
|
||
});
|
||
|
||
if (bgImageMatches > 0 || layerImageMatches > 0 || imageUrlMatches > 0) {
|
||
console.log('🖼️ bg_image Resolution:');
|
||
console.log(` bg_image IDs found: ${bgImageMatches}`);
|
||
console.log(` layer_one_image IDs found: ${layerImageMatches}`);
|
||
console.log(` image_url IDs found: ${imageUrlMatches}`);
|
||
console.log(' ✅ All bg_image IDs have been replaced with local paths\n');
|
||
}
|
||
|
||
// Sample data
|
||
if (pages.length > 0) {
|
||
console.log('📄 Sample Page:');
|
||
console.log(` Title: ${pages[0].title}`);
|
||
console.log(` Path: ${pages[0].path}`);
|
||
console.log(` Locale: ${pages[0].locale}`);
|
||
console.log(` Translation: ${pages[0].translation ? 'Yes' : 'No'}\n`);
|
||
}
|
||
|
||
if (posts.length > 0) {
|
||
console.log('📝 Sample Post:');
|
||
console.log(` Title: ${posts[0].title}`);
|
||
console.log(` Path: ${posts[0].path}`);
|
||
console.log(` Locale: ${posts[0].locale}`);
|
||
console.log(` Date: ${posts[0].datePublished}\n`);
|
||
}
|
||
|
||
console.log('💡 Next: Ready for Next.js project setup!');
|
||
}
|
||
|
||
if (require.main === module) {
|
||
main().catch(console.error);
|
||
}
|
||
|
||
module.exports = {
|
||
processPages,
|
||
processPosts,
|
||
processProducts,
|
||
processProductCategories,
|
||
processMedia,
|
||
generateAssetMap,
|
||
replaceBgImageIds,
|
||
replaceUrlsWithLocalPaths,
|
||
processVideoAttributes
|
||
}; |