This commit is contained in:
2026-01-16 18:24:45 +01:00
parent 815c410092
commit 36e2a84a54
223 changed files with 2 additions and 272264 deletions

View File

@@ -1,240 +0,0 @@
#!/usr/bin/env node
/**
* WordPress Export Analysis Script
* Quickly analyzes exported data without loading large files entirely
*/
const fs = require('fs');
const path = require('path');
const DATA_DIR = path.join(__dirname, '..', 'data', 'raw');
// Find the latest export directory
function getLatestExportDir() {
if (!fs.existsSync(DATA_DIR)) {
console.error('❌ No data directory found');
process.exit(1);
}
const dirs = fs.readdirSync(DATA_DIR).filter(f => {
const stat = fs.statSync(path.join(DATA_DIR, f));
return stat.isDirectory();
});
if (dirs.length === 0) {
console.error('❌ No export directories found');
process.exit(1);
}
// Sort by name (timestamp) and get latest
dirs.sort().reverse();
return path.join(DATA_DIR, dirs[0]);
}
// Quick file analysis
function analyzeFile(filePath, sampleSize = 3) {
if (!fs.existsSync(filePath)) {
return null;
}
const stats = fs.statSync(filePath);
const content = fs.readFileSync(filePath, 'utf8');
const lines = content.split('\n');
// Parse JSON safely
let data;
try {
data = JSON.parse(content);
} catch (e) {
return { error: 'Invalid JSON' };
}
const isArray = Array.isArray(data);
const count = isArray ? data.length : (data.menus ? data.menus.length : 0);
// Get sample items
const sample = isArray ? data.slice(0, sampleSize) : (data.menus ? data.menus.slice(0, sampleSize) : []);
return {
size: stats.size,
sizeHuman: `${(stats.size / 1024).toFixed(1)} KB`,
lines: lines.length,
count: count,
sample: sample,
isArray: isArray
};
}
// Main analysis
function main() {
const exportDir = getLatestExportDir();
console.log('📊 WordPress Export Analysis');
console.log('============================');
console.log(`Directory: ${path.basename(exportDir)}\n`);
const files = [
'site-info.json',
'translation-mapping.json',
'pages.en.json',
'pages.de.json',
'posts.en.json',
'posts.de.json',
'products.en.json',
'products.de.json',
'product-categories.en.json',
'product-categories.de.json',
'menus.en.json',
'menus.de.json',
'redirects.json',
'media.json'
];
const results = {};
files.forEach(file => {
const filePath = path.join(exportDir, file);
const analysis = analyzeFile(filePath, 2);
if (analysis) {
results[file] = analysis;
}
});
// Summary
console.log('📋 EXPORT SUMMARY');
console.log('=================\n');
// Content counts
const pagesEN = results['pages.en.json']?.count || 0;
const pagesDE = results['pages.de.json']?.count || 0;
const postsEN = results['posts.en.json']?.count || 0;
const postsDE = results['posts.de.json']?.count || 0;
const productsEN = results['products.en.json']?.count || 0;
const productsDE = results['products.de.json']?.count || 0;
const categoriesEN = results['product-categories.en.json']?.count || 0;
const categoriesDE = results['product-categories.de.json']?.count || 0;
const media = results['media.json']?.count || 0;
const redirects = results['redirects.json']?.count || 0;
console.log('📄 Content by Type & Language:');
console.log(` Pages: EN: ${pagesEN} | DE: ${pagesDE} | Total: ${pagesEN + pagesDE}`);
console.log(` Posts: EN: ${postsEN} | DE: ${postsDE} | Total: ${postsEN + postsDE}`);
console.log(` Products: EN: ${productsEN} | DE: ${productsDE} | Total: ${productsEN + productsDE}`);
console.log(` Categories: EN: ${categoriesEN} | DE: ${categoriesDE} | Total: ${categoriesEN + categoriesDE}`);
console.log(` Media: ${media} files`);
console.log(` Redirects: ${redirects} rules\n`);
// Translation mapping
const translation = results['translation-mapping.json']?.sample?.[0] || results['translation-mapping.json']?.sample;
if (translation) {
const pagePairs = Object.keys(translation.pages || {}).length;
const postPairs = Object.keys(translation.posts || {}).length;
const productPairs = Object.keys(translation.products || {}).length;
const categoryPairs = Object.keys(translation.productCategories || {}).length;
console.log('🌐 Translation Pairs:');
console.log(` Pages: ${pagePairs}`);
console.log(` Posts: ${postPairs}`);
console.log(` Products: ${productPairs}`);
console.log(` Categories: ${categoryPairs}`);
console.log(` Total: ${pagePairs + postPairs + productPairs + categoryPairs}\n`);
}
// File sizes
console.log('💾 File Sizes:');
Object.entries(results).forEach(([file, data]) => {
console.log(` ${file.padEnd(30)} ${data.sizeHuman}`);
});
// Sample data
console.log('\n🔍 Sample Data (first item from each):');
if (results['pages.en.json']?.sample?.[0]) {
const page = results['pages.en.json'].sample[0];
console.log(`\n Page (EN): "${page.titleHtml}"`);
console.log(` Path: ${page.path}`);
console.log(` Slug: ${page.slug}`);
}
if (results['posts.en.json']?.sample?.[0]) {
const post = results['posts.en.json'].sample[0];
console.log(`\n Post (EN): "${post.titleHtml}"`);
console.log(` Path: ${post.path}`);
console.log(` Date: ${post.datePublished}`);
}
if (results['products.en.json']?.sample?.[0]) {
const product = results['products.en.json'].sample[0];
console.log(`\n Product (EN): "${product.name}"`);
console.log(` Path: ${product.path}`);
console.log(` SKU: ${product.sku}`);
console.log(` Price: ${product.regularPrice} ${product.currency}`);
}
if (results['menus.en.json']?.sample?.[0]) {
const menu = results['menus.en.json'].sample[0];
console.log(`\n Menu (EN): "${menu.name}"`);
console.log(` Slug: ${menu.slug}`);
console.log(` Items: ${menu.items ? menu.items.length : 0}`);
}
// Data quality checks
console.log('\n✅ Data Quality Checks:');
const checks = [
{
name: 'Both languages present',
pass: pagesEN > 0 && pagesDE > 0 && postsEN > 0 && postsDE > 0
},
{
name: 'Translation pairs exist',
pass: (results['translation-mapping.json']?.sample &&
Object.keys(results['translation-mapping.json'].sample.pages || {}).length > 0)
},
{
name: 'Media files downloaded',
pass: media > 0
},
{
name: 'Redirects generated',
pass: redirects > 0
},
{
name: 'Site info complete',
pass: results['site-info.json']?.sample?.siteTitle !== undefined
}
];
checks.forEach(check => {
console.log(` ${check.pass ? '✅' : '❌'} ${check.name}`);
});
// Recommendations
console.log('\n💡 Recommendations:');
if (postsEN === 0 || postsDE === 0) {
console.log(' ⚠️ No posts found in one or both languages');
}
if (results['translation-mapping.json']?.sample) {
const mapping = results['translation-mapping.json'].sample;
const missingPosts = Object.keys(mapping.posts || {}).length === 0;
if (missingPosts) {
console.log(' ⚠️ No post translation pairs found - check if posts have matching slugs');
}
}
if (media === 0) {
console.log(' ⚠️ No media files downloaded - check API permissions');
}
console.log('\n🎯 Next Steps:');
console.log(' 1. Review sample data above for accuracy');
console.log(' 2. Check translation mapping for completeness');
console.log(' 3. Verify media files are properly named');
console.log(' 4. Proceed to Next.js data processing');
}
if (require.main === module) {
main();
}

View File

@@ -1,75 +0,0 @@
const pages = require('../data/processed/pages.json');
const cheerio = require('cheerio');
// Get home page (corporate-3-landing-2)
const homePage = pages.find(p => p.slug === 'corporate-3-landing-2');
const $ = cheerio.load(homePage.contentHtml);
console.log('=== HOME PAGE (corporate-3-landing-2) DETAILED ANALYSIS ===\n');
// Analyze each vc-row
$('.vc-row').each((i, row) => {
const $row = $(row);
const $cols = $row.find('> .vc-column');
const colCount = $cols.length;
console.log(`Row ${i + 1}:`);
console.log(` Columns: ${colCount}`);
console.log(` Classes: ${$row.attr('class')}`);
// Check for specific patterns
const hasH1 = $row.find('h1').length > 0;
const hasH2 = $row.find('h2').length > 0;
const hasH3 = $row.find('h3').length > 0;
const hasH4 = $row.find('h4').length > 0;
const hasH6 = $row.find('h6').length > 0;
const hasP = $row.find('p').length > 0;
const hasImg = $row.find('img').length > 0;
const hasNested = $row.find('.vc-row').length;
if (hasH1) console.log(` Has H1: ${$row.find('h1').text().substring(0, 50)}...`);
if (hasH2) console.log(` Has H2: ${$row.find('h2').text().substring(0, 50)}...`);
if (hasH3) console.log(` Has H3: ${$row.find('h3').text().substring(0, 50)}...`);
if (hasH4) console.log(` Has H4: ${$row.find('h4').text().substring(0, 50)}...`);
if (hasH6) console.log(` Has H6: ${$row.find('h6').text()}`);
if (hasP) console.log(` Has P: ${$row.find('p').length} paragraphs`);
if (hasImg) console.log(` Has Images: ${hasImg}`);
if (hasNested) console.log(` Has Nested Rows: ${hasNested}`);
// Check column structure
if (colCount > 0) {
$cols.each((j, col) => {
const $col = $(col);
const colClasses = $col.attr('class') || '';
const colH3 = $col.find('h3').text().trim();
const colH4 = $col.find('h4').text().trim();
const colH6 = $col.find('h6').text().trim();
const colP = $col.find('p').text().trim().substring(0, 30);
console.log(` Column ${j + 1}: ${colClasses}`);
if (colH3) console.log(` H3: ${colH3}`);
if (colH4) console.log(` H4: ${colH4}`);
if (colH6) console.log(` H6: ${colH6}`);
if (colP) console.log(` P: ${colP}...`);
});
}
console.log('');
});
// Also check team page for testimonials
console.log('\n=== TEAM PAGE TESTIMONIALS ANALYSIS ===\n');
const teamPage = pages.find(p => p.slug === 'team');
const $team = cheerio.load(teamPage.contentHtml);
$team('.vc-row').each((i, row) => {
const $row = $team(row);
const text = $row.text();
if (text.includes('„') || text.includes('“') || text.includes('Expertise') || text.includes('Experience')) {
console.log(`Row ${i + 1}:`);
console.log(` Content: ${text.substring(0, 100)}...`);
console.log(` Has quotes: ${text.includes('„') || text.includes('“')}`);
console.log('');
}
});

View File

@@ -1,122 +0,0 @@
const pages = require('../data/processed/pages.json');
const cheerio = require('cheerio');
// Analyze each page
const analysis = [];
pages.forEach(page => {
const html = page.contentHtml || '';
const $ = cheerio.load(html);
const patterns = {
vcRows: $('.vc-row').length,
vcColumns: $('.vc-column').length,
hasHero: $('.vc-row h1, .vc-row h2').length > 0,
hasCards: $('.vc-row .vc-column h3, .vc-row .vc-column h4').length >= 2,
hasNumberedFeatures: $('.vc-row h6').length > 0,
hasForm: $('.frm_forms').length > 0 || $('form').length > 0,
hasGrid: $('.vc-row > .vc-column').length >= 2,
hasImages: $('img').length,
hasLinks: $('a').length,
hasTables: $('table').length,
hasLists: $('ul, ol').length,
hasTestimonials: $('.vc-row').filter((i, el) => {
const text = $(el).text();
return text.includes('„') || text.includes('“') || text.includes('Meet the team');
}).length,
hasAnimations: $('.vc-row').filter((i, el) => {
const classes = $(el).attr('class') || '';
return classes.includes('nectar') || classes.includes('animation') || classes.includes('fade');
}).length,
hasSpecialColumns: $('.vc-row > .vc-column').filter((i, el) => {
const classes = $(el).attr('class') || '';
return classes.includes('vc_col-md-') || classes.includes('vc_col-lg-');
}).length,
hasNestedRows: $('.vc-row .vc-row').length,
hasBackgrounds: $('.vc-row').filter((i, el) => {
const style = $(el).attr('style') || '';
const classes = $(el).attr('class') || '';
return style.includes('background') || classes.includes('bg-') || classes.includes('full-width');
}).length,
hasQuotes: $('blockquote, h2').filter((i, el) => {
const text = $(el).text();
return text.includes('„') || text.includes('“') || text.includes('Expertise') || text.includes('Experience');
}).length,
hasPDFs: $('a[href$=".pdf"]').length,
hasContactInfo: $('.vc-row').filter((i, el) => {
const text = $(el).text();
return text.includes('@') || text.includes('Raiffeisenstraße') || text.includes('KLZ Cables');
}).length
};
analysis.push({
slug: page.slug,
locale: page.locale,
translationKey: page.translationKey,
title: page.title,
patterns: patterns,
rawHtml: html.substring(0, 200) + '...'
});
});
// Print detailed analysis
console.log('=== DETAILED PAGE ANALYSIS ===\n');
analysis.forEach(page => {
console.log(`📄 ${page.locale.toUpperCase()}: ${page.slug} (${page.title})`);
console.log(` Translation Key: ${page.translationKey}`);
console.log(' Patterns Found:');
Object.entries(page.patterns).forEach(([key, value]) => {
if (value > 0) {
console.log(` - ${key}: ${value}`);
}
});
console.log('');
});
// Summary by translation key
console.log('=== SUMMARY BY TRANSLATION KEY ===\n');
const byKey = {};
analysis.forEach(page => {
if (!byKey[page.translationKey]) {
byKey[page.translationKey] = [];
}
byKey[page.translationKey].push(page);
});
Object.keys(byKey).sort().forEach(key => {
const pages = byKey[key];
console.log(`${key}:`);
pages.forEach(p => {
const patterns = Object.entries(p.patterns).filter(([k, v]) => v > 0).map(([k, v]) => `${k}=${v}`).join(', ');
console.log(` ${p.locale}: ${p.slug} [${patterns}]`);
});
console.log('');
});
// Priority analysis
console.log('=== PRIORITY PAGES ANALYSIS ===\n');
const priority = {
'Home': ['corporate-3-landing-2', 'start'],
'Contact': ['contact', 'kontakt'],
'About/Legal/Privacy': ['legal-notice', 'impressum', 'privacy-policy', 'datenschutz', 'terms', 'agbs'],
'Team': ['team'],
'Products': ['products', 'produkte'],
'Blog': ['blog'],
'Thanks': ['thanks', 'danke']
};
Object.keys(priority).forEach(category => {
console.log(`${category}:`);
priority[category].forEach(slug => {
const page = analysis.find(p => p.slug === slug);
if (page) {
const patterns = Object.entries(page.patterns).filter(([k, v]) => v > 0).map(([k, v]) => `${k}=${v}`).join(', ');
console.log(` ${page.locale}/${page.slug}: ${patterns || 'No patterns'}`);
}
});
console.log('');
});

View File

@@ -1,194 +0,0 @@
#!/usr/bin/env node
/**
* Script to check WooCommerce product attributes for high-voltage cables
* This will query the API directly to see if attributes exist but weren't captured
*/
const https = require('https');
const path = require('path');
require('dotenv').config();
const CONFIG = {
url: process.env.WOOCOMMERCE_URL,
key: process.env.WOOCOMMERCE_CONSUMER_KEY,
secret: process.env.WOOCOMMERCE_CONSUMER_SECRET
};
// High-voltage product IDs that are missing attributes
const HIGH_VOLTAGE_IDS = [46773, 46771, 46769, 46767, 46765, 46763, 46761];
function buildAuthHeader() {
const credentials = Buffer.from(`${CONFIG.key}:${CONFIG.secret}`).toString('base64');
return `Basic ${credentials}`;
}
function makeRequest(endpoint) {
return new Promise((resolve, reject) => {
const url = `${CONFIG.url}/wp-json/wc/v3${endpoint}`;
const options = {
headers: {
'Authorization': buildAuthHeader(),
'Content-Type': 'application/json',
'User-Agent': 'KLZ-Attribute-Checker/1.0'
}
};
console.log(`🌐 Fetching: ${endpoint}`);
https.get(url, options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
if (res.statusCode >= 200 && res.statusCode < 300) {
try {
resolve(JSON.parse(data));
} catch (e) {
resolve(data);
}
} else {
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
}
});
}).on('error', reject);
});
}
async function checkProductAttributes() {
console.log('🔍 Checking WooCommerce Product Attributes\n');
console.log('Target URL:', CONFIG.url);
console.log('Products to check:', HIGH_VOLTAGE_IDS.length);
console.log('');
const results = [];
for (const productId of HIGH_VOLTAGE_IDS) {
try {
const product = await makeRequest(`/products/${productId}`);
console.log(`\n📦 Product ID: ${productId}`);
console.log(`Name: ${product.name}`);
console.log(`SKU: ${product.sku}`);
console.log(`Type: ${product.type}`);
if (product.attributes && product.attributes.length > 0) {
console.log(`✅ Attributes found: ${product.attributes.length}`);
// Show sample attributes
product.attributes.slice(0, 5).forEach(attr => {
console.log(` - ${attr.name}: ${attr.options?.length || 0} options`);
});
if (product.attributes.length > 5) {
console.log(` ... and ${product.attributes.length - 5} more`);
}
results.push({
id: productId,
name: product.name,
hasAttributes: true,
count: product.attributes.length,
attributes: product.attributes
});
} else {
console.log(`❌ No attributes found`);
// Check if it's a variable product that might have attributes on variations
if (product.type === 'variable' && product.variations && product.variations.length > 0) {
console.log(` Variable product with ${product.variations.length} variations`);
// Check first variation for attributes
const firstVar = await makeRequest(`/products/${productId}/variations/${product.variations[0]}`);
if (firstVar.attributes && firstVar.attributes.length > 0) {
console.log(`⚠️ Variations have attributes, but parent product doesn't`);
}
}
results.push({
id: productId,
name: product.name,
hasAttributes: false,
count: 0,
attributes: []
});
}
// Also check product categories
if (product.categories && product.categories.length > 0) {
console.log(`Categories: ${product.categories.map(c => c.name).join(', ')}`);
}
} catch (error) {
console.log(`❌ Error fetching product ${productId}: ${error.message}`);
results.push({
id: productId,
name: 'Unknown',
hasAttributes: false,
count: 0,
error: error.message
});
}
// Rate limiting
await new Promise(resolve => setTimeout(resolve, 200));
}
// Summary
console.log('\n' + '='.repeat(60));
console.log('📊 SUMMARY');
console.log('='.repeat(60));
const withAttrs = results.filter(r => r.hasAttributes);
const withoutAttrs = results.filter(r => !r.hasAttributes);
console.log(`Products checked: ${results.length}`);
console.log(`✅ With attributes: ${withAttrs.length}`);
console.log(`❌ Without attributes: ${withoutAttrs.length}`);
if (withAttrs.length > 0) {
console.log('\nProducts WITH attributes:');
withAttrs.forEach(p => {
console.log(` - ${p.name} (${p.count} attributes)`);
});
}
if (withoutAttrs.length > 0) {
console.log('\nProducts WITHOUT attributes:');
withoutAttrs.forEach(p => {
console.log(` - ${p.name}${p.error ? ' (Error: ' + p.error + ')' : ''}`);
});
}
// Save detailed results
const fs = require('fs');
const outputPath = path.join(__dirname, '..', 'data', 'attribute-check-results.json');
fs.writeFileSync(outputPath, JSON.stringify(results, null, 2));
console.log(`\n💾 Detailed results saved to: ${outputPath}`);
return results;
}
// Run if called directly
if (require.main === module) {
if (!CONFIG.url || !CONFIG.key || !CONFIG.secret) {
console.error('❌ Missing WooCommerce credentials in environment variables');
process.exit(1);
}
checkProductAttributes()
.then(() => {
console.log('\n✅ Attribute check complete');
process.exit(0);
})
.catch(error => {
console.error('\n❌ Attribute check failed:', error.message);
process.exit(1);
});
}
module.exports = { checkProductAttributes };

View File

@@ -1,58 +0,0 @@
#!/usr/bin/env node
// Debug what entities are actually in the raw data
const rawExcerpt = '<p>[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default” column_direction=”default” column_direction_tablet=”default” column_direction_phone=”default” scene_position=”center” text_color=”dark” text_align=”left” row_border_radius=”none” row_border_radius_applies=”bg” overflow=”visible” overlay_strength=”0.3″ gradient_direction=”left_to_right” shape_divider_position=”bottom” bg_image_animation=”none”][vc_column column_padding=”no-extra-padding” column_padding_tablet=”inherit” column_padding_phone=”inherit” column_padding_position=”all” column_element_direction_desktop=”default” column_element_spacing=”default” desktop_text_alignment=”default” tablet_text_alignment=”default” phone_text_alignment=”default” background_color_opacity=”1″ background_hover_color_opacity=”1″ column_backdrop_filter=”none” column_shadow=”none”…</p>';
console.log('=== Raw Data Analysis ===');
console.log('Original excerpt:');
console.log(rawExcerpt);
console.log('\n=== Entity Analysis ===');
// Check for numeric entities
const numericEntities = rawExcerpt.match(/&#\d+;/g);
console.log('Numeric entities found:', numericEntities);
// Check for Unicode characters
const unicodeChars = rawExcerpt.match(/[”“‘’–—″′]/g);
console.log('Unicode characters found:', unicodeChars);
// Test what each numeric entity represents
if (numericEntities) {
console.log('\n=== Numeric Entity Decoding ===');
const uniqueEntities = [...new Set(numericEntities)];
uniqueEntities.forEach(entity => {
const code = parseInt(entity.replace(/[&#;]/g, ''));
const char = String.fromCharCode(code);
console.log(`${entity} (code ${code}) → "${char}"`);
});
}
// Test manual decoding
console.log('\n=== Manual Decoding Test ===');
let decoded = rawExcerpt
.replace(/”/g, '"')
.replace(/“/g, '"')
.replace(//g, '-')
.replace(/—/g, '—')
.replace(//g, "'")
.replace(//g, "'")
.replace(/″/g, '"')
.replace(//g, "'")
.replace(/…/g, '…');
console.log('After manual decoding:');
console.log(decoded);
// Test the current function approach
console.log('\n=== Current Function Test ===');
let processed = rawExcerpt
.replace(/”/g, '"') // This won't work because raw has ”
.replace(/“/g, '"')
.replace(//g, '-')
.replace(/—/g, '—')
.replace(//g, "'")
.replace(//g, "'");
console.log('After current function (which won\'t work):');
console.log(processed);

View File

@@ -1,9 +0,0 @@
const { processShortcodes } = require('../lib/html-compat.ts');
const input = '[vc_row bg_image="”10440″" color_overlay="“#000000”"]content[/vc_row]';
console.log('Input:', input);
const result = processShortcodes(input);
console.log('Result:', result);
console.log('Contains bg image?', result.includes('background-image'));
console.log('Style attribute:', result.match(/style="([^"]*)"/)?.[1]);

View File

@@ -1,153 +0,0 @@
#!/usr/bin/env node
/**
* Script to download missing videos and PDFs
* Downloads videos referenced in processed data and PDFs linked in pages
*/
const fs = require('fs');
const path = require('path');
const https = require('https');
const http = require('http');
// Configuration
const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media');
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
// Videos to download (from home pages)
const VIDEOS_TO_DOWNLOAD = [
{
url: 'https://klz-cables.com/wp-content/uploads/2025/02/header.mp4',
filename: 'header.mp4'
},
{
url: 'https://klz-cables.com/wp-content/uploads/2025/02/header.webm',
filename: 'header.webm'
}
];
// PDFs to download (from terms pages)
const PDFS_TO_DOWNLOAD = [
{
url: 'https://klz-cables.com/wp-content/uploads/2025/01/agbs.pdf',
filename: 'agbs.pdf'
}
];
// Create media directory if it doesn't exist
if (!fs.existsSync(MEDIA_DIR)) {
fs.mkdirSync(MEDIA_DIR, { recursive: true });
}
// Download file function
function downloadFile(url, filename) {
return new Promise((resolve, reject) => {
const filePath = path.join(MEDIA_DIR, filename);
// Check if file already exists
if (fs.existsSync(filePath)) {
console.log(`✅ Already exists: ${filename}`);
resolve(filePath);
return;
}
console.log(`📥 Downloading: ${filename} from ${url}`);
const protocol = url.startsWith('https') ? https : http;
const file = fs.createWriteStream(filePath);
protocol.get(url, (res) => {
if (res.statusCode === 200) {
res.pipe(file);
file.on('finish', () => {
console.log(`✅ Downloaded: ${filename}`);
resolve(filePath);
});
} else if (res.statusCode === 301 || res.statusCode === 302) {
// Handle redirects
if (res.headers.location) {
console.log(`🔄 Redirected to: ${res.headers.location}`);
downloadFile(res.headers.location, filename).then(resolve).catch(reject);
} else {
reject(new Error(`Redirect without location: ${res.statusCode}`));
}
} else {
reject(new Error(`Failed to download: HTTP ${res.statusCode}`));
}
}).on('error', (err) => {
fs.unlink(filePath, () => {});
reject(err);
});
});
}
// Main function
async function main() {
console.log('🔍 Downloading Missing Assets');
console.log('==============================');
console.log(`Output: ${MEDIA_DIR}`);
console.log('');
const assetMap = {};
const downloaded = [];
// Download videos
console.log('🎬 Videos:');
for (const video of VIDEOS_TO_DOWNLOAD) {
try {
await downloadFile(video.url, video.filename);
assetMap[video.url] = `/media/${video.filename}`;
downloaded.push(video.filename);
} catch (error) {
console.warn(`⚠️ Failed to download video ${video.filename}:`, error.message);
}
}
console.log('');
// Download PDFs
console.log('📄 PDFs:');
for (const pdf of PDFS_TO_DOWNLOAD) {
try {
await downloadFile(pdf.url, pdf.filename);
assetMap[pdf.url] = `/media/${pdf.filename}`;
downloaded.push(pdf.filename);
} catch (error) {
console.warn(`⚠️ Failed to download PDF ${pdf.filename}:`, error.message);
}
}
// Update asset-map.json with new entries
const assetMapPath = path.join(PROCESSED_DIR, 'asset-map.json');
if (fs.existsSync(assetMapPath)) {
const existingMap = JSON.parse(fs.readFileSync(assetMapPath, 'utf8'));
const updatedMap = { ...existingMap, ...assetMap };
fs.writeFileSync(assetMapPath, JSON.stringify(updatedMap, null, 2));
console.log(`\n✅ Updated asset-map.json with ${Object.keys(assetMap).length} new entries`);
}
console.log('\n🎉 Asset Download Complete!');
console.log('==============================');
console.log(`📥 Downloaded: ${downloaded.length} files`);
console.log(`📁 Directory: public/media/`);
console.log('');
console.log('Files downloaded:');
downloaded.forEach(file => {
console.log(` - ${file}`);
});
}
// Run if called directly
if (require.main === module) {
main().catch(error => {
console.error('\n❌ Script failed:', error.message);
process.exit(1);
});
}
module.exports = {
downloadFile,
main
};

View File

@@ -1,216 +0,0 @@
#!/usr/bin/env node
/**
* Script to fetch specific missing media IDs from WordPress
* Uses the WordPress REST API to get media URLs and download them
*/
const fs = require('fs');
const path = require('path');
const https = require('https');
// Load environment variables
require('dotenv').config();
const BASE_URL = process.env.WOOCOMMERCE_URL;
const APP_PASSWORD = process.env.WORDPRESS_APP_PASSWORD;
// Validate environment
if (!BASE_URL || !APP_PASSWORD) {
console.error('❌ Missing required environment variables');
console.error('Please check .env file for:');
console.error(' - WOOCOMMERCE_URL');
console.error(' - WORDPRESS_APP_PASSWORD');
process.exit(1);
}
// Configuration
const MISSING_MEDIA_IDS = [10432, 10440, 10382, 10616, 10615, 45569, 10638, 5767];
const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media');
const RAW_DATA_DIR = path.join(__dirname, '..', 'data', 'raw', '2025-12-30T15-21-49-331Z');
// Create media directory if it doesn't exist
if (!fs.existsSync(MEDIA_DIR)) {
fs.mkdirSync(MEDIA_DIR, { recursive: true });
}
// WordPress Auth Header
function buildWordPressAuth() {
return {
'Authorization': `Basic ${Buffer.from(`admin:${APP_PASSWORD}`).toString('base64')}`,
'Content-Type': 'application/json'
};
}
// Make HTTPS request
function makeRequest(url, headers = {}) {
return new Promise((resolve, reject) => {
const options = {
headers: {
'User-Agent': 'WordPress-Missing-Media-Fetcher/1.0',
...headers
}
};
https.get(url, options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
if (res.statusCode >= 200 && res.statusCode < 300) {
try {
resolve(JSON.parse(data));
} catch (e) {
resolve(data);
}
} else {
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
}
});
}).on('error', reject);
});
}
// Fetch single media item
async function fetchMedia(mediaId) {
const url = `${BASE_URL}/wp-json/wp/v2/media/${mediaId}`;
try {
console.log(`📥 Fetching media ${mediaId}...`);
const media = await makeRequest(url, buildWordPressAuth());
return media;
} catch (error) {
console.error(`❌ Error fetching media ${mediaId}:`, error.message);
return null;
}
}
// Download media file
function downloadMedia(url, filename) {
return new Promise((resolve, reject) => {
const filePath = path.join(MEDIA_DIR, filename);
// Check if file already exists
if (fs.existsSync(filePath)) {
console.log(`✅ Media already exists: ${filename}`);
resolve(filePath);
return;
}
const file = fs.createWriteStream(filePath);
https.get(url, (res) => {
if (res.statusCode === 200) {
res.pipe(file);
file.on('finish', () => {
console.log(`✅ Downloaded: ${filename}`);
resolve(filePath);
});
} else {
reject(new Error(`Failed to download: ${res.statusCode}`));
}
}).on('error', (err) => {
fs.unlink(filePath, () => {});
reject(err);
});
});
}
// Main function
async function main() {
console.log('🔍 Fetching Missing Media IDs');
console.log('==============================');
console.log(`Target: ${BASE_URL}`);
console.log(`Output: ${MEDIA_DIR}`);
console.log(`Missing IDs: ${MISSING_MEDIA_IDS.join(', ')}`);
console.log('');
const mediaManifest = [];
const downloadPromises = [];
for (const mediaId of MISSING_MEDIA_IDS) {
const media = await fetchMedia(mediaId);
if (media && media.source_url) {
const originalFilename = media.source_url.split('/').pop();
const filename = `${mediaId}-${originalFilename}`;
// Add to manifest
mediaManifest.push({
id: mediaId,
url: media.source_url,
filename: filename,
alt: media.alt_text || '',
width: media.media_details?.width,
height: media.media_details?.height,
mime_type: media.mime_type
});
// Download file
downloadPromises.push(
downloadMedia(media.source_url, filename).catch(err => {
console.warn(`⚠️ Failed to download media ${mediaId}:`, err.message);
})
);
} else {
console.warn(`⚠️ Could not fetch media ${mediaId}`);
}
}
// Wait for all downloads
await Promise.all(downloadPromises);
// Update media.json
const mediaJsonPath = path.join(RAW_DATA_DIR, 'media.json');
if (fs.existsSync(mediaJsonPath)) {
const existingMedia = JSON.parse(fs.readFileSync(mediaJsonPath, 'utf8'));
const updatedMedia = [...existingMedia, ...mediaManifest];
fs.writeFileSync(
mediaJsonPath,
JSON.stringify(updatedMedia, null, 2)
);
console.log(`✅ Updated media.json with ${mediaManifest.length} new items`);
} else {
console.warn('⚠️ media.json not found, creating new file');
fs.writeFileSync(
mediaJsonPath,
JSON.stringify(mediaManifest, null, 2)
);
}
// Update assets.json if needed
const assetsJsonPath = path.join(RAW_DATA_DIR, 'assets.json');
if (fs.existsSync(assetsJsonPath)) {
const assets = JSON.parse(fs.readFileSync(assetsJsonPath, 'utf8'));
console.log('✅ Current assets.json:', assets);
}
console.log('\n🎉 Missing Media Fetch Complete!');
console.log('==============================');
console.log(`📥 Fetched: ${mediaManifest.length} items`);
console.log(`📁 Directory: public/media/`);
console.log(`📄 Updated: data/raw/2025-12-30T15-21-49-331Z/media.json`);
console.log('');
console.log('Media items fetched:');
mediaManifest.forEach(item => {
console.log(` - ${item.id}: ${item.filename}`);
});
}
// Run if called directly
if (require.main === module) {
main().catch(error => {
console.error('\n❌ Script failed:', error.message);
process.exit(1);
});
}
module.exports = {
fetchMedia,
downloadMedia,
main
};

View File

@@ -1,230 +0,0 @@
#!/usr/bin/env node
const fs = require('fs');
const path = require('path');
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
const ASSET_MAP_PATH = path.join(PROCESSED_DIR, 'asset-map.json');
// Load asset map
const assetMap = JSON.parse(fs.readFileSync(ASSET_MAP_PATH, 'utf8'));
// Create ID to path mapping
const idToPath = {};
for (const [wpUrl, localPath] of Object.entries(assetMap)) {
const patterns = [/\/(\d+)-/, /\/(\d+)\./, /id=(\d+)/];
for (const pattern of patterns) {
const match = wpUrl.match(pattern);
if (match) {
idToPath[match[1]] = localPath;
break;
}
}
}
// Add manual mappings
idToPath['45569'] = '/media/45569-Still-2025-02-10-104337_1.1.1.webp';
idToPath['10648'] = '/media/10648-low-voltage-scaled.webp';
idToPath['6486'] = '/media/6486-Low-Voltage.svg';
idToPath['10649'] = '/media/10649-medium-voltage-scaled.webp';
idToPath['6487'] = '/media/6487-Medium-Voltage.svg';
idToPath['46786'] = '/media/46786-na2xsfl2y-rendered.webp';
idToPath['6485'] = '/media/6485-High-Voltage.svg';
idToPath['46359'] = '/media/46359-3.webp';
idToPath['6484'] = '/media/6484-Solar.svg';
idToPath['6527'] = '/media/6527-high-voltage-category.webp';
idToPath['6519'] = '/media/6519-solar-category.webp';
idToPath['6521'] = '/media/6521-low-voltage-category.webp';
idToPath['6517'] = '/media/6517-medium-voltage-category.webp';
console.log('Found', Object.keys(idToPath).length, 'media ID mappings');
// HTML entity decoding - handles decimal, hex, and named entities
function decodeHTMLEntities(text) {
if (!text) return '';
let result = text;
// First, handle numeric entities (decimal and hex)
result = result
.replace(/&#(\d+);/g, (match, dec) => {
const char = String.fromCharCode(parseInt(dec, 10));
return char;
})
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
const char = String.fromCharCode(parseInt(hex, 16));
return char;
});
// Handle common named entities and Unicode characters
const entityMap = {
' ': ' ',
'': "'",
'': "'",
'“': '"',
'”': '"',
'″': '"', // Double prime (8243)
'': '-',
'—': '—',
'…': '…',
'•': '•',
'€': '€',
'©': '©',
'®': '®',
'™': '™',
'°': '°',
'±': '±',
'×': '×',
'÷': '÷',
'': '',
'¢': '¢',
'£': '£',
'¥': '¥',
'§': '§',
'¶': '¶',
'µ': 'µ',
'«': '«',
'»': '»',
'·': '·'
};
// Replace all named entities
for (const [entity, char] of Object.entries(entityMap)) {
result = result.replace(new RegExp(entity, 'g'), char);
}
// Clean up any remaining ampersand patterns
result = result.replace(/&([a-zA-Z]+);/g, (match, name) => {
return entityMap[`&${name};`] || match;
});
return result;
}
// Process files
const files = ['pages.json', 'posts.json', 'products.json'];
files.forEach(file => {
const filePath = path.join(PROCESSED_DIR, file);
if (!fs.existsSync(filePath)) return;
const items = JSON.parse(fs.readFileSync(filePath, 'utf8'));
let updated = false;
let updateCount = 0;
let decodeCount = 0;
items.forEach(item => {
let contentChanged = false;
let wasDecoded = false;
if (item.contentHtml) {
// Decode entities first
const original = item.contentHtml;
item.contentHtml = decodeHTMLEntities(item.contentHtml);
if (item.contentHtml !== original) {
wasDecoded = true;
decodeCount++;
}
// Now replace IDs with local paths
for (const [id, localPath] of Object.entries(idToPath)) {
// Pattern 1: bg_image="45569" (standard quotes)
const patterns = [
{ search: 'bg_image="' + id + '"', replace: 'bg_image="' + localPath + '"' },
{ search: 'background_image="' + id + '"', replace: 'background_image="' + localPath + '"' },
{ search: 'image_url="' + id + '"', replace: 'image_url="' + localPath + '"' },
{ search: 'custom_icon_image="' + id + '"', replace: 'custom_icon_image="' + localPath + '"' },
{ search: 'poster="' + id + '"', replace: 'poster="' + localPath + '"' },
{ search: 'column_background_image="' + id + '"', replace: 'column_background_image="' + localPath + '"' },
];
patterns.forEach(({ search, replace }) => {
if (item.contentHtml.includes(search)) {
item.contentHtml = item.contentHtml.split(search).join(replace);
contentChanged = true;
}
});
// Also check for HTML-encoded attribute values (after decodeHTMLEntities, these become regular quotes)
// But we need to handle the case where the HTML entities haven't been decoded yet
const encodedPatterns = [
{ search: 'bg_image=”' + id + '″', replace: 'bg_image="' + localPath + '"' },
{ search: 'bg_image=”' + id + '”', replace: 'bg_image="' + localPath + '"' },
{ search: 'bg_image="' + id + '"', replace: 'bg_image="' + localPath + '"' },
];
encodedPatterns.forEach(({ search, replace }) => {
if (item.contentHtml.includes(search)) {
item.contentHtml = item.contentHtml.split(search).join(replace);
contentChanged = true;
}
});
}
}
if (item.excerptHtml) {
const original = item.excerptHtml;
item.excerptHtml = decodeHTMLEntities(item.excerptHtml);
for (const [id, localPath] of Object.entries(idToPath)) {
// Standard pattern
const search = 'bg_image="' + id + '"';
const replace = 'bg_image="' + localPath + '"';
if (item.excerptHtml.includes(search)) {
item.excerptHtml = item.excerptHtml.split(search).join(replace);
contentChanged = true;
}
// Also check for HTML-encoded patterns that might remain (after decode)
// Handle various quote combinations
const encodedPatterns = [
'bg_image="' + id + '"', // Already decoded
'bg_image="' + id + '″', // Opening regular, closing double prime
'bg_image="' + id + '"', // Both regular
];
encodedPatterns.forEach(search => {
if (item.excerptHtml.includes(search)) {
item.excerptHtml = item.excerptHtml.split(search).join(replace);
contentChanged = true;
}
});
}
if (item.excerptHtml !== original && !contentChanged) contentChanged = true;
}
if (contentChanged || wasDecoded) {
updated = true;
if (contentChanged) updateCount++;
}
});
if (updated) {
fs.writeFileSync(filePath, JSON.stringify(items, null, 2));
console.log('✅ Updated ' + file + ' (' + updateCount + ' replacements, ' + decodeCount + ' decoded)');
} else {
console.log(' No changes for ' + file);
}
});
// Verify
const pages = JSON.parse(fs.readFileSync(path.join(PROCESSED_DIR, 'pages.json'), 'utf8'));
const homeEn = pages.find(p => p.slug === 'corporate-3-landing-2' && p.locale === 'en');
const homeDe = pages.find(p => p.slug === 'start' && p.locale === 'de');
console.log('\n✅ Verification:');
console.log('EN home images:', (homeEn?.contentHtml?.match(/\/media\//g) || []).length);
console.log('DE home images:', (homeDe?.contentHtml?.match(/\/media\//g) || []).length);
// Check for remaining IDs
const remainingIds = homeEn?.contentHtml?.match(/bg_image="\d+"/g) || [];
console.log('Remaining IDs in EN:', remainingIds.length > 0 ? remainingIds : 'None');
// Show examples
if (homeEn?.contentHtml) {
const matches = homeEn.contentHtml.match(/bg_image="[^"]+"/g);
if (matches) {
console.log('\nEN bg_image examples:', matches.slice(0, 3));
}
}

View File

@@ -1,353 +0,0 @@
#!/usr/bin/env node
/**
* Script to fix missing attributes for high-voltage cables
* Creates a manual attribute mapping based on product specifications
*/
const fs = require('fs');
const path = require('path');
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
const BACKUP_DIR = path.join(__dirname, '..', 'data', 'backup');
// Create backup directory
if (!fs.existsSync(BACKUP_DIR)) {
fs.mkdirSync(BACKUP_DIR, { recursive: true });
}
/**
* Manual attribute mappings for high-voltage cables
* Based on typical specifications for these cable types
*/
const MANUAL_ATTRIBUTES = {
// High Voltage Cables - Aluminum conductor, XLPE insulation
'na2xsfl2y-3': { // NA2XS(FL)2Y high voltage
en: [
{ name: 'Conductor', options: ['Aluminum'] },
{ name: 'Insulation', options: ['XLPE'] },
{ name: 'Sheath', options: ['PE'] },
{ name: 'Screen', options: ['Copper wire + tape'] },
{ name: 'Water blocking', options: ['Yes'] },
{ name: 'Voltage rating', options: ['6/10 kV', '12/20 kV', '18/30 kV'] },
{ name: 'Installation', options: ['Underground', 'Cable ducts', 'Outdoor'] },
{ name: 'Standard', options: ['IEC 60840', 'DIN VDE 0276-620'] },
{ name: 'Conductor material', options: ['Aluminum'] },
{ name: 'Conductor type', options: ['Compacted stranded'] },
{ name: 'Insulation material', options: ['XLPE'] },
{ name: 'Sheath material', options: ['PE'] },
{ name: 'Armour', options: ['None'] },
{ name: 'Max operating temperature', options: ['+90 °C'] },
{ name: 'Short circuit temperature', options: ['+250 °C'] },
{ name: 'Bending radius', options: ['Min. 15x diameter'] }
],
de: [
{ name: 'Leiter', options: ['Aluminium'] },
{ name: 'Isolation', options: ['XLPE'] },
{ name: 'Mantel', options: ['PE'] },
{ name: 'Abschirmung', options: ['Kupferdraht + Band'] },
{ name: 'Wassersperre', options: ['Ja'] },
{ name: 'Spannungsbereich', options: ['6/10 kV', '12/20 kV', '18/30 kV'] },
{ name: 'Installation', options: ['Unterirdisch', 'Kabelrohre', 'Außen'] },
{ name: 'Norm', options: ['IEC 60840', 'DIN VDE 0276-620'] },
{ name: 'Leitermaterial', options: ['Aluminium'] },
{ name: 'Leitertyp', options: ['Verdrillt'] },
{ name: 'Isolationsmaterial', options: ['XLPE'] },
{ name: 'Mantelmaterial', options: ['PE'] },
{ name: 'Bewehrung', options: ['Keine'] },
{ name: 'Max. Betriebstemperatur', options: ['+90 °C'] },
{ name: 'Kurzschlusstemperatur', options: ['+250 °C'] },
{ name: 'Biegeradius', options: ['Min. 15x Durchmesser'] }
]
},
'n2xsfl2y': { // N2XS(FL)2Y high voltage
en: [
{ name: 'Conductor', options: ['Copper'] },
{ name: 'Insulation', options: ['XLPE'] },
{ name: 'Sheath', options: ['PE'] },
{ name: 'Screen', options: ['Copper wire + tape'] },
{ name: 'Water blocking', options: ['Yes'] },
{ name: 'Voltage rating', options: ['6/10 kV', '12/20 kV', '18/30 kV'] },
{ name: 'Installation', options: ['Underground', 'Cable ducts', 'Outdoor'] },
{ name: 'Standard', options: ['IEC 60840', 'DIN VDE 0276-620'] },
{ name: 'Conductor material', options: ['Copper'] },
{ name: 'Conductor type', options: ['Stranded'] },
{ name: 'Insulation material', options: ['XLPE'] },
{ name: 'Sheath material', options: ['PE'] },
{ name: 'Armour', options: ['None'] },
{ name: 'Max operating temperature', options: ['+90 °C'] },
{ name: 'Short circuit temperature', options: ['+250 °C'] },
{ name: 'Bending radius', options: ['Min. 15x diameter'] }
],
de: [
{ name: 'Leiter', options: ['Kupfer'] },
{ name: 'Isolation', options: ['XLPE'] },
{ name: 'Mantel', options: ['PE'] },
{ name: 'Abschirmung', options: ['Kupferdraht + Band'] },
{ name: 'Wassersperre', options: ['Ja'] },
{ name: 'Spannungsbereich', options: ['6/10 kV', '12/20 kV', '18/30 kV'] },
{ name: 'Installation', options: ['Unterirdisch', 'Kabelrohre', 'Außen'] },
{ name: 'Norm', options: ['IEC 60840', 'DIN VDE 0276-620'] },
{ name: 'Leitermaterial', options: ['Kupfer'] },
{ name: 'Leitertyp', options: ['Verdrillt'] },
{ name: 'Isolationsmaterial', options: ['XLPE'] },
{ name: 'Mantelmaterial', options: ['PE'] },
{ name: 'Bewehrung', options: ['Keine'] },
{ name: 'Max. Betriebstemperatur', options: ['+90 °C'] },
{ name: 'Kurzschlusstemperatur', options: ['+250 °C'] },
{ name: 'Biegeradius', options: ['Min. 15x Durchmesser'] }
]
},
'h1z2z2-k': { // H1Z2Z2-K solar cable
en: [
{ name: 'Conductor', options: ['Tinned copper'] },
{ name: 'Insulation', options: ['XLPE'] },
{ name: 'Sheath', options: ['XLPE'] },
{ name: 'Voltage rating', options: ['1.5 kV'] },
{ name: 'Temperature range', options: ['-40 °C to +120 °C'] },
{ name: 'Standard', options: ['DIN EN 50618', 'VDE 0283-618'] },
{ name: 'Flame retardant', options: ['Yes'] },
{ name: 'Halogen free', options: ['Yes'] },
{ name: 'UV resistant', options: ['Yes'] },
{ name: 'Conductor class', options: ['Class 5'] },
{ name: 'Test voltage', options: ['6.5 kV'] },
{ name: 'CPR class', options: ['Eca'] }
],
de: [
{ name: 'Leiter', options: ['Verzinntes Kupfer'] },
{ name: 'Isolation', options: ['XLPE'] },
{ name: 'Mantel', options: ['XLPE'] },
{ name: 'Spannungsbereich', options: ['1.5 kV'] },
{ name: 'Temperaturbereich', options: ['-40 °C bis +120 °C'] },
{ name: 'Norm', options: ['DIN EN 50618', 'VDE 0283-618'] },
{ name: 'Flammhemmend', options: ['Ja'] },
{ name: 'Halogenfrei', options: ['Ja'] },
{ name: 'UV-beständig', options: ['Ja'] },
{ name: 'Leiterklasse', options: ['Klasse 5'] },
{ name: 'Prüfspannung', options: ['6.5 kV'] },
{ name: 'CPR-Klasse', options: ['Eca'] }
]
},
'na2xfk2y': { // NA2X(F)K2Y high voltage
en: [
{ name: 'Conductor', options: ['Copper'] },
{ name: 'Insulation', options: ['XLPE'] },
{ name: 'Sheath', options: ['PVC'] },
{ name: 'Screen', options: ['Copper wire'] },
{ name: 'Voltage rating', options: ['64/110 kV'] },
{ name: 'Installation', options: ['Underground', 'Cable ducts'] },
{ name: 'Standard', options: ['IEC 60502-2'] },
{ name: 'Conductor material', options: ['Copper'] },
{ name: 'Insulation material', options: ['XLPE'] },
{ name: 'Sheath material', options: ['PVC'] },
{ name: 'Max operating temperature', options: ['+90 °C'] },
{ name: 'Short circuit temperature', options: ['+250 °C'] }
],
de: [
{ name: 'Leiter', options: ['Kupfer'] },
{ name: 'Isolation', options: ['XLPE'] },
{ name: 'Mantel', options: ['PVC'] },
{ name: 'Abschirmung', options: ['Kupferdraht'] },
{ name: 'Spannungsbereich', options: ['64/110 kV'] },
{ name: 'Installation', options: ['Unterirdisch', 'Kabelrohre'] },
{ name: 'Norm', options: ['IEC 60502-2'] },
{ name: 'Leitermaterial', options: ['Kupfer'] },
{ name: 'Isolationsmaterial', options: ['XLPE'] },
{ name: 'Mantelmaterial', options: ['PVC'] },
{ name: 'Max. Betriebstemperatur', options: ['+90 °C'] },
{ name: 'Kurzschlusstemperatur', options: ['+250 °C'] }
]
},
'n2xfk2y': { // N2X(F)K2Y high voltage
en: [
{ name: 'Conductor', options: ['Copper'] },
{ name: 'Insulation', options: ['XLPE'] },
{ name: 'Sheath', options: ['PVC'] },
{ name: 'Screen', options: ['Copper wire'] },
{ name: 'Voltage rating', options: ['64/110 kV'] },
{ name: 'Installation', options: ['Underground', 'Cable ducts'] },
{ name: 'Standard', options: ['IEC 60502-2'] },
{ name: 'Conductor material', options: ['Copper'] },
{ name: 'Insulation material', options: ['XLPE'] },
{ name: 'Sheath material', options: ['PVC'] },
{ name: 'Max operating temperature', options: ['+90 °C'] },
{ name: 'Short circuit temperature', options: ['+250 °C'] }
],
de: [
{ name: 'Leiter', options: ['Kupfer'] },
{ name: 'Isolation', options: ['XLPE'] },
{ name: 'Mantel', options: ['PVC'] },
{ name: 'Abschirmung', options: ['Kupferdraht'] },
{ name: 'Spannungsbereich', options: ['64/110 kV'] },
{ name: 'Installation', options: ['Unterirdisch', 'Kabelrohre'] },
{ name: 'Norm', options: ['IEC 60502-2'] },
{ name: 'Leitermaterial', options: ['Kupfer'] },
{ name: 'Isolationsmaterial', options: ['XLPE'] },
{ name: 'Mantelmaterial', options: ['PVC'] },
{ name: 'Max. Betriebstemperatur', options: ['+90 °C'] },
{ name: 'Kurzschlusstemperatur', options: ['+250 °C'] }
]
},
'na2xfkld2y': { // NA2X(F)KLD2Y high voltage
en: [
{ name: 'Conductor', options: ['Copper'] },
{ name: 'Insulation', options: ['XLPE'] },
{ name: 'Sheath', options: ['PE'] },
{ name: 'Screen', options: ['Copper wire + tape'] },
{ name: 'Voltage rating', options: ['64/110 kV'] },
{ name: 'Installation', options: ['Direct burial', 'Cable tray'] },
{ name: 'Standard', options: ['IEC 60502-2'] },
{ name: 'Conductor material', options: ['Copper'] },
{ name: 'Insulation material', options: ['XLPE'] },
{ name: 'Sheath material', options: ['PE'] },
{ name: 'Armour', options: ['Aluminum tape'] },
{ name: 'Max operating temperature', options: ['+90 °C'] },
{ name: 'Short circuit temperature', options: ['+250 °C'] }
],
de: [
{ name: 'Leiter', options: ['Kupfer'] },
{ name: 'Isolation', options: ['XLPE'] },
{ name: 'Mantel', options: ['PE'] },
{ name: 'Abschirmung', options: ['Kupferdraht + Band'] },
{ name: 'Spannungsbereich', options: ['64/110 kV'] },
{ name: 'Installation', options: ['Direktverlegung', 'Kabeltragg'] },
{ name: 'Norm', options: ['IEC 60502-2'] },
{ name: 'Leitermaterial', options: ['Kupfer'] },
{ name: 'Isolationsmaterial', options: ['XLPE'] },
{ name: 'Mantelmaterial', options: ['PE'] },
{ name: 'Bewehrung', options: ['Aluminiumband'] },
{ name: 'Max. Betriebstemperatur', options: ['+90 °C'] },
{ name: 'Kurzschlusstemperatur', options: ['+250 °C'] }
]
},
'n2xfkld2y': { // N2X(F)KLD2Y high voltage
en: [
{ name: 'Conductor', options: ['Copper'] },
{ name: 'Insulation', options: ['XLPE'] },
{ name: 'Sheath', options: ['PE'] },
{ name: 'Screen', options: ['Copper wire + tape'] },
{ name: 'Voltage rating', options: ['64/110 kV'] },
{ name: 'Installation', options: ['Direct burial', 'Cable tray'] },
{ name: 'Standard', options: ['IEC 60502-2'] },
{ name: 'Conductor material', options: ['Copper'] },
{ name: 'Insulation material', options: ['XLPE'] },
{ name: 'Sheath material', options: ['PE'] },
{ name: 'Armour', options: ['Aluminum tape'] },
{ name: 'Max operating temperature', options: ['+90 °C'] },
{ name: 'Short circuit temperature', options: ['+250 °C'] }
],
de: [
{ name: 'Leiter', options: ['Kupfer'] },
{ name: 'Isolation', options: ['XLPE'] },
{ name: 'Mantel', options: ['PE'] },
{ name: 'Abschirmung', options: ['Kupferdraht + Band'] },
{ name: 'Spannungsbereich', options: ['64/110 kV'] },
{ name: 'Installation', options: ['Direktverlegung', 'Kabeltragg'] },
{ name: 'Norm', options: ['IEC 60502-2'] },
{ name: 'Leitermaterial', options: ['Kupfer'] },
{ name: 'Isolationsmaterial', options: ['XLPE'] },
{ name: 'Mantelmaterial', options: ['PE'] },
{ name: 'Bewehrung', options: ['Aluminiumband'] },
{ name: 'Max. Betriebstemperatur', options: ['+90 °C'] },
{ name: 'Kurzschlusstemperatur', options: ['+250 °C'] }
]
}
};
function addMissingAttributes() {
console.log('🔧 Fixing missing product attributes\n');
const productsPath = path.join(PROCESSED_DIR, 'products.json');
if (!fs.existsSync(productsPath)) {
console.error('❌ products.json not found');
return;
}
// Load current products
const products = JSON.parse(fs.readFileSync(productsPath, 'utf8'));
console.log(`📊 Loaded ${products.length} products`);
// Create backup
const backupPath = path.join(BACKUP_DIR, `products-${Date.now()}.json`);
fs.writeFileSync(backupPath, JSON.stringify(products, null, 2));
console.log(`💾 Backup created: ${backupPath}`);
let fixedCount = 0;
let alreadyFixedCount = 0;
// Process each product
const updatedProducts = products.map(product => {
// Skip if already has attributes
if (product.attributes && product.attributes.length > 0) {
alreadyFixedCount++;
return product;
}
// Find matching manual attributes
const slug = product.slug;
const manualSet = MANUAL_ATTRIBUTES[slug];
if (manualSet) {
const attributes = product.locale === 'en' ? manualSet.en : manualSet.de;
console.log(`✅ Fixed: ${product.name} (${product.locale})`);
console.log(` Added ${attributes.length} attributes`);
fixedCount++;
return {
...product,
attributes: attributes.map((attr, index) => ({
id: index,
name: attr.name,
slug: attr.name.toLowerCase().replace(/\s+/g, '-'),
position: index,
visible: true,
variation: true,
options: attr.options
}))
};
}
// No manual mapping found
return product;
});
// Save updated products
fs.writeFileSync(productsPath, JSON.stringify(updatedProducts, null, 2));
// Summary
console.log('\n' + '='.repeat(60));
console.log('📊 SUMMARY');
console.log('='.repeat(60));
console.log(`Total products: ${products.length}`);
console.log(`Already had attributes: ${alreadyFixedCount}`);
console.log(`Fixed with manual mapping: ${fixedCount}`);
console.log(`Still missing: ${products.length - alreadyFixedCount - fixedCount}`);
// Show which products still need work
const stillMissing = updatedProducts.filter(p => !p.attributes || p.attributes.length === 0);
if (stillMissing.length > 0) {
console.log('\n⚠ Products still missing attributes:');
stillMissing.forEach(p => {
console.log(` - ${p.name} (${p.slug}) [ID: ${p.id}, Locale: ${p.locale}]`);
});
}
console.log(`\n✅ Attribute fix complete!`);
console.log(`💾 Updated file: ${productsPath}`);
}
// Run if called directly
if (require.main === module) {
addMissingAttributes();
}
module.exports = { addMissingAttributes, MANUAL_ATTRIBUTES };

View File

@@ -1,144 +0,0 @@
#!/usr/bin/env node
/**
* Script to move video attributes from excerptHtml to contentHtml
* This fixes the issue where video background attributes are in excerptHtml
* but ContentRenderer never sees them because it processes contentHtml
*/
const fs = require('fs');
const path = require('path');
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
// Function to extract video attributes from excerptHtml
function extractVideoAttributes(excerptHtml) {
if (!excerptHtml) return null;
// Look for video attributes in vc_row elements
const videoMp4Match = excerptHtml.match(/video_mp4="([^"]*)"/i);
const videoWebmMatch = excerptHtml.match(/video_webm="([^"]*)"/i);
const videoBgMatch = excerptHtml.match(/video_bg="([^"]*)"/i);
// Also check for data attributes
const dataVideoMp4Match = excerptHtml.match(/data-video-mp4="([^"]*)"/i);
const dataVideoWebmMatch = excerptHtml.match(/data-video-webm="([^"]*)"/i);
const dataVideoBgMatch = excerptHtml.match(/data-video-bg="([^"]*)"/i);
const videoMp4 = videoMp4Match?.[1] || dataVideoMp4Match?.[1] || '';
const videoWebm = videoWebmMatch?.[1] || dataVideoWebmMatch?.[1] || '';
const videoBg = videoBgMatch?.[1] || dataVideoBgMatch?.[1] || '';
if (videoMp4 || videoWebm || videoBg) {
return { videoMp4, videoWebm, videoBg };
}
return null;
}
// Function to merge video attributes into contentHtml
function mergeVideoAttributes(contentHtml, videoAttrs) {
if (!contentHtml || !videoAttrs) return contentHtml;
let merged = contentHtml;
// Find the first vc-row element in contentHtml
const vcRowRegex = /<div class="vc-row[^"]*"[^>]*>/i;
const match = merged.match(vcRowRegex);
if (match) {
const existingDiv = match[0];
let newDiv = existingDiv;
// Add video attributes if they don't already exist
if (videoAttrs.videoMp4 && !existingDiv.includes('video_mp4=') && !existingDiv.includes('data-video-mp4=')) {
newDiv = newDiv.replace('>', ` video_mp4="${videoAttrs.videoMp4}">`);
}
if (videoAttrs.videoWebm && !existingDiv.includes('video_webm=') && !existingDiv.includes('data-video-webm=')) {
newDiv = newDiv.replace('>', ` video_webm="${videoAttrs.videoWebm}">`);
}
if (videoAttrs.videoBg && !existingDiv.includes('video_bg=') && !existingDiv.includes('data-video-bg=')) {
newDiv = newDiv.replace('>', ` video_bg="${videoAttrs.videoBg}">`);
}
// Also add data attributes for better compatibility
if (videoAttrs.videoMp4 && !existingDiv.includes('data-video-mp4=')) {
newDiv = newDiv.replace('>', ` data-video-mp4="${videoAttrs.videoMp4}">`);
}
if (videoAttrs.videoWebm && !existingDiv.includes('data-video-webm=')) {
newDiv = newDiv.replace('>', ` data-video-webm="${videoAttrs.videoWebm}">`);
}
if (videoAttrs.videoBg && !existingDiv.includes('data-video-bg=')) {
newDiv = newDiv.replace('>', ` data-video-bg="${videoAttrs.videoBg}">`);
}
merged = merged.replace(existingDiv, newDiv);
}
return merged;
}
// Main function
function main() {
console.log('🎬 Fixing video attributes in processed data...\n');
// Load pages.json
const pagesPath = path.join(PROCESSED_DIR, 'pages.json');
if (!fs.existsSync(pagesPath)) {
console.error('❌ pages.json not found');
process.exit(1);
}
const pages = JSON.parse(fs.readFileSync(pagesPath, 'utf8'));
let fixedCount = 0;
// Process each page
const updatedPages = pages.map(page => {
const videoAttrs = extractVideoAttributes(page.excerptHtml);
if (videoAttrs) {
console.log(`📄 Page: ${page.slug} (${page.locale})`);
console.log(` Found video attrs in excerpt: mp4="${videoAttrs.videoMp4}" webm="${videoAttrs.videoWebm}"`);
// Merge into contentHtml
const originalContent = page.contentHtml;
page.contentHtml = mergeVideoAttributes(page.contentHtml, videoAttrs);
if (page.contentHtml !== originalContent) {
console.log(` ✅ Merged into contentHtml`);
fixedCount++;
} else {
console.log(` ⚠️ Already present or no vc-row found`);
}
console.log('');
}
return page;
});
// Save updated pages
fs.writeFileSync(pagesPath, JSON.stringify(updatedPages, null, 2));
// Also update the main wordpress-data.json if it exists
const wordpressDataPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
if (fs.existsSync(wordpressDataPath)) {
const wordpressData = JSON.parse(fs.readFileSync(wordpressDataPath, 'utf8'));
if (wordpressData.content && wordpressData.content.pages) {
wordpressData.content.pages = updatedPages;
fs.writeFileSync(wordpressDataPath, JSON.stringify(wordpressData, null, 2));
}
}
console.log(`✅ Fixed ${fixedCount} pages with video attributes`);
console.log('📁 Files updated:');
console.log(` ${pagesPath}`);
console.log(` ${wordpressDataPath}`);
}
if (require.main === module) {
main();
}

View File

@@ -1,246 +0,0 @@
#!/usr/bin/env node
/**
* Improved Translation Mapping Script
* Creates translation pairs by analyzing content similarity and patterns
*/
const fs = require('fs');
const path = require('path');
const DATA_DIR = path.join(__dirname, '..', 'data', 'raw');
// Find the latest export directory
function getLatestExportDir() {
const dirs = fs.readdirSync(DATA_DIR).filter(f => {
const stat = fs.statSync(path.join(DATA_DIR, f));
return stat.isDirectory();
});
dirs.sort().reverse();
return path.join(DATA_DIR, dirs[0]);
}
// Simple text similarity (Levenshtein-like)
function similarity(str1, str2) {
const longer = str1.length > str2.length ? str1 : str2;
const shorter = str1.length > str2.length ? str2 : str1;
if (longer.length === 0) return 1.0;
const editDistance = (s1, s2) => {
const costs = [];
for (let i = 0; i <= s1.length; i++) {
let lastValue = i;
for (let j = 0; j <= s2.length; j++) {
if (i === 0) {
costs[j] = j;
} else if (j > 0) {
let newValue = costs[j - 1];
if (s1.charAt(i - 1) !== s2.charAt(j - 1)) {
newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1;
}
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
if (i > 0) costs[s2.length] = lastValue;
}
return costs[s2.length];
};
return (longer.length - editDistance(longer, shorter)) / longer.length;
}
// Extract keywords from slug
function extractKeywords(slug) {
return slug
.split('-')
.filter(word => word.length > 3)
.sort()
.join('-');
}
// Find translation pairs using multiple strategies
function findTranslationPairs(itemsEN, itemsDE, threshold = 0.6) {
const pairs = [];
const usedDE = new Set();
itemsEN.forEach(enItem => {
let bestMatch = null;
let bestScore = 0;
itemsDE.forEach(deItem => {
if (usedDE.has(deItem.id)) return;
// Strategy 1: Keyword similarity
const enKeywords = extractKeywords(enItem.slug);
const deKeywords = extractKeywords(deItem.slug);
const keywordScore = similarity(enKeywords, deKeywords);
// Strategy 2: Title similarity (if available)
let titleScore = 0;
if (enItem.titleHtml && deItem.titleHtml) {
const enTitle = enItem.titleHtml.replace(/<[^>]*>/g, '').toLowerCase();
const deTitle = deItem.titleHtml.replace(/<[^>]*>/g, '').toLowerCase();
titleScore = similarity(enTitle, deTitle);
}
// Strategy 3: Content preview similarity
let contentScore = 0;
if (enItem.contentHtml && deItem.contentHtml) {
const enPreview = enItem.contentHtml.substring(0, 200).replace(/<[^>]*>/g, '').toLowerCase();
const dePreview = deItem.contentHtml.substring(0, 200).replace(/<[^>]*>/g, '').toLowerCase();
contentScore = similarity(enPreview, dePreview);
}
// Combined score (weighted)
const combinedScore = (keywordScore * 0.4) + (titleScore * 0.4) + (contentScore * 0.2);
if (combinedScore > bestScore && combinedScore > threshold) {
bestScore = combinedScore;
bestMatch = deItem;
}
});
if (bestMatch) {
usedDE.add(bestMatch.id);
pairs.push({
translationKey: `${enItem.slug}`,
en: enItem.id,
de: bestMatch.id,
score: bestScore,
enSlug: enItem.slug,
deSlug: bestMatch.slug
});
}
});
return pairs;
}
// Main function
function main() {
const exportDir = getLatestExportDir();
console.log('🔧 Improving Translation Mapping');
console.log('================================\n');
// Load data
const loadJSON = (file) => {
try {
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
} catch (e) {
return [];
}
};
const pagesEN = loadJSON('pages.en.json');
const pagesDE = loadJSON('pages.de.json');
const postsEN = loadJSON('posts.en.json');
const postsDE = loadJSON('posts.de.json');
const productsEN = loadJSON('products.en.json');
const productsDE = loadJSON('products.de.json');
const categoriesEN = loadJSON('product-categories.en.json');
const categoriesDE = loadJSON('product-categories.de.json');
console.log('📊 Content loaded:');
console.log(` Pages: ${pagesEN.length} EN, ${pagesDE.length} DE`);
console.log(` Posts: ${postsEN.length} EN, ${postsDE.length} DE`);
console.log(` Products: ${productsEN.length} EN, ${productsDE.length} DE`);
console.log(` Categories: ${categoriesEN.length} EN, ${categoriesDE.length} DE\n`);
// Find pairs
console.log('🔍 Finding translation pairs...\n');
const pagePairs = findTranslationPairs(pagesEN, pagesDE, 0.5);
const postPairs = findTranslationPairs(postsEN, postsDE, 0.5);
const productPairs = findTranslationPairs(productsEN, productsDE, 0.6);
const categoryPairs = findTranslationPairs(categoriesEN, categoriesDE, 0.5);
// Build mapping
const mapping = {
pages: {},
posts: {},
products: {},
productCategories: {}
};
pagePairs.forEach(pair => {
mapping.pages[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
});
postPairs.forEach(pair => {
mapping.posts[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
});
productPairs.forEach(pair => {
mapping.products[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
});
categoryPairs.forEach(pair => {
mapping.productCategories[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
});
// Save improved mapping
const outputDir = path.join(exportDir, 'translation-mapping-improved.json');
fs.writeFileSync(outputDir, JSON.stringify(mapping, null, 2));
// Summary
console.log('✅ Translation Mapping Complete\n');
console.log('Pairs found:');
console.log(` Pages: ${pagePairs.length}`);
console.log(` Posts: ${postPairs.length}`);
console.log(` Products: ${productPairs.length}`);
console.log(` Categories: ${categoryPairs.length}`);
console.log(` Total: ${pagePairs.length + postPairs.length + productPairs.length + categoryPairs.length}\n`);
// Show some examples
if (postPairs.length > 0) {
console.log('📝 Sample Post Pairs:');
postPairs.slice(0, 3).forEach(pair => {
console.log(` ${pair.enSlug} (${pair.score.toFixed(2)})`);
console.log(`${pair.deSlug}`);
console.log('');
});
}
if (productPairs.length > 0) {
console.log('📦 Sample Product Pairs:');
productPairs.slice(0, 3).forEach(pair => {
console.log(` ${pair.enSlug} (${pair.score.toFixed(2)})`);
console.log(`${pair.deSlug}`);
console.log('');
});
}
// Show unmatched items
const matchedEN = new Set([...pagePairs.map(p => p.en), ...postPairs.map(p => p.en), ...productPairs.map(p => p.en), ...categoryPairs.map(p => p.en)]);
const matchedDE = new Set([...pagePairs.map(p => p.de), ...postPairs.map(p => p.de), ...productPairs.map(p => p.de), ...categoryPairs.map(p => p.de)]);
const unmatchedEN = {
pages: pagesEN.filter(p => !matchedEN.has(p.id)).length,
posts: postsEN.filter(p => !matchedEN.has(p.id)).length,
products: productsEN.filter(p => !matchedEN.has(p.id)).length,
categories: categoriesEN.filter(p => !matchedEN.has(p.id)).length
};
const unmatchedDE = {
pages: pagesDE.filter(p => !matchedDE.has(p.id)).length,
posts: postsDE.filter(p => !matchedDE.has(p.id)).length,
products: productsDE.filter(p => !matchedDE.has(p.id)).length,
categories: categoriesDE.filter(p => !matchedDE.has(p.id)).length
};
console.log('🔍 Unmatched Items (may need manual review):');
console.log(` EN: ${unmatchedEN.pages} pages, ${unmatchedEN.posts} posts, ${unmatchedEN.products} products, ${unmatchedEN.categories} categories`);
console.log(` DE: ${unmatchedDE.pages} pages, ${unmatchedDE.posts} posts, ${unmatchedDE.products} products, ${unmatchedDE.categories} categories`);
console.log('\n💾 File saved:', outputDir);
console.log('\n💡 Next steps:');
console.log(' 1. Review the improved mapping for accuracy');
console.log(' 2. Manually add any missing pairs');
console.log(' 3. Use this mapping for Next.js i18n implementation');
}
if (require.main === module) {
main();
}

View File

@@ -1,563 +0,0 @@
#!/usr/bin/env node
/**
* WordPress → Next.js Data Processing Pipeline
* Transforms raw WordPress data into Next.js compatible format
*/
const fs = require('fs');
const path = require('path');
const DATA_DIR = path.join(__dirname, '..', 'data');
const RAW_DIR = path.join(DATA_DIR, 'raw');
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
// Create processed directory
if (!fs.existsSync(PROCESSED_DIR)) {
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
}
// Find latest export
function getLatestExportDir() {
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
const stat = fs.statSync(path.join(RAW_DIR, f));
return stat.isDirectory();
});
dirs.sort().reverse();
return path.join(RAW_DIR, dirs[0]);
}
// HTML sanitization - preserve content but clean dangerous elements
function sanitizeHTML(html) {
if (!html) return '';
let sanitized = html;
// Remove script tags and inline handlers (security)
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
// Remove WPBakery shortcode wrappers but keep their content
// Replace vc_row/vc_column with divs to preserve structure
sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '<div class="vc-row">');
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '<div class="vc-column">');
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
// Remove other shortcodes but keep text content
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
// Handle Nectar shortcodes - remove them but keep any text content
// [nectar_cta] blocks often contain text we want to preserve
sanitized = sanitized.replace(/\[nectar_cta.*?\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1');
sanitized = sanitized.replace(/\[nectar.*?\]/gi, '');
// Remove all remaining shortcodes
sanitized = sanitized.replace(/\[.*?\]/g, '');
// Remove empty paragraphs and divs
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace but preserve HTML structure
sanitized = sanitized.replace(/\s+/g, ' ').trim();
return sanitized;
}
// Process excerpts specifically to handle shortcodes comprehensively
function processExcerptShortcodes(excerptHtml) {
if (!excerptHtml) return '';
let processed = excerptHtml;
// First, decode HTML entities to regular characters
// Handle both numeric entities (”) and named entities (")
processed = processed
// Numeric HTML entities commonly found in WordPress raw data
.replace(/”/g, '"') // ” - Right double quote
.replace(/“/g, '"') // “ - Left double quote
.replace(/„/g, ',') // „ - Low double quote
.replace(/‟/g, '"') // ‟ - High double quote
.replace(//g, "'") // - Left single quote
.replace(//g, "'") // - Right single quote
.replace(//g, '-') // - En dash
.replace(/—/g, '—') // — - Em dash
.replace(/…/g, '…') // … - Ellipsis
.replace(/″/g, '"') // ″ - Inches/Prime
.replace(//g, "'") // - Feet/Prime
.replace(//g, ',') // - Single low quote
.replace(//g, '`') // - Single high reversed quote
.replace(/•/g, '•') // • - Bullet
.replace(/€/g, '€') // € - Euro
// Unicode characters (from rendered content)
.replace(/”/g, '"') // Right double quote
.replace(/“/g, '"') // Left double quote
.replace(/„/g, ',') // Low double quote
.replace(/‟/g, '"') // High double quote
.replace(//g, "'") // Left single quote
.replace(//g, "'") // Right single quote
.replace(//g, '-') // En dash
.replace(/—/g, '—') // Em dash
.replace(/…/g, '…') // Ellipsis
.replace(/″/g, '"') // Inches/Prime
.replace(//g, "'") // Feet/Prime
.replace(/•/g, '•') // Bullet
// Named HTML entities
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(//g, "'")
.replace(//g, "'")
.replace(/“/g, '"')
.replace(/”/g, '"')
.replace(//g, '-')
.replace(/—/g, '—')
.replace(/…/g, '…')
.replace(/•/g, '•')
.replace(/€/g, '€');
// Process WPBakery shortcodes with HTML entities
processed = processed
// vc_row - convert to div with classes
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-row'];
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
if (attrs.includes('in_container')) classes.push('in-container');
if (attrs.includes('full_width_content')) classes.push('full-width-content');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_row\]/gi, '</div>')
// vc_column - convert to div with classes
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-column'];
if (attrs.includes('1/1')) classes.push('col-1-1');
if (attrs.includes('1/2')) classes.push('col-1-2');
if (attrs.includes('1/3')) classes.push('col-1-3');
if (attrs.includes('2/3')) classes.push('col-2-3');
if (attrs.includes('1/4')) classes.push('col-1-4');
if (attrs.includes('3/4')) classes.push('col-3-4');
if (attrs.includes('5/12')) classes.push('col-5-12');
if (attrs.includes('7/12')) classes.push('col-7-12');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_column\]/gi, '</div>')
// vc_column_text - convert to div
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
.replace(/\[\/vc_column_text\]/gi, '</div>')
// nectar_cta - convert to button
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
'<a href="$4" class="nectar-cta">$2</a>')
// nectar_highlighted_text - convert to span
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
'<span class="nectar-highlighted">$2</span>')
// nectar_responsive_text - convert to span
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
'<span class="nectar-responsive">$2</span>')
// nectar_icon_list - convert to ul
.replace(/\[nectar_icon_list([^\]]*)\]/gi, '<ul class="nectar-icon-list">')
.replace(/\[\/nectar_icon_list\]/gi, '</ul>')
// nectar_icon_list_item - convert to li
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
'<li><strong>$2</strong>: $4</li>')
// nectar_btn - convert to button
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
'<a href="$4" class="nectar-btn">$2</a>')
// split_line_heading - convert to heading
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
'<h2 class="split-line-heading">$2</h2>')
// vc_row_inner - convert to div
.replace(/\[vc_row_inner([^\]]*)\]/gi, '<div class="vc-row-inner">')
.replace(/\[\/vc_row_inner\]/gi, '</div>')
// vc_column_inner - convert to div
.replace(/\[vc_column_inner([^\]]*)\]/gi, '<div class="vc-column-inner">')
.replace(/\[\/vc_column_inner\]/gi, '</div>')
// divider - convert to hr
.replace(/\[divider([^\]]*)\]/gi, '<hr class="divider" />')
// vc_gallery - convert to div (placeholder)
.replace(/\[vc_gallery([^\]]*)\]/gi, '<div class="vc-gallery">[Gallery]</div>')
// vc_raw_js - remove or convert to div
.replace(/\[vc_raw_js\](.*?)\[\/vc_raw_js\]/gi, '<div class="vc-raw-js">[JavaScript]</div>')
// nectar_gmap - convert to div
.replace(/\[nectar_gmap([^\]]*)\]/gi, '<div class="nectar-gmap">[Google Map]</div>');
// Remove any remaining shortcodes
processed = processed.replace(/\[.*?\]/g, '');
// Clean up any HTML that might be broken
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace
processed = processed.replace(/\s+/g, ' ').trim();
return processed;
}
// Extract excerpt from content
function generateExcerpt(content, maxLength = 200) {
const text = content.replace(/<[^>]*>/g, '');
if (text.length <= maxLength) return text;
return text.substring(0, maxLength) + '...';
}
// Process pages
function processPages(pagesEN, pagesDE, translationMapping) {
const processed = [];
// Process English pages
pagesEN.forEach(page => {
const translationKey = page.slug;
const deMatch = translationMapping.pages[translationKey];
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'en',
slug: page.slug,
path: `/${page.slug}`,
title: page.titleHtml.replace(/<[^>]*>/g, ''),
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
featuredImage: page.featuredImage,
updatedAt: page.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
// Process German pages
pagesDE.forEach(page => {
const translationKey = page.slug;
const enMatch = translationMapping.pages[translationKey];
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'de',
slug: page.slug,
path: `/de/${page.slug}`,
title: page.titleHtml.replace(/<[^>]*>/g, ''),
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
featuredImage: page.featuredImage,
updatedAt: page.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
// Process posts
function processPosts(postsEN, postsDE, translationMapping) {
const processed = [];
postsEN.forEach(post => {
const translationKey = post.slug;
const deMatch = translationMapping.posts[translationKey];
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'en',
slug: post.slug,
path: `/blog/${post.slug}`,
title: post.titleHtml.replace(/<[^>]*>/g, ''),
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
postsDE.forEach(post => {
const translationKey = post.slug;
const enMatch = translationMapping.posts[translationKey];
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'de',
slug: post.slug,
path: `/de/blog/${post.slug}`,
title: post.titleHtml.replace(/<[^>]*>/g, ''),
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
// Process products
function processProducts(productsEN, productsDE, translationMapping) {
const processed = [];
productsEN.forEach(product => {
const translationKey = product.slug;
const deMatch = translationMapping.products[translationKey];
processed.push({
id: product.id,
translationKey: translationKey,
locale: 'en',
slug: product.slug,
path: `/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.shortDescriptionHtml,
descriptionHtml: sanitizeHTML(product.descriptionHtml),
images: product.images,
featuredImage: product.featuredImage,
sku: product.sku,
regularPrice: product.regularPrice,
salePrice: product.salePrice,
currency: product.currency,
stockStatus: product.stockStatus,
categories: product.categories,
attributes: product.attributes,
variations: product.variations,
updatedAt: product.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
productsDE.forEach(product => {
const translationKey = product.slug;
const enMatch = translationMapping.products[translationKey];
processed.push({
id: product.id,
translationKey: translationKey,
locale: 'de',
slug: product.slug,
path: `/de/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.shortDescriptionHtml,
descriptionHtml: sanitizeHTML(product.descriptionHtml),
images: product.images,
featuredImage: product.featuredImage,
sku: product.sku,
regularPrice: product.regularPrice,
salePrice: product.salePrice,
currency: product.currency,
stockStatus: product.stockStatus,
categories: product.categories,
attributes: product.attributes,
variations: product.variations,
updatedAt: product.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
// Process product categories
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
const processed = [];
categoriesEN.forEach(category => {
const translationKey = category.slug;
const deMatch = translationMapping.productCategories[translationKey];
processed.push({
id: category.id,
translationKey: translationKey,
locale: 'en',
slug: category.slug,
name: category.name,
path: `/product-category/${category.slug}`,
description: category.description,
count: category.count,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
categoriesDE.forEach(category => {
const translationKey = category.slug;
const enMatch = translationMapping.productCategories[translationKey];
processed.push({
id: category.id,
translationKey: translationKey,
locale: 'de',
slug: category.slug,
name: category.name,
path: `/de/product-category/${category.slug}`,
description: category.description,
count: category.count,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
// Process media manifest
function processMedia(media) {
return media.map(item => ({
id: item.id,
filename: item.filename,
url: item.url,
localPath: `/media/${item.filename}`,
alt: item.alt,
width: item.width,
height: item.height,
mimeType: item.mime_type
}));
}
// Generate asset map for URL replacement
function generateAssetMap(media) {
const map = {};
media.forEach(item => {
if (item.url) {
map[item.url] = `/media/${item.filename}`;
}
});
return map;
}
// Main processing function
function main() {
const exportDir = getLatestExportDir();
console.log('🔄 Processing WordPress Data for Next.js');
console.log('========================================\n');
// Load raw data
const loadJSON = (file) => {
try {
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
} catch (e) {
console.error(`❌ Failed to load ${file}:`, e.message);
return [];
}
};
const translationMapping = loadJSON('translation-mapping-improved.json');
const pagesEN = loadJSON('pages.en.json');
const pagesDE = loadJSON('pages.de.json');
const postsEN = loadJSON('posts.en.json');
const postsDE = loadJSON('posts.de.json');
const productsEN = loadJSON('products.en.json');
const productsDE = loadJSON('products.de.json');
const categoriesEN = loadJSON('product-categories.en.json');
const categoriesDE = loadJSON('product-categories.de.json');
const media = loadJSON('media.json');
const redirects = loadJSON('redirects.json');
const siteInfo = loadJSON('site-info.json');
console.log('📊 Processing content types...\n');
// Process each content type
const pages = processPages(pagesEN, pagesDE, translationMapping);
const posts = processPosts(postsEN, postsDE, translationMapping);
const products = processProducts(productsEN, productsDE, translationMapping);
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
const processedMedia = processMedia(media);
const assetMap = generateAssetMap(media);
// Create processed data structure
const processedData = {
site: {
title: siteInfo.siteTitle,
description: siteInfo.siteDescription,
baseUrl: siteInfo.baseUrl,
defaultLocale: siteInfo.defaultLocale || 'en',
locales: ['en', 'de']
},
content: {
pages,
posts,
products,
categories
},
assets: {
media: processedMedia,
map: assetMap
},
redirects,
exportDate: new Date().toISOString()
};
// Save processed data
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
// Save individual files for easier access
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
// Summary
console.log('✅ Data Processing Complete\n');
console.log('📦 Processed Content:');
console.log(` Pages: ${pages.length} (with translations)`);
console.log(` Posts: ${posts.length} (with translations)`);
console.log(` Products: ${products.length} (with translations)`);
console.log(` Categories: ${categories.length} (with translations)`);
console.log(` Media: ${processedMedia.length} files`);
console.log(` Redirects: ${redirects.length} rules\n`);
console.log('📁 Output Files:');
console.log(` ${outputPath}`);
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
// Sample data
if (pages.length > 0) {
console.log('📄 Sample Page:');
console.log(` Title: ${pages[0].title}`);
console.log(` Path: ${pages[0].path}`);
console.log(` Locale: ${pages[0].locale}`);
console.log(` Translation: ${pages[0].translation ? 'Yes' : 'No'}\n`);
}
if (posts.length > 0) {
console.log('📝 Sample Post:');
console.log(` Title: ${posts[0].title}`);
console.log(` Path: ${posts[0].path}`);
console.log(` Locale: ${posts[0].locale}`);
console.log(` Date: ${posts[0].datePublished}\n`);
}
console.log('💡 Next: Ready for Next.js project setup!');
}
if (require.main === module) {
main();
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,842 +0,0 @@
#!/usr/bin/env node
/**
* WordPress → Next.js Data Processing Pipeline with WooCommerce API Integration
* Transforms raw WordPress data into Next.js compatible format with prices and variations
*/
const fs = require('fs');
const path = require('path');
const https = require('https');
const dotenv = require('dotenv');
// Load environment variables from .env file
dotenv.config();
const DATA_DIR = path.join(__dirname, '..', 'data');
const RAW_DIR = path.join(DATA_DIR, 'raw');
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
// Create processed directory
if (!fs.existsSync(PROCESSED_DIR)) {
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
}
// WooCommerce API Configuration from .env
const WOOCOMMERCE_CONFIG = {
url: process.env.WOOCOMMERCE_URL || 'https://klz-cables.com',
consumerKey: process.env.WOOCOMMERCE_CONSUMER_KEY,
consumerSecret: process.env.WOOCOMMERCE_CONSUMER_SECRET,
apiVersion: 'wc/v3'
};
// Debug: Check if credentials are loaded
if (!WOOCOMMERCE_CONFIG.consumerKey || !WOOCOMMERCE_CONFIG.consumerSecret) {
console.error('❌ Missing WooCommerce credentials in environment');
console.error('WOOCOMMERCE_CONSUMER_KEY:', WOOCOMMERCE_CONFIG.consumerKey ? '✓ Loaded' : '❌ Missing');
console.error('WOOCOMMERCE_CONSUMER_SECRET:', WOOCOMMERCE_CONFIG.consumerSecret ? '✓ Loaded' : '❌ Missing');
process.exit(1);
} else {
console.log('✅ WooCommerce credentials loaded successfully');
}
// Rate limiting configuration
const RATE_LIMIT = {
maxConcurrent: 2, // Max concurrent API calls
delayBetweenCalls: 100, // ms between calls
timeout: 30000 // 30 second timeout
};
// API call queue and tracking
let apiQueue = [];
let activeRequests = 0;
let apiStats = {
total: 0,
success: 0,
failed: 0,
retries: 0
};
/**
* WooCommerce API Client
*/
class WooCommerceAPI {
constructor(config) {
this.config = config;
this.baseURL = `${config.url}/wp-json/${config.apiVersion}`;
}
/**
* Make authenticated API request
*/
async request(endpoint, method = 'GET', data = null) {
return new Promise((resolve, reject) => {
const url = new URL(`${this.baseURL}${endpoint}`);
// Add authentication
const auth = Buffer.from(`${this.config.consumerKey}:${this.config.consumerSecret}`).toString('base64');
const options = {
hostname: url.hostname,
port: url.port || 443,
path: url.pathname + url.search,
method,
headers: {
'Authorization': `Basic ${auth}`,
'Content-Type': 'application/json',
'User-Agent': 'KLZ-Data-Processor/1.0'
},
timeout: RATE_LIMIT.timeout
};
// Log the request
console.log(`🌐 API Request: ${method} ${url.pathname}`);
const req = https.request(options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
apiStats.total++;
if (res.statusCode >= 200 && res.statusCode < 300) {
try {
const parsed = JSON.parse(data);
apiStats.success++;
resolve(parsed);
} catch (e) {
apiStats.failed++;
reject(new Error(`JSON parse error: ${e.message}`));
}
} else {
apiStats.failed++;
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
}
});
});
req.on('error', (err) => {
apiStats.failed++;
reject(err);
});
req.on('timeout', () => {
apiStats.failed++;
req.destroy();
reject(new Error('Request timeout'));
});
// Add request body for POST/PUT
if (data && (method === 'POST' || method === 'PUT')) {
const body = JSON.stringify(data);
req.write(body);
}
req.end();
});
}
/**
* Get product by ID
*/
async getProduct(productId) {
try {
return await this.request(`/products/${productId}`);
} catch (error) {
console.error(`❌ Failed to fetch product ${productId}:`, error.message);
return null;
}
}
/**
* Get product variations
*/
async getProductVariations(productId) {
try {
return await this.request(`/products/${productId}/variations?per_page=100`);
} catch (error) {
console.error(`❌ Failed to fetch variations for product ${productId}:`, error.message);
return [];
}
}
/**
* Process API queue with rate limiting
*/
async processQueue(tasks, progressCallback) {
const results = [];
for (let i = 0; i < tasks.length; i++) {
// Wait for rate limit
if (i > 0) {
await new Promise(resolve => setTimeout(resolve, RATE_LIMIT.delayBetweenCalls));
}
const task = tasks[i];
let attempt = 0;
let success = false;
let result = null;
// Retry logic
while (attempt < 3 && !success) {
try {
if (progressCallback) {
progressCallback(i + 1, tasks.length, task.label);
}
result = await task.fn();
success = true;
if (attempt > 0) {
apiStats.retries++;
console.log(`✅ Retry successful for: ${task.label}`);
}
} catch (error) {
attempt++;
if (attempt < 3) {
console.log(`⚠️ Retry ${attempt}/3 for: ${task.label} - ${error.message}`);
await new Promise(resolve => setTimeout(resolve, 1000 * attempt)); // Exponential backoff
} else {
console.log(`❌ Failed after 3 attempts: ${task.label} - ${error.message}`);
}
}
}
results.push(result);
}
return results;
}
}
/**
* Decode HTML entities in text - comprehensive handling
*/
function decodeHTMLEntities(text) {
if (!text) return '';
// First, handle numeric entities (decimal and hex)
let result = text
.replace(/&#(\d+);/g, (match, dec) => {
const char = String.fromCharCode(parseInt(dec, 10));
return char;
})
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
const char = String.fromCharCode(parseInt(hex, 16));
return char;
});
// Handle common named entities and Unicode characters
const entityMap = {
' ': ' ',
'': "'",
'': "'",
'“': '"',
'”': '"',
'″': '"',
'': '-',
'—': '—',
'…': '…',
'•': '•',
'€': '€',
'©': '©',
'®': '®',
'™': '™',
'°': '°',
'±': '±',
'×': '×',
'÷': '÷',
'': '',
'¢': '¢',
'£': '£',
'¥': '¥',
'§': '§',
'¶': '¶',
'µ': 'µ',
'«': '«',
'»': '»',
'·': '·'
};
// Replace all named entities
for (const [entity, char] of Object.entries(entityMap)) {
result = result.replace(new RegExp(entity, 'g'), char);
}
// Clean up any remaining ampersand patterns
result = result.replace(/&([a-zA-Z]+);/g, (match, name) => {
// If it's not in our map, try to decode it or leave as is
return entityMap[`&${name};`] || match;
});
return result;
}
/**
* HTML sanitization - preserve content but clean dangerous elements
*/
function sanitizeHTML(html) {
if (!html) return '';
let sanitized = html;
// Remove script tags and inline handlers (security)
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
// Remove WPBakery shortcode wrappers but keep their content
sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '<div class="vc-row">');
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '<div class="vc-column">');
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
// Remove other shortcodes but keep text content
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
// Handle Nectar shortcodes
sanitized = sanitized.replace(/\[nectar_cta.*?\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1');
sanitized = sanitized.replace(/\[nectar.*?\]/gi, '');
// Remove all remaining shortcodes
sanitized = sanitized.replace(/\[.*?\]/g, '');
// Remove empty paragraphs and divs
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace
sanitized = sanitized.replace(/\s+/g, ' ').trim();
return sanitized;
}
/**
* Process products with WooCommerce API integration
*/
async function processProductsWithWooCommerce(productsEN, productsDE, translationMapping) {
const api = new WooCommerceAPI(WOOCOMMERCE_CONFIG);
const processed = [];
console.log(`\n🚀 Starting WooCommerce API integration for ${productsEN.length} products...`);
// Create tasks for price and variation fetching
const tasks = [];
productsEN.forEach(product => {
tasks.push({
label: `${product.name} (EN) - Prices`,
fn: async () => {
const wooProduct = await api.getProduct(product.id);
if (wooProduct) {
return {
productId: product.id,
locale: 'en',
regularPrice: wooProduct.regular_price || '',
salePrice: wooProduct.sale_price || '',
currency: wooProduct.currency || 'EUR',
stockStatus: wooProduct.stock_status || 'instock'
};
}
return null;
}
});
tasks.push({
label: `${product.name} (EN) - Variations`,
fn: async () => {
const variations = await api.getProductVariations(product.id);
return {
productId: product.id,
locale: 'en',
variations: variations || []
};
}
});
});
productsDE.forEach(product => {
tasks.push({
label: `${product.name} (DE) - Prices`,
fn: async () => {
const wooProduct = await api.getProduct(product.id);
if (wooProduct) {
return {
productId: product.id,
locale: 'de',
regularPrice: wooProduct.regular_price || '',
salePrice: wooProduct.sale_price || '',
currency: wooProduct.currency || 'EUR',
stockStatus: wooProduct.stock_status || 'instock'
};
}
return null;
}
});
tasks.push({
label: `${product.name} (DE) - Variations`,
fn: async () => {
const variations = await api.getProductVariations(product.id);
return {
productId: product.id,
locale: 'de',
variations: variations || []
};
}
});
});
// Progress callback
const progressCallback = (current, total, label) => {
const progress = Math.round((current / total) * 100);
process.stdout.write(`\r📊 Progress: ${current}/${total} (${progress}%) - ${label}`);
};
// Process all tasks
const results = await api.processQueue(tasks, progressCallback);
// Clear progress line
process.stdout.write('\n');
// Organize results
const priceData = {};
const variationData = {};
results.forEach(result => {
if (!result) return;
const key = `${result.productId}_${result.locale}`;
if (result.variations) {
variationData[key] = result.variations;
} else {
priceData[key] = {
regularPrice: result.regularPrice,
salePrice: result.salePrice,
currency: result.currency,
stockStatus: result.stockStatus
};
}
});
console.log(`\n📈 API Statistics:`);
console.log(` Total requests: ${apiStats.total}`);
console.log(` Successful: ${apiStats.success}`);
console.log(` Failed: ${apiStats.failed}`);
console.log(` Retries: ${apiStats.retries}`);
// Process English products
productsEN.forEach(product => {
const key = `${product.id}_en`;
const priceInfo = priceData[key] || {};
const variations = variationData[key] || [];
const translationKey = product.slug;
const deMatch = translationMapping.products[translationKey];
processed.push({
id: product.id,
translationKey: translationKey,
locale: 'en',
slug: product.slug,
path: `/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.shortDescriptionHtml,
descriptionHtml: sanitizeHTML(product.descriptionHtml),
images: product.images,
featuredImage: product.featuredImage,
sku: product.sku,
regularPrice: priceInfo.regularPrice || '',
salePrice: priceInfo.salePrice || '',
currency: priceInfo.currency || 'EUR',
stockStatus: priceInfo.stockStatus || 'instock',
categories: product.categories,
attributes: product.attributes,
variations: variations,
updatedAt: product.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
// Process German products
productsDE.forEach(product => {
const key = `${product.id}_de`;
const priceInfo = priceData[key] || {};
const variations = variationData[key] || [];
const translationKey = product.slug;
const enMatch = translationMapping.products[translationKey];
processed.push({
id: product.id,
translationKey: translationKey,
locale: 'de',
slug: product.slug,
path: `/de/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.shortDescriptionHtml,
descriptionHtml: sanitizeHTML(product.descriptionHtml),
images: product.images,
featuredImage: product.featuredImage,
sku: product.sku,
regularPrice: priceInfo.regularPrice || '',
salePrice: priceInfo.salePrice || '',
currency: priceInfo.currency || 'EUR',
stockStatus: priceInfo.stockStatus || 'instock',
categories: product.categories,
attributes: product.attributes,
variations: variations,
updatedAt: product.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
/**
* Process pages
*/
function processPages(pagesEN, pagesDE, translationMapping) {
const processed = [];
// Process English pages
pagesEN.forEach(page => {
const translationKey = page.slug;
const deMatch = translationMapping.pages[translationKey];
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'en',
slug: page.slug,
path: `/${page.slug}`,
title: decodedTitle,
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: page.excerptHtml || '',
featuredImage: page.featuredImage,
updatedAt: page.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
// Process German pages
pagesDE.forEach(page => {
const translationKey = page.slug;
const enMatch = translationMapping.pages[translationKey];
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'de',
slug: page.slug,
path: `/de/${page.slug}`,
title: decodedTitle,
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: page.excerptHtml || '',
featuredImage: page.featuredImage,
updatedAt: page.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
/**
* Process posts
*/
function processPosts(postsEN, postsDE, translationMapping) {
const processed = [];
postsEN.forEach(post => {
const translationKey = post.slug;
const deMatch = translationMapping.posts[translationKey];
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'en',
slug: post.slug,
path: `/blog/${post.slug}`,
title: decodedTitle,
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: post.excerptHtml || '',
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
postsDE.forEach(post => {
const translationKey = post.slug;
const enMatch = translationMapping.posts[translationKey];
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'de',
slug: post.slug,
path: `/de/blog/${post.slug}`,
title: decodedTitle,
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: post.excerptHtml || '',
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
/**
* Process product categories
*/
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
const processed = [];
categoriesEN.forEach(category => {
const translationKey = category.slug;
const deMatch = translationMapping.productCategories[translationKey];
processed.push({
id: category.id,
translationKey: translationKey,
locale: 'en',
slug: category.slug,
name: category.name,
path: `/product-category/${category.slug}`,
description: category.description,
count: category.count,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
categoriesDE.forEach(category => {
const translationKey = category.slug;
const enMatch = translationMapping.productCategories[translationKey];
processed.push({
id: category.id,
translationKey: translationKey,
locale: 'de',
slug: category.slug,
name: category.name,
path: `/de/product-category/${category.slug}`,
description: category.description,
count: category.count,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
/**
* Process media manifest
*/
function processMedia(media) {
return media.map(item => ({
id: item.id,
filename: item.filename,
url: item.url,
localPath: `/media/${item.filename}`,
alt: item.alt,
width: item.width,
height: item.height,
mimeType: item.mime_type
}));
}
/**
* Generate asset map for URL replacement
*/
function generateAssetMap(media) {
const map = {};
media.forEach(item => {
if (item.url) {
map[item.url] = `/media/${item.filename}`;
}
});
return map;
}
/**
* Main processing function
*/
async function main() {
const exportDir = getLatestExportDir();
console.log('🔄 Processing WordPress Data for Next.js with WooCommerce Integration');
console.log('==========================================================\n');
// Load raw data
const loadJSON = (file) => {
try {
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
} catch (e) {
console.error(`❌ Failed to load ${file}:`, e.message);
return [];
}
};
const translationMapping = loadJSON('translation-mapping.json');
const pagesEN = loadJSON('pages.en.json');
const pagesDE = loadJSON('pages.de.json');
const postsEN = loadJSON('posts.en.json');
const postsDE = loadJSON('posts.de.json');
const productsEN = loadJSON('products.en.json');
const productsDE = loadJSON('products.de.json');
const categoriesEN = loadJSON('product-categories.en.json');
const categoriesDE = loadJSON('product-categories.de.json');
const media = loadJSON('media.json');
const redirects = loadJSON('redirects.json');
const siteInfo = loadJSON('site-info.json');
console.log('📊 Processing content types...\n');
// Process each content type
const pages = processPages(pagesEN, pagesDE, translationMapping);
const posts = processPosts(postsEN, postsDE, translationMapping);
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
const processedMedia = processMedia(media);
const assetMap = generateAssetMap(media);
// Process products with WooCommerce API
const products = await processProductsWithWooCommerce(productsEN, productsDE, translationMapping);
// Create processed data structure
const processedData = {
site: {
title: siteInfo.siteTitle,
description: siteInfo.siteDescription,
baseUrl: siteInfo.baseUrl,
defaultLocale: siteInfo.defaultLocale || 'en',
locales: ['en', 'de']
},
content: {
pages,
posts,
products,
categories
},
assets: {
media: processedMedia,
map: assetMap
},
redirects,
exportDate: new Date().toISOString()
};
// Save processed data
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
// Save individual files for easier access
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
// Always write products.json with the processed data
// Even if WooCommerce data is missing, we still want the base product structure
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
// Report on WooCommerce data quality
const productsWithPrices = products.filter(p => p.regularPrice).length;
const productsWithVariations = products.filter(p => p.variations && p.variations.length > 0).length;
console.log('📊 WooCommerce Data Quality:');
console.log(` Products with prices: ${productsWithPrices}/${products.length}`);
console.log(` Products with variations: ${productsWithVariations}/${products.length}`);
if (productsWithPrices === 0 && productsWithVariations === 0) {
console.log('⚠️ Warning: No WooCommerce pricing or variation data was retrieved');
console.log(' Products written with empty price fields\n');
} else {
console.log('✅ WooCommerce data integrated successfully\n');
}
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
// Summary
console.log('\n✅ Data Processing Complete\n');
console.log('📦 Processed Content:');
console.log(` Pages: ${pages.length} (with translations)`);
console.log(` Posts: ${posts.length} (with translations)`);
console.log(` Products: ${products.length} (with translations)`);
console.log(` Categories: ${categories.length} (with translations)`);
console.log(` Media: ${processedMedia.length} files`);
console.log(` Redirects: ${redirects.length} rules\n`);
console.log('📁 Output Files:');
console.log(` ${outputPath}`);
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
// Sample data
if (products.length > 0) {
console.log('📦 Sample Product with WooCommerce Data:');
const sampleProduct = products.find(p => p.regularPrice) || products[0];
console.log(` Name: ${sampleProduct.name}`);
console.log(` SKU: ${sampleProduct.sku}`);
console.log(` Price: ${sampleProduct.regularPrice} ${sampleProduct.currency}`);
console.log(` Sale Price: ${sampleProduct.salePrice || 'N/A'}`);
console.log(` Variations: ${sampleProduct.variations.length}`);
console.log(` Locale: ${sampleProduct.locale}\n`);
}
console.log('💡 Next: Ready for Next.js project setup with complete product data!');
}
// Helper function to get latest export directory
function getLatestExportDir() {
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
const stat = fs.statSync(path.join(RAW_DIR, f));
return stat.isDirectory();
});
dirs.sort().reverse();
return path.join(RAW_DIR, dirs[0]);
}
if (require.main === module) {
main().catch(console.error);
}
module.exports = {
processPages,
processPosts,
processProductCategories,
processProductsWithWooCommerce,
processMedia,
generateAssetMap,
decodeHTMLEntities,
sanitizeHTML
};

View File

@@ -1,660 +0,0 @@
#!/usr/bin/env node
/**
* WordPress → Next.js Data Processing Pipeline
* Transforms raw WordPress data into Next.js compatible format
*/
const fs = require('fs');
const path = require('path');
const DATA_DIR = path.join(__dirname, '..', 'data');
const RAW_DIR = path.join(DATA_DIR, 'raw');
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
// Create processed directory
if (!fs.existsSync(PROCESSED_DIR)) {
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
}
// Find latest export
function getLatestExportDir() {
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
const stat = fs.statSync(path.join(RAW_DIR, f));
return stat.isDirectory();
});
dirs.sort().reverse();
return path.join(RAW_DIR, dirs[0]);
}
// Decode HTML entities in text - comprehensive handling
function decodeHTMLEntities(text) {
if (!text) return '';
// First, handle numeric entities (decimal and hex)
let result = text
.replace(/&#(\d+);/g, (match, dec) => {
const char = String.fromCharCode(parseInt(dec, 10));
return char;
})
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
const char = String.fromCharCode(parseInt(hex, 16));
return char;
});
// Handle common named entities and Unicode characters
const entityMap = {
' ': ' ',
'': "'",
'': "'",
'“': '"',
'”': '"',
'″': '"', // Double prime (8243)
'': '-',
'—': '—',
'…': '…',
'•': '•',
'€': '€',
'©': '©',
'®': '®',
'™': '™',
'°': '°',
'±': '±',
'×': '×',
'÷': '÷',
'': '',
'¢': '¢',
'£': '£',
'¥': '¥',
'§': '§',
'¶': '¶',
'µ': 'µ',
'«': '«',
'»': '»',
'·': '·'
};
// Replace all named entities
for (const [entity, char] of Object.entries(entityMap)) {
result = result.replace(new RegExp(entity, 'g'), char);
}
// Clean up any remaining ampersand patterns
result = result.replace(/&([a-zA-Z]+);/g, (match, name) => {
// If it's not in our map, try to decode it or leave as is
return entityMap[`&${name};`] || match;
});
return result;
}
// HTML sanitization - preserve content but clean dangerous elements
// Also preserves bg_image attributes for later processing by fix-images.js
function sanitizeHTML(html) {
if (!html) return '';
let sanitized = html;
// Temporarily preserve bg_image attributes by replacing them with placeholders
// Handle both regular quotes and Unicode quotes
const bgImagePlaceholders = [];
sanitized = sanitized.replace(/(bg_image=)(["”])([^"”]*?)["”]/gi, (match) => {
const placeholder = `__BG_IMAGE_${bgImagePlaceholders.length}__`;
bgImagePlaceholders.push(match);
return placeholder;
});
// Remove script tags and inline handlers (security)
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
// Remove WPBakery shortcode wrappers but keep their content
// Replace vc_row/vc_column with divs to preserve structure
sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '<div class="vc-row">');
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '<div class="vc-column">');
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
// Remove other shortcodes but keep text content
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
// Handle Nectar shortcodes - remove them but keep any text content
// [nectar_cta] blocks often contain text we want to preserve
sanitized = sanitized.replace(/\[nectar_cta.*?\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1');
sanitized = sanitized.replace(/\[nectar.*?\]/gi, '');
// Remove all remaining shortcodes
sanitized = sanitized.replace(/\[.*?\]/g, '');
// Remove empty paragraphs and divs
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace but preserve HTML structure
sanitized = sanitized.replace(/\s+/g, ' ').trim();
// Restore bg_image placeholders
bgImagePlaceholders.forEach((placeholder, index) => {
sanitized = sanitized.replace(`__BG_IMAGE_${index}__`, placeholder);
});
return sanitized;
}
// Process excerpts specifically to handle shortcodes comprehensively
function processExcerptShortcodes(excerptHtml) {
if (!excerptHtml) return '';
let processed = excerptHtml;
// First, decode HTML entities to regular characters
processed = decodeHTMLEntities(processed);
// Temporarily preserve bg_image attributes (handle both regular and Unicode quotes)
const bgImagePlaceholders = [];
processed = processed.replace(/(bg_image=)(["”])([^"”]*?)["”]/gi, (match) => {
const placeholder = `__BG_IMAGE_${bgImagePlaceholders.length}__`;
bgImagePlaceholders.push(match);
return placeholder;
});
// Process WPBakery shortcodes with HTML entities
processed = processed
// vc_row - convert to div with classes (handle both complete and truncated)
// Preserve any placeholders in the attributes
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-row'];
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
if (attrs.includes('in_container')) classes.push('in-container');
if (attrs.includes('full_width_content')) classes.push('full-width-content');
// Extract and preserve placeholders from attrs
const placeholderMatches = attrs.match(/__BG_IMAGE_\d+__/g) || [];
const preservedAttrs = placeholderMatches.join(' ');
return `<div class="${classes.join(' ')}" ${preservedAttrs}>`;
})
// Handle truncated vc_row (no closing bracket)
.replace(/\[vc_row([^\]]*)$/gi, (match, attrs) => {
const classes = ['vc-row'];
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
if (attrs.includes('in_container')) classes.push('in-container');
if (attrs.includes('full_width_content')) classes.push('full-width-content');
// Extract and preserve placeholders from attrs
const placeholderMatches = attrs.match(/__BG_IMAGE_\d+__/g) || [];
const preservedAttrs = placeholderMatches.join(' ');
return `<div class="${classes.join(' ')}" ${preservedAttrs}>`;
})
.replace(/\[\/vc_row\]/gi, '</div>')
// vc_column - convert to div with classes
// Handle both complete and incomplete (truncated) shortcodes
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-column'];
if (attrs.includes('1/1')) classes.push('col-1-1');
if (attrs.includes('1/2')) classes.push('col-1-2');
if (attrs.includes('1/3')) classes.push('col-1-3');
if (attrs.includes('2/3')) classes.push('col-2-3');
if (attrs.includes('1/4')) classes.push('col-1-4');
if (attrs.includes('3/4')) classes.push('col-3-4');
if (attrs.includes('5/12')) classes.push('col-5-12');
if (attrs.includes('7/12')) classes.push('col-7-12');
return `<div class="${classes.join(' ')}">`;
})
// Also handle incomplete vc_column shortcodes (truncated at end of excerpt)
.replace(/\[vc_column([^\]]*)$/gi, (match, attrs) => {
const classes = ['vc-column'];
if (attrs.includes('1/1')) classes.push('col-1-1');
if (attrs.includes('1/2')) classes.push('col-1-2');
if (attrs.includes('1/3')) classes.push('col-1-3');
if (attrs.includes('2/3')) classes.push('col-2-3');
if (attrs.includes('1/4')) classes.push('col-1-4');
if (attrs.includes('3/4')) classes.push('col-3-4');
if (attrs.includes('5/12')) classes.push('col-5-12');
if (attrs.includes('7/12')) classes.push('col-7-12');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_column\]/gi, '</div>')
// Handle truncated vc_column_text
.replace(/\[vc_column_text([^\]]*)$/gi, '<div class="vc-column-text">')
// vc_column_text - convert to div
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
.replace(/\[\/vc_column_text\]/gi, '</div>')
// nectar_cta - convert to button
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
'<a href="$4" class="nectar-cta">$2</a>')
// nectar_highlighted_text - convert to span
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
'<span class="nectar-highlighted">$2</span>')
// nectar_responsive_text - convert to span
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
'<span class="nectar-responsive">$2</span>')
// nectar_icon_list - convert to ul
.replace(/\[nectar_icon_list([^\]]*)\]/gi, '<ul class="nectar-icon-list">')
.replace(/\[\/nectar_icon_list\]/gi, '</ul>')
// nectar_icon_list_item - convert to li
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
'<li><strong>$2</strong>: $4</li>')
// nectar_btn - convert to button
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
'<a href="$4" class="nectar-btn">$2</a>')
// split_line_heading - convert to heading
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
'<h2 class="split-line-heading">$2</h2>')
// vc_row_inner - convert to div
.replace(/\[vc_row_inner([^\]]*)\]/gi, '<div class="vc-row-inner">')
.replace(/\[\/vc_row_inner\]/gi, '</div>')
// vc_column_inner - convert to div
.replace(/\[vc_column_inner([^\]]*)\]/gi, '<div class="vc-column-inner">')
.replace(/\[\/vc_column_inner\]/gi, '</div>')
// divider - convert to hr
.replace(/\[divider([^\]]*)\]/gi, '<hr class="divider" />')
// vc_gallery - convert to div (placeholder)
.replace(/\[vc_gallery([^\]]*)\]/gi, '<div class="vc-gallery">[Gallery]</div>')
// vc_raw_js - remove or convert to div
.replace(/\[vc_raw_js\](.*?)\[\/vc_raw_js\]/gi, '<div class="vc-raw-js">[JavaScript]</div>')
// nectar_gmap - convert to div
.replace(/\[nectar_gmap([^\]]*)\]/gi, '<div class="nectar-gmap">[Google Map]</div>');
// Remove any remaining shortcodes
processed = processed.replace(/\[.*?\]/g, '');
// Clean up any HTML that might be broken
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace
processed = processed.replace(/\s+/g, ' ').trim();
// Restore bg_image placeholders
bgImagePlaceholders.forEach((placeholder, index) => {
processed = processed.replace(`__BG_IMAGE_${index}__`, placeholder);
});
return processed;
}
// Extract excerpt from content
function generateExcerpt(content, maxLength = 200) {
const text = content.replace(/<[^>]*>/g, '');
if (text.length <= maxLength) return text;
return text.substring(0, maxLength) + '...';
}
// Process pages
function processPages(pagesEN, pagesDE, translationMapping) {
const processed = [];
// Process English pages
pagesEN.forEach(page => {
const translationKey = page.slug;
const deMatch = translationMapping.pages[translationKey];
// Extract title and decode HTML entities
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'en',
slug: page.slug,
path: `/${page.slug}`,
title: decodedTitle,
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
featuredImage: page.featuredImage,
updatedAt: page.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
// Process German pages
pagesDE.forEach(page => {
const translationKey = page.slug;
const enMatch = translationMapping.pages[translationKey];
// Extract title and decode HTML entities
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'de',
slug: page.slug,
path: `/de/${page.slug}`,
title: decodedTitle,
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
featuredImage: page.featuredImage,
updatedAt: page.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
// Process posts
function processPosts(postsEN, postsDE, translationMapping) {
const processed = [];
postsEN.forEach(post => {
const translationKey = post.slug;
const deMatch = translationMapping.posts[translationKey];
// Extract title and decode HTML entities
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'en',
slug: post.slug,
path: `/blog/${post.slug}`,
title: decodedTitle,
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
postsDE.forEach(post => {
const translationKey = post.slug;
const enMatch = translationMapping.posts[translationKey];
// Extract title and decode HTML entities
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'de',
slug: post.slug,
path: `/de/blog/${post.slug}`,
title: decodedTitle,
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
// Process products
function processProducts(productsEN, productsDE, translationMapping) {
const processed = [];
productsEN.forEach(product => {
const translationKey = product.slug;
const deMatch = translationMapping.products[translationKey];
processed.push({
id: product.id,
translationKey: translationKey,
locale: 'en',
slug: product.slug,
path: `/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.shortDescriptionHtml,
descriptionHtml: sanitizeHTML(product.descriptionHtml),
images: product.images,
featuredImage: product.featuredImage,
sku: product.sku,
regularPrice: product.regularPrice,
salePrice: product.salePrice,
currency: product.currency,
stockStatus: product.stockStatus,
categories: product.categories,
attributes: product.attributes,
variations: product.variations,
updatedAt: product.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
productsDE.forEach(product => {
const translationKey = product.slug;
const enMatch = translationMapping.products[translationKey];
processed.push({
id: product.id,
translationKey: translationKey,
locale: 'de',
slug: product.slug,
path: `/de/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.shortDescriptionHtml,
descriptionHtml: sanitizeHTML(product.descriptionHtml),
images: product.images,
featuredImage: product.featuredImage,
sku: product.sku,
regularPrice: product.regularPrice,
salePrice: product.salePrice,
currency: product.currency,
stockStatus: product.stockStatus,
categories: product.categories,
attributes: product.attributes,
variations: product.variations,
updatedAt: product.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
// Process product categories
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
const processed = [];
categoriesEN.forEach(category => {
const translationKey = category.slug;
const deMatch = translationMapping.productCategories[translationKey];
processed.push({
id: category.id,
translationKey: translationKey,
locale: 'en',
slug: category.slug,
name: category.name,
path: `/product-category/${category.slug}`,
description: category.description,
count: category.count,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
categoriesDE.forEach(category => {
const translationKey = category.slug;
const enMatch = translationMapping.productCategories[translationKey];
processed.push({
id: category.id,
translationKey: translationKey,
locale: 'de',
slug: category.slug,
name: category.name,
path: `/de/product-category/${category.slug}`,
description: category.description,
count: category.count,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
// Process media manifest
function processMedia(media) {
return media.map(item => ({
id: item.id,
filename: item.filename,
url: item.url,
localPath: `/media/${item.filename}`,
alt: item.alt,
width: item.width,
height: item.height,
mimeType: item.mime_type
}));
}
// Generate asset map for URL replacement
function generateAssetMap(media) {
const map = {};
media.forEach(item => {
if (item.url) {
map[item.url] = `/media/${item.filename}`;
}
});
return map;
}
// Main processing function
function main() {
const exportDir = getLatestExportDir();
console.log('🔄 Processing WordPress Data for Next.js');
console.log('========================================\n');
// Load raw data
const loadJSON = (file) => {
try {
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
} catch (e) {
console.error(`❌ Failed to load ${file}:`, e.message);
return [];
}
};
const translationMapping = loadJSON('translation-mapping.json');
const pagesEN = loadJSON('pages.en.json');
const pagesDE = loadJSON('pages.de.json');
const postsEN = loadJSON('posts.en.json');
const postsDE = loadJSON('posts.de.json');
const productsEN = loadJSON('products.en.json');
const productsDE = loadJSON('products.de.json');
const categoriesEN = loadJSON('product-categories.en.json');
const categoriesDE = loadJSON('product-categories.de.json');
const media = loadJSON('media.json');
const redirects = loadJSON('redirects.json');
const siteInfo = loadJSON('site-info.json');
console.log('📊 Processing content types...\n');
// Process each content type
const pages = processPages(pagesEN, pagesDE, translationMapping);
const posts = processPosts(postsEN, postsDE, translationMapping);
const products = processProducts(productsEN, productsDE, translationMapping);
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
const processedMedia = processMedia(media);
const assetMap = generateAssetMap(media);
// Create processed data structure
const processedData = {
site: {
title: siteInfo.siteTitle,
description: siteInfo.siteDescription,
baseUrl: siteInfo.baseUrl,
defaultLocale: siteInfo.defaultLocale || 'en',
locales: ['en', 'de']
},
content: {
pages,
posts,
products,
categories
},
assets: {
media: processedMedia,
map: assetMap
},
redirects,
exportDate: new Date().toISOString()
};
// Save processed data
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
// Save individual files for easier access
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
// Summary
console.log('✅ Data Processing Complete\n');
console.log('📦 Processed Content:');
console.log(` Pages: ${pages.length} (with translations)`);
console.log(` Posts: ${posts.length} (with translations)`);
console.log(` Products: ${products.length} (with translations)`);
console.log(` Categories: ${categories.length} (with translations)`);
console.log(` Media: ${processedMedia.length} files`);
console.log(` Redirects: ${redirects.length} rules\n`);
console.log('📁 Output Files:');
console.log(` ${outputPath}`);
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
// Sample data
if (pages.length > 0) {
console.log('📄 Sample Page:');
console.log(` Title: ${pages[0].title}`);
console.log(` Path: ${pages[0].path}`);
console.log(` Locale: ${pages[0].locale}`);
console.log(` Translation: ${pages[0].translation ? 'Yes' : 'No'}\n`);
}
if (posts.length > 0) {
console.log('📝 Sample Post:');
console.log(` Title: ${posts[0].title}`);
console.log(` Path: ${posts[0].path}`);
console.log(` Locale: ${posts[0].locale}`);
console.log(` Date: ${posts[0].datePublished}\n`);
}
console.log('💡 Next: Ready for Next.js project setup!');
}
if (require.main === module) {
main();
}

View File

@@ -1,132 +0,0 @@
#!/usr/bin/env node
// Test script to verify HTML entity decoding works correctly
const testExcerpt = '<p>[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default” column_direction=”default” column_direction_tablet=”default” column_direction_phone=”default” scene_position=”center” text_color=”dark” text_align=”left” row_border_radius=”none” row_border_radius_applies=”bg” overflow=”visible” overlay_strength=”0.3″ gradient_direction=”left_to_right” shape_divider_position=”bottom” bg_image_animation=”none”][vc_column column_padding=”no-extra-padding” column_padding_tablet=”inherit” column_padding_phone=”inherit” column_padding_position=”all” column_element_direction_desktop=”default” column_element_spacing=”default” desktop_text_alignment=”default” tablet_text_alignment=”default” phone_text_alignment=”default” background_color_opacity=”1″ background_hover_color_opacity=”1″ column_backdrop_filter=”none” column_shadow=”none” column_border_radius=”none” column_link_target=”_self” column_position=”default” gradient_direction=”left_to_right” overlay_strength=”0.3″ width=”1/1″ tablet_width_inherit=”default” animation_type=”default” bg_image_animation=”none” border_type=”simple” column_border_width=”none” column_border_style=”solid”][vc_column_text css=”” text_direction=”default”]\n<h1 class=\"p1\">Liefer- und Zahlungsbedingungen</h1>\n<p class=\"p1\">Stand November 2024</p>\n[/vc_column_text][/vc_column][/vc_row]</p>';
// Process excerpts specifically to handle shortcodes comprehensively
function processExcerptShortcodes(excerptHtml) {
if (!excerptHtml) return '';
let processed = excerptHtml;
// First, decode HTML entities to regular characters
// Use a comprehensive approach that handles both numeric and named entities
processed = processed
// Numeric HTML entities commonly found in WordPress raw data
.replace(/”/g, '"') // ” - Right double quote
.replace(/“/g, '"') // “ - Left double quote
.replace(/„/g, ',') // „ - Low double quote
.replace(/‟/g, '"') // ‟ - High double quote
.replace(//g, "'") // - Left single quote
.replace(//g, "'") // - Right single quote
.replace(//g, '-') // - En dash
.replace(/—/g, '—') // — - Em dash
.replace(/…/g, '…') // … - Ellipsis
.replace(/″/g, '"') // ″ - Inches/Prime
.replace(//g, "'") // - Feet/Prime
.replace(//g, ',') // - Single low quote
.replace(//g, '`') // - Single high reversed quote
.replace(/“/g, '"') // “ - Left double quote
.replace(/”/g, '"') // ” - Right double quote
.replace(/„/g, ',') // „ - Low double quote
.replace(/‟/g, '"') // ‟ - High double quote
.replace(/•/g, '•') // • - Bullet
.replace(/…/g, '…') // … - Ellipsis
.replace(/€/g, '€') // € - Euro
// Unicode characters (from rendered content)
.replace(/"/g, '"') // Right double quote
.replace(/"/g, '"') // Left double quote
.replace(/„/g, ',') // Low double quote
.replace(/‟/g, '"') // High double quote
.replace(/'/g, "'") // Left single quote
.replace(/'/g, "'") // Right single quote
.replace(//g, '-') // En dash
.replace(/—/g, '—') // Em dash
.replace(/…/g, '…') // Ellipsis
.replace(/″/g, '"') // Inches/Prime
.replace(//g, "'") // Feet/Prime
.replace(/•/g, '•') // Bullet
// Named HTML entities
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(//g, "'")
.replace(//g, "'")
.replace(/“/g, '"')
.replace(/”/g, '"')
.replace(//g, '-')
.replace(/—/g, '—')
.replace(/…/g, '…')
.replace(/•/g, '•')
.replace(/€/g, '€');
// Process WPBakery shortcodes with HTML entities
processed = processed
// vc_row - convert to div with classes
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-row'];
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
if (attrs.includes('in_container')) classes.push('in-container');
if (attrs.includes('full_width_content')) classes.push('full-width-content');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_row\]/gi, '</div>')
// vc_column - convert to div with classes
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-column'];
if (attrs.includes('1/1')) classes.push('col-1-1');
if (attrs.includes('1/2')) classes.push('col-1-2');
if (attrs.includes('1/3')) classes.push('col-1-3');
if (attrs.includes('2/3')) classes.push('col-2-3');
if (attrs.includes('1/4')) classes.push('col-1-4');
if (attrs.includes('3/4')) classes.push('col-3-4');
if (attrs.includes('5/12')) classes.push('col-5-12');
if (attrs.includes('7/12')) classes.push('col-7-12');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_column\]/gi, '</div>')
// vc_column_text - convert to div
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
.replace(/\[\/vc_column_text\]/gi, '</div>');
// Remove any remaining shortcodes
processed = processed.replace(/\[.*?\]/g, '');
// Clean up any HTML that might be broken
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace
processed = processed.replace(/\s+/g, ' ').trim();
return processed;
}
console.log('=== HTML Entity Decoding Test ===\n');
console.log('Original excerpt:');
console.log(testExcerpt);
console.log('\n--- After processing ---\n');
const result = processExcerptShortcodes(testExcerpt);
console.log(result);
// Test specific entity decoding
console.log('\n=== Specific Entity Tests ===');
const entityTests = [
{ input: '”', expected: '"', name: 'Right double quote' },
{ input: '“', expected: '"', name: 'Left double quote' },
{ input: '', expected: '-', name: 'En dash' },
{ input: '—', expected: '—', name: 'Em dash' },
{ input: '', expected: "'", name: 'Left single quote' },
{ input: '', expected: "'", name: 'Right single quote' },
{ input: 'type=”in_container”', expected: 'type="in_container"', name: 'Full attribute' }
];
entityTests.forEach(test => {
const processed = test.input.replace(/”/g, '"').replace(/“/g, '"').replace(//g, '-').replace(/—/g, '—').replace(//g, "'").replace(//g, "'");
const passed = processed === test.expected;
console.log(`${test.name}: ${passed ? '✅' : '❌'} "${test.input}" → "${processed}" (expected: "${test.expected}")`);
});

View File

@@ -1,125 +0,0 @@
#!/usr/bin/env node
// Test the final function with actual raw data
const fs = require('fs');
const path = require('path');
// Load the actual raw data
const rawData = JSON.parse(fs.readFileSync('data/raw/2025-12-27T21-26-12-521Z/pages.en.json', 'utf8'));
const testExcerpt = rawData[0].excerptHtml;
console.log('=== Testing Final Function ===');
console.log('Raw excerpt (first 200 chars):');
console.log(testExcerpt.substring(0, 200));
console.log('');
// The function from process-data.js
function processExcerptShortcodes(excerptHtml) {
if (!excerptHtml) return '';
let processed = excerptHtml;
// First, decode HTML entities to regular characters
// Handle both numeric entities (”) and named entities (")
processed = processed
// Decode numeric HTML entities first
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
// Then handle any remaining Unicode characters
.replace(/”/g, '"') // ” - Right double quote
.replace(/“/g, '"') // “ - Left double quote
.replace(/„/g, ',') // „ - Low double quote
.replace(/‟/g, '"') // ‟ - High double quote
.replace(//g, "'") // - Left single quote
.replace(//g, "'") // - Right single quote
.replace(//g, '-') // - En dash
.replace(/—/g, '—') // — - Em dash
.replace(/…/g, '…') // … - Ellipsis
.replace(/″/g, '"') // ″ - Inches/Prime
.replace(//g, "'") // - Feet/Prime
.replace(//g, ',') // - Single low quote
.replace(//g, '`') // - Single high reversed quote
.replace(/•/g, '•') // • - Bullet
.replace(/€/g, '€') // € - Euro
// Named HTML entities
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(//g, "'")
.replace(//g, "'")
.replace(/“/g, '"')
.replace(/”/g, '"')
.replace(//g, '-')
.replace(/—/g, '—')
.replace(/…/g, '…')
.replace(/•/g, '•')
.replace(/€/g, '€');
// Process WPBakery shortcodes with HTML entities
processed = processed
// vc_row - convert to div with classes
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-row'];
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
if (attrs.includes('in_container')) classes.push('in-container');
if (attrs.includes('full_width_content')) classes.push('full-width-content');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_row\]/gi, '</div>')
// vc_column - convert to div with classes
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-column'];
if (attrs.includes('1/1')) classes.push('col-1-1');
if (attrs.includes('1/2')) classes.push('col-1-2');
if (attrs.includes('1/3')) classes.push('col-1-3');
if (attrs.includes('2/3')) classes.push('col-2-3');
if (attrs.includes('1/4')) classes.push('col-1-4');
if (attrs.includes('3/4')) classes.push('col-3-4');
if (attrs.includes('5/12')) classes.push('col-5-12');
if (attrs.includes('7/12')) classes.push('col-7-12');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_column\]/gi, '</div>')
// vc_column_text - convert to div
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
.replace(/\[\/vc_column_text\]/gi, '</div>');
// Remove any remaining shortcodes
processed = processed.replace(/\[.*?\]/g, '');
// Clean up any HTML that might be broken
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace
processed = processed.replace(/\s+/g, ' ').trim();
return processed;
}
const result = processExcerptShortcodes(testExcerpt);
console.log('After processing:');
console.log(result);
console.log('');
// Check for entities
const hasEntities = /[”“‘’–—]/.test(result);
const hasNumericEntities = /&#\d+;/.test(result);
const hasShortcodes = /\[vc_row|\[vc_column/.test(result);
console.log('=== Verification ===');
console.log('Has Unicode entities:', hasEntities);
console.log('Has numeric entities:', hasNumericEntities);
console.log('Has shortcodes:', hasShortcodes);
console.log('Has proper HTML:', result.includes('<div class="vc-row"') || result.includes('<div class="vc-column"'));
console.log('');
if (!hasEntities && !hasNumericEntities && !hasShortcodes && result.includes('<div class="vc-row"')) {
console.log('✅ SUCCESS: Function works correctly!');
} else {
console.log('❌ Issues found');
}

View File

@@ -1,151 +0,0 @@
function processExcerptShortcodes(excerptHtml) {
if (!excerptHtml) return '';
let processed = excerptHtml;
// First, decode HTML entities to regular characters
// Handle both numeric entities (”) and named entities (")
processed = processed
// Decode numeric HTML entities first
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
// Then handle any remaining Unicode characters
.replace(/”/g, '"') // ” - Right double quote
.replace(/“/g, '"') // “ - Left double quote
.replace(/„/g, ',') // „ - Low double quote
.replace(/‟/g, '"') // ‟ - High double quote
.replace(//g, "'") // - Left single quote
.replace(//g, "'") // - Right single quote
.replace(//g, '-') // - En dash
.replace(/—/g, '—') // — - Em dash
.replace(/…/g, '…') // … - Ellipsis
.replace(/″/g, '"') // ″ - Inches/Prime
.replace(//g, "'") // - Feet/Prime
.replace(//g, ',') // - Single low quote
.replace(//g, '`') // - Single high reversed quote
.replace(/•/g, '•') // • - Bullet
.replace(/€/g, '€') // € - Euro
// Named HTML entities
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(//g, "'")
.replace(//g, "'")
.replace(/“/g, '"')
.replace(/”/g, '"')
.replace(//g, '-')
.replace(/—/g, '—')
.replace(/…/g, '…')
.replace(/•/g, '•')
.replace(/€/g, '€');
// Process WPBakery shortcodes with HTML entities
processed = processed
// vc_row - convert to div with classes
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-row'];
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
if (attrs.includes('in_container')) classes.push('in-container');
if (attrs.includes('full_width_content')) classes.push('full-width-content');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_row\]/gi, '</div>')
// vc_column - convert to div with classes
// Handle both complete and incomplete (truncated) shortcodes
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-column'];
if (attrs.includes('1/1')) classes.push('col-1-1');
if (attrs.includes('1/2')) classes.push('col-1-2');
if (attrs.includes('1/3')) classes.push('col-1-3');
if (attrs.includes('2/3')) classes.push('col-2-3');
if (attrs.includes('1/4')) classes.push('col-1-4');
if (attrs.includes('3/4')) classes.push('col-3-4');
if (attrs.includes('5/12')) classes.push('col-5-12');
if (attrs.includes('7/12')) classes.push('col-7-12');
return `<div class="${classes.join(' ')}">`;
})
// Also handle incomplete vc_column shortcodes (truncated at end of excerpt)
.replace(/\[vc_column([^\]]*)$/gi, (match, attrs) => {
const classes = ['vc-column'];
if (attrs.includes('1/1')) classes.push('col-1-1');
if (attrs.includes('1/2')) classes.push('col-1-2');
if (attrs.includes('1/3')) classes.push('col-1-3');
if (attrs.includes('2/3')) classes.push('col-2-3');
if (attrs.includes('1/4')) classes.push('col-1-4');
if (attrs.includes('3/4')) classes.push('col-3-4');
if (attrs.includes('5/12')) classes.push('col-5-12');
if (attrs.includes('7/12')) classes.push('col-7-12');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_column\]/gi, '</div>')
// vc_column_text - convert to div
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
.replace(/\[\/vc_column_text\]/gi, '</div>')
// nectar_cta - convert to button
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
'<a href="$4" class="nectar-cta">$2</a>')
// nectar_highlighted_text - convert to span
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
'<span class="nectar-highlighted">$2</span>')
// nectar_responsive_text - convert to span
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
'<span class="nectar-responsive">$2</span>')
// nectar_icon_list - convert to ul
.replace(/\[nectar_icon_list([^\]]*)\]/gi, '<ul class="nectar-icon-list">')
.replace(/\[\/nectar_icon_list\]/gi, '</ul>')
// nectar_icon_list_item - convert to li
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
'<li><strong>$2</strong>: $4</li>')
// nectar_btn - convert to button
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
'<a href="$4" class="nectar-btn">$2</a>')
// split_line_heading - convert to heading
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
'<h2 class="split-line-heading">$2</h2>')
// vc_row_inner - convert to div
.replace(/\[vc_row_inner([^\]]*)\]/gi, '<div class="vc-row-inner">')
.replace(/\[\/vc_row_inner\]/gi, '</div>')
// vc_column_inner - convert to div
.replace(/\[vc_column_inner([^\]]*)\]/gi, '<div class="vc-column-inner">')
.replace(/\[\/vc_column_inner\]/gi, '</div>')
// divider - convert to hr
.replace(/\[divider([^\]]*)\]/gi, '<hr class="divider" />')
// vc_gallery - convert to div (placeholder)
.replace(/\[vc_gallery([^\]]*)\]/gi, '<div class="vc-gallery">[Gallery]</div>')
// vc_raw_js - remove or convert to div
.replace(/\[vc_raw_js\](.*?)\[\/vc_raw_js\]/gi, '<div class="vc-raw-js">[JavaScript]</div>')
// nectar_gmap - convert to div
.replace(/\[nectar_gmap([^\]]*)\]/gi, '<div class="nectar-gmap">[Google Map]</div>');
// Remove any remaining shortcodes
processed = processed.replace(/\[.*?\]/g, '');
// Clean up any HTML that might be broken
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace
processed = processed.replace(/\s+/g, ' ').trim();
return processed;
}
// Extract excerpt from content
module.exports = processExcerptShortcodes;

View File

@@ -1,68 +0,0 @@
#!/usr/bin/env node
// Test numeric entity decoding
const testString = 'type=”in_container”';
console.log('Original:', testString);
// Method 1: Manual replacement
let method1 = testString
.replace(/”/g, '"')
.replace(/“/g, '"')
.replace(//g, "'")
.replace(//g, "'")
.replace(//g, '-')
.replace(/—/g, '—');
console.log('Method 1 (Unicode chars):', method1);
// Method 2: Numeric entity decoding
let method2 = testString
.replace(/”/g, '"')
.replace(/“/g, '"')
.replace(//g, "'")
.replace(//g, "'")
.replace(//g, '-')
.replace(/—/g, '—')
.replace(/…/g, '…')
.replace(/″/g, '"')
.replace(//g, "'");
console.log('Method 2 (Numeric entities):', method2);
// Method 3: Using a function to decode all numeric entities
function decodeHTMLEntities(str) {
return str.replace(/&#(\d+);/g, (match, dec) => {
return String.fromCharCode(dec);
});
}
let method3 = decodeHTMLEntities(testString);
console.log('Method 3 (All numeric):', method3);
// Method 4: Combined approach
function comprehensiveEntityDecode(str) {
return str
// First decode numeric entities
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
// Then handle any remaining Unicode characters
.replace(/”/g, '"')
.replace(/“/g, '"')
.replace(//g, "'")
.replace(//g, "'")
.replace(//g, '-')
.replace(/—/g, '—')
.replace(/…/g, '…')
.replace(/″/g, '"')
.replace(//g, "'");
}
let method4 = comprehensiveEntityDecode(testString);
console.log('Method 4 (Combined):', method4);
// Test with the actual excerpt
const actualExcerpt = '<p>[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default”]';
console.log('\n=== Real Test ===');
console.log('Original:', actualExcerpt);
console.log('Decoded:', comprehensiveEntityDecode(actualExcerpt));

View File

@@ -1,68 +0,0 @@
#!/usr/bin/env node
/**
* Script to update asset-map.json with new media entries
*/
const fs = require('fs');
const path = require('path');
// Configuration
const RAW_DATA_DIR = path.join(__dirname, '..', 'data', 'raw', '2025-12-30T15-21-49-331Z');
const PROCESSED_DATA_DIR = path.join(__dirname, '..', 'data', 'processed');
// New media IDs to add
const NEW_MEDIA_IDS = [10432, 10440, 10382, 10616, 10615, 45569, 10638];
function updateAssetMap() {
console.log('🔄 Updating asset-map.json with new media entries');
// Load current media.json
const mediaJsonPath = path.join(RAW_DATA_DIR, 'media.json');
const mediaData = JSON.parse(fs.readFileSync(mediaJsonPath, 'utf8'));
// Load current asset-map.json
const assetMapPath = path.join(PROCESSED_DATA_DIR, 'asset-map.json');
let assetMap = {};
if (fs.existsSync(assetMapPath)) {
assetMap = JSON.parse(fs.readFileSync(assetMapPath, 'utf8'));
}
// Add new entries
let addedCount = 0;
NEW_MEDIA_IDS.forEach(id => {
const mediaEntry = mediaData.find(m => m.id === id);
if (mediaEntry) {
const localPath = `/media/${mediaEntry.filename}`;
assetMap[mediaEntry.url] = localPath;
console.log(`✅ Added: ${id}${localPath}`);
addedCount++;
} else {
console.warn(`⚠️ Media ID ${id} not found in media.json`);
}
});
// Save updated asset-map.json
fs.writeFileSync(
assetMapPath,
JSON.stringify(assetMap, null, 2)
);
console.log(`\n🎉 Asset map updated! Added ${addedCount} new entries`);
console.log(`Total entries in asset-map.json: ${Object.keys(assetMap).length}`);
return assetMap;
}
// Run if called directly
if (require.main === module) {
try {
updateAssetMap();
} catch (error) {
console.error('❌ Failed to update asset map:', error.message);
process.exit(1);
}
}
module.exports = { updateAssetMap };

View File

@@ -1,145 +0,0 @@
#!/usr/bin/env node
const fs = require('fs');
const path = require('path');
const filePath = path.join(__dirname, 'process-data-with-bg-images.js');
let content = fs.readFileSync(filePath, 'utf8');
// 1. Update processPages to async
content = content.replace(
'function processPages(pagesEN, pagesDE, translationMapping, mediaMapping, assetMap) {',
'async function processPages(pagesEN, pagesDE, translationMapping, mediaMapping, assetMap) {'
);
// 2. Update processPosts to async
content = content.replace(
'function processPosts(postsEN, postsDE, translationMapping, mediaMapping, assetMap) {',
'async function processPosts(postsEN, postsDE, translationMapping, mediaMapping, assetMap) {'
);
// 3. Update main to async
content = content.replace(
'function main() {',
'async function main() {'
);
// 4. Update main() call
content = content.replace(
'if (require.main === module) {\n main();\n}',
'if (require.main === module) {\n main().catch(console.error);\n}'
);
// 5. Update processPages English loop
content = content.replace(
'pagesEN.forEach(page => {',
'for (const page of pagesEN) {'
);
// 6. Update processPages German loop
content = content.replace(
'pagesDE.forEach(page => {',
'for (const page of pagesDE) {'
);
// 7. Add video processing in processPages English
content = content.replace(
'contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);\n \n let excerptHtml = decodeContent(page.excerptHtml);',
'contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);\n \n // Process video attributes and download videos\n const videoResult = await processVideoAttributes(contentHtml);\n contentHtml = videoResult.html;\n \n let excerptHtml = decodeContent(page.excerptHtml);'
);
// 8. Add video processing in processPages German
const germanPattern = /contentHtml = replaceUrlsWithLocalPaths\(contentHtml, assetMap\);\n \n let excerptHtml = decodeContent\(page\.excerptHtml\);\n excerptHtml = replaceBgImageIds\(excerptHtml, mediaMapping\);\n excerptHtml = replaceUrlsWithLocalPaths\(excerptHtml, assetMap\);\n \n processed\.push\(\{/;
content = content.replace(
germanPattern,
`contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);
// Process video attributes and download videos
const videoResult = await processVideoAttributes(contentHtml);
contentHtml = videoResult.html;
let excerptHtml = decodeContent(page.excerptHtml);
excerptHtml = replaceBgImageIds(excerptHtml, mediaMapping);
excerptHtml = replaceUrlsWithLocalPaths(excerptHtml, assetMap);
processed.push({`
);
// 9. Update processPosts English loop
content = content.replace(
'postsEN.forEach(post => {',
'for (const post of postsEN) {'
);
// 10. Update processPosts German loop
content = content.replace(
'postsDE.forEach(post => {',
'for (const post of postsDE) {'
);
// 11. Add video processing in processPosts English
const postsEnglishPattern = /contentHtml = replaceUrlsWithLocalPaths\(contentHtml, assetMap\);\n \n let excerptHtml = decodeContent\(post\.excerptHtml\);\n excerptHtml = replaceBgImageIds\(excerptHtml, mediaMapping\);\n excerptHtml = replaceUrlsWithLocalPaths\(excerptHtml, assetMap\);\n \n processed\.push\(\{/;
content = content.replace(
postsEnglishPattern,
`contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);
// Process video attributes and download videos
const videoResult = await processVideoAttributes(contentHtml);
contentHtml = videoResult.html;
let excerptHtml = decodeContent(post.excerptHtml);
excerptHtml = replaceBgImageIds(excerptHtml, mediaMapping);
excerptHtml = replaceUrlsWithLocalPaths(excerptHtml, assetMap);
processed.push({`
);
// 12. Add video processing in processPosts German
const postsGermanPattern = /contentHtml = replaceUrlsWithLocalPaths\(contentHtml, assetMap\);\n \n let excerptHtml = decodeContent\(post\.excerptHtml\);\n excerptHtml = replaceBgImageIds\(excerptHtml, mediaMapping\);\n excerptHtml = replaceUrlsWithLocalPaths\(excerptHtml, assetMap\);\n \n processed\.push\(\{[\s\S]*?translation: enMatch \? \{ locale: 'en', id: enMatch\.en \} : null\n \}\);\n \}\n \n return processed;\n\}/;
content = content.replace(
postsGermanPattern,
`contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);
// Process video attributes and download videos
const videoResult = await processVideoAttributes(contentHtml);
contentHtml = videoResult.html;
let excerptHtml = decodeContent(post.excerptHtml);
excerptHtml = replaceBgImageIds(excerptHtml, mediaMapping);
excerptHtml = replaceUrlsWithLocalPaths(excerptHtml, assetMap);
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'de',
slug: post.slug,
path: \`/de/blog/\${post.slug}\`,
title: post.titleHtml.replace(/<[^>]*>/g, ''),
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(contentHtml),
excerptHtml: processExcerptShortcodes(excerptHtml) || generateExcerpt(contentHtml),
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
}
return processed;
}`
);
// 13. Update main() to await processPages and processPosts
content = content.replace(
'const pages = processPages(pagesEN, pagesDE, translationMapping, mediaMapping, assetMap);\n const posts = processPosts(postsEN, postsDE, translationMapping, mediaMapping, assetMap);',
'const pages = await processPages(pagesEN, pagesDE, translationMapping, mediaMapping, assetMap);\n const posts = await processPosts(postsEN, postsDE, translationMapping, mediaMapping, assetMap);'
);
// 14. Update module.exports
content = content.replace(
'module.exports = {\n processPages,\n processPosts,\n processProducts,\n processProductCategories,\n processMedia,\n generateAssetMap,\n replaceBgImageIds,\n replaceUrlsWithLocalPaths\n};',
'module.exports = {\n processPages,\n processPosts,\n processProducts,\n processProductCategories,\n processMedia,\n generateAssetMap,\n replaceBgImageIds,\n replaceUrlsWithLocalPaths,\n processVideoAttributes\n};'
);
fs.writeFileSync(filePath, content);
console.log('✅ Updated process-data-with-bg-images.js to be async');

View File

@@ -1,88 +0,0 @@
#!/usr/bin/env node
const fs = require('fs');
const path = require('path');
// Load the processed data
const processedDir = path.join(__dirname, '..', 'data', 'processed');
const pages = JSON.parse(fs.readFileSync(path.join(processedDir, 'pages.json'), 'utf8'));
const posts = JSON.parse(fs.readFileSync(path.join(processedDir, 'posts.json'), 'utf8'));
console.log('=== Verification of HTML Entity Decoding ===\n');
// Check pages
console.log('📄 PAGES:');
pages.slice(0, 3).forEach(page => {
console.log(`\nPage: ${page.title}`);
console.log(`Path: ${page.path}`);
console.log(`Excerpt preview: ${page.excerptHtml.substring(0, 150)}...`);
// Check for problematic entities
const hasEntities = /[”“‘’–—]/.test(page.excerptHtml);
const hasNumericEntities = /&#\d+;/.test(page.excerptHtml);
if (hasEntities || hasNumericEntities) {
console.log('❌ Still contains HTML entities!');
if (hasEntities) console.log(' - Found smart quotes/dashes');
if (hasNumericEntities) console.log(' - Found numeric entities');
} else {
console.log('✅ Clean - no HTML entities found');
}
});
// Check posts
console.log('\n📝 POSTS:');
posts.slice(0, 3).forEach(post => {
console.log(`\nPost: ${post.title}`);
console.log(`Path: ${post.path}`);
console.log(`Excerpt preview: ${post.excerptHtml.substring(0, 150)}...`);
// Check for problematic entities
const hasEntities = /[”“‘’–—]/.test(post.excerptHtml);
const hasNumericEntities = /&#\d+;/.test(post.excerptHtml);
if (hasEntities || hasNumericEntities) {
console.log('❌ Still contains HTML entities!');
if (hasEntities) console.log(' - Found smart quotes/dashes');
if (hasNumericEntities) console.log(' - Found numeric entities');
} else {
console.log('✅ Clean - no HTML entities found');
}
});
// Check for shortcode patterns
console.log('\n🔍 SHORTCODE CHECK:');
const allPages = [...pages, ...posts];
const shortcodesFound = allPages.filter(item => /\[vc_row|\[vc_column|\[nectar/.test(item.excerptHtml));
console.log(`Pages/posts with shortcodes in excerpt: ${shortcodesFound.length}`);
if (shortcodesFound.length > 0) {
console.log('\nSample of items with shortcodes:');
shortcodesFound.slice(0, 2).forEach(item => {
console.log(`- ${item.title}: ${item.excerptHtml.substring(0, 100)}...`);
});
} else {
console.log('✅ No shortcodes found in excerpts');
}
// Check for proper HTML structure
console.log('\n📊 HTML STRUCTURE CHECK:');
const withProperHTML = allPages.filter(item =>
item.excerptHtml.includes('<div class="vc-row"') ||
item.excerptHtml.includes('<div class="vc-column"') ||
item.excerptHtml.includes('<div class="nectar')
);
console.log(`Items with converted shortcode HTML: ${withProperHTML.length}`);
console.log('\n=== Summary ===');
console.log(`Total items checked: ${allPages.length}`);
console.log(`Items with proper HTML structure: ${withProperHTML.length}`);
console.log(`Items with remaining shortcodes: ${shortcodesFound.length}`);
// Sample the actual content to show it works
console.log('\n=== SAMPLE PROCESSED EXCERPTS ===');
const sample = pages.find(p => p.excerptHtml.includes('vc-row'));
if (sample) {
console.log(`\nTitle: ${sample.title}`);
console.log(`Excerpt: ${sample.excerptHtml}`);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,996 +0,0 @@
#!/usr/bin/env node
/**
* WordPress to Next.js Data Export Script
* Gathers all required data from WordPress/WooCommerce for static site generation
*/
const fs = require('fs');
const path = require('path');
const https = require('https');
// Load environment variables
require('dotenv').config();
const BASE_URL = process.env.WOOCOMMERCE_URL;
const CONSUMER_KEY = process.env.WOOCOMMERCE_CONSUMER_KEY;
const CONSUMER_SECRET = process.env.WOOCOMMERCE_CONSUMER_SECRET;
const APP_PASSWORD = process.env.WORDPRESS_APP_PASSWORD;
// Validate environment
if (!BASE_URL || !CONSUMER_KEY || !CONSUMER_SECRET) {
console.error('❌ Missing required environment variables');
console.error('Please check .env file for:');
console.error(' - WOOCOMMERCE_URL');
console.error(' - WOOCOMMERCE_CONSUMER_KEY');
console.error(' - WOOCOMMERCE_CONSUMER_SECRET');
process.exit(1);
}
// Configuration
const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-');
const OUTPUT_DIR = path.join(__dirname, '..', 'data', 'raw', TIMESTAMP);
const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media');
const PUBLIC_DIR = path.join(__dirname, '..', 'public');
// Create output directories
if (!fs.existsSync(OUTPUT_DIR)) {
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
}
if (!fs.existsSync(MEDIA_DIR)) {
fs.mkdirSync(MEDIA_DIR, { recursive: true });
}
// API Helper Functions
function buildAuthHeader() {
const credentials = Buffer.from(`${CONSUMER_KEY}:${CONSUMER_SECRET}`).toString('base64');
return `Basic ${credentials}`;
}
function buildWordPressAuth() {
// For WordPress REST API with app password
return {
'Authorization': `Basic ${Buffer.from(`admin:${APP_PASSWORD}`).toString('base64')}`,
'Content-Type': 'application/json'
};
}
function makeRequest(url, headers = {}) {
return new Promise((resolve, reject) => {
const options = {
headers: {
'User-Agent': 'WordPress-NextJS-Migration/1.0',
...headers
}
};
https.get(url, options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
if (res.statusCode >= 200 && res.statusCode < 300) {
try {
resolve(JSON.parse(data));
} catch (e) {
resolve(data);
}
} else {
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
}
});
}).on('error', reject);
});
}
async function fetchWithPagination(endpoint, params = {}, locale = null) {
const allItems = [];
let page = 1;
const perPage = 100;
while (true) {
const queryString = new URLSearchParams({
...params,
page: page.toString(),
per_page: perPage.toString(),
...(locale ? { lang: locale } : {})
}).toString();
const url = `${BASE_URL}/wp-json/wp/v2/${endpoint}?${queryString}`;
console.log(`📥 Fetching ${endpoint} page ${page}${locale ? ` (${locale})` : ''}...`);
try {
const items = await makeRequest(url, buildWordPressAuth());
if (!Array.isArray(items) || items.length === 0) {
break;
}
allItems.push(...items);
// Check if we got a full page (indicates more pages might exist)
if (items.length < perPage) {
break;
}
page++;
} catch (error) {
console.error(`❌ Error fetching ${endpoint} page ${page}:`, error.message);
break;
}
}
return allItems;
}
async function fetchWooCommerce(endpoint, params = {}, locale = null) {
const queryString = new URLSearchParams({
...params,
per_page: '100',
...(locale ? { lang: locale } : {})
}).toString();
const url = `${BASE_URL}/wp-json/wc/v3/${endpoint}?${queryString}`;
console.log(`📥 Fetching WooCommerce ${endpoint}${locale ? ` (${locale})` : ''}...`);
try {
const response = await makeRequest(url, {
'Authorization': buildAuthHeader(),
'Content-Type': 'application/json'
});
return Array.isArray(response) ? response : [response];
} catch (error) {
console.error(`❌ Error fetching WooCommerce ${endpoint}:`, error.message);
return [];
}
}
async function fetchMedia(mediaId) {
const url = `${BASE_URL}/wp-json/wp/v2/media/${mediaId}`;
try {
const media = await makeRequest(url, buildWordPressAuth());
return media;
} catch (error) {
console.error(`❌ Error fetching media ${mediaId}:`, error.message);
return null;
}
}
async function downloadMedia(url, filename) {
return new Promise((resolve, reject) => {
const filePath = path.join(MEDIA_DIR, filename);
// Check if file already exists
if (fs.existsSync(filePath)) {
console.log(`✅ Media already downloaded: ${filename}`);
resolve(filePath);
return;
}
const file = fs.createWriteStream(filePath);
https.get(url, (res) => {
if (res.statusCode === 200) {
res.pipe(file);
file.on('finish', () => {
console.log(`✅ Downloaded: ${filename}`);
resolve(filePath);
});
} else {
reject(new Error(`Failed to download: ${res.statusCode}`));
}
}).on('error', (err) => {
fs.unlink(filePath, () => {});
reject(err);
});
});
}
async function downloadFavicon(url, filename) {
return new Promise((resolve, reject) => {
const filePath = path.join(PUBLIC_DIR, filename);
// Check if file already exists
if (fs.existsSync(filePath)) {
console.log(`✅ Favicon already exists: ${filename}`);
resolve(filePath);
return;
}
const file = fs.createWriteStream(filePath);
https.get(url, (res) => {
if (res.statusCode === 200) {
res.pipe(file);
file.on('finish', () => {
console.log(`✅ Downloaded favicon: ${filename}`);
resolve(filePath);
});
} else {
reject(new Error(`Failed to download favicon: ${res.statusCode}`));
}
}).on('error', (err) => {
fs.unlink(filePath, () => {});
reject(err);
});
});
}
// Data Processing Functions
function extractFeaturedImage(item) {
if (item.featured_media) {
return item.featured_media;
}
if (item._embedded && item._embedded['wp:featuredmedia']) {
return item._embedded['wp:featuredmedia'][0];
}
return null;
}
function processPage(page, locale) {
return {
id: page.id,
translationKey: `page-${page.slug}`, // Will be refined with Polylang data
locale: locale,
slug: page.slug,
path: locale === 'en' ? `/${page.slug}` : `/${locale}/${page.slug}`,
titleHtml: page.title?.rendered || '',
contentHtml: page.content?.rendered || '',
excerptHtml: page.excerpt?.rendered || '',
featuredImage: page.featured_media || null,
updatedAt: page.modified || page.date
};
}
function processPost(post, locale) {
return {
id: post.id,
translationKey: `post-${post.slug}`,
locale: locale,
slug: post.slug,
path: locale === 'en' ? `/blog/${post.slug}` : `/${locale}/blog/${post.slug}`,
titleHtml: post.title?.rendered || '',
contentHtml: post.content?.rendered || '',
excerptHtml: post.excerpt?.rendered || '',
featuredImage: post.featured_media || null,
datePublished: post.date,
updatedAt: post.modified || post.date
};
}
function processProduct(product, locale) {
return {
id: product.id,
translationKey: `product-${product.slug}`,
locale: locale,
slug: product.slug,
path: locale === 'en' ? `/product/${product.slug}` : `/${locale}/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.short_description || '',
descriptionHtml: product.description || '',
images: product.images ? product.images.map(img => img.src) : [],
featuredImage: product.images && product.images.length > 0 ? product.images[0].src : null,
sku: product.sku,
regularPrice: product.regular_price,
salePrice: product.sale_price,
currency: product.currency || 'EUR',
stockStatus: product.stock_status,
categories: product.categories ? product.categories.map(cat => ({ id: cat.id, name: cat.name, slug: cat.slug })) : [],
attributes: product.attributes || [],
variations: product.variations || [],
updatedAt: product.date_modified
};
}
function processProductCategory(category, locale) {
return {
id: category.id,
translationKey: `product-category-${category.slug}`,
locale: locale,
slug: category.slug,
name: category.name,
path: locale === 'en' ? `/product-category/${category.slug}` : `/${locale}/product-category/${category.slug}`,
description: category.description || '',
count: category.count || 0
};
}
function processMenu(menu, locale) {
// WordPress menus are complex, we'll extract basic structure
return {
id: menu.term_id || menu.id,
slug: menu.slug,
name: menu.name,
locale: locale,
items: menu.items || []
};
}
// Main Export Functions
async function exportPages() {
console.log('\n📊 EXPORTING PAGES');
const pagesEN = await fetchWithPagination('pages', { status: 'publish' }, 'en');
const pagesDE = await fetchWithPagination('pages', { status: 'publish' }, 'de');
const processedEN = pagesEN.map(p => processPage(p, 'en'));
const processedDE = pagesDE.map(p => processPage(p, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'pages.en.json'),
JSON.stringify(processedEN, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'pages.de.json'),
JSON.stringify(processedDE, null, 2)
);
console.log(`✅ Pages: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE };
}
async function exportPosts() {
console.log('\n📊 EXPORTING POSTS');
const postsEN = await fetchWithPagination('posts', { status: 'publish' }, 'en');
const postsDE = await fetchWithPagination('posts', { status: 'publish' }, 'de');
const processedEN = postsEN.map(p => processPost(p, 'en'));
const processedDE = postsDE.map(p => processPost(p, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'posts.en.json'),
JSON.stringify(processedEN, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'posts.de.json'),
JSON.stringify(processedDE, null, 2)
);
console.log(`✅ Posts: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE };
}
async function exportProducts() {
console.log('\n📊 EXPORTING PRODUCTS');
const productsEN = await fetchWooCommerce('products', {}, 'en');
const productsDE = await fetchWooCommerce('products', {}, 'de');
const processedEN = productsEN.map(p => processProduct(p, 'en'));
const processedDE = productsDE.map(p => processProduct(p, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'products.en.json'),
JSON.stringify(processedEN, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'products.de.json'),
JSON.stringify(processedDE, null, 2)
);
console.log(`✅ Products: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE };
}
async function exportProductCategories() {
console.log('\n📊 EXPORTING PRODUCT CATEGORIES');
const categoriesEN = await fetchWooCommerce('products/categories', {}, 'en');
const categoriesDE = await fetchWooCommerce('products/categories', {}, 'de');
const processedEN = categoriesEN.map(c => processProductCategory(c, 'en'));
const processedDE = categoriesDE.map(c => processProductCategory(c, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'product-categories.en.json'),
JSON.stringify(processedEN, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'product-categories.de.json'),
JSON.stringify(processedDE, null, 2)
);
console.log(`✅ Product Categories: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE };
}
async function exportMenus() {
console.log('\n📊 EXPORTING MENUS');
// Try to get menus via WordPress REST API
// Note: This might require additional plugins or direct DB access
const menusEN = await fetchWithPagination('menus', {}, 'en').catch(() => []);
const menusDE = await fetchWithPagination('menus', {}, 'de').catch(() => []);
// If menus endpoint doesn't work, try to get menu locations
let menuLocations = {};
try {
const locations = await makeRequest(`${BASE_URL}/wp-json/wp/v2/menu-locations`, buildWordPressAuth());
menuLocations = locations;
} catch (e) {
console.log('⚠️ Menu locations endpoint not available');
}
const processedEN = menusEN.map(m => processMenu(m, 'en'));
const processedDE = menusDE.map(m => processMenu(m, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'menus.en.json'),
JSON.stringify({ menus: processedEN, locations: menuLocations }, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'menus.de.json'),
JSON.stringify({ menus: processedDE, locations: menuLocations }, null, 2)
);
console.log(`✅ Menus: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE, locations: menuLocations };
}
async function exportMedia() {
console.log('\n📊 EXPORTING MEDIA');
// Get all unique media IDs from collected data
const mediaIds = new Set();
// Read all JSON files to find media references
const jsonFiles = fs.readdirSync(OUTPUT_DIR).filter(f => f.endsWith('.json'));
for (const file of jsonFiles) {
const content = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, file), 'utf8'));
const items = Array.isArray(content) ? content : (content.menus || []);
items.forEach(item => {
if (item.featuredImage) mediaIds.add(item.featuredImage);
if (item.images) item.images.forEach(img => {
// Extract ID from URL if possible, or add as URL
if (typeof img === 'string' && img.includes('/wp-content/')) {
mediaIds.add(img);
}
});
});
}
const mediaManifest = [];
const downloadPromises = [];
for (const mediaRef of mediaIds) {
if (typeof mediaRef === 'number') {
// Fetch media info
const media = await fetchMedia(mediaRef);
if (media && media.source_url) {
const filename = `${mediaRef}-${path.basename(media.source_url)}`;
mediaManifest.push({
id: mediaRef,
url: media.source_url,
filename: filename,
alt: media.alt_text || '',
width: media.media_details?.width,
height: media.media_details?.height,
mime_type: media.mime_type
});
// Download file
downloadPromises.push(
downloadMedia(media.source_url, filename).catch(err => {
console.warn(`⚠️ Failed to download media ${mediaRef}:`, err.message);
})
);
}
} else if (typeof mediaRef === 'string' && mediaRef.startsWith('http')) {
// Direct URL
const filename = `media-${Date.now()}-${path.basename(mediaRef)}`;
mediaManifest.push({
id: null,
url: mediaRef,
filename: filename,
alt: '',
width: null,
height: null,
mime_type: null
});
downloadPromises.push(
downloadMedia(mediaRef, filename).catch(err => {
console.warn(`⚠️ Failed to download media from URL:`, err.message);
})
);
}
}
// Wait for all downloads
await Promise.all(downloadPromises);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'media.json'),
JSON.stringify(mediaManifest, null, 2)
);
console.log(`✅ Media: ${mediaManifest.length} items`);
return mediaManifest;
}
async function exportSiteInfo() {
console.log('\n📊 EXPORTING SITE INFORMATION');
const siteInfo = {
baseUrl: BASE_URL,
exportDate: new Date().toISOString(),
timestamp: TIMESTAMP,
polylang: false,
languages: ['en', 'de'],
defaultLocale: 'en' // Will need to confirm
};
// Check for Polylang
try {
const plugins = await makeRequest(`${BASE_URL}/wp-json/wp/v2/plugins`, buildWordPressAuth());
const polylangPlugin = plugins.find(p => p.name.includes('polylang'));
if (polylangPlugin) {
siteInfo.polylang = true;
siteInfo.polylangVersion = polylangPlugin.version;
}
} catch (e) {
console.log('⚠️ Could not check plugins');
}
// Get site settings
try {
const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
siteInfo.siteTitle = settings.title;
siteInfo.siteDescription = settings.description;
siteInfo.defaultLanguage = settings.default_language || 'en';
} catch (e) {
console.log('⚠️ Could not fetch settings');
}
// Get permalink structure
try {
const permalink = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
siteInfo.permalinkStructure = permalink.permalink_structure;
} catch (e) {
console.log('⚠️ Could not fetch permalink structure');
}
fs.writeFileSync(
path.join(OUTPUT_DIR, 'site-info.json'),
JSON.stringify(siteInfo, null, 2)
);
console.log('✅ Site info exported');
return siteInfo;
}
async function exportLogoAndFavicon() {
console.log('\n📊 EXPORTING LOGO AND FAVICON');
const assets = {
logo: null,
logoSvg: null,
favicon: null,
appleTouchIcon: null,
siteIconId: null
};
try {
// Get site settings which may include logo and icon IDs
const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
// Try to get custom_logo
if (settings.custom_logo) {
console.log(`📥 Found custom_logo ID: ${settings.custom_logo}`);
const logoMedia = await fetchMedia(settings.custom_logo);
if (logoMedia && logoMedia.source_url) {
const ext = path.extname(logoMedia.source_url);
const logoFilename = `logo${ext}`;
await downloadMedia(logoMedia.source_url, logoFilename);
assets.logo = `/media/${logoFilename}`;
console.log(`✅ Logo downloaded: ${logoFilename}`);
// Check if it's SVG
if (logoMedia.mime_type === 'image/svg+xml' || ext === '.svg') {
assets.logoSvg = `/media/${logoFilename}`;
console.log(`✅ SVG logo detected: ${logoFilename}`);
}
}
}
// Try to get site_icon
if (settings.site_icon) {
console.log(`📥 Found site_icon ID: ${settings.site_icon}`);
assets.siteIconId = settings.site_icon;
const iconMedia = await fetchMedia(settings.site_icon);
if (iconMedia && iconMedia.source_url) {
// Download as favicon.ico
const faviconFilename = 'favicon.ico';
await downloadFavicon(iconMedia.source_url, faviconFilename);
assets.favicon = `/favicon.ico`;
console.log(`✅ Favicon downloaded: ${faviconFilename}`);
// Also create apple-touch-icon.png (same file, different name)
const appleTouchFilename = 'apple-touch-icon.png';
await downloadFavicon(iconMedia.source_url, appleTouchFilename);
assets.appleTouchIcon = `/apple-touch-icon.png`;
console.log(`✅ Apple touch icon downloaded: ${appleTouchFilename}`);
}
}
// WP CLI Equivalent: wp media list --search=logo --format=json
console.log('🔍 WP CLI Equivalent: Searching for logo media...');
if (!assets.logo) {
const allMedia = await fetchWithPagination('media', { per_page: 100 });
const logoCandidates = allMedia.filter(m => {
const title = m.title?.rendered?.toLowerCase() || '';
const slug = m.slug?.toLowerCase() || '';
const url = m.source_url?.toLowerCase() || '';
return title.includes('logo') || slug.includes('logo') || url.includes('logo');
});
if (logoCandidates.length > 0) {
const logoMedia = logoCandidates[0];
const ext = path.extname(logoMedia.source_url);
const logoFilename = `logo${ext}`;
await downloadMedia(logoMedia.source_url, logoFilename);
assets.logo = `/media/${logoFilename}`;
if (logoMedia.mime_type === 'image/svg+xml' || ext === '.svg') {
assets.logoSvg = `/media/${logoFilename}`;
console.log(`✅ SVG logo found and downloaded: ${logoFilename}`);
} else {
console.log(`✅ Logo found and downloaded: ${logoFilename}`);
}
}
}
// WP CLI Equivalent: wp media list --mime=image/svg+xml --format=json
console.log('🔍 WP CLI Equivalent: Searching for SVG images...');
const allMedia = await fetchWithPagination('media', { per_page: 200 });
const svgImages = allMedia.filter(m => m.mime_type === 'image/svg+xml');
if (svgImages.length > 0) {
console.log(`📥 Found ${svgImages.length} SVG images`);
for (const svg of svgImages) {
const filename = `svg-${svg.id}-${path.basename(svg.source_url)}`;
await downloadMedia(svg.source_url, filename);
console.log(`✅ SVG downloaded: ${filename}`);
}
}
// WP CLI Equivalent: wp postmeta list --post_type=any --meta_key~=_vc --format=json
console.log('🔍 WP CLI Equivalent: Searching for Salient/VC images...');
const salientImages = new Set();
// Search pages and posts for Visual Composer meta
const searchEndpoints = ['pages', 'posts'];
for (const endpoint of searchEndpoints) {
const items = await fetchWithPagination(endpoint, { per_page: 100 });
items.forEach(item => {
// Look for VC-related meta
if (item.meta) {
Object.keys(item.meta).forEach(key => {
if (key.includes('_vc') || key.includes('vc_') || key.includes('salient')) {
const metaValue = item.meta[key];
if (typeof metaValue === 'string') {
// Extract URLs from meta value
const urlMatches = metaValue.match(/https?:\/\/[^\s"']+/g);
if (urlMatches) {
urlMatches.forEach(url => salientImages.add(url));
}
}
}
});
}
// Also check content for images
const content = item.content?.rendered || '';
const contentUrls = content.match(/https?:\/\/[^\s"']+\.(jpg|jpeg|png|webp|svg)/gi);
if (contentUrls) {
contentUrls.forEach(url => salientImages.add(url));
}
});
}
// Download Salient/VC images
if (salientImages.size > 0) {
console.log(`📥 Found ${salientImages.size} Salient/VC images`);
const salientManifest = [];
for (const url of salientImages) {
try {
const filename = `salient-${Date.now()}-${path.basename(url)}`;
await downloadMedia(url, filename);
salientManifest.push({
originalUrl: url,
localPath: `/media/${filename}`,
filename: filename
});
console.log(`✅ Salient image downloaded: ${filename}`);
} catch (err) {
console.warn(`⚠️ Failed to download Salient image ${url}:`, err.message);
}
}
// Save Salient images manifest
fs.writeFileSync(
path.join(OUTPUT_DIR, 'salient-images.json'),
JSON.stringify(salientManifest, null, 2)
);
}
// If no favicon found, try to download from common locations
if (!assets.favicon) {
console.log('⚠️ No favicon found in settings, trying common locations...');
const faviconUrls = [
`${BASE_URL}/favicon.ico`,
`${BASE_URL}/wp-content/uploads/favicon.ico`
];
for (const url of faviconUrls) {
try {
await downloadFavicon(url, 'favicon.ico');
assets.favicon = '/favicon.ico';
console.log(`✅ Favicon downloaded from: ${url}`);
// Also create apple-touch-icon
await downloadFavicon(url, 'apple-touch-icon.png');
assets.appleTouchIcon = '/apple-touch-icon.png';
break;
} catch (e) {
// Continue to next URL
}
}
}
// Save asset manifest
fs.writeFileSync(
path.join(OUTPUT_DIR, 'assets.json'),
JSON.stringify(assets, null, 2)
);
console.log('✅ Logo and favicon export complete');
} catch (error) {
console.error('❌ Error exporting logo/favicon:', error.message);
}
return assets;
}
async function generateTranslationMapping() {
console.log('\n📊 GENERATING TRANSLATION MAPPING');
// This function creates translationKey mappings between locales
// We'll use slug-based matching for now, but this should be enhanced with Polylang data
const mapping = {
pages: {},
posts: {},
products: {},
productCategories: {}
};
// Load all data
const loadFile = (filename) => {
try {
return JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, filename), 'utf8'));
} catch (e) {
return [];
}
};
const pagesEN = loadFile('pages.en.json');
const pagesDE = loadFile('pages.de.json');
const postsEN = loadFile('posts.en.json');
const postsDE = loadFile('posts.de.json');
const productsEN = loadFile('products.en.json');
const productsDE = loadFile('products.de.json');
const categoriesEN = loadFile('product-categories.en.json');
const categoriesDE = loadFile('product-categories.de.json');
// Helper to find translation pairs by slug
function findTranslationPairs(enItems, deItems) {
const pairs = {};
enItems.forEach(enItem => {
const deMatch = deItems.find(de => de.slug === enItem.slug);
if (deMatch) {
const translationKey = `${enItem.slug}`;
pairs[translationKey] = {
en: enItem.id,
de: deMatch.id
};
}
});
return pairs;
}
mapping.pages = findTranslationPairs(pagesEN, pagesDE);
mapping.posts = findTranslationPairs(postsEN, postsDE);
mapping.products = findTranslationPairs(productsEN, productsDE);
mapping.productCategories = findTranslationPairs(categoriesEN, categoriesDE);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'translation-mapping.json'),
JSON.stringify(mapping, null, 2)
);
const totalPairs = Object.values(mapping).reduce((sum, obj) => sum + Object.keys(obj).length, 0);
console.log(`✅ Translation mapping: ${totalPairs} pairs found`);
return mapping;
}
async function exportWPCliPostmeta() {
console.log('\n📊 EXPORTING WP CLI POSTMETA (VC/Salient)');
const vcMeta = [];
try {
// Get all pages and posts
const pages = await fetchWithPagination('pages', { status: 'publish', per_page: 100 });
const posts = await fetchWithPagination('posts', { status: 'publish', per_page: 100 });
const allItems = [...pages, ...posts];
console.log(`🔍 Scanning ${allItems.length} items for VC/Salient meta...`);
allItems.forEach(item => {
if (item.meta) {
const vcKeys = Object.keys(item.meta).filter(key =>
key.includes('_vc') || key.includes('vc_') || key.includes('salient') || key.includes('wpb_')
);
if (vcKeys.length > 0) {
vcKeys.forEach(key => {
const value = item.meta[key];
vcMeta.push({
post_id: item.id,
post_type: item.type || 'page',
post_slug: item.slug,
meta_key: key,
meta_value: typeof value === 'string' ? value.substring(0, 200) : JSON.stringify(value),
full_value: value
});
});
}
}
});
// Save VC postmeta
fs.writeFileSync(
path.join(OUTPUT_DIR, 'vc-postmeta.json'),
JSON.stringify(vcMeta, null, 2)
);
console.log(`✅ VC/Salient postmeta: ${vcMeta.length} entries found`);
} catch (error) {
console.error('❌ Error exporting postmeta:', error.message);
}
return vcMeta;
}
async function generateRedirects() {
console.log('\n📊 GENERATING REDIRECT RULES');
const redirects = [];
// Load posts
const postsEN = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.en.json'), 'utf8'));
const postsDE = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.de.json'), 'utf8'));
// Base redirect: /{postSlug} → /blog/{postSlug} (English)
postsEN.forEach(post => {
redirects.push({
source: `/${post.slug}`,
destination: `/blog/${post.slug}`,
permanent: true,
locale: 'en'
});
});
// German redirects: /de/{postSlug} → /de/blog/{postSlug}
postsDE.forEach(post => {
redirects.push({
source: `/de/${post.slug}`,
destination: `/de/blog/${post.slug}`,
permanent: true,
locale: 'de'
});
});
fs.writeFileSync(
path.join(OUTPUT_DIR, 'redirects.json'),
JSON.stringify(redirects, null, 2)
);
console.log(`✅ Redirects: ${redirects.length} rules generated`);
return redirects;
}
// Main Execution
async function main() {
console.log('🚀 WordPress → Next.js Data Export (WP CLI Enhanced)');
console.log('=====================================');
console.log(`Target: ${BASE_URL}`);
console.log(`Output: ${OUTPUT_DIR}`);
console.log('');
try {
// Step 1: Export all content
await exportSiteInfo();
await exportPages();
await exportPosts();
await exportProducts();
await exportProductCategories();
await exportMenus();
// Step 2: WP CLI Enhanced exports
await exportWPCliPostmeta();
await exportMedia();
await exportLogoAndFavicon();
// Step 3: Generate mappings and redirects
await generateTranslationMapping();
await generateRedirects();
console.log('\n🎉 Export Complete!');
console.log('=====================================');
console.log(`📁 Data directory: data/raw/${TIMESTAMP}`);
console.log(`🖼️ Media directory: public/media/`);
console.log(`🎨 Logo/Favicon: public/`);
console.log('');
console.log('WP CLI Features:');
console.log('✓ SVG logo detection and download');
console.log('✓ All SVG images exported');
console.log('✓ Salient/VC postmeta extracted');
console.log('✓ All media downloaded locally');
console.log('');
console.log('Next steps:');
console.log('1. Review exported data for completeness');
console.log('2. Check for any missing translations');
console.log('3. Verify media downloads');
console.log('4. Proceed with Next.js data processing');
} catch (error) {
console.error('\n❌ Export failed:', error.message);
process.exit(1);
}
}
// Run if called directly
if (require.main === module) {
main();
}
module.exports = {
exportPages,
exportPosts,
exportProducts,
exportProductCategories,
exportMenus,
exportMedia,
exportSiteInfo,
exportLogoAndFavicon,
exportWPCliPostmeta,
generateTranslationMapping,
generateRedirects
};