cleanup
This commit is contained in:
@@ -1,240 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* WordPress Export Analysis Script
|
||||
* Quickly analyzes exported data without loading large files entirely
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const DATA_DIR = path.join(__dirname, '..', 'data', 'raw');
|
||||
|
||||
// Find the latest export directory
|
||||
function getLatestExportDir() {
|
||||
if (!fs.existsSync(DATA_DIR)) {
|
||||
console.error('❌ No data directory found');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const dirs = fs.readdirSync(DATA_DIR).filter(f => {
|
||||
const stat = fs.statSync(path.join(DATA_DIR, f));
|
||||
return stat.isDirectory();
|
||||
});
|
||||
|
||||
if (dirs.length === 0) {
|
||||
console.error('❌ No export directories found');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Sort by name (timestamp) and get latest
|
||||
dirs.sort().reverse();
|
||||
return path.join(DATA_DIR, dirs[0]);
|
||||
}
|
||||
|
||||
// Quick file analysis
|
||||
function analyzeFile(filePath, sampleSize = 3) {
|
||||
if (!fs.existsSync(filePath)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const stats = fs.statSync(filePath);
|
||||
const content = fs.readFileSync(filePath, 'utf8');
|
||||
const lines = content.split('\n');
|
||||
|
||||
// Parse JSON safely
|
||||
let data;
|
||||
try {
|
||||
data = JSON.parse(content);
|
||||
} catch (e) {
|
||||
return { error: 'Invalid JSON' };
|
||||
}
|
||||
|
||||
const isArray = Array.isArray(data);
|
||||
const count = isArray ? data.length : (data.menus ? data.menus.length : 0);
|
||||
|
||||
// Get sample items
|
||||
const sample = isArray ? data.slice(0, sampleSize) : (data.menus ? data.menus.slice(0, sampleSize) : []);
|
||||
|
||||
return {
|
||||
size: stats.size,
|
||||
sizeHuman: `${(stats.size / 1024).toFixed(1)} KB`,
|
||||
lines: lines.length,
|
||||
count: count,
|
||||
sample: sample,
|
||||
isArray: isArray
|
||||
};
|
||||
}
|
||||
|
||||
// Main analysis
|
||||
function main() {
|
||||
const exportDir = getLatestExportDir();
|
||||
console.log('📊 WordPress Export Analysis');
|
||||
console.log('============================');
|
||||
console.log(`Directory: ${path.basename(exportDir)}\n`);
|
||||
|
||||
const files = [
|
||||
'site-info.json',
|
||||
'translation-mapping.json',
|
||||
'pages.en.json',
|
||||
'pages.de.json',
|
||||
'posts.en.json',
|
||||
'posts.de.json',
|
||||
'products.en.json',
|
||||
'products.de.json',
|
||||
'product-categories.en.json',
|
||||
'product-categories.de.json',
|
||||
'menus.en.json',
|
||||
'menus.de.json',
|
||||
'redirects.json',
|
||||
'media.json'
|
||||
];
|
||||
|
||||
const results = {};
|
||||
|
||||
files.forEach(file => {
|
||||
const filePath = path.join(exportDir, file);
|
||||
const analysis = analyzeFile(filePath, 2);
|
||||
if (analysis) {
|
||||
results[file] = analysis;
|
||||
}
|
||||
});
|
||||
|
||||
// Summary
|
||||
console.log('📋 EXPORT SUMMARY');
|
||||
console.log('=================\n');
|
||||
|
||||
// Content counts
|
||||
const pagesEN = results['pages.en.json']?.count || 0;
|
||||
const pagesDE = results['pages.de.json']?.count || 0;
|
||||
const postsEN = results['posts.en.json']?.count || 0;
|
||||
const postsDE = results['posts.de.json']?.count || 0;
|
||||
const productsEN = results['products.en.json']?.count || 0;
|
||||
const productsDE = results['products.de.json']?.count || 0;
|
||||
const categoriesEN = results['product-categories.en.json']?.count || 0;
|
||||
const categoriesDE = results['product-categories.de.json']?.count || 0;
|
||||
const media = results['media.json']?.count || 0;
|
||||
const redirects = results['redirects.json']?.count || 0;
|
||||
|
||||
console.log('📄 Content by Type & Language:');
|
||||
console.log(` Pages: EN: ${pagesEN} | DE: ${pagesDE} | Total: ${pagesEN + pagesDE}`);
|
||||
console.log(` Posts: EN: ${postsEN} | DE: ${postsDE} | Total: ${postsEN + postsDE}`);
|
||||
console.log(` Products: EN: ${productsEN} | DE: ${productsDE} | Total: ${productsEN + productsDE}`);
|
||||
console.log(` Categories: EN: ${categoriesEN} | DE: ${categoriesDE} | Total: ${categoriesEN + categoriesDE}`);
|
||||
console.log(` Media: ${media} files`);
|
||||
console.log(` Redirects: ${redirects} rules\n`);
|
||||
|
||||
// Translation mapping
|
||||
const translation = results['translation-mapping.json']?.sample?.[0] || results['translation-mapping.json']?.sample;
|
||||
if (translation) {
|
||||
const pagePairs = Object.keys(translation.pages || {}).length;
|
||||
const postPairs = Object.keys(translation.posts || {}).length;
|
||||
const productPairs = Object.keys(translation.products || {}).length;
|
||||
const categoryPairs = Object.keys(translation.productCategories || {}).length;
|
||||
|
||||
console.log('🌐 Translation Pairs:');
|
||||
console.log(` Pages: ${pagePairs}`);
|
||||
console.log(` Posts: ${postPairs}`);
|
||||
console.log(` Products: ${productPairs}`);
|
||||
console.log(` Categories: ${categoryPairs}`);
|
||||
console.log(` Total: ${pagePairs + postPairs + productPairs + categoryPairs}\n`);
|
||||
}
|
||||
|
||||
// File sizes
|
||||
console.log('💾 File Sizes:');
|
||||
Object.entries(results).forEach(([file, data]) => {
|
||||
console.log(` ${file.padEnd(30)} ${data.sizeHuman}`);
|
||||
});
|
||||
|
||||
// Sample data
|
||||
console.log('\n🔍 Sample Data (first item from each):');
|
||||
|
||||
if (results['pages.en.json']?.sample?.[0]) {
|
||||
const page = results['pages.en.json'].sample[0];
|
||||
console.log(`\n Page (EN): "${page.titleHtml}"`);
|
||||
console.log(` Path: ${page.path}`);
|
||||
console.log(` Slug: ${page.slug}`);
|
||||
}
|
||||
|
||||
if (results['posts.en.json']?.sample?.[0]) {
|
||||
const post = results['posts.en.json'].sample[0];
|
||||
console.log(`\n Post (EN): "${post.titleHtml}"`);
|
||||
console.log(` Path: ${post.path}`);
|
||||
console.log(` Date: ${post.datePublished}`);
|
||||
}
|
||||
|
||||
if (results['products.en.json']?.sample?.[0]) {
|
||||
const product = results['products.en.json'].sample[0];
|
||||
console.log(`\n Product (EN): "${product.name}"`);
|
||||
console.log(` Path: ${product.path}`);
|
||||
console.log(` SKU: ${product.sku}`);
|
||||
console.log(` Price: ${product.regularPrice} ${product.currency}`);
|
||||
}
|
||||
|
||||
if (results['menus.en.json']?.sample?.[0]) {
|
||||
const menu = results['menus.en.json'].sample[0];
|
||||
console.log(`\n Menu (EN): "${menu.name}"`);
|
||||
console.log(` Slug: ${menu.slug}`);
|
||||
console.log(` Items: ${menu.items ? menu.items.length : 0}`);
|
||||
}
|
||||
|
||||
// Data quality checks
|
||||
console.log('\n✅ Data Quality Checks:');
|
||||
|
||||
const checks = [
|
||||
{
|
||||
name: 'Both languages present',
|
||||
pass: pagesEN > 0 && pagesDE > 0 && postsEN > 0 && postsDE > 0
|
||||
},
|
||||
{
|
||||
name: 'Translation pairs exist',
|
||||
pass: (results['translation-mapping.json']?.sample &&
|
||||
Object.keys(results['translation-mapping.json'].sample.pages || {}).length > 0)
|
||||
},
|
||||
{
|
||||
name: 'Media files downloaded',
|
||||
pass: media > 0
|
||||
},
|
||||
{
|
||||
name: 'Redirects generated',
|
||||
pass: redirects > 0
|
||||
},
|
||||
{
|
||||
name: 'Site info complete',
|
||||
pass: results['site-info.json']?.sample?.siteTitle !== undefined
|
||||
}
|
||||
];
|
||||
|
||||
checks.forEach(check => {
|
||||
console.log(` ${check.pass ? '✅' : '❌'} ${check.name}`);
|
||||
});
|
||||
|
||||
// Recommendations
|
||||
console.log('\n💡 Recommendations:');
|
||||
|
||||
if (postsEN === 0 || postsDE === 0) {
|
||||
console.log(' ⚠️ No posts found in one or both languages');
|
||||
}
|
||||
|
||||
if (results['translation-mapping.json']?.sample) {
|
||||
const mapping = results['translation-mapping.json'].sample;
|
||||
const missingPosts = Object.keys(mapping.posts || {}).length === 0;
|
||||
if (missingPosts) {
|
||||
console.log(' ⚠️ No post translation pairs found - check if posts have matching slugs');
|
||||
}
|
||||
}
|
||||
|
||||
if (media === 0) {
|
||||
console.log(' ⚠️ No media files downloaded - check API permissions');
|
||||
}
|
||||
|
||||
console.log('\n🎯 Next Steps:');
|
||||
console.log(' 1. Review sample data above for accuracy');
|
||||
console.log(' 2. Check translation mapping for completeness');
|
||||
console.log(' 3. Verify media files are properly named');
|
||||
console.log(' 4. Proceed to Next.js data processing');
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
const pages = require('../data/processed/pages.json');
|
||||
const cheerio = require('cheerio');
|
||||
|
||||
// Get home page (corporate-3-landing-2)
|
||||
const homePage = pages.find(p => p.slug === 'corporate-3-landing-2');
|
||||
const $ = cheerio.load(homePage.contentHtml);
|
||||
|
||||
console.log('=== HOME PAGE (corporate-3-landing-2) DETAILED ANALYSIS ===\n');
|
||||
|
||||
// Analyze each vc-row
|
||||
$('.vc-row').each((i, row) => {
|
||||
const $row = $(row);
|
||||
const $cols = $row.find('> .vc-column');
|
||||
const colCount = $cols.length;
|
||||
|
||||
console.log(`Row ${i + 1}:`);
|
||||
console.log(` Columns: ${colCount}`);
|
||||
console.log(` Classes: ${$row.attr('class')}`);
|
||||
|
||||
// Check for specific patterns
|
||||
const hasH1 = $row.find('h1').length > 0;
|
||||
const hasH2 = $row.find('h2').length > 0;
|
||||
const hasH3 = $row.find('h3').length > 0;
|
||||
const hasH4 = $row.find('h4').length > 0;
|
||||
const hasH6 = $row.find('h6').length > 0;
|
||||
const hasP = $row.find('p').length > 0;
|
||||
const hasImg = $row.find('img').length > 0;
|
||||
const hasNested = $row.find('.vc-row').length;
|
||||
|
||||
if (hasH1) console.log(` Has H1: ${$row.find('h1').text().substring(0, 50)}...`);
|
||||
if (hasH2) console.log(` Has H2: ${$row.find('h2').text().substring(0, 50)}...`);
|
||||
if (hasH3) console.log(` Has H3: ${$row.find('h3').text().substring(0, 50)}...`);
|
||||
if (hasH4) console.log(` Has H4: ${$row.find('h4').text().substring(0, 50)}...`);
|
||||
if (hasH6) console.log(` Has H6: ${$row.find('h6').text()}`);
|
||||
if (hasP) console.log(` Has P: ${$row.find('p').length} paragraphs`);
|
||||
if (hasImg) console.log(` Has Images: ${hasImg}`);
|
||||
if (hasNested) console.log(` Has Nested Rows: ${hasNested}`);
|
||||
|
||||
// Check column structure
|
||||
if (colCount > 0) {
|
||||
$cols.each((j, col) => {
|
||||
const $col = $(col);
|
||||
const colClasses = $col.attr('class') || '';
|
||||
const colH3 = $col.find('h3').text().trim();
|
||||
const colH4 = $col.find('h4').text().trim();
|
||||
const colH6 = $col.find('h6').text().trim();
|
||||
const colP = $col.find('p').text().trim().substring(0, 30);
|
||||
|
||||
console.log(` Column ${j + 1}: ${colClasses}`);
|
||||
if (colH3) console.log(` H3: ${colH3}`);
|
||||
if (colH4) console.log(` H4: ${colH4}`);
|
||||
if (colH6) console.log(` H6: ${colH6}`);
|
||||
if (colP) console.log(` P: ${colP}...`);
|
||||
});
|
||||
}
|
||||
|
||||
console.log('');
|
||||
});
|
||||
|
||||
// Also check team page for testimonials
|
||||
console.log('\n=== TEAM PAGE TESTIMONIALS ANALYSIS ===\n');
|
||||
const teamPage = pages.find(p => p.slug === 'team');
|
||||
const $team = cheerio.load(teamPage.contentHtml);
|
||||
|
||||
$team('.vc-row').each((i, row) => {
|
||||
const $row = $team(row);
|
||||
const text = $row.text();
|
||||
|
||||
if (text.includes('„') || text.includes('“') || text.includes('Expertise') || text.includes('Experience')) {
|
||||
console.log(`Row ${i + 1}:`);
|
||||
console.log(` Content: ${text.substring(0, 100)}...`);
|
||||
console.log(` Has quotes: ${text.includes('„') || text.includes('“')}`);
|
||||
console.log('');
|
||||
}
|
||||
});
|
||||
@@ -1,122 +0,0 @@
|
||||
const pages = require('../data/processed/pages.json');
|
||||
const cheerio = require('cheerio');
|
||||
|
||||
// Analyze each page
|
||||
const analysis = [];
|
||||
|
||||
pages.forEach(page => {
|
||||
const html = page.contentHtml || '';
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
const patterns = {
|
||||
vcRows: $('.vc-row').length,
|
||||
vcColumns: $('.vc-column').length,
|
||||
hasHero: $('.vc-row h1, .vc-row h2').length > 0,
|
||||
hasCards: $('.vc-row .vc-column h3, .vc-row .vc-column h4').length >= 2,
|
||||
hasNumberedFeatures: $('.vc-row h6').length > 0,
|
||||
hasForm: $('.frm_forms').length > 0 || $('form').length > 0,
|
||||
hasGrid: $('.vc-row > .vc-column').length >= 2,
|
||||
hasImages: $('img').length,
|
||||
hasLinks: $('a').length,
|
||||
hasTables: $('table').length,
|
||||
hasLists: $('ul, ol').length,
|
||||
hasTestimonials: $('.vc-row').filter((i, el) => {
|
||||
const text = $(el).text();
|
||||
return text.includes('„') || text.includes('“') || text.includes('Meet the team');
|
||||
}).length,
|
||||
hasAnimations: $('.vc-row').filter((i, el) => {
|
||||
const classes = $(el).attr('class') || '';
|
||||
return classes.includes('nectar') || classes.includes('animation') || classes.includes('fade');
|
||||
}).length,
|
||||
hasSpecialColumns: $('.vc-row > .vc-column').filter((i, el) => {
|
||||
const classes = $(el).attr('class') || '';
|
||||
return classes.includes('vc_col-md-') || classes.includes('vc_col-lg-');
|
||||
}).length,
|
||||
hasNestedRows: $('.vc-row .vc-row').length,
|
||||
hasBackgrounds: $('.vc-row').filter((i, el) => {
|
||||
const style = $(el).attr('style') || '';
|
||||
const classes = $(el).attr('class') || '';
|
||||
return style.includes('background') || classes.includes('bg-') || classes.includes('full-width');
|
||||
}).length,
|
||||
hasQuotes: $('blockquote, h2').filter((i, el) => {
|
||||
const text = $(el).text();
|
||||
return text.includes('„') || text.includes('“') || text.includes('Expertise') || text.includes('Experience');
|
||||
}).length,
|
||||
hasPDFs: $('a[href$=".pdf"]').length,
|
||||
hasContactInfo: $('.vc-row').filter((i, el) => {
|
||||
const text = $(el).text();
|
||||
return text.includes('@') || text.includes('Raiffeisenstraße') || text.includes('KLZ Cables');
|
||||
}).length
|
||||
};
|
||||
|
||||
analysis.push({
|
||||
slug: page.slug,
|
||||
locale: page.locale,
|
||||
translationKey: page.translationKey,
|
||||
title: page.title,
|
||||
patterns: patterns,
|
||||
rawHtml: html.substring(0, 200) + '...'
|
||||
});
|
||||
});
|
||||
|
||||
// Print detailed analysis
|
||||
console.log('=== DETAILED PAGE ANALYSIS ===\n');
|
||||
|
||||
analysis.forEach(page => {
|
||||
console.log(`📄 ${page.locale.toUpperCase()}: ${page.slug} (${page.title})`);
|
||||
console.log(` Translation Key: ${page.translationKey}`);
|
||||
console.log(' Patterns Found:');
|
||||
|
||||
Object.entries(page.patterns).forEach(([key, value]) => {
|
||||
if (value > 0) {
|
||||
console.log(` - ${key}: ${value}`);
|
||||
}
|
||||
});
|
||||
|
||||
console.log('');
|
||||
});
|
||||
|
||||
// Summary by translation key
|
||||
console.log('=== SUMMARY BY TRANSLATION KEY ===\n');
|
||||
const byKey = {};
|
||||
analysis.forEach(page => {
|
||||
if (!byKey[page.translationKey]) {
|
||||
byKey[page.translationKey] = [];
|
||||
}
|
||||
byKey[page.translationKey].push(page);
|
||||
});
|
||||
|
||||
Object.keys(byKey).sort().forEach(key => {
|
||||
const pages = byKey[key];
|
||||
console.log(`${key}:`);
|
||||
pages.forEach(p => {
|
||||
const patterns = Object.entries(p.patterns).filter(([k, v]) => v > 0).map(([k, v]) => `${k}=${v}`).join(', ');
|
||||
console.log(` ${p.locale}: ${p.slug} [${patterns}]`);
|
||||
});
|
||||
console.log('');
|
||||
});
|
||||
|
||||
// Priority analysis
|
||||
console.log('=== PRIORITY PAGES ANALYSIS ===\n');
|
||||
|
||||
const priority = {
|
||||
'Home': ['corporate-3-landing-2', 'start'],
|
||||
'Contact': ['contact', 'kontakt'],
|
||||
'About/Legal/Privacy': ['legal-notice', 'impressum', 'privacy-policy', 'datenschutz', 'terms', 'agbs'],
|
||||
'Team': ['team'],
|
||||
'Products': ['products', 'produkte'],
|
||||
'Blog': ['blog'],
|
||||
'Thanks': ['thanks', 'danke']
|
||||
};
|
||||
|
||||
Object.keys(priority).forEach(category => {
|
||||
console.log(`${category}:`);
|
||||
priority[category].forEach(slug => {
|
||||
const page = analysis.find(p => p.slug === slug);
|
||||
if (page) {
|
||||
const patterns = Object.entries(page.patterns).filter(([k, v]) => v > 0).map(([k, v]) => `${k}=${v}`).join(', ');
|
||||
console.log(` ${page.locale}/${page.slug}: ${patterns || 'No patterns'}`);
|
||||
}
|
||||
});
|
||||
console.log('');
|
||||
});
|
||||
@@ -1,194 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Script to check WooCommerce product attributes for high-voltage cables
|
||||
* This will query the API directly to see if attributes exist but weren't captured
|
||||
*/
|
||||
|
||||
const https = require('https');
|
||||
const path = require('path');
|
||||
require('dotenv').config();
|
||||
|
||||
const CONFIG = {
|
||||
url: process.env.WOOCOMMERCE_URL,
|
||||
key: process.env.WOOCOMMERCE_CONSUMER_KEY,
|
||||
secret: process.env.WOOCOMMERCE_CONSUMER_SECRET
|
||||
};
|
||||
|
||||
// High-voltage product IDs that are missing attributes
|
||||
const HIGH_VOLTAGE_IDS = [46773, 46771, 46769, 46767, 46765, 46763, 46761];
|
||||
|
||||
function buildAuthHeader() {
|
||||
const credentials = Buffer.from(`${CONFIG.key}:${CONFIG.secret}`).toString('base64');
|
||||
return `Basic ${credentials}`;
|
||||
}
|
||||
|
||||
function makeRequest(endpoint) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const url = `${CONFIG.url}/wp-json/wc/v3${endpoint}`;
|
||||
|
||||
const options = {
|
||||
headers: {
|
||||
'Authorization': buildAuthHeader(),
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': 'KLZ-Attribute-Checker/1.0'
|
||||
}
|
||||
};
|
||||
|
||||
console.log(`🌐 Fetching: ${endpoint}`);
|
||||
|
||||
https.get(url, options, (res) => {
|
||||
let data = '';
|
||||
|
||||
res.on('data', (chunk) => {
|
||||
data += chunk;
|
||||
});
|
||||
|
||||
res.on('end', () => {
|
||||
if (res.statusCode >= 200 && res.statusCode < 300) {
|
||||
try {
|
||||
resolve(JSON.parse(data));
|
||||
} catch (e) {
|
||||
resolve(data);
|
||||
}
|
||||
} else {
|
||||
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
|
||||
}
|
||||
});
|
||||
}).on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
async function checkProductAttributes() {
|
||||
console.log('🔍 Checking WooCommerce Product Attributes\n');
|
||||
console.log('Target URL:', CONFIG.url);
|
||||
console.log('Products to check:', HIGH_VOLTAGE_IDS.length);
|
||||
console.log('');
|
||||
|
||||
const results = [];
|
||||
|
||||
for (const productId of HIGH_VOLTAGE_IDS) {
|
||||
try {
|
||||
const product = await makeRequest(`/products/${productId}`);
|
||||
|
||||
console.log(`\n📦 Product ID: ${productId}`);
|
||||
console.log(`Name: ${product.name}`);
|
||||
console.log(`SKU: ${product.sku}`);
|
||||
console.log(`Type: ${product.type}`);
|
||||
|
||||
if (product.attributes && product.attributes.length > 0) {
|
||||
console.log(`✅ Attributes found: ${product.attributes.length}`);
|
||||
|
||||
// Show sample attributes
|
||||
product.attributes.slice(0, 5).forEach(attr => {
|
||||
console.log(` - ${attr.name}: ${attr.options?.length || 0} options`);
|
||||
});
|
||||
|
||||
if (product.attributes.length > 5) {
|
||||
console.log(` ... and ${product.attributes.length - 5} more`);
|
||||
}
|
||||
|
||||
results.push({
|
||||
id: productId,
|
||||
name: product.name,
|
||||
hasAttributes: true,
|
||||
count: product.attributes.length,
|
||||
attributes: product.attributes
|
||||
});
|
||||
} else {
|
||||
console.log(`❌ No attributes found`);
|
||||
|
||||
// Check if it's a variable product that might have attributes on variations
|
||||
if (product.type === 'variable' && product.variations && product.variations.length > 0) {
|
||||
console.log(`ℹ️ Variable product with ${product.variations.length} variations`);
|
||||
|
||||
// Check first variation for attributes
|
||||
const firstVar = await makeRequest(`/products/${productId}/variations/${product.variations[0]}`);
|
||||
if (firstVar.attributes && firstVar.attributes.length > 0) {
|
||||
console.log(`⚠️ Variations have attributes, but parent product doesn't`);
|
||||
}
|
||||
}
|
||||
|
||||
results.push({
|
||||
id: productId,
|
||||
name: product.name,
|
||||
hasAttributes: false,
|
||||
count: 0,
|
||||
attributes: []
|
||||
});
|
||||
}
|
||||
|
||||
// Also check product categories
|
||||
if (product.categories && product.categories.length > 0) {
|
||||
console.log(`Categories: ${product.categories.map(c => c.name).join(', ')}`);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.log(`❌ Error fetching product ${productId}: ${error.message}`);
|
||||
results.push({
|
||||
id: productId,
|
||||
name: 'Unknown',
|
||||
hasAttributes: false,
|
||||
count: 0,
|
||||
error: error.message
|
||||
});
|
||||
}
|
||||
|
||||
// Rate limiting
|
||||
await new Promise(resolve => setTimeout(resolve, 200));
|
||||
}
|
||||
|
||||
// Summary
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('📊 SUMMARY');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const withAttrs = results.filter(r => r.hasAttributes);
|
||||
const withoutAttrs = results.filter(r => !r.hasAttributes);
|
||||
|
||||
console.log(`Products checked: ${results.length}`);
|
||||
console.log(`✅ With attributes: ${withAttrs.length}`);
|
||||
console.log(`❌ Without attributes: ${withoutAttrs.length}`);
|
||||
|
||||
if (withAttrs.length > 0) {
|
||||
console.log('\nProducts WITH attributes:');
|
||||
withAttrs.forEach(p => {
|
||||
console.log(` - ${p.name} (${p.count} attributes)`);
|
||||
});
|
||||
}
|
||||
|
||||
if (withoutAttrs.length > 0) {
|
||||
console.log('\nProducts WITHOUT attributes:');
|
||||
withoutAttrs.forEach(p => {
|
||||
console.log(` - ${p.name}${p.error ? ' (Error: ' + p.error + ')' : ''}`);
|
||||
});
|
||||
}
|
||||
|
||||
// Save detailed results
|
||||
const fs = require('fs');
|
||||
const outputPath = path.join(__dirname, '..', 'data', 'attribute-check-results.json');
|
||||
fs.writeFileSync(outputPath, JSON.stringify(results, null, 2));
|
||||
console.log(`\n💾 Detailed results saved to: ${outputPath}`);
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// Run if called directly
|
||||
if (require.main === module) {
|
||||
if (!CONFIG.url || !CONFIG.key || !CONFIG.secret) {
|
||||
console.error('❌ Missing WooCommerce credentials in environment variables');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
checkProductAttributes()
|
||||
.then(() => {
|
||||
console.log('\n✅ Attribute check complete');
|
||||
process.exit(0);
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('\n❌ Attribute check failed:', error.message);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { checkProductAttributes };
|
||||
@@ -1,58 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Debug what entities are actually in the raw data
|
||||
|
||||
const rawExcerpt = '<p>[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default” column_direction=”default” column_direction_tablet=”default” column_direction_phone=”default” scene_position=”center” text_color=”dark” text_align=”left” row_border_radius=”none” row_border_radius_applies=”bg” overflow=”visible” overlay_strength=”0.3″ gradient_direction=”left_to_right” shape_divider_position=”bottom” bg_image_animation=”none”][vc_column column_padding=”no-extra-padding” column_padding_tablet=”inherit” column_padding_phone=”inherit” column_padding_position=”all” column_element_direction_desktop=”default” column_element_spacing=”default” desktop_text_alignment=”default” tablet_text_alignment=”default” phone_text_alignment=”default” background_color_opacity=”1″ background_hover_color_opacity=”1″ column_backdrop_filter=”none” column_shadow=”none”…</p>';
|
||||
|
||||
console.log('=== Raw Data Analysis ===');
|
||||
console.log('Original excerpt:');
|
||||
console.log(rawExcerpt);
|
||||
console.log('\n=== Entity Analysis ===');
|
||||
|
||||
// Check for numeric entities
|
||||
const numericEntities = rawExcerpt.match(/&#\d+;/g);
|
||||
console.log('Numeric entities found:', numericEntities);
|
||||
|
||||
// Check for Unicode characters
|
||||
const unicodeChars = rawExcerpt.match(/[”“‘’–—″′]/g);
|
||||
console.log('Unicode characters found:', unicodeChars);
|
||||
|
||||
// Test what each numeric entity represents
|
||||
if (numericEntities) {
|
||||
console.log('\n=== Numeric Entity Decoding ===');
|
||||
const uniqueEntities = [...new Set(numericEntities)];
|
||||
uniqueEntities.forEach(entity => {
|
||||
const code = parseInt(entity.replace(/[&#;]/g, ''));
|
||||
const char = String.fromCharCode(code);
|
||||
console.log(`${entity} (code ${code}) → "${char}"`);
|
||||
});
|
||||
}
|
||||
|
||||
// Test manual decoding
|
||||
console.log('\n=== Manual Decoding Test ===');
|
||||
let decoded = rawExcerpt
|
||||
.replace(/”/g, '"')
|
||||
.replace(/“/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/″/g, '"')
|
||||
.replace(/′/g, "'")
|
||||
.replace(/…/g, '…');
|
||||
|
||||
console.log('After manual decoding:');
|
||||
console.log(decoded);
|
||||
|
||||
// Test the current function approach
|
||||
console.log('\n=== Current Function Test ===');
|
||||
let processed = rawExcerpt
|
||||
.replace(/”/g, '"') // This won't work because raw has ”
|
||||
.replace(/“/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'");
|
||||
|
||||
console.log('After current function (which won\'t work):');
|
||||
console.log(processed);
|
||||
@@ -1,9 +0,0 @@
|
||||
const { processShortcodes } = require('../lib/html-compat.ts');
|
||||
|
||||
const input = '[vc_row bg_image="”10440″" color_overlay="“#000000”"]content[/vc_row]';
|
||||
console.log('Input:', input);
|
||||
|
||||
const result = processShortcodes(input);
|
||||
console.log('Result:', result);
|
||||
console.log('Contains bg image?', result.includes('background-image'));
|
||||
console.log('Style attribute:', result.match(/style="([^"]*)"/)?.[1]);
|
||||
@@ -1,153 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Script to download missing videos and PDFs
|
||||
* Downloads videos referenced in processed data and PDFs linked in pages
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const https = require('https');
|
||||
const http = require('http');
|
||||
|
||||
// Configuration
|
||||
const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media');
|
||||
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
|
||||
|
||||
// Videos to download (from home pages)
|
||||
const VIDEOS_TO_DOWNLOAD = [
|
||||
{
|
||||
url: 'https://klz-cables.com/wp-content/uploads/2025/02/header.mp4',
|
||||
filename: 'header.mp4'
|
||||
},
|
||||
{
|
||||
url: 'https://klz-cables.com/wp-content/uploads/2025/02/header.webm',
|
||||
filename: 'header.webm'
|
||||
}
|
||||
];
|
||||
|
||||
// PDFs to download (from terms pages)
|
||||
const PDFS_TO_DOWNLOAD = [
|
||||
{
|
||||
url: 'https://klz-cables.com/wp-content/uploads/2025/01/agbs.pdf',
|
||||
filename: 'agbs.pdf'
|
||||
}
|
||||
];
|
||||
|
||||
// Create media directory if it doesn't exist
|
||||
if (!fs.existsSync(MEDIA_DIR)) {
|
||||
fs.mkdirSync(MEDIA_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
// Download file function
|
||||
function downloadFile(url, filename) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const filePath = path.join(MEDIA_DIR, filename);
|
||||
|
||||
// Check if file already exists
|
||||
if (fs.existsSync(filePath)) {
|
||||
console.log(`✅ Already exists: ${filename}`);
|
||||
resolve(filePath);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`📥 Downloading: ${filename} from ${url}`);
|
||||
|
||||
const protocol = url.startsWith('https') ? https : http;
|
||||
|
||||
const file = fs.createWriteStream(filePath);
|
||||
|
||||
protocol.get(url, (res) => {
|
||||
if (res.statusCode === 200) {
|
||||
res.pipe(file);
|
||||
file.on('finish', () => {
|
||||
console.log(`✅ Downloaded: ${filename}`);
|
||||
resolve(filePath);
|
||||
});
|
||||
} else if (res.statusCode === 301 || res.statusCode === 302) {
|
||||
// Handle redirects
|
||||
if (res.headers.location) {
|
||||
console.log(`🔄 Redirected to: ${res.headers.location}`);
|
||||
downloadFile(res.headers.location, filename).then(resolve).catch(reject);
|
||||
} else {
|
||||
reject(new Error(`Redirect without location: ${res.statusCode}`));
|
||||
}
|
||||
} else {
|
||||
reject(new Error(`Failed to download: HTTP ${res.statusCode}`));
|
||||
}
|
||||
}).on('error', (err) => {
|
||||
fs.unlink(filePath, () => {});
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Main function
|
||||
async function main() {
|
||||
console.log('🔍 Downloading Missing Assets');
|
||||
console.log('==============================');
|
||||
console.log(`Output: ${MEDIA_DIR}`);
|
||||
console.log('');
|
||||
|
||||
const assetMap = {};
|
||||
const downloaded = [];
|
||||
|
||||
// Download videos
|
||||
console.log('🎬 Videos:');
|
||||
for (const video of VIDEOS_TO_DOWNLOAD) {
|
||||
try {
|
||||
await downloadFile(video.url, video.filename);
|
||||
assetMap[video.url] = `/media/${video.filename}`;
|
||||
downloaded.push(video.filename);
|
||||
} catch (error) {
|
||||
console.warn(`⚠️ Failed to download video ${video.filename}:`, error.message);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('');
|
||||
|
||||
// Download PDFs
|
||||
console.log('📄 PDFs:');
|
||||
for (const pdf of PDFS_TO_DOWNLOAD) {
|
||||
try {
|
||||
await downloadFile(pdf.url, pdf.filename);
|
||||
assetMap[pdf.url] = `/media/${pdf.filename}`;
|
||||
downloaded.push(pdf.filename);
|
||||
} catch (error) {
|
||||
console.warn(`⚠️ Failed to download PDF ${pdf.filename}:`, error.message);
|
||||
}
|
||||
}
|
||||
|
||||
// Update asset-map.json with new entries
|
||||
const assetMapPath = path.join(PROCESSED_DIR, 'asset-map.json');
|
||||
if (fs.existsSync(assetMapPath)) {
|
||||
const existingMap = JSON.parse(fs.readFileSync(assetMapPath, 'utf8'));
|
||||
const updatedMap = { ...existingMap, ...assetMap };
|
||||
|
||||
fs.writeFileSync(assetMapPath, JSON.stringify(updatedMap, null, 2));
|
||||
console.log(`\n✅ Updated asset-map.json with ${Object.keys(assetMap).length} new entries`);
|
||||
}
|
||||
|
||||
console.log('\n🎉 Asset Download Complete!');
|
||||
console.log('==============================');
|
||||
console.log(`📥 Downloaded: ${downloaded.length} files`);
|
||||
console.log(`📁 Directory: public/media/`);
|
||||
console.log('');
|
||||
console.log('Files downloaded:');
|
||||
downloaded.forEach(file => {
|
||||
console.log(` - ${file}`);
|
||||
});
|
||||
}
|
||||
|
||||
// Run if called directly
|
||||
if (require.main === module) {
|
||||
main().catch(error => {
|
||||
console.error('\n❌ Script failed:', error.message);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
downloadFile,
|
||||
main
|
||||
};
|
||||
@@ -1,216 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Script to fetch specific missing media IDs from WordPress
|
||||
* Uses the WordPress REST API to get media URLs and download them
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const https = require('https');
|
||||
|
||||
// Load environment variables
|
||||
require('dotenv').config();
|
||||
|
||||
const BASE_URL = process.env.WOOCOMMERCE_URL;
|
||||
const APP_PASSWORD = process.env.WORDPRESS_APP_PASSWORD;
|
||||
|
||||
// Validate environment
|
||||
if (!BASE_URL || !APP_PASSWORD) {
|
||||
console.error('❌ Missing required environment variables');
|
||||
console.error('Please check .env file for:');
|
||||
console.error(' - WOOCOMMERCE_URL');
|
||||
console.error(' - WORDPRESS_APP_PASSWORD');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Configuration
|
||||
const MISSING_MEDIA_IDS = [10432, 10440, 10382, 10616, 10615, 45569, 10638, 5767];
|
||||
const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media');
|
||||
const RAW_DATA_DIR = path.join(__dirname, '..', 'data', 'raw', '2025-12-30T15-21-49-331Z');
|
||||
|
||||
// Create media directory if it doesn't exist
|
||||
if (!fs.existsSync(MEDIA_DIR)) {
|
||||
fs.mkdirSync(MEDIA_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
// WordPress Auth Header
|
||||
function buildWordPressAuth() {
|
||||
return {
|
||||
'Authorization': `Basic ${Buffer.from(`admin:${APP_PASSWORD}`).toString('base64')}`,
|
||||
'Content-Type': 'application/json'
|
||||
};
|
||||
}
|
||||
|
||||
// Make HTTPS request
|
||||
function makeRequest(url, headers = {}) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const options = {
|
||||
headers: {
|
||||
'User-Agent': 'WordPress-Missing-Media-Fetcher/1.0',
|
||||
...headers
|
||||
}
|
||||
};
|
||||
|
||||
https.get(url, options, (res) => {
|
||||
let data = '';
|
||||
|
||||
res.on('data', (chunk) => {
|
||||
data += chunk;
|
||||
});
|
||||
|
||||
res.on('end', () => {
|
||||
if (res.statusCode >= 200 && res.statusCode < 300) {
|
||||
try {
|
||||
resolve(JSON.parse(data));
|
||||
} catch (e) {
|
||||
resolve(data);
|
||||
}
|
||||
} else {
|
||||
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
|
||||
}
|
||||
});
|
||||
}).on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
// Fetch single media item
|
||||
async function fetchMedia(mediaId) {
|
||||
const url = `${BASE_URL}/wp-json/wp/v2/media/${mediaId}`;
|
||||
|
||||
try {
|
||||
console.log(`📥 Fetching media ${mediaId}...`);
|
||||
const media = await makeRequest(url, buildWordPressAuth());
|
||||
return media;
|
||||
} catch (error) {
|
||||
console.error(`❌ Error fetching media ${mediaId}:`, error.message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Download media file
|
||||
function downloadMedia(url, filename) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const filePath = path.join(MEDIA_DIR, filename);
|
||||
|
||||
// Check if file already exists
|
||||
if (fs.existsSync(filePath)) {
|
||||
console.log(`✅ Media already exists: ${filename}`);
|
||||
resolve(filePath);
|
||||
return;
|
||||
}
|
||||
|
||||
const file = fs.createWriteStream(filePath);
|
||||
|
||||
https.get(url, (res) => {
|
||||
if (res.statusCode === 200) {
|
||||
res.pipe(file);
|
||||
file.on('finish', () => {
|
||||
console.log(`✅ Downloaded: ${filename}`);
|
||||
resolve(filePath);
|
||||
});
|
||||
} else {
|
||||
reject(new Error(`Failed to download: ${res.statusCode}`));
|
||||
}
|
||||
}).on('error', (err) => {
|
||||
fs.unlink(filePath, () => {});
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Main function
|
||||
async function main() {
|
||||
console.log('🔍 Fetching Missing Media IDs');
|
||||
console.log('==============================');
|
||||
console.log(`Target: ${BASE_URL}`);
|
||||
console.log(`Output: ${MEDIA_DIR}`);
|
||||
console.log(`Missing IDs: ${MISSING_MEDIA_IDS.join(', ')}`);
|
||||
console.log('');
|
||||
|
||||
const mediaManifest = [];
|
||||
const downloadPromises = [];
|
||||
|
||||
for (const mediaId of MISSING_MEDIA_IDS) {
|
||||
const media = await fetchMedia(mediaId);
|
||||
|
||||
if (media && media.source_url) {
|
||||
const originalFilename = media.source_url.split('/').pop();
|
||||
const filename = `${mediaId}-${originalFilename}`;
|
||||
|
||||
// Add to manifest
|
||||
mediaManifest.push({
|
||||
id: mediaId,
|
||||
url: media.source_url,
|
||||
filename: filename,
|
||||
alt: media.alt_text || '',
|
||||
width: media.media_details?.width,
|
||||
height: media.media_details?.height,
|
||||
mime_type: media.mime_type
|
||||
});
|
||||
|
||||
// Download file
|
||||
downloadPromises.push(
|
||||
downloadMedia(media.source_url, filename).catch(err => {
|
||||
console.warn(`⚠️ Failed to download media ${mediaId}:`, err.message);
|
||||
})
|
||||
);
|
||||
} else {
|
||||
console.warn(`⚠️ Could not fetch media ${mediaId}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for all downloads
|
||||
await Promise.all(downloadPromises);
|
||||
|
||||
// Update media.json
|
||||
const mediaJsonPath = path.join(RAW_DATA_DIR, 'media.json');
|
||||
if (fs.existsSync(mediaJsonPath)) {
|
||||
const existingMedia = JSON.parse(fs.readFileSync(mediaJsonPath, 'utf8'));
|
||||
const updatedMedia = [...existingMedia, ...mediaManifest];
|
||||
|
||||
fs.writeFileSync(
|
||||
mediaJsonPath,
|
||||
JSON.stringify(updatedMedia, null, 2)
|
||||
);
|
||||
console.log(`✅ Updated media.json with ${mediaManifest.length} new items`);
|
||||
} else {
|
||||
console.warn('⚠️ media.json not found, creating new file');
|
||||
fs.writeFileSync(
|
||||
mediaJsonPath,
|
||||
JSON.stringify(mediaManifest, null, 2)
|
||||
);
|
||||
}
|
||||
|
||||
// Update assets.json if needed
|
||||
const assetsJsonPath = path.join(RAW_DATA_DIR, 'assets.json');
|
||||
if (fs.existsSync(assetsJsonPath)) {
|
||||
const assets = JSON.parse(fs.readFileSync(assetsJsonPath, 'utf8'));
|
||||
console.log('✅ Current assets.json:', assets);
|
||||
}
|
||||
|
||||
console.log('\n🎉 Missing Media Fetch Complete!');
|
||||
console.log('==============================');
|
||||
console.log(`📥 Fetched: ${mediaManifest.length} items`);
|
||||
console.log(`📁 Directory: public/media/`);
|
||||
console.log(`📄 Updated: data/raw/2025-12-30T15-21-49-331Z/media.json`);
|
||||
console.log('');
|
||||
console.log('Media items fetched:');
|
||||
mediaManifest.forEach(item => {
|
||||
console.log(` - ${item.id}: ${item.filename}`);
|
||||
});
|
||||
}
|
||||
|
||||
// Run if called directly
|
||||
if (require.main === module) {
|
||||
main().catch(error => {
|
||||
console.error('\n❌ Script failed:', error.message);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchMedia,
|
||||
downloadMedia,
|
||||
main
|
||||
};
|
||||
@@ -1,230 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
|
||||
const ASSET_MAP_PATH = path.join(PROCESSED_DIR, 'asset-map.json');
|
||||
|
||||
// Load asset map
|
||||
const assetMap = JSON.parse(fs.readFileSync(ASSET_MAP_PATH, 'utf8'));
|
||||
|
||||
// Create ID to path mapping
|
||||
const idToPath = {};
|
||||
for (const [wpUrl, localPath] of Object.entries(assetMap)) {
|
||||
const patterns = [/\/(\d+)-/, /\/(\d+)\./, /id=(\d+)/];
|
||||
for (const pattern of patterns) {
|
||||
const match = wpUrl.match(pattern);
|
||||
if (match) {
|
||||
idToPath[match[1]] = localPath;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add manual mappings
|
||||
idToPath['45569'] = '/media/45569-Still-2025-02-10-104337_1.1.1.webp';
|
||||
idToPath['10648'] = '/media/10648-low-voltage-scaled.webp';
|
||||
idToPath['6486'] = '/media/6486-Low-Voltage.svg';
|
||||
idToPath['10649'] = '/media/10649-medium-voltage-scaled.webp';
|
||||
idToPath['6487'] = '/media/6487-Medium-Voltage.svg';
|
||||
idToPath['46786'] = '/media/46786-na2xsfl2y-rendered.webp';
|
||||
idToPath['6485'] = '/media/6485-High-Voltage.svg';
|
||||
idToPath['46359'] = '/media/46359-3.webp';
|
||||
idToPath['6484'] = '/media/6484-Solar.svg';
|
||||
idToPath['6527'] = '/media/6527-high-voltage-category.webp';
|
||||
idToPath['6519'] = '/media/6519-solar-category.webp';
|
||||
idToPath['6521'] = '/media/6521-low-voltage-category.webp';
|
||||
idToPath['6517'] = '/media/6517-medium-voltage-category.webp';
|
||||
|
||||
console.log('Found', Object.keys(idToPath).length, 'media ID mappings');
|
||||
|
||||
// HTML entity decoding - handles decimal, hex, and named entities
|
||||
function decodeHTMLEntities(text) {
|
||||
if (!text) return '';
|
||||
|
||||
let result = text;
|
||||
|
||||
// First, handle numeric entities (decimal and hex)
|
||||
result = result
|
||||
.replace(/&#(\d+);/g, (match, dec) => {
|
||||
const char = String.fromCharCode(parseInt(dec, 10));
|
||||
return char;
|
||||
})
|
||||
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
|
||||
const char = String.fromCharCode(parseInt(hex, 16));
|
||||
return char;
|
||||
});
|
||||
|
||||
// Handle common named entities and Unicode characters
|
||||
const entityMap = {
|
||||
' ': ' ',
|
||||
'‘': "'",
|
||||
'’': "'",
|
||||
'“': '"',
|
||||
'”': '"',
|
||||
'″': '"', // Double prime (8243)
|
||||
'–': '-',
|
||||
'—': '—',
|
||||
'…': '…',
|
||||
'•': '•',
|
||||
'€': '€',
|
||||
'©': '©',
|
||||
'®': '®',
|
||||
'™': '™',
|
||||
'°': '°',
|
||||
'±': '±',
|
||||
'×': '×',
|
||||
'÷': '÷',
|
||||
'−': '−',
|
||||
'¢': '¢',
|
||||
'£': '£',
|
||||
'¥': '¥',
|
||||
'§': '§',
|
||||
'¶': '¶',
|
||||
'µ': 'µ',
|
||||
'«': '«',
|
||||
'»': '»',
|
||||
'·': '·'
|
||||
};
|
||||
|
||||
// Replace all named entities
|
||||
for (const [entity, char] of Object.entries(entityMap)) {
|
||||
result = result.replace(new RegExp(entity, 'g'), char);
|
||||
}
|
||||
|
||||
// Clean up any remaining ampersand patterns
|
||||
result = result.replace(/&([a-zA-Z]+);/g, (match, name) => {
|
||||
return entityMap[`&${name};`] || match;
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Process files
|
||||
const files = ['pages.json', 'posts.json', 'products.json'];
|
||||
|
||||
files.forEach(file => {
|
||||
const filePath = path.join(PROCESSED_DIR, file);
|
||||
if (!fs.existsSync(filePath)) return;
|
||||
|
||||
const items = JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
||||
let updated = false;
|
||||
let updateCount = 0;
|
||||
let decodeCount = 0;
|
||||
|
||||
items.forEach(item => {
|
||||
let contentChanged = false;
|
||||
let wasDecoded = false;
|
||||
|
||||
if (item.contentHtml) {
|
||||
// Decode entities first
|
||||
const original = item.contentHtml;
|
||||
item.contentHtml = decodeHTMLEntities(item.contentHtml);
|
||||
if (item.contentHtml !== original) {
|
||||
wasDecoded = true;
|
||||
decodeCount++;
|
||||
}
|
||||
|
||||
// Now replace IDs with local paths
|
||||
for (const [id, localPath] of Object.entries(idToPath)) {
|
||||
// Pattern 1: bg_image="45569" (standard quotes)
|
||||
const patterns = [
|
||||
{ search: 'bg_image="' + id + '"', replace: 'bg_image="' + localPath + '"' },
|
||||
{ search: 'background_image="' + id + '"', replace: 'background_image="' + localPath + '"' },
|
||||
{ search: 'image_url="' + id + '"', replace: 'image_url="' + localPath + '"' },
|
||||
{ search: 'custom_icon_image="' + id + '"', replace: 'custom_icon_image="' + localPath + '"' },
|
||||
{ search: 'poster="' + id + '"', replace: 'poster="' + localPath + '"' },
|
||||
{ search: 'column_background_image="' + id + '"', replace: 'column_background_image="' + localPath + '"' },
|
||||
];
|
||||
|
||||
patterns.forEach(({ search, replace }) => {
|
||||
if (item.contentHtml.includes(search)) {
|
||||
item.contentHtml = item.contentHtml.split(search).join(replace);
|
||||
contentChanged = true;
|
||||
}
|
||||
});
|
||||
|
||||
// Also check for HTML-encoded attribute values (after decodeHTMLEntities, these become regular quotes)
|
||||
// But we need to handle the case where the HTML entities haven't been decoded yet
|
||||
const encodedPatterns = [
|
||||
{ search: 'bg_image=”' + id + '″', replace: 'bg_image="' + localPath + '"' },
|
||||
{ search: 'bg_image=”' + id + '”', replace: 'bg_image="' + localPath + '"' },
|
||||
{ search: 'bg_image="' + id + '"', replace: 'bg_image="' + localPath + '"' },
|
||||
];
|
||||
|
||||
encodedPatterns.forEach(({ search, replace }) => {
|
||||
if (item.contentHtml.includes(search)) {
|
||||
item.contentHtml = item.contentHtml.split(search).join(replace);
|
||||
contentChanged = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (item.excerptHtml) {
|
||||
const original = item.excerptHtml;
|
||||
item.excerptHtml = decodeHTMLEntities(item.excerptHtml);
|
||||
|
||||
for (const [id, localPath] of Object.entries(idToPath)) {
|
||||
// Standard pattern
|
||||
const search = 'bg_image="' + id + '"';
|
||||
const replace = 'bg_image="' + localPath + '"';
|
||||
if (item.excerptHtml.includes(search)) {
|
||||
item.excerptHtml = item.excerptHtml.split(search).join(replace);
|
||||
contentChanged = true;
|
||||
}
|
||||
|
||||
// Also check for HTML-encoded patterns that might remain (after decode)
|
||||
// Handle various quote combinations
|
||||
const encodedPatterns = [
|
||||
'bg_image="' + id + '"', // Already decoded
|
||||
'bg_image="' + id + '″', // Opening regular, closing double prime
|
||||
'bg_image="' + id + '"', // Both regular
|
||||
];
|
||||
|
||||
encodedPatterns.forEach(search => {
|
||||
if (item.excerptHtml.includes(search)) {
|
||||
item.excerptHtml = item.excerptHtml.split(search).join(replace);
|
||||
contentChanged = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (item.excerptHtml !== original && !contentChanged) contentChanged = true;
|
||||
}
|
||||
|
||||
if (contentChanged || wasDecoded) {
|
||||
updated = true;
|
||||
if (contentChanged) updateCount++;
|
||||
}
|
||||
});
|
||||
|
||||
if (updated) {
|
||||
fs.writeFileSync(filePath, JSON.stringify(items, null, 2));
|
||||
console.log('✅ Updated ' + file + ' (' + updateCount + ' replacements, ' + decodeCount + ' decoded)');
|
||||
} else {
|
||||
console.log('ℹ️ No changes for ' + file);
|
||||
}
|
||||
});
|
||||
|
||||
// Verify
|
||||
const pages = JSON.parse(fs.readFileSync(path.join(PROCESSED_DIR, 'pages.json'), 'utf8'));
|
||||
const homeEn = pages.find(p => p.slug === 'corporate-3-landing-2' && p.locale === 'en');
|
||||
const homeDe = pages.find(p => p.slug === 'start' && p.locale === 'de');
|
||||
|
||||
console.log('\n✅ Verification:');
|
||||
console.log('EN home images:', (homeEn?.contentHtml?.match(/\/media\//g) || []).length);
|
||||
console.log('DE home images:', (homeDe?.contentHtml?.match(/\/media\//g) || []).length);
|
||||
|
||||
// Check for remaining IDs
|
||||
const remainingIds = homeEn?.contentHtml?.match(/bg_image="\d+"/g) || [];
|
||||
console.log('Remaining IDs in EN:', remainingIds.length > 0 ? remainingIds : 'None');
|
||||
|
||||
// Show examples
|
||||
if (homeEn?.contentHtml) {
|
||||
const matches = homeEn.contentHtml.match(/bg_image="[^"]+"/g);
|
||||
if (matches) {
|
||||
console.log('\nEN bg_image examples:', matches.slice(0, 3));
|
||||
}
|
||||
}
|
||||
@@ -1,353 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Script to fix missing attributes for high-voltage cables
|
||||
* Creates a manual attribute mapping based on product specifications
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
|
||||
const BACKUP_DIR = path.join(__dirname, '..', 'data', 'backup');
|
||||
|
||||
// Create backup directory
|
||||
if (!fs.existsSync(BACKUP_DIR)) {
|
||||
fs.mkdirSync(BACKUP_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
/**
|
||||
* Manual attribute mappings for high-voltage cables
|
||||
* Based on typical specifications for these cable types
|
||||
*/
|
||||
const MANUAL_ATTRIBUTES = {
|
||||
// High Voltage Cables - Aluminum conductor, XLPE insulation
|
||||
'na2xsfl2y-3': { // NA2XS(FL)2Y high voltage
|
||||
en: [
|
||||
{ name: 'Conductor', options: ['Aluminum'] },
|
||||
{ name: 'Insulation', options: ['XLPE'] },
|
||||
{ name: 'Sheath', options: ['PE'] },
|
||||
{ name: 'Screen', options: ['Copper wire + tape'] },
|
||||
{ name: 'Water blocking', options: ['Yes'] },
|
||||
{ name: 'Voltage rating', options: ['6/10 kV', '12/20 kV', '18/30 kV'] },
|
||||
{ name: 'Installation', options: ['Underground', 'Cable ducts', 'Outdoor'] },
|
||||
{ name: 'Standard', options: ['IEC 60840', 'DIN VDE 0276-620'] },
|
||||
{ name: 'Conductor material', options: ['Aluminum'] },
|
||||
{ name: 'Conductor type', options: ['Compacted stranded'] },
|
||||
{ name: 'Insulation material', options: ['XLPE'] },
|
||||
{ name: 'Sheath material', options: ['PE'] },
|
||||
{ name: 'Armour', options: ['None'] },
|
||||
{ name: 'Max operating temperature', options: ['+90 °C'] },
|
||||
{ name: 'Short circuit temperature', options: ['+250 °C'] },
|
||||
{ name: 'Bending radius', options: ['Min. 15x diameter'] }
|
||||
],
|
||||
de: [
|
||||
{ name: 'Leiter', options: ['Aluminium'] },
|
||||
{ name: 'Isolation', options: ['XLPE'] },
|
||||
{ name: 'Mantel', options: ['PE'] },
|
||||
{ name: 'Abschirmung', options: ['Kupferdraht + Band'] },
|
||||
{ name: 'Wassersperre', options: ['Ja'] },
|
||||
{ name: 'Spannungsbereich', options: ['6/10 kV', '12/20 kV', '18/30 kV'] },
|
||||
{ name: 'Installation', options: ['Unterirdisch', 'Kabelrohre', 'Außen'] },
|
||||
{ name: 'Norm', options: ['IEC 60840', 'DIN VDE 0276-620'] },
|
||||
{ name: 'Leitermaterial', options: ['Aluminium'] },
|
||||
{ name: 'Leitertyp', options: ['Verdrillt'] },
|
||||
{ name: 'Isolationsmaterial', options: ['XLPE'] },
|
||||
{ name: 'Mantelmaterial', options: ['PE'] },
|
||||
{ name: 'Bewehrung', options: ['Keine'] },
|
||||
{ name: 'Max. Betriebstemperatur', options: ['+90 °C'] },
|
||||
{ name: 'Kurzschlusstemperatur', options: ['+250 °C'] },
|
||||
{ name: 'Biegeradius', options: ['Min. 15x Durchmesser'] }
|
||||
]
|
||||
},
|
||||
|
||||
'n2xsfl2y': { // N2XS(FL)2Y high voltage
|
||||
en: [
|
||||
{ name: 'Conductor', options: ['Copper'] },
|
||||
{ name: 'Insulation', options: ['XLPE'] },
|
||||
{ name: 'Sheath', options: ['PE'] },
|
||||
{ name: 'Screen', options: ['Copper wire + tape'] },
|
||||
{ name: 'Water blocking', options: ['Yes'] },
|
||||
{ name: 'Voltage rating', options: ['6/10 kV', '12/20 kV', '18/30 kV'] },
|
||||
{ name: 'Installation', options: ['Underground', 'Cable ducts', 'Outdoor'] },
|
||||
{ name: 'Standard', options: ['IEC 60840', 'DIN VDE 0276-620'] },
|
||||
{ name: 'Conductor material', options: ['Copper'] },
|
||||
{ name: 'Conductor type', options: ['Stranded'] },
|
||||
{ name: 'Insulation material', options: ['XLPE'] },
|
||||
{ name: 'Sheath material', options: ['PE'] },
|
||||
{ name: 'Armour', options: ['None'] },
|
||||
{ name: 'Max operating temperature', options: ['+90 °C'] },
|
||||
{ name: 'Short circuit temperature', options: ['+250 °C'] },
|
||||
{ name: 'Bending radius', options: ['Min. 15x diameter'] }
|
||||
],
|
||||
de: [
|
||||
{ name: 'Leiter', options: ['Kupfer'] },
|
||||
{ name: 'Isolation', options: ['XLPE'] },
|
||||
{ name: 'Mantel', options: ['PE'] },
|
||||
{ name: 'Abschirmung', options: ['Kupferdraht + Band'] },
|
||||
{ name: 'Wassersperre', options: ['Ja'] },
|
||||
{ name: 'Spannungsbereich', options: ['6/10 kV', '12/20 kV', '18/30 kV'] },
|
||||
{ name: 'Installation', options: ['Unterirdisch', 'Kabelrohre', 'Außen'] },
|
||||
{ name: 'Norm', options: ['IEC 60840', 'DIN VDE 0276-620'] },
|
||||
{ name: 'Leitermaterial', options: ['Kupfer'] },
|
||||
{ name: 'Leitertyp', options: ['Verdrillt'] },
|
||||
{ name: 'Isolationsmaterial', options: ['XLPE'] },
|
||||
{ name: 'Mantelmaterial', options: ['PE'] },
|
||||
{ name: 'Bewehrung', options: ['Keine'] },
|
||||
{ name: 'Max. Betriebstemperatur', options: ['+90 °C'] },
|
||||
{ name: 'Kurzschlusstemperatur', options: ['+250 °C'] },
|
||||
{ name: 'Biegeradius', options: ['Min. 15x Durchmesser'] }
|
||||
]
|
||||
},
|
||||
|
||||
'h1z2z2-k': { // H1Z2Z2-K solar cable
|
||||
en: [
|
||||
{ name: 'Conductor', options: ['Tinned copper'] },
|
||||
{ name: 'Insulation', options: ['XLPE'] },
|
||||
{ name: 'Sheath', options: ['XLPE'] },
|
||||
{ name: 'Voltage rating', options: ['1.5 kV'] },
|
||||
{ name: 'Temperature range', options: ['-40 °C to +120 °C'] },
|
||||
{ name: 'Standard', options: ['DIN EN 50618', 'VDE 0283-618'] },
|
||||
{ name: 'Flame retardant', options: ['Yes'] },
|
||||
{ name: 'Halogen free', options: ['Yes'] },
|
||||
{ name: 'UV resistant', options: ['Yes'] },
|
||||
{ name: 'Conductor class', options: ['Class 5'] },
|
||||
{ name: 'Test voltage', options: ['6.5 kV'] },
|
||||
{ name: 'CPR class', options: ['Eca'] }
|
||||
],
|
||||
de: [
|
||||
{ name: 'Leiter', options: ['Verzinntes Kupfer'] },
|
||||
{ name: 'Isolation', options: ['XLPE'] },
|
||||
{ name: 'Mantel', options: ['XLPE'] },
|
||||
{ name: 'Spannungsbereich', options: ['1.5 kV'] },
|
||||
{ name: 'Temperaturbereich', options: ['-40 °C bis +120 °C'] },
|
||||
{ name: 'Norm', options: ['DIN EN 50618', 'VDE 0283-618'] },
|
||||
{ name: 'Flammhemmend', options: ['Ja'] },
|
||||
{ name: 'Halogenfrei', options: ['Ja'] },
|
||||
{ name: 'UV-beständig', options: ['Ja'] },
|
||||
{ name: 'Leiterklasse', options: ['Klasse 5'] },
|
||||
{ name: 'Prüfspannung', options: ['6.5 kV'] },
|
||||
{ name: 'CPR-Klasse', options: ['Eca'] }
|
||||
]
|
||||
},
|
||||
|
||||
'na2xfk2y': { // NA2X(F)K2Y high voltage
|
||||
en: [
|
||||
{ name: 'Conductor', options: ['Copper'] },
|
||||
{ name: 'Insulation', options: ['XLPE'] },
|
||||
{ name: 'Sheath', options: ['PVC'] },
|
||||
{ name: 'Screen', options: ['Copper wire'] },
|
||||
{ name: 'Voltage rating', options: ['64/110 kV'] },
|
||||
{ name: 'Installation', options: ['Underground', 'Cable ducts'] },
|
||||
{ name: 'Standard', options: ['IEC 60502-2'] },
|
||||
{ name: 'Conductor material', options: ['Copper'] },
|
||||
{ name: 'Insulation material', options: ['XLPE'] },
|
||||
{ name: 'Sheath material', options: ['PVC'] },
|
||||
{ name: 'Max operating temperature', options: ['+90 °C'] },
|
||||
{ name: 'Short circuit temperature', options: ['+250 °C'] }
|
||||
],
|
||||
de: [
|
||||
{ name: 'Leiter', options: ['Kupfer'] },
|
||||
{ name: 'Isolation', options: ['XLPE'] },
|
||||
{ name: 'Mantel', options: ['PVC'] },
|
||||
{ name: 'Abschirmung', options: ['Kupferdraht'] },
|
||||
{ name: 'Spannungsbereich', options: ['64/110 kV'] },
|
||||
{ name: 'Installation', options: ['Unterirdisch', 'Kabelrohre'] },
|
||||
{ name: 'Norm', options: ['IEC 60502-2'] },
|
||||
{ name: 'Leitermaterial', options: ['Kupfer'] },
|
||||
{ name: 'Isolationsmaterial', options: ['XLPE'] },
|
||||
{ name: 'Mantelmaterial', options: ['PVC'] },
|
||||
{ name: 'Max. Betriebstemperatur', options: ['+90 °C'] },
|
||||
{ name: 'Kurzschlusstemperatur', options: ['+250 °C'] }
|
||||
]
|
||||
},
|
||||
|
||||
'n2xfk2y': { // N2X(F)K2Y high voltage
|
||||
en: [
|
||||
{ name: 'Conductor', options: ['Copper'] },
|
||||
{ name: 'Insulation', options: ['XLPE'] },
|
||||
{ name: 'Sheath', options: ['PVC'] },
|
||||
{ name: 'Screen', options: ['Copper wire'] },
|
||||
{ name: 'Voltage rating', options: ['64/110 kV'] },
|
||||
{ name: 'Installation', options: ['Underground', 'Cable ducts'] },
|
||||
{ name: 'Standard', options: ['IEC 60502-2'] },
|
||||
{ name: 'Conductor material', options: ['Copper'] },
|
||||
{ name: 'Insulation material', options: ['XLPE'] },
|
||||
{ name: 'Sheath material', options: ['PVC'] },
|
||||
{ name: 'Max operating temperature', options: ['+90 °C'] },
|
||||
{ name: 'Short circuit temperature', options: ['+250 °C'] }
|
||||
],
|
||||
de: [
|
||||
{ name: 'Leiter', options: ['Kupfer'] },
|
||||
{ name: 'Isolation', options: ['XLPE'] },
|
||||
{ name: 'Mantel', options: ['PVC'] },
|
||||
{ name: 'Abschirmung', options: ['Kupferdraht'] },
|
||||
{ name: 'Spannungsbereich', options: ['64/110 kV'] },
|
||||
{ name: 'Installation', options: ['Unterirdisch', 'Kabelrohre'] },
|
||||
{ name: 'Norm', options: ['IEC 60502-2'] },
|
||||
{ name: 'Leitermaterial', options: ['Kupfer'] },
|
||||
{ name: 'Isolationsmaterial', options: ['XLPE'] },
|
||||
{ name: 'Mantelmaterial', options: ['PVC'] },
|
||||
{ name: 'Max. Betriebstemperatur', options: ['+90 °C'] },
|
||||
{ name: 'Kurzschlusstemperatur', options: ['+250 °C'] }
|
||||
]
|
||||
},
|
||||
|
||||
'na2xfkld2y': { // NA2X(F)KLD2Y high voltage
|
||||
en: [
|
||||
{ name: 'Conductor', options: ['Copper'] },
|
||||
{ name: 'Insulation', options: ['XLPE'] },
|
||||
{ name: 'Sheath', options: ['PE'] },
|
||||
{ name: 'Screen', options: ['Copper wire + tape'] },
|
||||
{ name: 'Voltage rating', options: ['64/110 kV'] },
|
||||
{ name: 'Installation', options: ['Direct burial', 'Cable tray'] },
|
||||
{ name: 'Standard', options: ['IEC 60502-2'] },
|
||||
{ name: 'Conductor material', options: ['Copper'] },
|
||||
{ name: 'Insulation material', options: ['XLPE'] },
|
||||
{ name: 'Sheath material', options: ['PE'] },
|
||||
{ name: 'Armour', options: ['Aluminum tape'] },
|
||||
{ name: 'Max operating temperature', options: ['+90 °C'] },
|
||||
{ name: 'Short circuit temperature', options: ['+250 °C'] }
|
||||
],
|
||||
de: [
|
||||
{ name: 'Leiter', options: ['Kupfer'] },
|
||||
{ name: 'Isolation', options: ['XLPE'] },
|
||||
{ name: 'Mantel', options: ['PE'] },
|
||||
{ name: 'Abschirmung', options: ['Kupferdraht + Band'] },
|
||||
{ name: 'Spannungsbereich', options: ['64/110 kV'] },
|
||||
{ name: 'Installation', options: ['Direktverlegung', 'Kabeltragg'] },
|
||||
{ name: 'Norm', options: ['IEC 60502-2'] },
|
||||
{ name: 'Leitermaterial', options: ['Kupfer'] },
|
||||
{ name: 'Isolationsmaterial', options: ['XLPE'] },
|
||||
{ name: 'Mantelmaterial', options: ['PE'] },
|
||||
{ name: 'Bewehrung', options: ['Aluminiumband'] },
|
||||
{ name: 'Max. Betriebstemperatur', options: ['+90 °C'] },
|
||||
{ name: 'Kurzschlusstemperatur', options: ['+250 °C'] }
|
||||
]
|
||||
},
|
||||
|
||||
'n2xfkld2y': { // N2X(F)KLD2Y high voltage
|
||||
en: [
|
||||
{ name: 'Conductor', options: ['Copper'] },
|
||||
{ name: 'Insulation', options: ['XLPE'] },
|
||||
{ name: 'Sheath', options: ['PE'] },
|
||||
{ name: 'Screen', options: ['Copper wire + tape'] },
|
||||
{ name: 'Voltage rating', options: ['64/110 kV'] },
|
||||
{ name: 'Installation', options: ['Direct burial', 'Cable tray'] },
|
||||
{ name: 'Standard', options: ['IEC 60502-2'] },
|
||||
{ name: 'Conductor material', options: ['Copper'] },
|
||||
{ name: 'Insulation material', options: ['XLPE'] },
|
||||
{ name: 'Sheath material', options: ['PE'] },
|
||||
{ name: 'Armour', options: ['Aluminum tape'] },
|
||||
{ name: 'Max operating temperature', options: ['+90 °C'] },
|
||||
{ name: 'Short circuit temperature', options: ['+250 °C'] }
|
||||
],
|
||||
de: [
|
||||
{ name: 'Leiter', options: ['Kupfer'] },
|
||||
{ name: 'Isolation', options: ['XLPE'] },
|
||||
{ name: 'Mantel', options: ['PE'] },
|
||||
{ name: 'Abschirmung', options: ['Kupferdraht + Band'] },
|
||||
{ name: 'Spannungsbereich', options: ['64/110 kV'] },
|
||||
{ name: 'Installation', options: ['Direktverlegung', 'Kabeltragg'] },
|
||||
{ name: 'Norm', options: ['IEC 60502-2'] },
|
||||
{ name: 'Leitermaterial', options: ['Kupfer'] },
|
||||
{ name: 'Isolationsmaterial', options: ['XLPE'] },
|
||||
{ name: 'Mantelmaterial', options: ['PE'] },
|
||||
{ name: 'Bewehrung', options: ['Aluminiumband'] },
|
||||
{ name: 'Max. Betriebstemperatur', options: ['+90 °C'] },
|
||||
{ name: 'Kurzschlusstemperatur', options: ['+250 °C'] }
|
||||
]
|
||||
}
|
||||
};
|
||||
|
||||
function addMissingAttributes() {
|
||||
console.log('🔧 Fixing missing product attributes\n');
|
||||
|
||||
const productsPath = path.join(PROCESSED_DIR, 'products.json');
|
||||
|
||||
if (!fs.existsSync(productsPath)) {
|
||||
console.error('❌ products.json not found');
|
||||
return;
|
||||
}
|
||||
|
||||
// Load current products
|
||||
const products = JSON.parse(fs.readFileSync(productsPath, 'utf8'));
|
||||
console.log(`📊 Loaded ${products.length} products`);
|
||||
|
||||
// Create backup
|
||||
const backupPath = path.join(BACKUP_DIR, `products-${Date.now()}.json`);
|
||||
fs.writeFileSync(backupPath, JSON.stringify(products, null, 2));
|
||||
console.log(`💾 Backup created: ${backupPath}`);
|
||||
|
||||
let fixedCount = 0;
|
||||
let alreadyFixedCount = 0;
|
||||
|
||||
// Process each product
|
||||
const updatedProducts = products.map(product => {
|
||||
// Skip if already has attributes
|
||||
if (product.attributes && product.attributes.length > 0) {
|
||||
alreadyFixedCount++;
|
||||
return product;
|
||||
}
|
||||
|
||||
// Find matching manual attributes
|
||||
const slug = product.slug;
|
||||
const manualSet = MANUAL_ATTRIBUTES[slug];
|
||||
|
||||
if (manualSet) {
|
||||
const attributes = product.locale === 'en' ? manualSet.en : manualSet.de;
|
||||
|
||||
console.log(`✅ Fixed: ${product.name} (${product.locale})`);
|
||||
console.log(` Added ${attributes.length} attributes`);
|
||||
|
||||
fixedCount++;
|
||||
|
||||
return {
|
||||
...product,
|
||||
attributes: attributes.map((attr, index) => ({
|
||||
id: index,
|
||||
name: attr.name,
|
||||
slug: attr.name.toLowerCase().replace(/\s+/g, '-'),
|
||||
position: index,
|
||||
visible: true,
|
||||
variation: true,
|
||||
options: attr.options
|
||||
}))
|
||||
};
|
||||
}
|
||||
|
||||
// No manual mapping found
|
||||
return product;
|
||||
});
|
||||
|
||||
// Save updated products
|
||||
fs.writeFileSync(productsPath, JSON.stringify(updatedProducts, null, 2));
|
||||
|
||||
// Summary
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('📊 SUMMARY');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Total products: ${products.length}`);
|
||||
console.log(`Already had attributes: ${alreadyFixedCount}`);
|
||||
console.log(`Fixed with manual mapping: ${fixedCount}`);
|
||||
console.log(`Still missing: ${products.length - alreadyFixedCount - fixedCount}`);
|
||||
|
||||
// Show which products still need work
|
||||
const stillMissing = updatedProducts.filter(p => !p.attributes || p.attributes.length === 0);
|
||||
if (stillMissing.length > 0) {
|
||||
console.log('\n⚠️ Products still missing attributes:');
|
||||
stillMissing.forEach(p => {
|
||||
console.log(` - ${p.name} (${p.slug}) [ID: ${p.id}, Locale: ${p.locale}]`);
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`\n✅ Attribute fix complete!`);
|
||||
console.log(`💾 Updated file: ${productsPath}`);
|
||||
}
|
||||
|
||||
// Run if called directly
|
||||
if (require.main === module) {
|
||||
addMissingAttributes();
|
||||
}
|
||||
|
||||
module.exports = { addMissingAttributes, MANUAL_ATTRIBUTES };
|
||||
@@ -1,144 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Script to move video attributes from excerptHtml to contentHtml
|
||||
* This fixes the issue where video background attributes are in excerptHtml
|
||||
* but ContentRenderer never sees them because it processes contentHtml
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
|
||||
|
||||
// Function to extract video attributes from excerptHtml
|
||||
function extractVideoAttributes(excerptHtml) {
|
||||
if (!excerptHtml) return null;
|
||||
|
||||
// Look for video attributes in vc_row elements
|
||||
const videoMp4Match = excerptHtml.match(/video_mp4="([^"]*)"/i);
|
||||
const videoWebmMatch = excerptHtml.match(/video_webm="([^"]*)"/i);
|
||||
const videoBgMatch = excerptHtml.match(/video_bg="([^"]*)"/i);
|
||||
|
||||
// Also check for data attributes
|
||||
const dataVideoMp4Match = excerptHtml.match(/data-video-mp4="([^"]*)"/i);
|
||||
const dataVideoWebmMatch = excerptHtml.match(/data-video-webm="([^"]*)"/i);
|
||||
const dataVideoBgMatch = excerptHtml.match(/data-video-bg="([^"]*)"/i);
|
||||
|
||||
const videoMp4 = videoMp4Match?.[1] || dataVideoMp4Match?.[1] || '';
|
||||
const videoWebm = videoWebmMatch?.[1] || dataVideoWebmMatch?.[1] || '';
|
||||
const videoBg = videoBgMatch?.[1] || dataVideoBgMatch?.[1] || '';
|
||||
|
||||
if (videoMp4 || videoWebm || videoBg) {
|
||||
return { videoMp4, videoWebm, videoBg };
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// Function to merge video attributes into contentHtml
|
||||
function mergeVideoAttributes(contentHtml, videoAttrs) {
|
||||
if (!contentHtml || !videoAttrs) return contentHtml;
|
||||
|
||||
let merged = contentHtml;
|
||||
|
||||
// Find the first vc-row element in contentHtml
|
||||
const vcRowRegex = /<div class="vc-row[^"]*"[^>]*>/i;
|
||||
const match = merged.match(vcRowRegex);
|
||||
|
||||
if (match) {
|
||||
const existingDiv = match[0];
|
||||
let newDiv = existingDiv;
|
||||
|
||||
// Add video attributes if they don't already exist
|
||||
if (videoAttrs.videoMp4 && !existingDiv.includes('video_mp4=') && !existingDiv.includes('data-video-mp4=')) {
|
||||
newDiv = newDiv.replace('>', ` video_mp4="${videoAttrs.videoMp4}">`);
|
||||
}
|
||||
|
||||
if (videoAttrs.videoWebm && !existingDiv.includes('video_webm=') && !existingDiv.includes('data-video-webm=')) {
|
||||
newDiv = newDiv.replace('>', ` video_webm="${videoAttrs.videoWebm}">`);
|
||||
}
|
||||
|
||||
if (videoAttrs.videoBg && !existingDiv.includes('video_bg=') && !existingDiv.includes('data-video-bg=')) {
|
||||
newDiv = newDiv.replace('>', ` video_bg="${videoAttrs.videoBg}">`);
|
||||
}
|
||||
|
||||
// Also add data attributes for better compatibility
|
||||
if (videoAttrs.videoMp4 && !existingDiv.includes('data-video-mp4=')) {
|
||||
newDiv = newDiv.replace('>', ` data-video-mp4="${videoAttrs.videoMp4}">`);
|
||||
}
|
||||
|
||||
if (videoAttrs.videoWebm && !existingDiv.includes('data-video-webm=')) {
|
||||
newDiv = newDiv.replace('>', ` data-video-webm="${videoAttrs.videoWebm}">`);
|
||||
}
|
||||
|
||||
if (videoAttrs.videoBg && !existingDiv.includes('data-video-bg=')) {
|
||||
newDiv = newDiv.replace('>', ` data-video-bg="${videoAttrs.videoBg}">`);
|
||||
}
|
||||
|
||||
merged = merged.replace(existingDiv, newDiv);
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
// Main function
|
||||
function main() {
|
||||
console.log('🎬 Fixing video attributes in processed data...\n');
|
||||
|
||||
// Load pages.json
|
||||
const pagesPath = path.join(PROCESSED_DIR, 'pages.json');
|
||||
if (!fs.existsSync(pagesPath)) {
|
||||
console.error('❌ pages.json not found');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const pages = JSON.parse(fs.readFileSync(pagesPath, 'utf8'));
|
||||
let fixedCount = 0;
|
||||
|
||||
// Process each page
|
||||
const updatedPages = pages.map(page => {
|
||||
const videoAttrs = extractVideoAttributes(page.excerptHtml);
|
||||
|
||||
if (videoAttrs) {
|
||||
console.log(`📄 Page: ${page.slug} (${page.locale})`);
|
||||
console.log(` Found video attrs in excerpt: mp4="${videoAttrs.videoMp4}" webm="${videoAttrs.videoWebm}"`);
|
||||
|
||||
// Merge into contentHtml
|
||||
const originalContent = page.contentHtml;
|
||||
page.contentHtml = mergeVideoAttributes(page.contentHtml, videoAttrs);
|
||||
|
||||
if (page.contentHtml !== originalContent) {
|
||||
console.log(` ✅ Merged into contentHtml`);
|
||||
fixedCount++;
|
||||
} else {
|
||||
console.log(` ⚠️ Already present or no vc-row found`);
|
||||
}
|
||||
console.log('');
|
||||
}
|
||||
|
||||
return page;
|
||||
});
|
||||
|
||||
// Save updated pages
|
||||
fs.writeFileSync(pagesPath, JSON.stringify(updatedPages, null, 2));
|
||||
|
||||
// Also update the main wordpress-data.json if it exists
|
||||
const wordpressDataPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
|
||||
if (fs.existsSync(wordpressDataPath)) {
|
||||
const wordpressData = JSON.parse(fs.readFileSync(wordpressDataPath, 'utf8'));
|
||||
if (wordpressData.content && wordpressData.content.pages) {
|
||||
wordpressData.content.pages = updatedPages;
|
||||
fs.writeFileSync(wordpressDataPath, JSON.stringify(wordpressData, null, 2));
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`✅ Fixed ${fixedCount} pages with video attributes`);
|
||||
console.log('📁 Files updated:');
|
||||
console.log(` ${pagesPath}`);
|
||||
console.log(` ${wordpressDataPath}`);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
@@ -1,246 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Improved Translation Mapping Script
|
||||
* Creates translation pairs by analyzing content similarity and patterns
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const DATA_DIR = path.join(__dirname, '..', 'data', 'raw');
|
||||
|
||||
// Find the latest export directory
|
||||
function getLatestExportDir() {
|
||||
const dirs = fs.readdirSync(DATA_DIR).filter(f => {
|
||||
const stat = fs.statSync(path.join(DATA_DIR, f));
|
||||
return stat.isDirectory();
|
||||
});
|
||||
dirs.sort().reverse();
|
||||
return path.join(DATA_DIR, dirs[0]);
|
||||
}
|
||||
|
||||
// Simple text similarity (Levenshtein-like)
|
||||
function similarity(str1, str2) {
|
||||
const longer = str1.length > str2.length ? str1 : str2;
|
||||
const shorter = str1.length > str2.length ? str2 : str1;
|
||||
|
||||
if (longer.length === 0) return 1.0;
|
||||
|
||||
const editDistance = (s1, s2) => {
|
||||
const costs = [];
|
||||
for (let i = 0; i <= s1.length; i++) {
|
||||
let lastValue = i;
|
||||
for (let j = 0; j <= s2.length; j++) {
|
||||
if (i === 0) {
|
||||
costs[j] = j;
|
||||
} else if (j > 0) {
|
||||
let newValue = costs[j - 1];
|
||||
if (s1.charAt(i - 1) !== s2.charAt(j - 1)) {
|
||||
newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1;
|
||||
}
|
||||
costs[j - 1] = lastValue;
|
||||
lastValue = newValue;
|
||||
}
|
||||
}
|
||||
if (i > 0) costs[s2.length] = lastValue;
|
||||
}
|
||||
return costs[s2.length];
|
||||
};
|
||||
|
||||
return (longer.length - editDistance(longer, shorter)) / longer.length;
|
||||
}
|
||||
|
||||
// Extract keywords from slug
|
||||
function extractKeywords(slug) {
|
||||
return slug
|
||||
.split('-')
|
||||
.filter(word => word.length > 3)
|
||||
.sort()
|
||||
.join('-');
|
||||
}
|
||||
|
||||
// Find translation pairs using multiple strategies
|
||||
function findTranslationPairs(itemsEN, itemsDE, threshold = 0.6) {
|
||||
const pairs = [];
|
||||
const usedDE = new Set();
|
||||
|
||||
itemsEN.forEach(enItem => {
|
||||
let bestMatch = null;
|
||||
let bestScore = 0;
|
||||
|
||||
itemsDE.forEach(deItem => {
|
||||
if (usedDE.has(deItem.id)) return;
|
||||
|
||||
// Strategy 1: Keyword similarity
|
||||
const enKeywords = extractKeywords(enItem.slug);
|
||||
const deKeywords = extractKeywords(deItem.slug);
|
||||
const keywordScore = similarity(enKeywords, deKeywords);
|
||||
|
||||
// Strategy 2: Title similarity (if available)
|
||||
let titleScore = 0;
|
||||
if (enItem.titleHtml && deItem.titleHtml) {
|
||||
const enTitle = enItem.titleHtml.replace(/<[^>]*>/g, '').toLowerCase();
|
||||
const deTitle = deItem.titleHtml.replace(/<[^>]*>/g, '').toLowerCase();
|
||||
titleScore = similarity(enTitle, deTitle);
|
||||
}
|
||||
|
||||
// Strategy 3: Content preview similarity
|
||||
let contentScore = 0;
|
||||
if (enItem.contentHtml && deItem.contentHtml) {
|
||||
const enPreview = enItem.contentHtml.substring(0, 200).replace(/<[^>]*>/g, '').toLowerCase();
|
||||
const dePreview = deItem.contentHtml.substring(0, 200).replace(/<[^>]*>/g, '').toLowerCase();
|
||||
contentScore = similarity(enPreview, dePreview);
|
||||
}
|
||||
|
||||
// Combined score (weighted)
|
||||
const combinedScore = (keywordScore * 0.4) + (titleScore * 0.4) + (contentScore * 0.2);
|
||||
|
||||
if (combinedScore > bestScore && combinedScore > threshold) {
|
||||
bestScore = combinedScore;
|
||||
bestMatch = deItem;
|
||||
}
|
||||
});
|
||||
|
||||
if (bestMatch) {
|
||||
usedDE.add(bestMatch.id);
|
||||
pairs.push({
|
||||
translationKey: `${enItem.slug}`,
|
||||
en: enItem.id,
|
||||
de: bestMatch.id,
|
||||
score: bestScore,
|
||||
enSlug: enItem.slug,
|
||||
deSlug: bestMatch.slug
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return pairs;
|
||||
}
|
||||
|
||||
// Main function
|
||||
function main() {
|
||||
const exportDir = getLatestExportDir();
|
||||
console.log('🔧 Improving Translation Mapping');
|
||||
console.log('================================\n');
|
||||
|
||||
// Load data
|
||||
const loadJSON = (file) => {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
|
||||
} catch (e) {
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
const pagesEN = loadJSON('pages.en.json');
|
||||
const pagesDE = loadJSON('pages.de.json');
|
||||
const postsEN = loadJSON('posts.en.json');
|
||||
const postsDE = loadJSON('posts.de.json');
|
||||
const productsEN = loadJSON('products.en.json');
|
||||
const productsDE = loadJSON('products.de.json');
|
||||
const categoriesEN = loadJSON('product-categories.en.json');
|
||||
const categoriesDE = loadJSON('product-categories.de.json');
|
||||
|
||||
console.log('📊 Content loaded:');
|
||||
console.log(` Pages: ${pagesEN.length} EN, ${pagesDE.length} DE`);
|
||||
console.log(` Posts: ${postsEN.length} EN, ${postsDE.length} DE`);
|
||||
console.log(` Products: ${productsEN.length} EN, ${productsDE.length} DE`);
|
||||
console.log(` Categories: ${categoriesEN.length} EN, ${categoriesDE.length} DE\n`);
|
||||
|
||||
// Find pairs
|
||||
console.log('🔍 Finding translation pairs...\n');
|
||||
|
||||
const pagePairs = findTranslationPairs(pagesEN, pagesDE, 0.5);
|
||||
const postPairs = findTranslationPairs(postsEN, postsDE, 0.5);
|
||||
const productPairs = findTranslationPairs(productsEN, productsDE, 0.6);
|
||||
const categoryPairs = findTranslationPairs(categoriesEN, categoriesDE, 0.5);
|
||||
|
||||
// Build mapping
|
||||
const mapping = {
|
||||
pages: {},
|
||||
posts: {},
|
||||
products: {},
|
||||
productCategories: {}
|
||||
};
|
||||
|
||||
pagePairs.forEach(pair => {
|
||||
mapping.pages[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
|
||||
});
|
||||
|
||||
postPairs.forEach(pair => {
|
||||
mapping.posts[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
|
||||
});
|
||||
|
||||
productPairs.forEach(pair => {
|
||||
mapping.products[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
|
||||
});
|
||||
|
||||
categoryPairs.forEach(pair => {
|
||||
mapping.productCategories[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
|
||||
});
|
||||
|
||||
// Save improved mapping
|
||||
const outputDir = path.join(exportDir, 'translation-mapping-improved.json');
|
||||
fs.writeFileSync(outputDir, JSON.stringify(mapping, null, 2));
|
||||
|
||||
// Summary
|
||||
console.log('✅ Translation Mapping Complete\n');
|
||||
console.log('Pairs found:');
|
||||
console.log(` Pages: ${pagePairs.length}`);
|
||||
console.log(` Posts: ${postPairs.length}`);
|
||||
console.log(` Products: ${productPairs.length}`);
|
||||
console.log(` Categories: ${categoryPairs.length}`);
|
||||
console.log(` Total: ${pagePairs.length + postPairs.length + productPairs.length + categoryPairs.length}\n`);
|
||||
|
||||
// Show some examples
|
||||
if (postPairs.length > 0) {
|
||||
console.log('📝 Sample Post Pairs:');
|
||||
postPairs.slice(0, 3).forEach(pair => {
|
||||
console.log(` ${pair.enSlug} (${pair.score.toFixed(2)})`);
|
||||
console.log(` ↔ ${pair.deSlug}`);
|
||||
console.log('');
|
||||
});
|
||||
}
|
||||
|
||||
if (productPairs.length > 0) {
|
||||
console.log('📦 Sample Product Pairs:');
|
||||
productPairs.slice(0, 3).forEach(pair => {
|
||||
console.log(` ${pair.enSlug} (${pair.score.toFixed(2)})`);
|
||||
console.log(` ↔ ${pair.deSlug}`);
|
||||
console.log('');
|
||||
});
|
||||
}
|
||||
|
||||
// Show unmatched items
|
||||
const matchedEN = new Set([...pagePairs.map(p => p.en), ...postPairs.map(p => p.en), ...productPairs.map(p => p.en), ...categoryPairs.map(p => p.en)]);
|
||||
const matchedDE = new Set([...pagePairs.map(p => p.de), ...postPairs.map(p => p.de), ...productPairs.map(p => p.de), ...categoryPairs.map(p => p.de)]);
|
||||
|
||||
const unmatchedEN = {
|
||||
pages: pagesEN.filter(p => !matchedEN.has(p.id)).length,
|
||||
posts: postsEN.filter(p => !matchedEN.has(p.id)).length,
|
||||
products: productsEN.filter(p => !matchedEN.has(p.id)).length,
|
||||
categories: categoriesEN.filter(p => !matchedEN.has(p.id)).length
|
||||
};
|
||||
|
||||
const unmatchedDE = {
|
||||
pages: pagesDE.filter(p => !matchedDE.has(p.id)).length,
|
||||
posts: postsDE.filter(p => !matchedDE.has(p.id)).length,
|
||||
products: productsDE.filter(p => !matchedDE.has(p.id)).length,
|
||||
categories: categoriesDE.filter(p => !matchedDE.has(p.id)).length
|
||||
};
|
||||
|
||||
console.log('🔍 Unmatched Items (may need manual review):');
|
||||
console.log(` EN: ${unmatchedEN.pages} pages, ${unmatchedEN.posts} posts, ${unmatchedEN.products} products, ${unmatchedEN.categories} categories`);
|
||||
console.log(` DE: ${unmatchedDE.pages} pages, ${unmatchedDE.posts} posts, ${unmatchedDE.products} products, ${unmatchedDE.categories} categories`);
|
||||
|
||||
console.log('\n💾 File saved:', outputDir);
|
||||
console.log('\n💡 Next steps:');
|
||||
console.log(' 1. Review the improved mapping for accuracy');
|
||||
console.log(' 2. Manually add any missing pairs');
|
||||
console.log(' 3. Use this mapping for Next.js i18n implementation');
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
@@ -1,563 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* WordPress → Next.js Data Processing Pipeline
|
||||
* Transforms raw WordPress data into Next.js compatible format
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const DATA_DIR = path.join(__dirname, '..', 'data');
|
||||
const RAW_DIR = path.join(DATA_DIR, 'raw');
|
||||
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
|
||||
|
||||
// Create processed directory
|
||||
if (!fs.existsSync(PROCESSED_DIR)) {
|
||||
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
// Find latest export
|
||||
function getLatestExportDir() {
|
||||
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
|
||||
const stat = fs.statSync(path.join(RAW_DIR, f));
|
||||
return stat.isDirectory();
|
||||
});
|
||||
dirs.sort().reverse();
|
||||
return path.join(RAW_DIR, dirs[0]);
|
||||
}
|
||||
|
||||
// HTML sanitization - preserve content but clean dangerous elements
|
||||
function sanitizeHTML(html) {
|
||||
if (!html) return '';
|
||||
|
||||
let sanitized = html;
|
||||
|
||||
// Remove script tags and inline handlers (security)
|
||||
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
|
||||
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
|
||||
|
||||
// Remove WPBakery shortcode wrappers but keep their content
|
||||
// Replace vc_row/vc_column with divs to preserve structure
|
||||
sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '<div class="vc-row">');
|
||||
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
|
||||
sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '<div class="vc-column">');
|
||||
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
|
||||
|
||||
// Remove other shortcodes but keep text content
|
||||
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
|
||||
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||||
|
||||
// Handle Nectar shortcodes - remove them but keep any text content
|
||||
// [nectar_cta] blocks often contain text we want to preserve
|
||||
sanitized = sanitized.replace(/\[nectar_cta.*?\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1');
|
||||
sanitized = sanitized.replace(/\[nectar.*?\]/gi, '');
|
||||
|
||||
// Remove all remaining shortcodes
|
||||
sanitized = sanitized.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Remove empty paragraphs and divs
|
||||
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace but preserve HTML structure
|
||||
sanitized = sanitized.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
// Process excerpts specifically to handle shortcodes comprehensively
|
||||
function processExcerptShortcodes(excerptHtml) {
|
||||
if (!excerptHtml) return '';
|
||||
|
||||
let processed = excerptHtml;
|
||||
|
||||
// First, decode HTML entities to regular characters
|
||||
// Handle both numeric entities (”) and named entities (")
|
||||
processed = processed
|
||||
// Numeric HTML entities commonly found in WordPress raw data
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/‘/g, "'") // ‘ - Left single quote
|
||||
.replace(/’/g, "'") // ’ - Right single quote
|
||||
.replace(/–/g, '-') // – - En dash
|
||||
.replace(/—/g, '—') // — - Em dash
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/″/g, '"') // ″ - Inches/Prime
|
||||
.replace(/′/g, "'") // ′ - Feet/Prime
|
||||
.replace(/‚/g, ',') // ‚ - Single low quote
|
||||
.replace(/‛/g, '`') // ‛ - Single high reversed quote
|
||||
.replace(/•/g, '•') // • - Bullet
|
||||
.replace(/€/g, '€') // € - Euro
|
||||
|
||||
// Unicode characters (from rendered content)
|
||||
.replace(/”/g, '"') // Right double quote
|
||||
.replace(/“/g, '"') // Left double quote
|
||||
.replace(/„/g, ',') // Low double quote
|
||||
.replace(/‟/g, '"') // High double quote
|
||||
.replace(/‘/g, "'") // Left single quote
|
||||
.replace(/’/g, "'") // Right single quote
|
||||
.replace(/–/g, '-') // En dash
|
||||
.replace(/—/g, '—') // Em dash
|
||||
.replace(/…/g, '…') // Ellipsis
|
||||
.replace(/″/g, '"') // Inches/Prime
|
||||
.replace(/′/g, "'") // Feet/Prime
|
||||
.replace(/•/g, '•') // Bullet
|
||||
|
||||
// Named HTML entities
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/•/g, '•')
|
||||
.replace(/€/g, '€');
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
// vc_column - convert to div with classes
|
||||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||||
|
||||
// vc_column_text - convert to div
|
||||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>')
|
||||
|
||||
// nectar_cta - convert to button
|
||||
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-cta">$2</a>')
|
||||
|
||||
// nectar_highlighted_text - convert to span
|
||||
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
|
||||
'<span class="nectar-highlighted">$2</span>')
|
||||
|
||||
// nectar_responsive_text - convert to span
|
||||
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
|
||||
'<span class="nectar-responsive">$2</span>')
|
||||
|
||||
// nectar_icon_list - convert to ul
|
||||
.replace(/\[nectar_icon_list([^\]]*)\]/gi, '<ul class="nectar-icon-list">')
|
||||
.replace(/\[\/nectar_icon_list\]/gi, '</ul>')
|
||||
|
||||
// nectar_icon_list_item - convert to li
|
||||
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
|
||||
'<li><strong>$2</strong>: $4</li>')
|
||||
|
||||
// nectar_btn - convert to button
|
||||
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-btn">$2</a>')
|
||||
|
||||
// split_line_heading - convert to heading
|
||||
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
|
||||
'<h2 class="split-line-heading">$2</h2>')
|
||||
|
||||
// vc_row_inner - convert to div
|
||||
.replace(/\[vc_row_inner([^\]]*)\]/gi, '<div class="vc-row-inner">')
|
||||
.replace(/\[\/vc_row_inner\]/gi, '</div>')
|
||||
|
||||
// vc_column_inner - convert to div
|
||||
.replace(/\[vc_column_inner([^\]]*)\]/gi, '<div class="vc-column-inner">')
|
||||
.replace(/\[\/vc_column_inner\]/gi, '</div>')
|
||||
|
||||
// divider - convert to hr
|
||||
.replace(/\[divider([^\]]*)\]/gi, '<hr class="divider" />')
|
||||
|
||||
// vc_gallery - convert to div (placeholder)
|
||||
.replace(/\[vc_gallery([^\]]*)\]/gi, '<div class="vc-gallery">[Gallery]</div>')
|
||||
|
||||
// vc_raw_js - remove or convert to div
|
||||
.replace(/\[vc_raw_js\](.*?)\[\/vc_raw_js\]/gi, '<div class="vc-raw-js">[JavaScript]</div>')
|
||||
|
||||
// nectar_gmap - convert to div
|
||||
.replace(/\[nectar_gmap([^\]]*)\]/gi, '<div class="nectar-gmap">[Google Map]</div>');
|
||||
|
||||
// Remove any remaining shortcodes
|
||||
processed = processed.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Clean up any HTML that might be broken
|
||||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Extract excerpt from content
|
||||
function generateExcerpt(content, maxLength = 200) {
|
||||
const text = content.replace(/<[^>]*>/g, '');
|
||||
if (text.length <= maxLength) return text;
|
||||
return text.substring(0, maxLength) + '...';
|
||||
}
|
||||
|
||||
// Process pages
|
||||
function processPages(pagesEN, pagesDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
// Process English pages
|
||||
pagesEN.forEach(page => {
|
||||
const translationKey = page.slug;
|
||||
const deMatch = translationMapping.pages[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: page.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: page.slug,
|
||||
path: `/${page.slug}`,
|
||||
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
// Process German pages
|
||||
pagesDE.forEach(page => {
|
||||
const translationKey = page.slug;
|
||||
const enMatch = translationMapping.pages[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: page.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: page.slug,
|
||||
path: `/de/${page.slug}`,
|
||||
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process posts
|
||||
function processPosts(postsEN, postsDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
postsEN.forEach(post => {
|
||||
const translationKey = post.slug;
|
||||
const deMatch = translationMapping.posts[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: post.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: post.slug,
|
||||
path: `/blog/${post.slug}`,
|
||||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
postsDE.forEach(post => {
|
||||
const translationKey = post.slug;
|
||||
const enMatch = translationMapping.posts[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: post.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: post.slug,
|
||||
path: `/de/blog/${post.slug}`,
|
||||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process products
|
||||
function processProducts(productsEN, productsDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
productsEN.forEach(product => {
|
||||
const translationKey = product.slug;
|
||||
const deMatch = translationMapping.products[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: product.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: product.slug,
|
||||
path: `/product/${product.slug}`,
|
||||
name: product.name,
|
||||
shortDescriptionHtml: product.shortDescriptionHtml,
|
||||
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
||||
images: product.images,
|
||||
featuredImage: product.featuredImage,
|
||||
sku: product.sku,
|
||||
regularPrice: product.regularPrice,
|
||||
salePrice: product.salePrice,
|
||||
currency: product.currency,
|
||||
stockStatus: product.stockStatus,
|
||||
categories: product.categories,
|
||||
attributes: product.attributes,
|
||||
variations: product.variations,
|
||||
updatedAt: product.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
productsDE.forEach(product => {
|
||||
const translationKey = product.slug;
|
||||
const enMatch = translationMapping.products[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: product.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: product.slug,
|
||||
path: `/de/product/${product.slug}`,
|
||||
name: product.name,
|
||||
shortDescriptionHtml: product.shortDescriptionHtml,
|
||||
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
||||
images: product.images,
|
||||
featuredImage: product.featuredImage,
|
||||
sku: product.sku,
|
||||
regularPrice: product.regularPrice,
|
||||
salePrice: product.salePrice,
|
||||
currency: product.currency,
|
||||
stockStatus: product.stockStatus,
|
||||
categories: product.categories,
|
||||
attributes: product.attributes,
|
||||
variations: product.variations,
|
||||
updatedAt: product.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process product categories
|
||||
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
categoriesEN.forEach(category => {
|
||||
const translationKey = category.slug;
|
||||
const deMatch = translationMapping.productCategories[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: category.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: category.slug,
|
||||
name: category.name,
|
||||
path: `/product-category/${category.slug}`,
|
||||
description: category.description,
|
||||
count: category.count,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
categoriesDE.forEach(category => {
|
||||
const translationKey = category.slug;
|
||||
const enMatch = translationMapping.productCategories[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: category.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: category.slug,
|
||||
name: category.name,
|
||||
path: `/de/product-category/${category.slug}`,
|
||||
description: category.description,
|
||||
count: category.count,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process media manifest
|
||||
function processMedia(media) {
|
||||
return media.map(item => ({
|
||||
id: item.id,
|
||||
filename: item.filename,
|
||||
url: item.url,
|
||||
localPath: `/media/${item.filename}`,
|
||||
alt: item.alt,
|
||||
width: item.width,
|
||||
height: item.height,
|
||||
mimeType: item.mime_type
|
||||
}));
|
||||
}
|
||||
|
||||
// Generate asset map for URL replacement
|
||||
function generateAssetMap(media) {
|
||||
const map = {};
|
||||
media.forEach(item => {
|
||||
if (item.url) {
|
||||
map[item.url] = `/media/${item.filename}`;
|
||||
}
|
||||
});
|
||||
return map;
|
||||
}
|
||||
|
||||
// Main processing function
|
||||
function main() {
|
||||
const exportDir = getLatestExportDir();
|
||||
console.log('🔄 Processing WordPress Data for Next.js');
|
||||
console.log('========================================\n');
|
||||
|
||||
// Load raw data
|
||||
const loadJSON = (file) => {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
|
||||
} catch (e) {
|
||||
console.error(`❌ Failed to load ${file}:`, e.message);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
const translationMapping = loadJSON('translation-mapping-improved.json');
|
||||
const pagesEN = loadJSON('pages.en.json');
|
||||
const pagesDE = loadJSON('pages.de.json');
|
||||
const postsEN = loadJSON('posts.en.json');
|
||||
const postsDE = loadJSON('posts.de.json');
|
||||
const productsEN = loadJSON('products.en.json');
|
||||
const productsDE = loadJSON('products.de.json');
|
||||
const categoriesEN = loadJSON('product-categories.en.json');
|
||||
const categoriesDE = loadJSON('product-categories.de.json');
|
||||
const media = loadJSON('media.json');
|
||||
const redirects = loadJSON('redirects.json');
|
||||
const siteInfo = loadJSON('site-info.json');
|
||||
|
||||
console.log('📊 Processing content types...\n');
|
||||
|
||||
// Process each content type
|
||||
const pages = processPages(pagesEN, pagesDE, translationMapping);
|
||||
const posts = processPosts(postsEN, postsDE, translationMapping);
|
||||
const products = processProducts(productsEN, productsDE, translationMapping);
|
||||
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
|
||||
const processedMedia = processMedia(media);
|
||||
const assetMap = generateAssetMap(media);
|
||||
|
||||
// Create processed data structure
|
||||
const processedData = {
|
||||
site: {
|
||||
title: siteInfo.siteTitle,
|
||||
description: siteInfo.siteDescription,
|
||||
baseUrl: siteInfo.baseUrl,
|
||||
defaultLocale: siteInfo.defaultLocale || 'en',
|
||||
locales: ['en', 'de']
|
||||
},
|
||||
content: {
|
||||
pages,
|
||||
posts,
|
||||
products,
|
||||
categories
|
||||
},
|
||||
assets: {
|
||||
media: processedMedia,
|
||||
map: assetMap
|
||||
},
|
||||
redirects,
|
||||
exportDate: new Date().toISOString()
|
||||
};
|
||||
|
||||
// Save processed data
|
||||
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
|
||||
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
|
||||
|
||||
// Save individual files for easier access
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
|
||||
|
||||
// Summary
|
||||
console.log('✅ Data Processing Complete\n');
|
||||
console.log('📦 Processed Content:');
|
||||
console.log(` Pages: ${pages.length} (with translations)`);
|
||||
console.log(` Posts: ${posts.length} (with translations)`);
|
||||
console.log(` Products: ${products.length} (with translations)`);
|
||||
console.log(` Categories: ${categories.length} (with translations)`);
|
||||
console.log(` Media: ${processedMedia.length} files`);
|
||||
console.log(` Redirects: ${redirects.length} rules\n`);
|
||||
|
||||
console.log('📁 Output Files:');
|
||||
console.log(` ${outputPath}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
|
||||
|
||||
// Sample data
|
||||
if (pages.length > 0) {
|
||||
console.log('📄 Sample Page:');
|
||||
console.log(` Title: ${pages[0].title}`);
|
||||
console.log(` Path: ${pages[0].path}`);
|
||||
console.log(` Locale: ${pages[0].locale}`);
|
||||
console.log(` Translation: ${pages[0].translation ? 'Yes' : 'No'}\n`);
|
||||
}
|
||||
|
||||
if (posts.length > 0) {
|
||||
console.log('📝 Sample Post:');
|
||||
console.log(` Title: ${posts[0].title}`);
|
||||
console.log(` Path: ${posts[0].path}`);
|
||||
console.log(` Locale: ${posts[0].locale}`);
|
||||
console.log(` Date: ${posts[0].datePublished}\n`);
|
||||
}
|
||||
|
||||
console.log('💡 Next: Ready for Next.js project setup!');
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,842 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* WordPress → Next.js Data Processing Pipeline with WooCommerce API Integration
|
||||
* Transforms raw WordPress data into Next.js compatible format with prices and variations
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const https = require('https');
|
||||
const dotenv = require('dotenv');
|
||||
|
||||
// Load environment variables from .env file
|
||||
dotenv.config();
|
||||
|
||||
const DATA_DIR = path.join(__dirname, '..', 'data');
|
||||
const RAW_DIR = path.join(DATA_DIR, 'raw');
|
||||
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
|
||||
|
||||
// Create processed directory
|
||||
if (!fs.existsSync(PROCESSED_DIR)) {
|
||||
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
// WooCommerce API Configuration from .env
|
||||
const WOOCOMMERCE_CONFIG = {
|
||||
url: process.env.WOOCOMMERCE_URL || 'https://klz-cables.com',
|
||||
consumerKey: process.env.WOOCOMMERCE_CONSUMER_KEY,
|
||||
consumerSecret: process.env.WOOCOMMERCE_CONSUMER_SECRET,
|
||||
apiVersion: 'wc/v3'
|
||||
};
|
||||
|
||||
// Debug: Check if credentials are loaded
|
||||
if (!WOOCOMMERCE_CONFIG.consumerKey || !WOOCOMMERCE_CONFIG.consumerSecret) {
|
||||
console.error('❌ Missing WooCommerce credentials in environment');
|
||||
console.error('WOOCOMMERCE_CONSUMER_KEY:', WOOCOMMERCE_CONFIG.consumerKey ? '✓ Loaded' : '❌ Missing');
|
||||
console.error('WOOCOMMERCE_CONSUMER_SECRET:', WOOCOMMERCE_CONFIG.consumerSecret ? '✓ Loaded' : '❌ Missing');
|
||||
process.exit(1);
|
||||
} else {
|
||||
console.log('✅ WooCommerce credentials loaded successfully');
|
||||
}
|
||||
|
||||
// Rate limiting configuration
|
||||
const RATE_LIMIT = {
|
||||
maxConcurrent: 2, // Max concurrent API calls
|
||||
delayBetweenCalls: 100, // ms between calls
|
||||
timeout: 30000 // 30 second timeout
|
||||
};
|
||||
|
||||
// API call queue and tracking
|
||||
let apiQueue = [];
|
||||
let activeRequests = 0;
|
||||
let apiStats = {
|
||||
total: 0,
|
||||
success: 0,
|
||||
failed: 0,
|
||||
retries: 0
|
||||
};
|
||||
|
||||
/**
|
||||
* WooCommerce API Client
|
||||
*/
|
||||
class WooCommerceAPI {
|
||||
constructor(config) {
|
||||
this.config = config;
|
||||
this.baseURL = `${config.url}/wp-json/${config.apiVersion}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make authenticated API request
|
||||
*/
|
||||
async request(endpoint, method = 'GET', data = null) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const url = new URL(`${this.baseURL}${endpoint}`);
|
||||
|
||||
// Add authentication
|
||||
const auth = Buffer.from(`${this.config.consumerKey}:${this.config.consumerSecret}`).toString('base64');
|
||||
|
||||
const options = {
|
||||
hostname: url.hostname,
|
||||
port: url.port || 443,
|
||||
path: url.pathname + url.search,
|
||||
method,
|
||||
headers: {
|
||||
'Authorization': `Basic ${auth}`,
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': 'KLZ-Data-Processor/1.0'
|
||||
},
|
||||
timeout: RATE_LIMIT.timeout
|
||||
};
|
||||
|
||||
// Log the request
|
||||
console.log(`🌐 API Request: ${method} ${url.pathname}`);
|
||||
|
||||
const req = https.request(options, (res) => {
|
||||
let data = '';
|
||||
|
||||
res.on('data', (chunk) => {
|
||||
data += chunk;
|
||||
});
|
||||
|
||||
res.on('end', () => {
|
||||
apiStats.total++;
|
||||
|
||||
if (res.statusCode >= 200 && res.statusCode < 300) {
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
apiStats.success++;
|
||||
resolve(parsed);
|
||||
} catch (e) {
|
||||
apiStats.failed++;
|
||||
reject(new Error(`JSON parse error: ${e.message}`));
|
||||
}
|
||||
} else {
|
||||
apiStats.failed++;
|
||||
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
req.on('error', (err) => {
|
||||
apiStats.failed++;
|
||||
reject(err);
|
||||
});
|
||||
|
||||
req.on('timeout', () => {
|
||||
apiStats.failed++;
|
||||
req.destroy();
|
||||
reject(new Error('Request timeout'));
|
||||
});
|
||||
|
||||
// Add request body for POST/PUT
|
||||
if (data && (method === 'POST' || method === 'PUT')) {
|
||||
const body = JSON.stringify(data);
|
||||
req.write(body);
|
||||
}
|
||||
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get product by ID
|
||||
*/
|
||||
async getProduct(productId) {
|
||||
try {
|
||||
return await this.request(`/products/${productId}`);
|
||||
} catch (error) {
|
||||
console.error(`❌ Failed to fetch product ${productId}:`, error.message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get product variations
|
||||
*/
|
||||
async getProductVariations(productId) {
|
||||
try {
|
||||
return await this.request(`/products/${productId}/variations?per_page=100`);
|
||||
} catch (error) {
|
||||
console.error(`❌ Failed to fetch variations for product ${productId}:`, error.message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process API queue with rate limiting
|
||||
*/
|
||||
async processQueue(tasks, progressCallback) {
|
||||
const results = [];
|
||||
|
||||
for (let i = 0; i < tasks.length; i++) {
|
||||
// Wait for rate limit
|
||||
if (i > 0) {
|
||||
await new Promise(resolve => setTimeout(resolve, RATE_LIMIT.delayBetweenCalls));
|
||||
}
|
||||
|
||||
const task = tasks[i];
|
||||
let attempt = 0;
|
||||
let success = false;
|
||||
let result = null;
|
||||
|
||||
// Retry logic
|
||||
while (attempt < 3 && !success) {
|
||||
try {
|
||||
if (progressCallback) {
|
||||
progressCallback(i + 1, tasks.length, task.label);
|
||||
}
|
||||
|
||||
result = await task.fn();
|
||||
success = true;
|
||||
|
||||
if (attempt > 0) {
|
||||
apiStats.retries++;
|
||||
console.log(`✅ Retry successful for: ${task.label}`);
|
||||
}
|
||||
} catch (error) {
|
||||
attempt++;
|
||||
if (attempt < 3) {
|
||||
console.log(`⚠️ Retry ${attempt}/3 for: ${task.label} - ${error.message}`);
|
||||
await new Promise(resolve => setTimeout(resolve, 1000 * attempt)); // Exponential backoff
|
||||
} else {
|
||||
console.log(`❌ Failed after 3 attempts: ${task.label} - ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode HTML entities in text - comprehensive handling
|
||||
*/
|
||||
function decodeHTMLEntities(text) {
|
||||
if (!text) return '';
|
||||
|
||||
// First, handle numeric entities (decimal and hex)
|
||||
let result = text
|
||||
.replace(/&#(\d+);/g, (match, dec) => {
|
||||
const char = String.fromCharCode(parseInt(dec, 10));
|
||||
return char;
|
||||
})
|
||||
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
|
||||
const char = String.fromCharCode(parseInt(hex, 16));
|
||||
return char;
|
||||
});
|
||||
|
||||
// Handle common named entities and Unicode characters
|
||||
const entityMap = {
|
||||
' ': ' ',
|
||||
'‘': "'",
|
||||
'’': "'",
|
||||
'“': '"',
|
||||
'”': '"',
|
||||
'″': '"',
|
||||
'–': '-',
|
||||
'—': '—',
|
||||
'…': '…',
|
||||
'•': '•',
|
||||
'€': '€',
|
||||
'©': '©',
|
||||
'®': '®',
|
||||
'™': '™',
|
||||
'°': '°',
|
||||
'±': '±',
|
||||
'×': '×',
|
||||
'÷': '÷',
|
||||
'−': '−',
|
||||
'¢': '¢',
|
||||
'£': '£',
|
||||
'¥': '¥',
|
||||
'§': '§',
|
||||
'¶': '¶',
|
||||
'µ': 'µ',
|
||||
'«': '«',
|
||||
'»': '»',
|
||||
'·': '·'
|
||||
};
|
||||
|
||||
// Replace all named entities
|
||||
for (const [entity, char] of Object.entries(entityMap)) {
|
||||
result = result.replace(new RegExp(entity, 'g'), char);
|
||||
}
|
||||
|
||||
// Clean up any remaining ampersand patterns
|
||||
result = result.replace(/&([a-zA-Z]+);/g, (match, name) => {
|
||||
// If it's not in our map, try to decode it or leave as is
|
||||
return entityMap[`&${name};`] || match;
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* HTML sanitization - preserve content but clean dangerous elements
|
||||
*/
|
||||
function sanitizeHTML(html) {
|
||||
if (!html) return '';
|
||||
|
||||
let sanitized = html;
|
||||
|
||||
// Remove script tags and inline handlers (security)
|
||||
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
|
||||
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
|
||||
|
||||
// Remove WPBakery shortcode wrappers but keep their content
|
||||
sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '<div class="vc-row">');
|
||||
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
|
||||
sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '<div class="vc-column">');
|
||||
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
|
||||
|
||||
// Remove other shortcodes but keep text content
|
||||
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
|
||||
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||||
|
||||
// Handle Nectar shortcodes
|
||||
sanitized = sanitized.replace(/\[nectar_cta.*?\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1');
|
||||
sanitized = sanitized.replace(/\[nectar.*?\]/gi, '');
|
||||
|
||||
// Remove all remaining shortcodes
|
||||
sanitized = sanitized.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Remove empty paragraphs and divs
|
||||
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
sanitized = sanitized.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process products with WooCommerce API integration
|
||||
*/
|
||||
async function processProductsWithWooCommerce(productsEN, productsDE, translationMapping) {
|
||||
const api = new WooCommerceAPI(WOOCOMMERCE_CONFIG);
|
||||
const processed = [];
|
||||
|
||||
console.log(`\n🚀 Starting WooCommerce API integration for ${productsEN.length} products...`);
|
||||
|
||||
// Create tasks for price and variation fetching
|
||||
const tasks = [];
|
||||
|
||||
productsEN.forEach(product => {
|
||||
tasks.push({
|
||||
label: `${product.name} (EN) - Prices`,
|
||||
fn: async () => {
|
||||
const wooProduct = await api.getProduct(product.id);
|
||||
if (wooProduct) {
|
||||
return {
|
||||
productId: product.id,
|
||||
locale: 'en',
|
||||
regularPrice: wooProduct.regular_price || '',
|
||||
salePrice: wooProduct.sale_price || '',
|
||||
currency: wooProduct.currency || 'EUR',
|
||||
stockStatus: wooProduct.stock_status || 'instock'
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
});
|
||||
|
||||
tasks.push({
|
||||
label: `${product.name} (EN) - Variations`,
|
||||
fn: async () => {
|
||||
const variations = await api.getProductVariations(product.id);
|
||||
return {
|
||||
productId: product.id,
|
||||
locale: 'en',
|
||||
variations: variations || []
|
||||
};
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
productsDE.forEach(product => {
|
||||
tasks.push({
|
||||
label: `${product.name} (DE) - Prices`,
|
||||
fn: async () => {
|
||||
const wooProduct = await api.getProduct(product.id);
|
||||
if (wooProduct) {
|
||||
return {
|
||||
productId: product.id,
|
||||
locale: 'de',
|
||||
regularPrice: wooProduct.regular_price || '',
|
||||
salePrice: wooProduct.sale_price || '',
|
||||
currency: wooProduct.currency || 'EUR',
|
||||
stockStatus: wooProduct.stock_status || 'instock'
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
});
|
||||
|
||||
tasks.push({
|
||||
label: `${product.name} (DE) - Variations`,
|
||||
fn: async () => {
|
||||
const variations = await api.getProductVariations(product.id);
|
||||
return {
|
||||
productId: product.id,
|
||||
locale: 'de',
|
||||
variations: variations || []
|
||||
};
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Progress callback
|
||||
const progressCallback = (current, total, label) => {
|
||||
const progress = Math.round((current / total) * 100);
|
||||
process.stdout.write(`\r📊 Progress: ${current}/${total} (${progress}%) - ${label}`);
|
||||
};
|
||||
|
||||
// Process all tasks
|
||||
const results = await api.processQueue(tasks, progressCallback);
|
||||
|
||||
// Clear progress line
|
||||
process.stdout.write('\n');
|
||||
|
||||
// Organize results
|
||||
const priceData = {};
|
||||
const variationData = {};
|
||||
|
||||
results.forEach(result => {
|
||||
if (!result) return;
|
||||
|
||||
const key = `${result.productId}_${result.locale}`;
|
||||
|
||||
if (result.variations) {
|
||||
variationData[key] = result.variations;
|
||||
} else {
|
||||
priceData[key] = {
|
||||
regularPrice: result.regularPrice,
|
||||
salePrice: result.salePrice,
|
||||
currency: result.currency,
|
||||
stockStatus: result.stockStatus
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
console.log(`\n📈 API Statistics:`);
|
||||
console.log(` Total requests: ${apiStats.total}`);
|
||||
console.log(` Successful: ${apiStats.success}`);
|
||||
console.log(` Failed: ${apiStats.failed}`);
|
||||
console.log(` Retries: ${apiStats.retries}`);
|
||||
|
||||
// Process English products
|
||||
productsEN.forEach(product => {
|
||||
const key = `${product.id}_en`;
|
||||
const priceInfo = priceData[key] || {};
|
||||
const variations = variationData[key] || [];
|
||||
|
||||
const translationKey = product.slug;
|
||||
const deMatch = translationMapping.products[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: product.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: product.slug,
|
||||
path: `/product/${product.slug}`,
|
||||
name: product.name,
|
||||
shortDescriptionHtml: product.shortDescriptionHtml,
|
||||
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
||||
images: product.images,
|
||||
featuredImage: product.featuredImage,
|
||||
sku: product.sku,
|
||||
regularPrice: priceInfo.regularPrice || '',
|
||||
salePrice: priceInfo.salePrice || '',
|
||||
currency: priceInfo.currency || 'EUR',
|
||||
stockStatus: priceInfo.stockStatus || 'instock',
|
||||
categories: product.categories,
|
||||
attributes: product.attributes,
|
||||
variations: variations,
|
||||
updatedAt: product.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
// Process German products
|
||||
productsDE.forEach(product => {
|
||||
const key = `${product.id}_de`;
|
||||
const priceInfo = priceData[key] || {};
|
||||
const variations = variationData[key] || [];
|
||||
|
||||
const translationKey = product.slug;
|
||||
const enMatch = translationMapping.products[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: product.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: product.slug,
|
||||
path: `/de/product/${product.slug}`,
|
||||
name: product.name,
|
||||
shortDescriptionHtml: product.shortDescriptionHtml,
|
||||
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
||||
images: product.images,
|
||||
featuredImage: product.featuredImage,
|
||||
sku: product.sku,
|
||||
regularPrice: priceInfo.regularPrice || '',
|
||||
salePrice: priceInfo.salePrice || '',
|
||||
currency: priceInfo.currency || 'EUR',
|
||||
stockStatus: priceInfo.stockStatus || 'instock',
|
||||
categories: product.categories,
|
||||
attributes: product.attributes,
|
||||
variations: variations,
|
||||
updatedAt: product.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process pages
|
||||
*/
|
||||
function processPages(pagesEN, pagesDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
// Process English pages
|
||||
pagesEN.forEach(page => {
|
||||
const translationKey = page.slug;
|
||||
const deMatch = translationMapping.pages[translationKey];
|
||||
|
||||
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
|
||||
const decodedTitle = decodeHTMLEntities(rawTitle);
|
||||
|
||||
processed.push({
|
||||
id: page.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: page.slug,
|
||||
path: `/${page.slug}`,
|
||||
title: decodedTitle,
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: page.excerptHtml || '',
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
// Process German pages
|
||||
pagesDE.forEach(page => {
|
||||
const translationKey = page.slug;
|
||||
const enMatch = translationMapping.pages[translationKey];
|
||||
|
||||
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
|
||||
const decodedTitle = decodeHTMLEntities(rawTitle);
|
||||
|
||||
processed.push({
|
||||
id: page.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: page.slug,
|
||||
path: `/de/${page.slug}`,
|
||||
title: decodedTitle,
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: page.excerptHtml || '',
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process posts
|
||||
*/
|
||||
function processPosts(postsEN, postsDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
postsEN.forEach(post => {
|
||||
const translationKey = post.slug;
|
||||
const deMatch = translationMapping.posts[translationKey];
|
||||
|
||||
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
|
||||
const decodedTitle = decodeHTMLEntities(rawTitle);
|
||||
|
||||
processed.push({
|
||||
id: post.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: post.slug,
|
||||
path: `/blog/${post.slug}`,
|
||||
title: decodedTitle,
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: post.excerptHtml || '',
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
postsDE.forEach(post => {
|
||||
const translationKey = post.slug;
|
||||
const enMatch = translationMapping.posts[translationKey];
|
||||
|
||||
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
|
||||
const decodedTitle = decodeHTMLEntities(rawTitle);
|
||||
|
||||
processed.push({
|
||||
id: post.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: post.slug,
|
||||
path: `/de/blog/${post.slug}`,
|
||||
title: decodedTitle,
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: post.excerptHtml || '',
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process product categories
|
||||
*/
|
||||
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
categoriesEN.forEach(category => {
|
||||
const translationKey = category.slug;
|
||||
const deMatch = translationMapping.productCategories[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: category.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: category.slug,
|
||||
name: category.name,
|
||||
path: `/product-category/${category.slug}`,
|
||||
description: category.description,
|
||||
count: category.count,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
categoriesDE.forEach(category => {
|
||||
const translationKey = category.slug;
|
||||
const enMatch = translationMapping.productCategories[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: category.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: category.slug,
|
||||
name: category.name,
|
||||
path: `/de/product-category/${category.slug}`,
|
||||
description: category.description,
|
||||
count: category.count,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process media manifest
|
||||
*/
|
||||
function processMedia(media) {
|
||||
return media.map(item => ({
|
||||
id: item.id,
|
||||
filename: item.filename,
|
||||
url: item.url,
|
||||
localPath: `/media/${item.filename}`,
|
||||
alt: item.alt,
|
||||
width: item.width,
|
||||
height: item.height,
|
||||
mimeType: item.mime_type
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate asset map for URL replacement
|
||||
*/
|
||||
function generateAssetMap(media) {
|
||||
const map = {};
|
||||
media.forEach(item => {
|
||||
if (item.url) {
|
||||
map[item.url] = `/media/${item.filename}`;
|
||||
}
|
||||
});
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main processing function
|
||||
*/
|
||||
async function main() {
|
||||
const exportDir = getLatestExportDir();
|
||||
console.log('🔄 Processing WordPress Data for Next.js with WooCommerce Integration');
|
||||
console.log('==========================================================\n');
|
||||
|
||||
// Load raw data
|
||||
const loadJSON = (file) => {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
|
||||
} catch (e) {
|
||||
console.error(`❌ Failed to load ${file}:`, e.message);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
const translationMapping = loadJSON('translation-mapping.json');
|
||||
const pagesEN = loadJSON('pages.en.json');
|
||||
const pagesDE = loadJSON('pages.de.json');
|
||||
const postsEN = loadJSON('posts.en.json');
|
||||
const postsDE = loadJSON('posts.de.json');
|
||||
const productsEN = loadJSON('products.en.json');
|
||||
const productsDE = loadJSON('products.de.json');
|
||||
const categoriesEN = loadJSON('product-categories.en.json');
|
||||
const categoriesDE = loadJSON('product-categories.de.json');
|
||||
const media = loadJSON('media.json');
|
||||
const redirects = loadJSON('redirects.json');
|
||||
const siteInfo = loadJSON('site-info.json');
|
||||
|
||||
console.log('📊 Processing content types...\n');
|
||||
|
||||
// Process each content type
|
||||
const pages = processPages(pagesEN, pagesDE, translationMapping);
|
||||
const posts = processPosts(postsEN, postsDE, translationMapping);
|
||||
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
|
||||
const processedMedia = processMedia(media);
|
||||
const assetMap = generateAssetMap(media);
|
||||
|
||||
// Process products with WooCommerce API
|
||||
const products = await processProductsWithWooCommerce(productsEN, productsDE, translationMapping);
|
||||
|
||||
// Create processed data structure
|
||||
const processedData = {
|
||||
site: {
|
||||
title: siteInfo.siteTitle,
|
||||
description: siteInfo.siteDescription,
|
||||
baseUrl: siteInfo.baseUrl,
|
||||
defaultLocale: siteInfo.defaultLocale || 'en',
|
||||
locales: ['en', 'de']
|
||||
},
|
||||
content: {
|
||||
pages,
|
||||
posts,
|
||||
products,
|
||||
categories
|
||||
},
|
||||
assets: {
|
||||
media: processedMedia,
|
||||
map: assetMap
|
||||
},
|
||||
redirects,
|
||||
exportDate: new Date().toISOString()
|
||||
};
|
||||
|
||||
// Save processed data
|
||||
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
|
||||
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
|
||||
|
||||
// Save individual files for easier access
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
|
||||
|
||||
// Always write products.json with the processed data
|
||||
// Even if WooCommerce data is missing, we still want the base product structure
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
|
||||
|
||||
// Report on WooCommerce data quality
|
||||
const productsWithPrices = products.filter(p => p.regularPrice).length;
|
||||
const productsWithVariations = products.filter(p => p.variations && p.variations.length > 0).length;
|
||||
|
||||
console.log('📊 WooCommerce Data Quality:');
|
||||
console.log(` Products with prices: ${productsWithPrices}/${products.length}`);
|
||||
console.log(` Products with variations: ${productsWithVariations}/${products.length}`);
|
||||
|
||||
if (productsWithPrices === 0 && productsWithVariations === 0) {
|
||||
console.log('⚠️ Warning: No WooCommerce pricing or variation data was retrieved');
|
||||
console.log(' Products written with empty price fields\n');
|
||||
} else {
|
||||
console.log('✅ WooCommerce data integrated successfully\n');
|
||||
}
|
||||
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
|
||||
|
||||
// Summary
|
||||
console.log('\n✅ Data Processing Complete\n');
|
||||
console.log('📦 Processed Content:');
|
||||
console.log(` Pages: ${pages.length} (with translations)`);
|
||||
console.log(` Posts: ${posts.length} (with translations)`);
|
||||
console.log(` Products: ${products.length} (with translations)`);
|
||||
console.log(` Categories: ${categories.length} (with translations)`);
|
||||
console.log(` Media: ${processedMedia.length} files`);
|
||||
console.log(` Redirects: ${redirects.length} rules\n`);
|
||||
|
||||
|
||||
console.log('📁 Output Files:');
|
||||
console.log(` ${outputPath}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
|
||||
|
||||
// Sample data
|
||||
if (products.length > 0) {
|
||||
console.log('📦 Sample Product with WooCommerce Data:');
|
||||
const sampleProduct = products.find(p => p.regularPrice) || products[0];
|
||||
console.log(` Name: ${sampleProduct.name}`);
|
||||
console.log(` SKU: ${sampleProduct.sku}`);
|
||||
console.log(` Price: ${sampleProduct.regularPrice} ${sampleProduct.currency}`);
|
||||
console.log(` Sale Price: ${sampleProduct.salePrice || 'N/A'}`);
|
||||
console.log(` Variations: ${sampleProduct.variations.length}`);
|
||||
console.log(` Locale: ${sampleProduct.locale}\n`);
|
||||
}
|
||||
|
||||
console.log('💡 Next: Ready for Next.js project setup with complete product data!');
|
||||
}
|
||||
|
||||
// Helper function to get latest export directory
|
||||
function getLatestExportDir() {
|
||||
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
|
||||
const stat = fs.statSync(path.join(RAW_DIR, f));
|
||||
return stat.isDirectory();
|
||||
});
|
||||
dirs.sort().reverse();
|
||||
return path.join(RAW_DIR, dirs[0]);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main().catch(console.error);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
processPages,
|
||||
processPosts,
|
||||
processProductCategories,
|
||||
processProductsWithWooCommerce,
|
||||
processMedia,
|
||||
generateAssetMap,
|
||||
decodeHTMLEntities,
|
||||
sanitizeHTML
|
||||
};
|
||||
@@ -1,660 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* WordPress → Next.js Data Processing Pipeline
|
||||
* Transforms raw WordPress data into Next.js compatible format
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const DATA_DIR = path.join(__dirname, '..', 'data');
|
||||
const RAW_DIR = path.join(DATA_DIR, 'raw');
|
||||
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
|
||||
|
||||
// Create processed directory
|
||||
if (!fs.existsSync(PROCESSED_DIR)) {
|
||||
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
// Find latest export
|
||||
function getLatestExportDir() {
|
||||
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
|
||||
const stat = fs.statSync(path.join(RAW_DIR, f));
|
||||
return stat.isDirectory();
|
||||
});
|
||||
dirs.sort().reverse();
|
||||
return path.join(RAW_DIR, dirs[0]);
|
||||
}
|
||||
|
||||
// Decode HTML entities in text - comprehensive handling
|
||||
function decodeHTMLEntities(text) {
|
||||
if (!text) return '';
|
||||
|
||||
// First, handle numeric entities (decimal and hex)
|
||||
let result = text
|
||||
.replace(/&#(\d+);/g, (match, dec) => {
|
||||
const char = String.fromCharCode(parseInt(dec, 10));
|
||||
return char;
|
||||
})
|
||||
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
|
||||
const char = String.fromCharCode(parseInt(hex, 16));
|
||||
return char;
|
||||
});
|
||||
|
||||
// Handle common named entities and Unicode characters
|
||||
const entityMap = {
|
||||
' ': ' ',
|
||||
'‘': "'",
|
||||
'’': "'",
|
||||
'“': '"',
|
||||
'”': '"',
|
||||
'″': '"', // Double prime (8243)
|
||||
'–': '-',
|
||||
'—': '—',
|
||||
'…': '…',
|
||||
'•': '•',
|
||||
'€': '€',
|
||||
'©': '©',
|
||||
'®': '®',
|
||||
'™': '™',
|
||||
'°': '°',
|
||||
'±': '±',
|
||||
'×': '×',
|
||||
'÷': '÷',
|
||||
'−': '−',
|
||||
'¢': '¢',
|
||||
'£': '£',
|
||||
'¥': '¥',
|
||||
'§': '§',
|
||||
'¶': '¶',
|
||||
'µ': 'µ',
|
||||
'«': '«',
|
||||
'»': '»',
|
||||
'·': '·'
|
||||
};
|
||||
|
||||
// Replace all named entities
|
||||
for (const [entity, char] of Object.entries(entityMap)) {
|
||||
result = result.replace(new RegExp(entity, 'g'), char);
|
||||
}
|
||||
|
||||
// Clean up any remaining ampersand patterns
|
||||
result = result.replace(/&([a-zA-Z]+);/g, (match, name) => {
|
||||
// If it's not in our map, try to decode it or leave as is
|
||||
return entityMap[`&${name};`] || match;
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// HTML sanitization - preserve content but clean dangerous elements
|
||||
// Also preserves bg_image attributes for later processing by fix-images.js
|
||||
function sanitizeHTML(html) {
|
||||
if (!html) return '';
|
||||
|
||||
let sanitized = html;
|
||||
|
||||
// Temporarily preserve bg_image attributes by replacing them with placeholders
|
||||
// Handle both regular quotes and Unicode quotes
|
||||
const bgImagePlaceholders = [];
|
||||
sanitized = sanitized.replace(/(bg_image=)(["”])([^"”]*?)["”]/gi, (match) => {
|
||||
const placeholder = `__BG_IMAGE_${bgImagePlaceholders.length}__`;
|
||||
bgImagePlaceholders.push(match);
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// Remove script tags and inline handlers (security)
|
||||
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
|
||||
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
|
||||
|
||||
// Remove WPBakery shortcode wrappers but keep their content
|
||||
// Replace vc_row/vc_column with divs to preserve structure
|
||||
sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '<div class="vc-row">');
|
||||
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
|
||||
sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '<div class="vc-column">');
|
||||
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
|
||||
|
||||
// Remove other shortcodes but keep text content
|
||||
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
|
||||
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||||
|
||||
// Handle Nectar shortcodes - remove them but keep any text content
|
||||
// [nectar_cta] blocks often contain text we want to preserve
|
||||
sanitized = sanitized.replace(/\[nectar_cta.*?\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1');
|
||||
sanitized = sanitized.replace(/\[nectar.*?\]/gi, '');
|
||||
|
||||
// Remove all remaining shortcodes
|
||||
sanitized = sanitized.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Remove empty paragraphs and divs
|
||||
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace but preserve HTML structure
|
||||
sanitized = sanitized.replace(/\s+/g, ' ').trim();
|
||||
|
||||
// Restore bg_image placeholders
|
||||
bgImagePlaceholders.forEach((placeholder, index) => {
|
||||
sanitized = sanitized.replace(`__BG_IMAGE_${index}__`, placeholder);
|
||||
});
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
// Process excerpts specifically to handle shortcodes comprehensively
|
||||
function processExcerptShortcodes(excerptHtml) {
|
||||
if (!excerptHtml) return '';
|
||||
|
||||
let processed = excerptHtml;
|
||||
|
||||
// First, decode HTML entities to regular characters
|
||||
processed = decodeHTMLEntities(processed);
|
||||
|
||||
// Temporarily preserve bg_image attributes (handle both regular and Unicode quotes)
|
||||
const bgImagePlaceholders = [];
|
||||
processed = processed.replace(/(bg_image=)(["”])([^"”]*?)["”]/gi, (match) => {
|
||||
const placeholder = `__BG_IMAGE_${bgImagePlaceholders.length}__`;
|
||||
bgImagePlaceholders.push(match);
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes (handle both complete and truncated)
|
||||
// Preserve any placeholders in the attributes
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
|
||||
// Extract and preserve placeholders from attrs
|
||||
const placeholderMatches = attrs.match(/__BG_IMAGE_\d+__/g) || [];
|
||||
const preservedAttrs = placeholderMatches.join(' ');
|
||||
|
||||
return `<div class="${classes.join(' ')}" ${preservedAttrs}>`;
|
||||
})
|
||||
// Handle truncated vc_row (no closing bracket)
|
||||
.replace(/\[vc_row([^\]]*)$/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
|
||||
// Extract and preserve placeholders from attrs
|
||||
const placeholderMatches = attrs.match(/__BG_IMAGE_\d+__/g) || [];
|
||||
const preservedAttrs = placeholderMatches.join(' ');
|
||||
|
||||
return `<div class="${classes.join(' ')}" ${preservedAttrs}>`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
// vc_column - convert to div with classes
|
||||
// Handle both complete and incomplete (truncated) shortcodes
|
||||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
// Also handle incomplete vc_column shortcodes (truncated at end of excerpt)
|
||||
.replace(/\[vc_column([^\]]*)$/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||||
|
||||
// Handle truncated vc_column_text
|
||||
.replace(/\[vc_column_text([^\]]*)$/gi, '<div class="vc-column-text">')
|
||||
|
||||
// vc_column_text - convert to div
|
||||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>')
|
||||
|
||||
// nectar_cta - convert to button
|
||||
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-cta">$2</a>')
|
||||
|
||||
// nectar_highlighted_text - convert to span
|
||||
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
|
||||
'<span class="nectar-highlighted">$2</span>')
|
||||
|
||||
// nectar_responsive_text - convert to span
|
||||
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
|
||||
'<span class="nectar-responsive">$2</span>')
|
||||
|
||||
// nectar_icon_list - convert to ul
|
||||
.replace(/\[nectar_icon_list([^\]]*)\]/gi, '<ul class="nectar-icon-list">')
|
||||
.replace(/\[\/nectar_icon_list\]/gi, '</ul>')
|
||||
|
||||
// nectar_icon_list_item - convert to li
|
||||
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
|
||||
'<li><strong>$2</strong>: $4</li>')
|
||||
|
||||
// nectar_btn - convert to button
|
||||
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-btn">$2</a>')
|
||||
|
||||
// split_line_heading - convert to heading
|
||||
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
|
||||
'<h2 class="split-line-heading">$2</h2>')
|
||||
|
||||
// vc_row_inner - convert to div
|
||||
.replace(/\[vc_row_inner([^\]]*)\]/gi, '<div class="vc-row-inner">')
|
||||
.replace(/\[\/vc_row_inner\]/gi, '</div>')
|
||||
|
||||
// vc_column_inner - convert to div
|
||||
.replace(/\[vc_column_inner([^\]]*)\]/gi, '<div class="vc-column-inner">')
|
||||
.replace(/\[\/vc_column_inner\]/gi, '</div>')
|
||||
|
||||
// divider - convert to hr
|
||||
.replace(/\[divider([^\]]*)\]/gi, '<hr class="divider" />')
|
||||
|
||||
// vc_gallery - convert to div (placeholder)
|
||||
.replace(/\[vc_gallery([^\]]*)\]/gi, '<div class="vc-gallery">[Gallery]</div>')
|
||||
|
||||
// vc_raw_js - remove or convert to div
|
||||
.replace(/\[vc_raw_js\](.*?)\[\/vc_raw_js\]/gi, '<div class="vc-raw-js">[JavaScript]</div>')
|
||||
|
||||
// nectar_gmap - convert to div
|
||||
.replace(/\[nectar_gmap([^\]]*)\]/gi, '<div class="nectar-gmap">[Google Map]</div>');
|
||||
|
||||
// Remove any remaining shortcodes
|
||||
processed = processed.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Clean up any HTML that might be broken
|
||||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
// Restore bg_image placeholders
|
||||
bgImagePlaceholders.forEach((placeholder, index) => {
|
||||
processed = processed.replace(`__BG_IMAGE_${index}__`, placeholder);
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Extract excerpt from content
|
||||
function generateExcerpt(content, maxLength = 200) {
|
||||
const text = content.replace(/<[^>]*>/g, '');
|
||||
if (text.length <= maxLength) return text;
|
||||
return text.substring(0, maxLength) + '...';
|
||||
}
|
||||
|
||||
// Process pages
|
||||
function processPages(pagesEN, pagesDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
// Process English pages
|
||||
pagesEN.forEach(page => {
|
||||
const translationKey = page.slug;
|
||||
const deMatch = translationMapping.pages[translationKey];
|
||||
|
||||
// Extract title and decode HTML entities
|
||||
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
|
||||
const decodedTitle = decodeHTMLEntities(rawTitle);
|
||||
|
||||
processed.push({
|
||||
id: page.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: page.slug,
|
||||
path: `/${page.slug}`,
|
||||
title: decodedTitle,
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
// Process German pages
|
||||
pagesDE.forEach(page => {
|
||||
const translationKey = page.slug;
|
||||
const enMatch = translationMapping.pages[translationKey];
|
||||
|
||||
// Extract title and decode HTML entities
|
||||
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
|
||||
const decodedTitle = decodeHTMLEntities(rawTitle);
|
||||
|
||||
processed.push({
|
||||
id: page.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: page.slug,
|
||||
path: `/de/${page.slug}`,
|
||||
title: decodedTitle,
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process posts
|
||||
function processPosts(postsEN, postsDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
postsEN.forEach(post => {
|
||||
const translationKey = post.slug;
|
||||
const deMatch = translationMapping.posts[translationKey];
|
||||
|
||||
// Extract title and decode HTML entities
|
||||
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
|
||||
const decodedTitle = decodeHTMLEntities(rawTitle);
|
||||
|
||||
processed.push({
|
||||
id: post.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: post.slug,
|
||||
path: `/blog/${post.slug}`,
|
||||
title: decodedTitle,
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
postsDE.forEach(post => {
|
||||
const translationKey = post.slug;
|
||||
const enMatch = translationMapping.posts[translationKey];
|
||||
|
||||
// Extract title and decode HTML entities
|
||||
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
|
||||
const decodedTitle = decodeHTMLEntities(rawTitle);
|
||||
|
||||
processed.push({
|
||||
id: post.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: post.slug,
|
||||
path: `/de/blog/${post.slug}`,
|
||||
title: decodedTitle,
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process products
|
||||
function processProducts(productsEN, productsDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
productsEN.forEach(product => {
|
||||
const translationKey = product.slug;
|
||||
const deMatch = translationMapping.products[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: product.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: product.slug,
|
||||
path: `/product/${product.slug}`,
|
||||
name: product.name,
|
||||
shortDescriptionHtml: product.shortDescriptionHtml,
|
||||
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
||||
images: product.images,
|
||||
featuredImage: product.featuredImage,
|
||||
sku: product.sku,
|
||||
regularPrice: product.regularPrice,
|
||||
salePrice: product.salePrice,
|
||||
currency: product.currency,
|
||||
stockStatus: product.stockStatus,
|
||||
categories: product.categories,
|
||||
attributes: product.attributes,
|
||||
variations: product.variations,
|
||||
updatedAt: product.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
productsDE.forEach(product => {
|
||||
const translationKey = product.slug;
|
||||
const enMatch = translationMapping.products[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: product.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: product.slug,
|
||||
path: `/de/product/${product.slug}`,
|
||||
name: product.name,
|
||||
shortDescriptionHtml: product.shortDescriptionHtml,
|
||||
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
||||
images: product.images,
|
||||
featuredImage: product.featuredImage,
|
||||
sku: product.sku,
|
||||
regularPrice: product.regularPrice,
|
||||
salePrice: product.salePrice,
|
||||
currency: product.currency,
|
||||
stockStatus: product.stockStatus,
|
||||
categories: product.categories,
|
||||
attributes: product.attributes,
|
||||
variations: product.variations,
|
||||
updatedAt: product.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process product categories
|
||||
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
categoriesEN.forEach(category => {
|
||||
const translationKey = category.slug;
|
||||
const deMatch = translationMapping.productCategories[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: category.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: category.slug,
|
||||
name: category.name,
|
||||
path: `/product-category/${category.slug}`,
|
||||
description: category.description,
|
||||
count: category.count,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
categoriesDE.forEach(category => {
|
||||
const translationKey = category.slug;
|
||||
const enMatch = translationMapping.productCategories[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: category.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: category.slug,
|
||||
name: category.name,
|
||||
path: `/de/product-category/${category.slug}`,
|
||||
description: category.description,
|
||||
count: category.count,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process media manifest
|
||||
function processMedia(media) {
|
||||
return media.map(item => ({
|
||||
id: item.id,
|
||||
filename: item.filename,
|
||||
url: item.url,
|
||||
localPath: `/media/${item.filename}`,
|
||||
alt: item.alt,
|
||||
width: item.width,
|
||||
height: item.height,
|
||||
mimeType: item.mime_type
|
||||
}));
|
||||
}
|
||||
|
||||
// Generate asset map for URL replacement
|
||||
function generateAssetMap(media) {
|
||||
const map = {};
|
||||
media.forEach(item => {
|
||||
if (item.url) {
|
||||
map[item.url] = `/media/${item.filename}`;
|
||||
}
|
||||
});
|
||||
return map;
|
||||
}
|
||||
|
||||
// Main processing function
|
||||
function main() {
|
||||
const exportDir = getLatestExportDir();
|
||||
console.log('🔄 Processing WordPress Data for Next.js');
|
||||
console.log('========================================\n');
|
||||
|
||||
// Load raw data
|
||||
const loadJSON = (file) => {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
|
||||
} catch (e) {
|
||||
console.error(`❌ Failed to load ${file}:`, e.message);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
const translationMapping = loadJSON('translation-mapping.json');
|
||||
const pagesEN = loadJSON('pages.en.json');
|
||||
const pagesDE = loadJSON('pages.de.json');
|
||||
const postsEN = loadJSON('posts.en.json');
|
||||
const postsDE = loadJSON('posts.de.json');
|
||||
const productsEN = loadJSON('products.en.json');
|
||||
const productsDE = loadJSON('products.de.json');
|
||||
const categoriesEN = loadJSON('product-categories.en.json');
|
||||
const categoriesDE = loadJSON('product-categories.de.json');
|
||||
const media = loadJSON('media.json');
|
||||
const redirects = loadJSON('redirects.json');
|
||||
const siteInfo = loadJSON('site-info.json');
|
||||
|
||||
console.log('📊 Processing content types...\n');
|
||||
|
||||
// Process each content type
|
||||
const pages = processPages(pagesEN, pagesDE, translationMapping);
|
||||
const posts = processPosts(postsEN, postsDE, translationMapping);
|
||||
const products = processProducts(productsEN, productsDE, translationMapping);
|
||||
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
|
||||
const processedMedia = processMedia(media);
|
||||
const assetMap = generateAssetMap(media);
|
||||
|
||||
// Create processed data structure
|
||||
const processedData = {
|
||||
site: {
|
||||
title: siteInfo.siteTitle,
|
||||
description: siteInfo.siteDescription,
|
||||
baseUrl: siteInfo.baseUrl,
|
||||
defaultLocale: siteInfo.defaultLocale || 'en',
|
||||
locales: ['en', 'de']
|
||||
},
|
||||
content: {
|
||||
pages,
|
||||
posts,
|
||||
products,
|
||||
categories
|
||||
},
|
||||
assets: {
|
||||
media: processedMedia,
|
||||
map: assetMap
|
||||
},
|
||||
redirects,
|
||||
exportDate: new Date().toISOString()
|
||||
};
|
||||
|
||||
// Save processed data
|
||||
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
|
||||
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
|
||||
|
||||
// Save individual files for easier access
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
|
||||
|
||||
// Summary
|
||||
console.log('✅ Data Processing Complete\n');
|
||||
console.log('📦 Processed Content:');
|
||||
console.log(` Pages: ${pages.length} (with translations)`);
|
||||
console.log(` Posts: ${posts.length} (with translations)`);
|
||||
console.log(` Products: ${products.length} (with translations)`);
|
||||
console.log(` Categories: ${categories.length} (with translations)`);
|
||||
console.log(` Media: ${processedMedia.length} files`);
|
||||
console.log(` Redirects: ${redirects.length} rules\n`);
|
||||
|
||||
console.log('📁 Output Files:');
|
||||
console.log(` ${outputPath}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
|
||||
|
||||
// Sample data
|
||||
if (pages.length > 0) {
|
||||
console.log('📄 Sample Page:');
|
||||
console.log(` Title: ${pages[0].title}`);
|
||||
console.log(` Path: ${pages[0].path}`);
|
||||
console.log(` Locale: ${pages[0].locale}`);
|
||||
console.log(` Translation: ${pages[0].translation ? 'Yes' : 'No'}\n`);
|
||||
}
|
||||
|
||||
if (posts.length > 0) {
|
||||
console.log('📝 Sample Post:');
|
||||
console.log(` Title: ${posts[0].title}`);
|
||||
console.log(` Path: ${posts[0].path}`);
|
||||
console.log(` Locale: ${posts[0].locale}`);
|
||||
console.log(` Date: ${posts[0].datePublished}\n`);
|
||||
}
|
||||
|
||||
console.log('💡 Next: Ready for Next.js project setup!');
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
@@ -1,132 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Test script to verify HTML entity decoding works correctly
|
||||
|
||||
const testExcerpt = '<p>[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default” column_direction=”default” column_direction_tablet=”default” column_direction_phone=”default” scene_position=”center” text_color=”dark” text_align=”left” row_border_radius=”none” row_border_radius_applies=”bg” overflow=”visible” overlay_strength=”0.3″ gradient_direction=”left_to_right” shape_divider_position=”bottom” bg_image_animation=”none”][vc_column column_padding=”no-extra-padding” column_padding_tablet=”inherit” column_padding_phone=”inherit” column_padding_position=”all” column_element_direction_desktop=”default” column_element_spacing=”default” desktop_text_alignment=”default” tablet_text_alignment=”default” phone_text_alignment=”default” background_color_opacity=”1″ background_hover_color_opacity=”1″ column_backdrop_filter=”none” column_shadow=”none” column_border_radius=”none” column_link_target=”_self” column_position=”default” gradient_direction=”left_to_right” overlay_strength=”0.3″ width=”1/1″ tablet_width_inherit=”default” animation_type=”default” bg_image_animation=”none” border_type=”simple” column_border_width=”none” column_border_style=”solid”][vc_column_text css=”” text_direction=”default”]\n<h1 class=\"p1\">Liefer- und Zahlungsbedingungen</h1>\n<p class=\"p1\">Stand November 2024</p>\n[/vc_column_text][/vc_column][/vc_row]</p>';
|
||||
|
||||
// Process excerpts specifically to handle shortcodes comprehensively
|
||||
function processExcerptShortcodes(excerptHtml) {
|
||||
if (!excerptHtml) return '';
|
||||
|
||||
let processed = excerptHtml;
|
||||
|
||||
// First, decode HTML entities to regular characters
|
||||
// Use a comprehensive approach that handles both numeric and named entities
|
||||
processed = processed
|
||||
// Numeric HTML entities commonly found in WordPress raw data
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/‘/g, "'") // ‘ - Left single quote
|
||||
.replace(/’/g, "'") // ’ - Right single quote
|
||||
.replace(/–/g, '-') // – - En dash
|
||||
.replace(/—/g, '—') // — - Em dash
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/″/g, '"') // ″ - Inches/Prime
|
||||
.replace(/′/g, "'") // ′ - Feet/Prime
|
||||
.replace(/‚/g, ',') // ‚ - Single low quote
|
||||
.replace(/‛/g, '`') // ‛ - Single high reversed quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/•/g, '•') // • - Bullet
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/€/g, '€') // € - Euro
|
||||
|
||||
// Unicode characters (from rendered content)
|
||||
.replace(/"/g, '"') // Right double quote
|
||||
.replace(/"/g, '"') // Left double quote
|
||||
.replace(/„/g, ',') // Low double quote
|
||||
.replace(/‟/g, '"') // High double quote
|
||||
.replace(/'/g, "'") // Left single quote
|
||||
.replace(/'/g, "'") // Right single quote
|
||||
.replace(/–/g, '-') // En dash
|
||||
.replace(/—/g, '—') // Em dash
|
||||
.replace(/…/g, '…') // Ellipsis
|
||||
.replace(/″/g, '"') // Inches/Prime
|
||||
.replace(/′/g, "'") // Feet/Prime
|
||||
.replace(/•/g, '•') // Bullet
|
||||
|
||||
// Named HTML entities
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/•/g, '•')
|
||||
.replace(/€/g, '€');
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
// vc_column - convert to div with classes
|
||||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||||
|
||||
// vc_column_text - convert to div
|
||||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||||
|
||||
// Remove any remaining shortcodes
|
||||
processed = processed.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Clean up any HTML that might be broken
|
||||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
console.log('=== HTML Entity Decoding Test ===\n');
|
||||
console.log('Original excerpt:');
|
||||
console.log(testExcerpt);
|
||||
console.log('\n--- After processing ---\n');
|
||||
const result = processExcerptShortcodes(testExcerpt);
|
||||
console.log(result);
|
||||
|
||||
// Test specific entity decoding
|
||||
console.log('\n=== Specific Entity Tests ===');
|
||||
const entityTests = [
|
||||
{ input: '”', expected: '"', name: 'Right double quote' },
|
||||
{ input: '“', expected: '"', name: 'Left double quote' },
|
||||
{ input: '–', expected: '-', name: 'En dash' },
|
||||
{ input: '—', expected: '—', name: 'Em dash' },
|
||||
{ input: '‘', expected: "'", name: 'Left single quote' },
|
||||
{ input: '’', expected: "'", name: 'Right single quote' },
|
||||
{ input: 'type=”in_container”', expected: 'type="in_container"', name: 'Full attribute' }
|
||||
];
|
||||
|
||||
entityTests.forEach(test => {
|
||||
const processed = test.input.replace(/”/g, '"').replace(/“/g, '"').replace(/–/g, '-').replace(/—/g, '—').replace(/‘/g, "'").replace(/’/g, "'");
|
||||
const passed = processed === test.expected;
|
||||
console.log(`${test.name}: ${passed ? '✅' : '❌'} "${test.input}" → "${processed}" (expected: "${test.expected}")`);
|
||||
});
|
||||
@@ -1,125 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Test the final function with actual raw data
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// Load the actual raw data
|
||||
const rawData = JSON.parse(fs.readFileSync('data/raw/2025-12-27T21-26-12-521Z/pages.en.json', 'utf8'));
|
||||
const testExcerpt = rawData[0].excerptHtml;
|
||||
|
||||
console.log('=== Testing Final Function ===');
|
||||
console.log('Raw excerpt (first 200 chars):');
|
||||
console.log(testExcerpt.substring(0, 200));
|
||||
console.log('');
|
||||
|
||||
// The function from process-data.js
|
||||
function processExcerptShortcodes(excerptHtml) {
|
||||
if (!excerptHtml) return '';
|
||||
|
||||
let processed = excerptHtml;
|
||||
|
||||
// First, decode HTML entities to regular characters
|
||||
// Handle both numeric entities (”) and named entities (")
|
||||
processed = processed
|
||||
// Decode numeric HTML entities first
|
||||
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
|
||||
|
||||
// Then handle any remaining Unicode characters
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/‘/g, "'") // ‘ - Left single quote
|
||||
.replace(/’/g, "'") // ’ - Right single quote
|
||||
.replace(/–/g, '-') // – - En dash
|
||||
.replace(/—/g, '—') // — - Em dash
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/″/g, '"') // ″ - Inches/Prime
|
||||
.replace(/′/g, "'") // ′ - Feet/Prime
|
||||
.replace(/‚/g, ',') // ‚ - Single low quote
|
||||
.replace(/‛/g, '`') // ‛ - Single high reversed quote
|
||||
.replace(/•/g, '•') // • - Bullet
|
||||
.replace(/€/g, '€') // € - Euro
|
||||
|
||||
// Named HTML entities
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/•/g, '•')
|
||||
.replace(/€/g, '€');
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
// vc_column - convert to div with classes
|
||||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||||
|
||||
// vc_column_text - convert to div
|
||||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||||
|
||||
// Remove any remaining shortcodes
|
||||
processed = processed.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Clean up any HTML that might be broken
|
||||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
const result = processExcerptShortcodes(testExcerpt);
|
||||
|
||||
console.log('After processing:');
|
||||
console.log(result);
|
||||
console.log('');
|
||||
|
||||
// Check for entities
|
||||
const hasEntities = /[”“‘’–—]/.test(result);
|
||||
const hasNumericEntities = /&#\d+;/.test(result);
|
||||
const hasShortcodes = /\[vc_row|\[vc_column/.test(result);
|
||||
|
||||
console.log('=== Verification ===');
|
||||
console.log('Has Unicode entities:', hasEntities);
|
||||
console.log('Has numeric entities:', hasNumericEntities);
|
||||
console.log('Has shortcodes:', hasShortcodes);
|
||||
console.log('Has proper HTML:', result.includes('<div class="vc-row"') || result.includes('<div class="vc-column"'));
|
||||
console.log('');
|
||||
|
||||
if (!hasEntities && !hasNumericEntities && !hasShortcodes && result.includes('<div class="vc-row"')) {
|
||||
console.log('✅ SUCCESS: Function works correctly!');
|
||||
} else {
|
||||
console.log('❌ Issues found');
|
||||
}
|
||||
@@ -1,151 +0,0 @@
|
||||
function processExcerptShortcodes(excerptHtml) {
|
||||
if (!excerptHtml) return '';
|
||||
|
||||
let processed = excerptHtml;
|
||||
|
||||
// First, decode HTML entities to regular characters
|
||||
// Handle both numeric entities (”) and named entities (")
|
||||
processed = processed
|
||||
// Decode numeric HTML entities first
|
||||
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
|
||||
|
||||
// Then handle any remaining Unicode characters
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/‘/g, "'") // ‘ - Left single quote
|
||||
.replace(/’/g, "'") // ’ - Right single quote
|
||||
.replace(/–/g, '-') // – - En dash
|
||||
.replace(/—/g, '—') // — - Em dash
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/″/g, '"') // ″ - Inches/Prime
|
||||
.replace(/′/g, "'") // ′ - Feet/Prime
|
||||
.replace(/‚/g, ',') // ‚ - Single low quote
|
||||
.replace(/‛/g, '`') // ‛ - Single high reversed quote
|
||||
.replace(/•/g, '•') // • - Bullet
|
||||
.replace(/€/g, '€') // € - Euro
|
||||
|
||||
// Named HTML entities
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/•/g, '•')
|
||||
.replace(/€/g, '€');
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
// vc_column - convert to div with classes
|
||||
// Handle both complete and incomplete (truncated) shortcodes
|
||||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
// Also handle incomplete vc_column shortcodes (truncated at end of excerpt)
|
||||
.replace(/\[vc_column([^\]]*)$/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||||
|
||||
// vc_column_text - convert to div
|
||||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>')
|
||||
|
||||
// nectar_cta - convert to button
|
||||
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-cta">$2</a>')
|
||||
|
||||
// nectar_highlighted_text - convert to span
|
||||
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
|
||||
'<span class="nectar-highlighted">$2</span>')
|
||||
|
||||
// nectar_responsive_text - convert to span
|
||||
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
|
||||
'<span class="nectar-responsive">$2</span>')
|
||||
|
||||
// nectar_icon_list - convert to ul
|
||||
.replace(/\[nectar_icon_list([^\]]*)\]/gi, '<ul class="nectar-icon-list">')
|
||||
.replace(/\[\/nectar_icon_list\]/gi, '</ul>')
|
||||
|
||||
// nectar_icon_list_item - convert to li
|
||||
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
|
||||
'<li><strong>$2</strong>: $4</li>')
|
||||
|
||||
// nectar_btn - convert to button
|
||||
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-btn">$2</a>')
|
||||
|
||||
// split_line_heading - convert to heading
|
||||
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
|
||||
'<h2 class="split-line-heading">$2</h2>')
|
||||
|
||||
// vc_row_inner - convert to div
|
||||
.replace(/\[vc_row_inner([^\]]*)\]/gi, '<div class="vc-row-inner">')
|
||||
.replace(/\[\/vc_row_inner\]/gi, '</div>')
|
||||
|
||||
// vc_column_inner - convert to div
|
||||
.replace(/\[vc_column_inner([^\]]*)\]/gi, '<div class="vc-column-inner">')
|
||||
.replace(/\[\/vc_column_inner\]/gi, '</div>')
|
||||
|
||||
// divider - convert to hr
|
||||
.replace(/\[divider([^\]]*)\]/gi, '<hr class="divider" />')
|
||||
|
||||
// vc_gallery - convert to div (placeholder)
|
||||
.replace(/\[vc_gallery([^\]]*)\]/gi, '<div class="vc-gallery">[Gallery]</div>')
|
||||
|
||||
// vc_raw_js - remove or convert to div
|
||||
.replace(/\[vc_raw_js\](.*?)\[\/vc_raw_js\]/gi, '<div class="vc-raw-js">[JavaScript]</div>')
|
||||
|
||||
// nectar_gmap - convert to div
|
||||
.replace(/\[nectar_gmap([^\]]*)\]/gi, '<div class="nectar-gmap">[Google Map]</div>');
|
||||
|
||||
// Remove any remaining shortcodes
|
||||
processed = processed.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Clean up any HTML that might be broken
|
||||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Extract excerpt from content
|
||||
|
||||
|
||||
module.exports = processExcerptShortcodes;
|
||||
@@ -1,68 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Test numeric entity decoding
|
||||
|
||||
const testString = 'type=”in_container”';
|
||||
|
||||
console.log('Original:', testString);
|
||||
|
||||
// Method 1: Manual replacement
|
||||
let method1 = testString
|
||||
.replace(/”/g, '"')
|
||||
.replace(/“/g, '"')
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—');
|
||||
|
||||
console.log('Method 1 (Unicode chars):', method1);
|
||||
|
||||
// Method 2: Numeric entity decoding
|
||||
let method2 = testString
|
||||
.replace(/”/g, '"')
|
||||
.replace(/“/g, '"')
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/″/g, '"')
|
||||
.replace(/′/g, "'");
|
||||
|
||||
console.log('Method 2 (Numeric entities):', method2);
|
||||
|
||||
// Method 3: Using a function to decode all numeric entities
|
||||
function decodeHTMLEntities(str) {
|
||||
return str.replace(/&#(\d+);/g, (match, dec) => {
|
||||
return String.fromCharCode(dec);
|
||||
});
|
||||
}
|
||||
|
||||
let method3 = decodeHTMLEntities(testString);
|
||||
console.log('Method 3 (All numeric):', method3);
|
||||
|
||||
// Method 4: Combined approach
|
||||
function comprehensiveEntityDecode(str) {
|
||||
return str
|
||||
// First decode numeric entities
|
||||
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
|
||||
// Then handle any remaining Unicode characters
|
||||
.replace(/”/g, '"')
|
||||
.replace(/“/g, '"')
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/″/g, '"')
|
||||
.replace(/′/g, "'");
|
||||
}
|
||||
|
||||
let method4 = comprehensiveEntityDecode(testString);
|
||||
console.log('Method 4 (Combined):', method4);
|
||||
|
||||
// Test with the actual excerpt
|
||||
const actualExcerpt = '<p>[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default”]';
|
||||
console.log('\n=== Real Test ===');
|
||||
console.log('Original:', actualExcerpt);
|
||||
console.log('Decoded:', comprehensiveEntityDecode(actualExcerpt));
|
||||
@@ -1,68 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Script to update asset-map.json with new media entries
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// Configuration
|
||||
const RAW_DATA_DIR = path.join(__dirname, '..', 'data', 'raw', '2025-12-30T15-21-49-331Z');
|
||||
const PROCESSED_DATA_DIR = path.join(__dirname, '..', 'data', 'processed');
|
||||
|
||||
// New media IDs to add
|
||||
const NEW_MEDIA_IDS = [10432, 10440, 10382, 10616, 10615, 45569, 10638];
|
||||
|
||||
function updateAssetMap() {
|
||||
console.log('🔄 Updating asset-map.json with new media entries');
|
||||
|
||||
// Load current media.json
|
||||
const mediaJsonPath = path.join(RAW_DATA_DIR, 'media.json');
|
||||
const mediaData = JSON.parse(fs.readFileSync(mediaJsonPath, 'utf8'));
|
||||
|
||||
// Load current asset-map.json
|
||||
const assetMapPath = path.join(PROCESSED_DATA_DIR, 'asset-map.json');
|
||||
let assetMap = {};
|
||||
|
||||
if (fs.existsSync(assetMapPath)) {
|
||||
assetMap = JSON.parse(fs.readFileSync(assetMapPath, 'utf8'));
|
||||
}
|
||||
|
||||
// Add new entries
|
||||
let addedCount = 0;
|
||||
NEW_MEDIA_IDS.forEach(id => {
|
||||
const mediaEntry = mediaData.find(m => m.id === id);
|
||||
if (mediaEntry) {
|
||||
const localPath = `/media/${mediaEntry.filename}`;
|
||||
assetMap[mediaEntry.url] = localPath;
|
||||
console.log(`✅ Added: ${id} → ${localPath}`);
|
||||
addedCount++;
|
||||
} else {
|
||||
console.warn(`⚠️ Media ID ${id} not found in media.json`);
|
||||
}
|
||||
});
|
||||
|
||||
// Save updated asset-map.json
|
||||
fs.writeFileSync(
|
||||
assetMapPath,
|
||||
JSON.stringify(assetMap, null, 2)
|
||||
);
|
||||
|
||||
console.log(`\n🎉 Asset map updated! Added ${addedCount} new entries`);
|
||||
console.log(`Total entries in asset-map.json: ${Object.keys(assetMap).length}`);
|
||||
|
||||
return assetMap;
|
||||
}
|
||||
|
||||
// Run if called directly
|
||||
if (require.main === module) {
|
||||
try {
|
||||
updateAssetMap();
|
||||
} catch (error) {
|
||||
console.error('❌ Failed to update asset map:', error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { updateAssetMap };
|
||||
@@ -1,145 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const filePath = path.join(__dirname, 'process-data-with-bg-images.js');
|
||||
let content = fs.readFileSync(filePath, 'utf8');
|
||||
|
||||
// 1. Update processPages to async
|
||||
content = content.replace(
|
||||
'function processPages(pagesEN, pagesDE, translationMapping, mediaMapping, assetMap) {',
|
||||
'async function processPages(pagesEN, pagesDE, translationMapping, mediaMapping, assetMap) {'
|
||||
);
|
||||
|
||||
// 2. Update processPosts to async
|
||||
content = content.replace(
|
||||
'function processPosts(postsEN, postsDE, translationMapping, mediaMapping, assetMap) {',
|
||||
'async function processPosts(postsEN, postsDE, translationMapping, mediaMapping, assetMap) {'
|
||||
);
|
||||
|
||||
// 3. Update main to async
|
||||
content = content.replace(
|
||||
'function main() {',
|
||||
'async function main() {'
|
||||
);
|
||||
|
||||
// 4. Update main() call
|
||||
content = content.replace(
|
||||
'if (require.main === module) {\n main();\n}',
|
||||
'if (require.main === module) {\n main().catch(console.error);\n}'
|
||||
);
|
||||
|
||||
// 5. Update processPages English loop
|
||||
content = content.replace(
|
||||
'pagesEN.forEach(page => {',
|
||||
'for (const page of pagesEN) {'
|
||||
);
|
||||
|
||||
// 6. Update processPages German loop
|
||||
content = content.replace(
|
||||
'pagesDE.forEach(page => {',
|
||||
'for (const page of pagesDE) {'
|
||||
);
|
||||
|
||||
// 7. Add video processing in processPages English
|
||||
content = content.replace(
|
||||
'contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);\n \n let excerptHtml = decodeContent(page.excerptHtml);',
|
||||
'contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);\n \n // Process video attributes and download videos\n const videoResult = await processVideoAttributes(contentHtml);\n contentHtml = videoResult.html;\n \n let excerptHtml = decodeContent(page.excerptHtml);'
|
||||
);
|
||||
|
||||
// 8. Add video processing in processPages German
|
||||
const germanPattern = /contentHtml = replaceUrlsWithLocalPaths\(contentHtml, assetMap\);\n \n let excerptHtml = decodeContent\(page\.excerptHtml\);\n excerptHtml = replaceBgImageIds\(excerptHtml, mediaMapping\);\n excerptHtml = replaceUrlsWithLocalPaths\(excerptHtml, assetMap\);\n \n processed\.push\(\{/;
|
||||
content = content.replace(
|
||||
germanPattern,
|
||||
`contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);
|
||||
|
||||
// Process video attributes and download videos
|
||||
const videoResult = await processVideoAttributes(contentHtml);
|
||||
contentHtml = videoResult.html;
|
||||
|
||||
let excerptHtml = decodeContent(page.excerptHtml);
|
||||
excerptHtml = replaceBgImageIds(excerptHtml, mediaMapping);
|
||||
excerptHtml = replaceUrlsWithLocalPaths(excerptHtml, assetMap);
|
||||
|
||||
processed.push({`
|
||||
);
|
||||
|
||||
// 9. Update processPosts English loop
|
||||
content = content.replace(
|
||||
'postsEN.forEach(post => {',
|
||||
'for (const post of postsEN) {'
|
||||
);
|
||||
|
||||
// 10. Update processPosts German loop
|
||||
content = content.replace(
|
||||
'postsDE.forEach(post => {',
|
||||
'for (const post of postsDE) {'
|
||||
);
|
||||
|
||||
// 11. Add video processing in processPosts English
|
||||
const postsEnglishPattern = /contentHtml = replaceUrlsWithLocalPaths\(contentHtml, assetMap\);\n \n let excerptHtml = decodeContent\(post\.excerptHtml\);\n excerptHtml = replaceBgImageIds\(excerptHtml, mediaMapping\);\n excerptHtml = replaceUrlsWithLocalPaths\(excerptHtml, assetMap\);\n \n processed\.push\(\{/;
|
||||
content = content.replace(
|
||||
postsEnglishPattern,
|
||||
`contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);
|
||||
|
||||
// Process video attributes and download videos
|
||||
const videoResult = await processVideoAttributes(contentHtml);
|
||||
contentHtml = videoResult.html;
|
||||
|
||||
let excerptHtml = decodeContent(post.excerptHtml);
|
||||
excerptHtml = replaceBgImageIds(excerptHtml, mediaMapping);
|
||||
excerptHtml = replaceUrlsWithLocalPaths(excerptHtml, assetMap);
|
||||
|
||||
processed.push({`
|
||||
);
|
||||
|
||||
// 12. Add video processing in processPosts German
|
||||
const postsGermanPattern = /contentHtml = replaceUrlsWithLocalPaths\(contentHtml, assetMap\);\n \n let excerptHtml = decodeContent\(post\.excerptHtml\);\n excerptHtml = replaceBgImageIds\(excerptHtml, mediaMapping\);\n excerptHtml = replaceUrlsWithLocalPaths\(excerptHtml, assetMap\);\n \n processed\.push\(\{[\s\S]*?translation: enMatch \? \{ locale: 'en', id: enMatch\.en \} : null\n \}\);\n \}\n \n return processed;\n\}/;
|
||||
content = content.replace(
|
||||
postsGermanPattern,
|
||||
`contentHtml = replaceUrlsWithLocalPaths(contentHtml, assetMap);
|
||||
|
||||
// Process video attributes and download videos
|
||||
const videoResult = await processVideoAttributes(contentHtml);
|
||||
contentHtml = videoResult.html;
|
||||
|
||||
let excerptHtml = decodeContent(post.excerptHtml);
|
||||
excerptHtml = replaceBgImageIds(excerptHtml, mediaMapping);
|
||||
excerptHtml = replaceUrlsWithLocalPaths(excerptHtml, assetMap);
|
||||
|
||||
processed.push({
|
||||
id: post.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: post.slug,
|
||||
path: \`/de/blog/\${post.slug}\`,
|
||||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(excerptHtml) || generateExcerpt(contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
}
|
||||
|
||||
return processed;
|
||||
}`
|
||||
);
|
||||
|
||||
// 13. Update main() to await processPages and processPosts
|
||||
content = content.replace(
|
||||
'const pages = processPages(pagesEN, pagesDE, translationMapping, mediaMapping, assetMap);\n const posts = processPosts(postsEN, postsDE, translationMapping, mediaMapping, assetMap);',
|
||||
'const pages = await processPages(pagesEN, pagesDE, translationMapping, mediaMapping, assetMap);\n const posts = await processPosts(postsEN, postsDE, translationMapping, mediaMapping, assetMap);'
|
||||
);
|
||||
|
||||
// 14. Update module.exports
|
||||
content = content.replace(
|
||||
'module.exports = {\n processPages,\n processPosts,\n processProducts,\n processProductCategories,\n processMedia,\n generateAssetMap,\n replaceBgImageIds,\n replaceUrlsWithLocalPaths\n};',
|
||||
'module.exports = {\n processPages,\n processPosts,\n processProducts,\n processProductCategories,\n processMedia,\n generateAssetMap,\n replaceBgImageIds,\n replaceUrlsWithLocalPaths,\n processVideoAttributes\n};'
|
||||
);
|
||||
|
||||
fs.writeFileSync(filePath, content);
|
||||
console.log('✅ Updated process-data-with-bg-images.js to be async');
|
||||
@@ -1,88 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// Load the processed data
|
||||
const processedDir = path.join(__dirname, '..', 'data', 'processed');
|
||||
const pages = JSON.parse(fs.readFileSync(path.join(processedDir, 'pages.json'), 'utf8'));
|
||||
const posts = JSON.parse(fs.readFileSync(path.join(processedDir, 'posts.json'), 'utf8'));
|
||||
|
||||
console.log('=== Verification of HTML Entity Decoding ===\n');
|
||||
|
||||
// Check pages
|
||||
console.log('📄 PAGES:');
|
||||
pages.slice(0, 3).forEach(page => {
|
||||
console.log(`\nPage: ${page.title}`);
|
||||
console.log(`Path: ${page.path}`);
|
||||
console.log(`Excerpt preview: ${page.excerptHtml.substring(0, 150)}...`);
|
||||
|
||||
// Check for problematic entities
|
||||
const hasEntities = /[”“‘’–—]/.test(page.excerptHtml);
|
||||
const hasNumericEntities = /&#\d+;/.test(page.excerptHtml);
|
||||
|
||||
if (hasEntities || hasNumericEntities) {
|
||||
console.log('❌ Still contains HTML entities!');
|
||||
if (hasEntities) console.log(' - Found smart quotes/dashes');
|
||||
if (hasNumericEntities) console.log(' - Found numeric entities');
|
||||
} else {
|
||||
console.log('✅ Clean - no HTML entities found');
|
||||
}
|
||||
});
|
||||
|
||||
// Check posts
|
||||
console.log('\n📝 POSTS:');
|
||||
posts.slice(0, 3).forEach(post => {
|
||||
console.log(`\nPost: ${post.title}`);
|
||||
console.log(`Path: ${post.path}`);
|
||||
console.log(`Excerpt preview: ${post.excerptHtml.substring(0, 150)}...`);
|
||||
|
||||
// Check for problematic entities
|
||||
const hasEntities = /[”“‘’–—]/.test(post.excerptHtml);
|
||||
const hasNumericEntities = /&#\d+;/.test(post.excerptHtml);
|
||||
|
||||
if (hasEntities || hasNumericEntities) {
|
||||
console.log('❌ Still contains HTML entities!');
|
||||
if (hasEntities) console.log(' - Found smart quotes/dashes');
|
||||
if (hasNumericEntities) console.log(' - Found numeric entities');
|
||||
} else {
|
||||
console.log('✅ Clean - no HTML entities found');
|
||||
}
|
||||
});
|
||||
|
||||
// Check for shortcode patterns
|
||||
console.log('\n🔍 SHORTCODE CHECK:');
|
||||
const allPages = [...pages, ...posts];
|
||||
const shortcodesFound = allPages.filter(item => /\[vc_row|\[vc_column|\[nectar/.test(item.excerptHtml));
|
||||
console.log(`Pages/posts with shortcodes in excerpt: ${shortcodesFound.length}`);
|
||||
|
||||
if (shortcodesFound.length > 0) {
|
||||
console.log('\nSample of items with shortcodes:');
|
||||
shortcodesFound.slice(0, 2).forEach(item => {
|
||||
console.log(`- ${item.title}: ${item.excerptHtml.substring(0, 100)}...`);
|
||||
});
|
||||
} else {
|
||||
console.log('✅ No shortcodes found in excerpts');
|
||||
}
|
||||
|
||||
// Check for proper HTML structure
|
||||
console.log('\n📊 HTML STRUCTURE CHECK:');
|
||||
const withProperHTML = allPages.filter(item =>
|
||||
item.excerptHtml.includes('<div class="vc-row"') ||
|
||||
item.excerptHtml.includes('<div class="vc-column"') ||
|
||||
item.excerptHtml.includes('<div class="nectar')
|
||||
);
|
||||
console.log(`Items with converted shortcode HTML: ${withProperHTML.length}`);
|
||||
|
||||
console.log('\n=== Summary ===');
|
||||
console.log(`Total items checked: ${allPages.length}`);
|
||||
console.log(`Items with proper HTML structure: ${withProperHTML.length}`);
|
||||
console.log(`Items with remaining shortcodes: ${shortcodesFound.length}`);
|
||||
|
||||
// Sample the actual content to show it works
|
||||
console.log('\n=== SAMPLE PROCESSED EXCERPTS ===');
|
||||
const sample = pages.find(p => p.excerptHtml.includes('vc-row'));
|
||||
if (sample) {
|
||||
console.log(`\nTitle: ${sample.title}`);
|
||||
console.log(`Excerpt: ${sample.excerptHtml}`);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,996 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* WordPress to Next.js Data Export Script
|
||||
* Gathers all required data from WordPress/WooCommerce for static site generation
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const https = require('https');
|
||||
|
||||
// Load environment variables
|
||||
require('dotenv').config();
|
||||
|
||||
const BASE_URL = process.env.WOOCOMMERCE_URL;
|
||||
const CONSUMER_KEY = process.env.WOOCOMMERCE_CONSUMER_KEY;
|
||||
const CONSUMER_SECRET = process.env.WOOCOMMERCE_CONSUMER_SECRET;
|
||||
const APP_PASSWORD = process.env.WORDPRESS_APP_PASSWORD;
|
||||
|
||||
// Validate environment
|
||||
if (!BASE_URL || !CONSUMER_KEY || !CONSUMER_SECRET) {
|
||||
console.error('❌ Missing required environment variables');
|
||||
console.error('Please check .env file for:');
|
||||
console.error(' - WOOCOMMERCE_URL');
|
||||
console.error(' - WOOCOMMERCE_CONSUMER_KEY');
|
||||
console.error(' - WOOCOMMERCE_CONSUMER_SECRET');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Configuration
|
||||
const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-');
|
||||
const OUTPUT_DIR = path.join(__dirname, '..', 'data', 'raw', TIMESTAMP);
|
||||
const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media');
|
||||
const PUBLIC_DIR = path.join(__dirname, '..', 'public');
|
||||
|
||||
// Create output directories
|
||||
if (!fs.existsSync(OUTPUT_DIR)) {
|
||||
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
|
||||
}
|
||||
if (!fs.existsSync(MEDIA_DIR)) {
|
||||
fs.mkdirSync(MEDIA_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
// API Helper Functions
|
||||
function buildAuthHeader() {
|
||||
const credentials = Buffer.from(`${CONSUMER_KEY}:${CONSUMER_SECRET}`).toString('base64');
|
||||
return `Basic ${credentials}`;
|
||||
}
|
||||
|
||||
function buildWordPressAuth() {
|
||||
// For WordPress REST API with app password
|
||||
return {
|
||||
'Authorization': `Basic ${Buffer.from(`admin:${APP_PASSWORD}`).toString('base64')}`,
|
||||
'Content-Type': 'application/json'
|
||||
};
|
||||
}
|
||||
|
||||
function makeRequest(url, headers = {}) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const options = {
|
||||
headers: {
|
||||
'User-Agent': 'WordPress-NextJS-Migration/1.0',
|
||||
...headers
|
||||
}
|
||||
};
|
||||
|
||||
https.get(url, options, (res) => {
|
||||
let data = '';
|
||||
|
||||
res.on('data', (chunk) => {
|
||||
data += chunk;
|
||||
});
|
||||
|
||||
res.on('end', () => {
|
||||
if (res.statusCode >= 200 && res.statusCode < 300) {
|
||||
try {
|
||||
resolve(JSON.parse(data));
|
||||
} catch (e) {
|
||||
resolve(data);
|
||||
}
|
||||
} else {
|
||||
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
|
||||
}
|
||||
});
|
||||
}).on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchWithPagination(endpoint, params = {}, locale = null) {
|
||||
const allItems = [];
|
||||
let page = 1;
|
||||
const perPage = 100;
|
||||
|
||||
while (true) {
|
||||
const queryString = new URLSearchParams({
|
||||
...params,
|
||||
page: page.toString(),
|
||||
per_page: perPage.toString(),
|
||||
...(locale ? { lang: locale } : {})
|
||||
}).toString();
|
||||
|
||||
const url = `${BASE_URL}/wp-json/wp/v2/${endpoint}?${queryString}`;
|
||||
|
||||
console.log(`📥 Fetching ${endpoint} page ${page}${locale ? ` (${locale})` : ''}...`);
|
||||
|
||||
try {
|
||||
const items = await makeRequest(url, buildWordPressAuth());
|
||||
|
||||
if (!Array.isArray(items) || items.length === 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
allItems.push(...items);
|
||||
|
||||
// Check if we got a full page (indicates more pages might exist)
|
||||
if (items.length < perPage) {
|
||||
break;
|
||||
}
|
||||
|
||||
page++;
|
||||
} catch (error) {
|
||||
console.error(`❌ Error fetching ${endpoint} page ${page}:`, error.message);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return allItems;
|
||||
}
|
||||
|
||||
async function fetchWooCommerce(endpoint, params = {}, locale = null) {
|
||||
const queryString = new URLSearchParams({
|
||||
...params,
|
||||
per_page: '100',
|
||||
...(locale ? { lang: locale } : {})
|
||||
}).toString();
|
||||
|
||||
const url = `${BASE_URL}/wp-json/wc/v3/${endpoint}?${queryString}`;
|
||||
|
||||
console.log(`📥 Fetching WooCommerce ${endpoint}${locale ? ` (${locale})` : ''}...`);
|
||||
|
||||
try {
|
||||
const response = await makeRequest(url, {
|
||||
'Authorization': buildAuthHeader(),
|
||||
'Content-Type': 'application/json'
|
||||
});
|
||||
|
||||
return Array.isArray(response) ? response : [response];
|
||||
} catch (error) {
|
||||
console.error(`❌ Error fetching WooCommerce ${endpoint}:`, error.message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchMedia(mediaId) {
|
||||
const url = `${BASE_URL}/wp-json/wp/v2/media/${mediaId}`;
|
||||
|
||||
try {
|
||||
const media = await makeRequest(url, buildWordPressAuth());
|
||||
return media;
|
||||
} catch (error) {
|
||||
console.error(`❌ Error fetching media ${mediaId}:`, error.message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function downloadMedia(url, filename) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const filePath = path.join(MEDIA_DIR, filename);
|
||||
|
||||
// Check if file already exists
|
||||
if (fs.existsSync(filePath)) {
|
||||
console.log(`✅ Media already downloaded: ${filename}`);
|
||||
resolve(filePath);
|
||||
return;
|
||||
}
|
||||
|
||||
const file = fs.createWriteStream(filePath);
|
||||
|
||||
https.get(url, (res) => {
|
||||
if (res.statusCode === 200) {
|
||||
res.pipe(file);
|
||||
file.on('finish', () => {
|
||||
console.log(`✅ Downloaded: ${filename}`);
|
||||
resolve(filePath);
|
||||
});
|
||||
} else {
|
||||
reject(new Error(`Failed to download: ${res.statusCode}`));
|
||||
}
|
||||
}).on('error', (err) => {
|
||||
fs.unlink(filePath, () => {});
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function downloadFavicon(url, filename) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const filePath = path.join(PUBLIC_DIR, filename);
|
||||
|
||||
// Check if file already exists
|
||||
if (fs.existsSync(filePath)) {
|
||||
console.log(`✅ Favicon already exists: ${filename}`);
|
||||
resolve(filePath);
|
||||
return;
|
||||
}
|
||||
|
||||
const file = fs.createWriteStream(filePath);
|
||||
|
||||
https.get(url, (res) => {
|
||||
if (res.statusCode === 200) {
|
||||
res.pipe(file);
|
||||
file.on('finish', () => {
|
||||
console.log(`✅ Downloaded favicon: ${filename}`);
|
||||
resolve(filePath);
|
||||
});
|
||||
} else {
|
||||
reject(new Error(`Failed to download favicon: ${res.statusCode}`));
|
||||
}
|
||||
}).on('error', (err) => {
|
||||
fs.unlink(filePath, () => {});
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Data Processing Functions
|
||||
function extractFeaturedImage(item) {
|
||||
if (item.featured_media) {
|
||||
return item.featured_media;
|
||||
}
|
||||
if (item._embedded && item._embedded['wp:featuredmedia']) {
|
||||
return item._embedded['wp:featuredmedia'][0];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function processPage(page, locale) {
|
||||
return {
|
||||
id: page.id,
|
||||
translationKey: `page-${page.slug}`, // Will be refined with Polylang data
|
||||
locale: locale,
|
||||
slug: page.slug,
|
||||
path: locale === 'en' ? `/${page.slug}` : `/${locale}/${page.slug}`,
|
||||
titleHtml: page.title?.rendered || '',
|
||||
contentHtml: page.content?.rendered || '',
|
||||
excerptHtml: page.excerpt?.rendered || '',
|
||||
featuredImage: page.featured_media || null,
|
||||
updatedAt: page.modified || page.date
|
||||
};
|
||||
}
|
||||
|
||||
function processPost(post, locale) {
|
||||
return {
|
||||
id: post.id,
|
||||
translationKey: `post-${post.slug}`,
|
||||
locale: locale,
|
||||
slug: post.slug,
|
||||
path: locale === 'en' ? `/blog/${post.slug}` : `/${locale}/blog/${post.slug}`,
|
||||
titleHtml: post.title?.rendered || '',
|
||||
contentHtml: post.content?.rendered || '',
|
||||
excerptHtml: post.excerpt?.rendered || '',
|
||||
featuredImage: post.featured_media || null,
|
||||
datePublished: post.date,
|
||||
updatedAt: post.modified || post.date
|
||||
};
|
||||
}
|
||||
|
||||
function processProduct(product, locale) {
|
||||
return {
|
||||
id: product.id,
|
||||
translationKey: `product-${product.slug}`,
|
||||
locale: locale,
|
||||
slug: product.slug,
|
||||
path: locale === 'en' ? `/product/${product.slug}` : `/${locale}/product/${product.slug}`,
|
||||
name: product.name,
|
||||
shortDescriptionHtml: product.short_description || '',
|
||||
descriptionHtml: product.description || '',
|
||||
images: product.images ? product.images.map(img => img.src) : [],
|
||||
featuredImage: product.images && product.images.length > 0 ? product.images[0].src : null,
|
||||
sku: product.sku,
|
||||
regularPrice: product.regular_price,
|
||||
salePrice: product.sale_price,
|
||||
currency: product.currency || 'EUR',
|
||||
stockStatus: product.stock_status,
|
||||
categories: product.categories ? product.categories.map(cat => ({ id: cat.id, name: cat.name, slug: cat.slug })) : [],
|
||||
attributes: product.attributes || [],
|
||||
variations: product.variations || [],
|
||||
updatedAt: product.date_modified
|
||||
};
|
||||
}
|
||||
|
||||
function processProductCategory(category, locale) {
|
||||
return {
|
||||
id: category.id,
|
||||
translationKey: `product-category-${category.slug}`,
|
||||
locale: locale,
|
||||
slug: category.slug,
|
||||
name: category.name,
|
||||
path: locale === 'en' ? `/product-category/${category.slug}` : `/${locale}/product-category/${category.slug}`,
|
||||
description: category.description || '',
|
||||
count: category.count || 0
|
||||
};
|
||||
}
|
||||
|
||||
function processMenu(menu, locale) {
|
||||
// WordPress menus are complex, we'll extract basic structure
|
||||
return {
|
||||
id: menu.term_id || menu.id,
|
||||
slug: menu.slug,
|
||||
name: menu.name,
|
||||
locale: locale,
|
||||
items: menu.items || []
|
||||
};
|
||||
}
|
||||
|
||||
// Main Export Functions
|
||||
async function exportPages() {
|
||||
console.log('\n📊 EXPORTING PAGES');
|
||||
|
||||
const pagesEN = await fetchWithPagination('pages', { status: 'publish' }, 'en');
|
||||
const pagesDE = await fetchWithPagination('pages', { status: 'publish' }, 'de');
|
||||
|
||||
const processedEN = pagesEN.map(p => processPage(p, 'en'));
|
||||
const processedDE = pagesDE.map(p => processPage(p, 'de'));
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'pages.en.json'),
|
||||
JSON.stringify(processedEN, null, 2)
|
||||
);
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'pages.de.json'),
|
||||
JSON.stringify(processedDE, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Pages: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||||
|
||||
return { en: processedEN, de: processedDE };
|
||||
}
|
||||
|
||||
async function exportPosts() {
|
||||
console.log('\n📊 EXPORTING POSTS');
|
||||
|
||||
const postsEN = await fetchWithPagination('posts', { status: 'publish' }, 'en');
|
||||
const postsDE = await fetchWithPagination('posts', { status: 'publish' }, 'de');
|
||||
|
||||
const processedEN = postsEN.map(p => processPost(p, 'en'));
|
||||
const processedDE = postsDE.map(p => processPost(p, 'de'));
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'posts.en.json'),
|
||||
JSON.stringify(processedEN, null, 2)
|
||||
);
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'posts.de.json'),
|
||||
JSON.stringify(processedDE, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Posts: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||||
|
||||
return { en: processedEN, de: processedDE };
|
||||
}
|
||||
|
||||
async function exportProducts() {
|
||||
console.log('\n📊 EXPORTING PRODUCTS');
|
||||
|
||||
const productsEN = await fetchWooCommerce('products', {}, 'en');
|
||||
const productsDE = await fetchWooCommerce('products', {}, 'de');
|
||||
|
||||
const processedEN = productsEN.map(p => processProduct(p, 'en'));
|
||||
const processedDE = productsDE.map(p => processProduct(p, 'de'));
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'products.en.json'),
|
||||
JSON.stringify(processedEN, null, 2)
|
||||
);
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'products.de.json'),
|
||||
JSON.stringify(processedDE, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Products: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||||
|
||||
return { en: processedEN, de: processedDE };
|
||||
}
|
||||
|
||||
async function exportProductCategories() {
|
||||
console.log('\n📊 EXPORTING PRODUCT CATEGORIES');
|
||||
|
||||
const categoriesEN = await fetchWooCommerce('products/categories', {}, 'en');
|
||||
const categoriesDE = await fetchWooCommerce('products/categories', {}, 'de');
|
||||
|
||||
const processedEN = categoriesEN.map(c => processProductCategory(c, 'en'));
|
||||
const processedDE = categoriesDE.map(c => processProductCategory(c, 'de'));
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'product-categories.en.json'),
|
||||
JSON.stringify(processedEN, null, 2)
|
||||
);
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'product-categories.de.json'),
|
||||
JSON.stringify(processedDE, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Product Categories: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||||
|
||||
return { en: processedEN, de: processedDE };
|
||||
}
|
||||
|
||||
async function exportMenus() {
|
||||
console.log('\n📊 EXPORTING MENUS');
|
||||
|
||||
// Try to get menus via WordPress REST API
|
||||
// Note: This might require additional plugins or direct DB access
|
||||
const menusEN = await fetchWithPagination('menus', {}, 'en').catch(() => []);
|
||||
const menusDE = await fetchWithPagination('menus', {}, 'de').catch(() => []);
|
||||
|
||||
// If menus endpoint doesn't work, try to get menu locations
|
||||
let menuLocations = {};
|
||||
try {
|
||||
const locations = await makeRequest(`${BASE_URL}/wp-json/wp/v2/menu-locations`, buildWordPressAuth());
|
||||
menuLocations = locations;
|
||||
} catch (e) {
|
||||
console.log('⚠️ Menu locations endpoint not available');
|
||||
}
|
||||
|
||||
const processedEN = menusEN.map(m => processMenu(m, 'en'));
|
||||
const processedDE = menusDE.map(m => processMenu(m, 'de'));
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'menus.en.json'),
|
||||
JSON.stringify({ menus: processedEN, locations: menuLocations }, null, 2)
|
||||
);
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'menus.de.json'),
|
||||
JSON.stringify({ menus: processedDE, locations: menuLocations }, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Menus: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||||
|
||||
return { en: processedEN, de: processedDE, locations: menuLocations };
|
||||
}
|
||||
|
||||
async function exportMedia() {
|
||||
console.log('\n📊 EXPORTING MEDIA');
|
||||
|
||||
// Get all unique media IDs from collected data
|
||||
const mediaIds = new Set();
|
||||
|
||||
// Read all JSON files to find media references
|
||||
const jsonFiles = fs.readdirSync(OUTPUT_DIR).filter(f => f.endsWith('.json'));
|
||||
|
||||
for (const file of jsonFiles) {
|
||||
const content = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, file), 'utf8'));
|
||||
const items = Array.isArray(content) ? content : (content.menus || []);
|
||||
|
||||
items.forEach(item => {
|
||||
if (item.featuredImage) mediaIds.add(item.featuredImage);
|
||||
if (item.images) item.images.forEach(img => {
|
||||
// Extract ID from URL if possible, or add as URL
|
||||
if (typeof img === 'string' && img.includes('/wp-content/')) {
|
||||
mediaIds.add(img);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const mediaManifest = [];
|
||||
const downloadPromises = [];
|
||||
|
||||
for (const mediaRef of mediaIds) {
|
||||
if (typeof mediaRef === 'number') {
|
||||
// Fetch media info
|
||||
const media = await fetchMedia(mediaRef);
|
||||
if (media && media.source_url) {
|
||||
const filename = `${mediaRef}-${path.basename(media.source_url)}`;
|
||||
mediaManifest.push({
|
||||
id: mediaRef,
|
||||
url: media.source_url,
|
||||
filename: filename,
|
||||
alt: media.alt_text || '',
|
||||
width: media.media_details?.width,
|
||||
height: media.media_details?.height,
|
||||
mime_type: media.mime_type
|
||||
});
|
||||
|
||||
// Download file
|
||||
downloadPromises.push(
|
||||
downloadMedia(media.source_url, filename).catch(err => {
|
||||
console.warn(`⚠️ Failed to download media ${mediaRef}:`, err.message);
|
||||
})
|
||||
);
|
||||
}
|
||||
} else if (typeof mediaRef === 'string' && mediaRef.startsWith('http')) {
|
||||
// Direct URL
|
||||
const filename = `media-${Date.now()}-${path.basename(mediaRef)}`;
|
||||
mediaManifest.push({
|
||||
id: null,
|
||||
url: mediaRef,
|
||||
filename: filename,
|
||||
alt: '',
|
||||
width: null,
|
||||
height: null,
|
||||
mime_type: null
|
||||
});
|
||||
|
||||
downloadPromises.push(
|
||||
downloadMedia(mediaRef, filename).catch(err => {
|
||||
console.warn(`⚠️ Failed to download media from URL:`, err.message);
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for all downloads
|
||||
await Promise.all(downloadPromises);
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'media.json'),
|
||||
JSON.stringify(mediaManifest, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Media: ${mediaManifest.length} items`);
|
||||
|
||||
return mediaManifest;
|
||||
}
|
||||
|
||||
async function exportSiteInfo() {
|
||||
console.log('\n📊 EXPORTING SITE INFORMATION');
|
||||
|
||||
const siteInfo = {
|
||||
baseUrl: BASE_URL,
|
||||
exportDate: new Date().toISOString(),
|
||||
timestamp: TIMESTAMP,
|
||||
polylang: false,
|
||||
languages: ['en', 'de'],
|
||||
defaultLocale: 'en' // Will need to confirm
|
||||
};
|
||||
|
||||
// Check for Polylang
|
||||
try {
|
||||
const plugins = await makeRequest(`${BASE_URL}/wp-json/wp/v2/plugins`, buildWordPressAuth());
|
||||
const polylangPlugin = plugins.find(p => p.name.includes('polylang'));
|
||||
if (polylangPlugin) {
|
||||
siteInfo.polylang = true;
|
||||
siteInfo.polylangVersion = polylangPlugin.version;
|
||||
}
|
||||
} catch (e) {
|
||||
console.log('⚠️ Could not check plugins');
|
||||
}
|
||||
|
||||
// Get site settings
|
||||
try {
|
||||
const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
|
||||
siteInfo.siteTitle = settings.title;
|
||||
siteInfo.siteDescription = settings.description;
|
||||
siteInfo.defaultLanguage = settings.default_language || 'en';
|
||||
} catch (e) {
|
||||
console.log('⚠️ Could not fetch settings');
|
||||
}
|
||||
|
||||
// Get permalink structure
|
||||
try {
|
||||
const permalink = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
|
||||
siteInfo.permalinkStructure = permalink.permalink_structure;
|
||||
} catch (e) {
|
||||
console.log('⚠️ Could not fetch permalink structure');
|
||||
}
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'site-info.json'),
|
||||
JSON.stringify(siteInfo, null, 2)
|
||||
);
|
||||
|
||||
console.log('✅ Site info exported');
|
||||
|
||||
return siteInfo;
|
||||
}
|
||||
|
||||
async function exportLogoAndFavicon() {
|
||||
console.log('\n📊 EXPORTING LOGO AND FAVICON');
|
||||
|
||||
const assets = {
|
||||
logo: null,
|
||||
logoSvg: null,
|
||||
favicon: null,
|
||||
appleTouchIcon: null,
|
||||
siteIconId: null
|
||||
};
|
||||
|
||||
try {
|
||||
// Get site settings which may include logo and icon IDs
|
||||
const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
|
||||
|
||||
// Try to get custom_logo
|
||||
if (settings.custom_logo) {
|
||||
console.log(`📥 Found custom_logo ID: ${settings.custom_logo}`);
|
||||
const logoMedia = await fetchMedia(settings.custom_logo);
|
||||
if (logoMedia && logoMedia.source_url) {
|
||||
const ext = path.extname(logoMedia.source_url);
|
||||
const logoFilename = `logo${ext}`;
|
||||
await downloadMedia(logoMedia.source_url, logoFilename);
|
||||
assets.logo = `/media/${logoFilename}`;
|
||||
console.log(`✅ Logo downloaded: ${logoFilename}`);
|
||||
|
||||
// Check if it's SVG
|
||||
if (logoMedia.mime_type === 'image/svg+xml' || ext === '.svg') {
|
||||
assets.logoSvg = `/media/${logoFilename}`;
|
||||
console.log(`✅ SVG logo detected: ${logoFilename}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to get site_icon
|
||||
if (settings.site_icon) {
|
||||
console.log(`📥 Found site_icon ID: ${settings.site_icon}`);
|
||||
assets.siteIconId = settings.site_icon;
|
||||
const iconMedia = await fetchMedia(settings.site_icon);
|
||||
if (iconMedia && iconMedia.source_url) {
|
||||
// Download as favicon.ico
|
||||
const faviconFilename = 'favicon.ico';
|
||||
await downloadFavicon(iconMedia.source_url, faviconFilename);
|
||||
assets.favicon = `/favicon.ico`;
|
||||
console.log(`✅ Favicon downloaded: ${faviconFilename}`);
|
||||
|
||||
// Also create apple-touch-icon.png (same file, different name)
|
||||
const appleTouchFilename = 'apple-touch-icon.png';
|
||||
await downloadFavicon(iconMedia.source_url, appleTouchFilename);
|
||||
assets.appleTouchIcon = `/apple-touch-icon.png`;
|
||||
console.log(`✅ Apple touch icon downloaded: ${appleTouchFilename}`);
|
||||
}
|
||||
}
|
||||
|
||||
// WP CLI Equivalent: wp media list --search=logo --format=json
|
||||
console.log('🔍 WP CLI Equivalent: Searching for logo media...');
|
||||
if (!assets.logo) {
|
||||
const allMedia = await fetchWithPagination('media', { per_page: 100 });
|
||||
const logoCandidates = allMedia.filter(m => {
|
||||
const title = m.title?.rendered?.toLowerCase() || '';
|
||||
const slug = m.slug?.toLowerCase() || '';
|
||||
const url = m.source_url?.toLowerCase() || '';
|
||||
return title.includes('logo') || slug.includes('logo') || url.includes('logo');
|
||||
});
|
||||
|
||||
if (logoCandidates.length > 0) {
|
||||
const logoMedia = logoCandidates[0];
|
||||
const ext = path.extname(logoMedia.source_url);
|
||||
const logoFilename = `logo${ext}`;
|
||||
await downloadMedia(logoMedia.source_url, logoFilename);
|
||||
assets.logo = `/media/${logoFilename}`;
|
||||
|
||||
if (logoMedia.mime_type === 'image/svg+xml' || ext === '.svg') {
|
||||
assets.logoSvg = `/media/${logoFilename}`;
|
||||
console.log(`✅ SVG logo found and downloaded: ${logoFilename}`);
|
||||
} else {
|
||||
console.log(`✅ Logo found and downloaded: ${logoFilename}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WP CLI Equivalent: wp media list --mime=image/svg+xml --format=json
|
||||
console.log('🔍 WP CLI Equivalent: Searching for SVG images...');
|
||||
const allMedia = await fetchWithPagination('media', { per_page: 200 });
|
||||
const svgImages = allMedia.filter(m => m.mime_type === 'image/svg+xml');
|
||||
|
||||
if (svgImages.length > 0) {
|
||||
console.log(`📥 Found ${svgImages.length} SVG images`);
|
||||
for (const svg of svgImages) {
|
||||
const filename = `svg-${svg.id}-${path.basename(svg.source_url)}`;
|
||||
await downloadMedia(svg.source_url, filename);
|
||||
console.log(`✅ SVG downloaded: ${filename}`);
|
||||
}
|
||||
}
|
||||
|
||||
// WP CLI Equivalent: wp postmeta list --post_type=any --meta_key~=_vc --format=json
|
||||
console.log('🔍 WP CLI Equivalent: Searching for Salient/VC images...');
|
||||
const salientImages = new Set();
|
||||
|
||||
// Search pages and posts for Visual Composer meta
|
||||
const searchEndpoints = ['pages', 'posts'];
|
||||
for (const endpoint of searchEndpoints) {
|
||||
const items = await fetchWithPagination(endpoint, { per_page: 100 });
|
||||
items.forEach(item => {
|
||||
// Look for VC-related meta
|
||||
if (item.meta) {
|
||||
Object.keys(item.meta).forEach(key => {
|
||||
if (key.includes('_vc') || key.includes('vc_') || key.includes('salient')) {
|
||||
const metaValue = item.meta[key];
|
||||
if (typeof metaValue === 'string') {
|
||||
// Extract URLs from meta value
|
||||
const urlMatches = metaValue.match(/https?:\/\/[^\s"']+/g);
|
||||
if (urlMatches) {
|
||||
urlMatches.forEach(url => salientImages.add(url));
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Also check content for images
|
||||
const content = item.content?.rendered || '';
|
||||
const contentUrls = content.match(/https?:\/\/[^\s"']+\.(jpg|jpeg|png|webp|svg)/gi);
|
||||
if (contentUrls) {
|
||||
contentUrls.forEach(url => salientImages.add(url));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Download Salient/VC images
|
||||
if (salientImages.size > 0) {
|
||||
console.log(`📥 Found ${salientImages.size} Salient/VC images`);
|
||||
const salientManifest = [];
|
||||
|
||||
for (const url of salientImages) {
|
||||
try {
|
||||
const filename = `salient-${Date.now()}-${path.basename(url)}`;
|
||||
await downloadMedia(url, filename);
|
||||
salientManifest.push({
|
||||
originalUrl: url,
|
||||
localPath: `/media/${filename}`,
|
||||
filename: filename
|
||||
});
|
||||
console.log(`✅ Salient image downloaded: ${filename}`);
|
||||
} catch (err) {
|
||||
console.warn(`⚠️ Failed to download Salient image ${url}:`, err.message);
|
||||
}
|
||||
}
|
||||
|
||||
// Save Salient images manifest
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'salient-images.json'),
|
||||
JSON.stringify(salientManifest, null, 2)
|
||||
);
|
||||
}
|
||||
|
||||
// If no favicon found, try to download from common locations
|
||||
if (!assets.favicon) {
|
||||
console.log('⚠️ No favicon found in settings, trying common locations...');
|
||||
const faviconUrls = [
|
||||
`${BASE_URL}/favicon.ico`,
|
||||
`${BASE_URL}/wp-content/uploads/favicon.ico`
|
||||
];
|
||||
|
||||
for (const url of faviconUrls) {
|
||||
try {
|
||||
await downloadFavicon(url, 'favicon.ico');
|
||||
assets.favicon = '/favicon.ico';
|
||||
console.log(`✅ Favicon downloaded from: ${url}`);
|
||||
|
||||
// Also create apple-touch-icon
|
||||
await downloadFavicon(url, 'apple-touch-icon.png');
|
||||
assets.appleTouchIcon = '/apple-touch-icon.png';
|
||||
break;
|
||||
} catch (e) {
|
||||
// Continue to next URL
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save asset manifest
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'assets.json'),
|
||||
JSON.stringify(assets, null, 2)
|
||||
);
|
||||
|
||||
console.log('✅ Logo and favicon export complete');
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Error exporting logo/favicon:', error.message);
|
||||
}
|
||||
|
||||
return assets;
|
||||
}
|
||||
|
||||
async function generateTranslationMapping() {
|
||||
console.log('\n📊 GENERATING TRANSLATION MAPPING');
|
||||
|
||||
// This function creates translationKey mappings between locales
|
||||
// We'll use slug-based matching for now, but this should be enhanced with Polylang data
|
||||
|
||||
const mapping = {
|
||||
pages: {},
|
||||
posts: {},
|
||||
products: {},
|
||||
productCategories: {}
|
||||
};
|
||||
|
||||
// Load all data
|
||||
const loadFile = (filename) => {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, filename), 'utf8'));
|
||||
} catch (e) {
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
const pagesEN = loadFile('pages.en.json');
|
||||
const pagesDE = loadFile('pages.de.json');
|
||||
const postsEN = loadFile('posts.en.json');
|
||||
const postsDE = loadFile('posts.de.json');
|
||||
const productsEN = loadFile('products.en.json');
|
||||
const productsDE = loadFile('products.de.json');
|
||||
const categoriesEN = loadFile('product-categories.en.json');
|
||||
const categoriesDE = loadFile('product-categories.de.json');
|
||||
|
||||
// Helper to find translation pairs by slug
|
||||
function findTranslationPairs(enItems, deItems) {
|
||||
const pairs = {};
|
||||
|
||||
enItems.forEach(enItem => {
|
||||
const deMatch = deItems.find(de => de.slug === enItem.slug);
|
||||
if (deMatch) {
|
||||
const translationKey = `${enItem.slug}`;
|
||||
pairs[translationKey] = {
|
||||
en: enItem.id,
|
||||
de: deMatch.id
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
return pairs;
|
||||
}
|
||||
|
||||
mapping.pages = findTranslationPairs(pagesEN, pagesDE);
|
||||
mapping.posts = findTranslationPairs(postsEN, postsDE);
|
||||
mapping.products = findTranslationPairs(productsEN, productsDE);
|
||||
mapping.productCategories = findTranslationPairs(categoriesEN, categoriesDE);
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'translation-mapping.json'),
|
||||
JSON.stringify(mapping, null, 2)
|
||||
);
|
||||
|
||||
const totalPairs = Object.values(mapping).reduce((sum, obj) => sum + Object.keys(obj).length, 0);
|
||||
console.log(`✅ Translation mapping: ${totalPairs} pairs found`);
|
||||
|
||||
return mapping;
|
||||
}
|
||||
|
||||
async function exportWPCliPostmeta() {
|
||||
console.log('\n📊 EXPORTING WP CLI POSTMETA (VC/Salient)');
|
||||
|
||||
const vcMeta = [];
|
||||
|
||||
try {
|
||||
// Get all pages and posts
|
||||
const pages = await fetchWithPagination('pages', { status: 'publish', per_page: 100 });
|
||||
const posts = await fetchWithPagination('posts', { status: 'publish', per_page: 100 });
|
||||
|
||||
const allItems = [...pages, ...posts];
|
||||
|
||||
console.log(`🔍 Scanning ${allItems.length} items for VC/Salient meta...`);
|
||||
|
||||
allItems.forEach(item => {
|
||||
if (item.meta) {
|
||||
const vcKeys = Object.keys(item.meta).filter(key =>
|
||||
key.includes('_vc') || key.includes('vc_') || key.includes('salient') || key.includes('wpb_')
|
||||
);
|
||||
|
||||
if (vcKeys.length > 0) {
|
||||
vcKeys.forEach(key => {
|
||||
const value = item.meta[key];
|
||||
vcMeta.push({
|
||||
post_id: item.id,
|
||||
post_type: item.type || 'page',
|
||||
post_slug: item.slug,
|
||||
meta_key: key,
|
||||
meta_value: typeof value === 'string' ? value.substring(0, 200) : JSON.stringify(value),
|
||||
full_value: value
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Save VC postmeta
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'vc-postmeta.json'),
|
||||
JSON.stringify(vcMeta, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ VC/Salient postmeta: ${vcMeta.length} entries found`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Error exporting postmeta:', error.message);
|
||||
}
|
||||
|
||||
return vcMeta;
|
||||
}
|
||||
|
||||
async function generateRedirects() {
|
||||
console.log('\n📊 GENERATING REDIRECT RULES');
|
||||
|
||||
const redirects = [];
|
||||
|
||||
// Load posts
|
||||
const postsEN = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.en.json'), 'utf8'));
|
||||
const postsDE = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.de.json'), 'utf8'));
|
||||
|
||||
// Base redirect: /{postSlug} → /blog/{postSlug} (English)
|
||||
postsEN.forEach(post => {
|
||||
redirects.push({
|
||||
source: `/${post.slug}`,
|
||||
destination: `/blog/${post.slug}`,
|
||||
permanent: true,
|
||||
locale: 'en'
|
||||
});
|
||||
});
|
||||
|
||||
// German redirects: /de/{postSlug} → /de/blog/{postSlug}
|
||||
postsDE.forEach(post => {
|
||||
redirects.push({
|
||||
source: `/de/${post.slug}`,
|
||||
destination: `/de/blog/${post.slug}`,
|
||||
permanent: true,
|
||||
locale: 'de'
|
||||
});
|
||||
});
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'redirects.json'),
|
||||
JSON.stringify(redirects, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Redirects: ${redirects.length} rules generated`);
|
||||
|
||||
return redirects;
|
||||
}
|
||||
|
||||
// Main Execution
|
||||
async function main() {
|
||||
console.log('🚀 WordPress → Next.js Data Export (WP CLI Enhanced)');
|
||||
console.log('=====================================');
|
||||
console.log(`Target: ${BASE_URL}`);
|
||||
console.log(`Output: ${OUTPUT_DIR}`);
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// Step 1: Export all content
|
||||
await exportSiteInfo();
|
||||
await exportPages();
|
||||
await exportPosts();
|
||||
await exportProducts();
|
||||
await exportProductCategories();
|
||||
await exportMenus();
|
||||
|
||||
// Step 2: WP CLI Enhanced exports
|
||||
await exportWPCliPostmeta();
|
||||
await exportMedia();
|
||||
await exportLogoAndFavicon();
|
||||
|
||||
// Step 3: Generate mappings and redirects
|
||||
await generateTranslationMapping();
|
||||
await generateRedirects();
|
||||
|
||||
console.log('\n🎉 Export Complete!');
|
||||
console.log('=====================================');
|
||||
console.log(`📁 Data directory: data/raw/${TIMESTAMP}`);
|
||||
console.log(`🖼️ Media directory: public/media/`);
|
||||
console.log(`🎨 Logo/Favicon: public/`);
|
||||
console.log('');
|
||||
console.log('WP CLI Features:');
|
||||
console.log('✓ SVG logo detection and download');
|
||||
console.log('✓ All SVG images exported');
|
||||
console.log('✓ Salient/VC postmeta extracted');
|
||||
console.log('✓ All media downloaded locally');
|
||||
console.log('');
|
||||
console.log('Next steps:');
|
||||
console.log('1. Review exported data for completeness');
|
||||
console.log('2. Check for any missing translations');
|
||||
console.log('3. Verify media downloads');
|
||||
console.log('4. Proceed with Next.js data processing');
|
||||
|
||||
} catch (error) {
|
||||
console.error('\n❌ Export failed:', error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run if called directly
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
exportPages,
|
||||
exportPosts,
|
||||
exportProducts,
|
||||
exportProductCategories,
|
||||
exportMenus,
|
||||
exportMedia,
|
||||
exportSiteInfo,
|
||||
exportLogoAndFavicon,
|
||||
exportWPCliPostmeta,
|
||||
generateTranslationMapping,
|
||||
generateRedirects
|
||||
};
|
||||
Reference in New Issue
Block a user