This commit is contained in:
2026-01-06 16:52:59 +01:00
parent f991ea6b9b
commit bfdd69533d
5 changed files with 1190 additions and 371 deletions

View File

@@ -0,0 +1,842 @@
#!/usr/bin/env node
/**
* WordPress → Next.js Data Processing Pipeline with WooCommerce API Integration
* Transforms raw WordPress data into Next.js compatible format with prices and variations
*/
const fs = require('fs');
const path = require('path');
const https = require('https');
const dotenv = require('dotenv');
// Load environment variables from .env file
dotenv.config();
const DATA_DIR = path.join(__dirname, '..', 'data');
const RAW_DIR = path.join(DATA_DIR, 'raw');
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
// Create processed directory
if (!fs.existsSync(PROCESSED_DIR)) {
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
}
// WooCommerce API Configuration from .env
const WOOCOMMERCE_CONFIG = {
url: process.env.WOOCOMMERCE_URL || 'https://klz-cables.com',
consumerKey: process.env.WOOCOMMERCE_CONSUMER_KEY,
consumerSecret: process.env.WOOCOMMERCE_CONSUMER_SECRET,
apiVersion: 'wc/v3'
};
// Debug: Check if credentials are loaded
if (!WOOCOMMERCE_CONFIG.consumerKey || !WOOCOMMERCE_CONFIG.consumerSecret) {
console.error('❌ Missing WooCommerce credentials in environment');
console.error('WOOCOMMERCE_CONSUMER_KEY:', WOOCOMMERCE_CONFIG.consumerKey ? '✓ Loaded' : '❌ Missing');
console.error('WOOCOMMERCE_CONSUMER_SECRET:', WOOCOMMERCE_CONFIG.consumerSecret ? '✓ Loaded' : '❌ Missing');
process.exit(1);
} else {
console.log('✅ WooCommerce credentials loaded successfully');
}
// Rate limiting configuration
const RATE_LIMIT = {
maxConcurrent: 2, // Max concurrent API calls
delayBetweenCalls: 100, // ms between calls
timeout: 30000 // 30 second timeout
};
// API call queue and tracking
let apiQueue = [];
let activeRequests = 0;
let apiStats = {
total: 0,
success: 0,
failed: 0,
retries: 0
};
/**
* WooCommerce API Client
*/
class WooCommerceAPI {
constructor(config) {
this.config = config;
this.baseURL = `${config.url}/wp-json/${config.apiVersion}`;
}
/**
* Make authenticated API request
*/
async request(endpoint, method = 'GET', data = null) {
return new Promise((resolve, reject) => {
const url = new URL(`${this.baseURL}${endpoint}`);
// Add authentication
const auth = Buffer.from(`${this.config.consumerKey}:${this.config.consumerSecret}`).toString('base64');
const options = {
hostname: url.hostname,
port: url.port || 443,
path: url.pathname + url.search,
method,
headers: {
'Authorization': `Basic ${auth}`,
'Content-Type': 'application/json',
'User-Agent': 'KLZ-Data-Processor/1.0'
},
timeout: RATE_LIMIT.timeout
};
// Log the request
console.log(`🌐 API Request: ${method} ${url.pathname}`);
const req = https.request(options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
apiStats.total++;
if (res.statusCode >= 200 && res.statusCode < 300) {
try {
const parsed = JSON.parse(data);
apiStats.success++;
resolve(parsed);
} catch (e) {
apiStats.failed++;
reject(new Error(`JSON parse error: ${e.message}`));
}
} else {
apiStats.failed++;
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
}
});
});
req.on('error', (err) => {
apiStats.failed++;
reject(err);
});
req.on('timeout', () => {
apiStats.failed++;
req.destroy();
reject(new Error('Request timeout'));
});
// Add request body for POST/PUT
if (data && (method === 'POST' || method === 'PUT')) {
const body = JSON.stringify(data);
req.write(body);
}
req.end();
});
}
/**
* Get product by ID
*/
async getProduct(productId) {
try {
return await this.request(`/products/${productId}`);
} catch (error) {
console.error(`❌ Failed to fetch product ${productId}:`, error.message);
return null;
}
}
/**
* Get product variations
*/
async getProductVariations(productId) {
try {
return await this.request(`/products/${productId}/variations?per_page=100`);
} catch (error) {
console.error(`❌ Failed to fetch variations for product ${productId}:`, error.message);
return [];
}
}
/**
* Process API queue with rate limiting
*/
async processQueue(tasks, progressCallback) {
const results = [];
for (let i = 0; i < tasks.length; i++) {
// Wait for rate limit
if (i > 0) {
await new Promise(resolve => setTimeout(resolve, RATE_LIMIT.delayBetweenCalls));
}
const task = tasks[i];
let attempt = 0;
let success = false;
let result = null;
// Retry logic
while (attempt < 3 && !success) {
try {
if (progressCallback) {
progressCallback(i + 1, tasks.length, task.label);
}
result = await task.fn();
success = true;
if (attempt > 0) {
apiStats.retries++;
console.log(`✅ Retry successful for: ${task.label}`);
}
} catch (error) {
attempt++;
if (attempt < 3) {
console.log(`⚠️ Retry ${attempt}/3 for: ${task.label} - ${error.message}`);
await new Promise(resolve => setTimeout(resolve, 1000 * attempt)); // Exponential backoff
} else {
console.log(`❌ Failed after 3 attempts: ${task.label} - ${error.message}`);
}
}
}
results.push(result);
}
return results;
}
}
/**
* Decode HTML entities in text - comprehensive handling
*/
function decodeHTMLEntities(text) {
if (!text) return '';
// First, handle numeric entities (decimal and hex)
let result = text
.replace(/&#(\d+);/g, (match, dec) => {
const char = String.fromCharCode(parseInt(dec, 10));
return char;
})
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
const char = String.fromCharCode(parseInt(hex, 16));
return char;
});
// Handle common named entities and Unicode characters
const entityMap = {
' ': ' ',
'': "'",
'': "'",
'“': '"',
'”': '"',
'″': '"',
'': '-',
'—': '—',
'…': '…',
'•': '•',
'€': '€',
'©': '©',
'®': '®',
'™': '™',
'°': '°',
'±': '±',
'×': '×',
'÷': '÷',
'': '',
'¢': '¢',
'£': '£',
'¥': '¥',
'§': '§',
'¶': '¶',
'µ': 'µ',
'«': '«',
'»': '»',
'·': '·'
};
// Replace all named entities
for (const [entity, char] of Object.entries(entityMap)) {
result = result.replace(new RegExp(entity, 'g'), char);
}
// Clean up any remaining ampersand patterns
result = result.replace(/&([a-zA-Z]+);/g, (match, name) => {
// If it's not in our map, try to decode it or leave as is
return entityMap[`&${name};`] || match;
});
return result;
}
/**
* HTML sanitization - preserve content but clean dangerous elements
*/
function sanitizeHTML(html) {
if (!html) return '';
let sanitized = html;
// Remove script tags and inline handlers (security)
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
// Remove WPBakery shortcode wrappers but keep their content
sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '<div class="vc-row">');
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '<div class="vc-column">');
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
// Remove other shortcodes but keep text content
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
// Handle Nectar shortcodes
sanitized = sanitized.replace(/\[nectar_cta.*?\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1');
sanitized = sanitized.replace(/\[nectar.*?\]/gi, '');
// Remove all remaining shortcodes
sanitized = sanitized.replace(/\[.*?\]/g, '');
// Remove empty paragraphs and divs
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace
sanitized = sanitized.replace(/\s+/g, ' ').trim();
return sanitized;
}
/**
* Process products with WooCommerce API integration
*/
async function processProductsWithWooCommerce(productsEN, productsDE, translationMapping) {
const api = new WooCommerceAPI(WOOCOMMERCE_CONFIG);
const processed = [];
console.log(`\n🚀 Starting WooCommerce API integration for ${productsEN.length} products...`);
// Create tasks for price and variation fetching
const tasks = [];
productsEN.forEach(product => {
tasks.push({
label: `${product.name} (EN) - Prices`,
fn: async () => {
const wooProduct = await api.getProduct(product.id);
if (wooProduct) {
return {
productId: product.id,
locale: 'en',
regularPrice: wooProduct.regular_price || '',
salePrice: wooProduct.sale_price || '',
currency: wooProduct.currency || 'EUR',
stockStatus: wooProduct.stock_status || 'instock'
};
}
return null;
}
});
tasks.push({
label: `${product.name} (EN) - Variations`,
fn: async () => {
const variations = await api.getProductVariations(product.id);
return {
productId: product.id,
locale: 'en',
variations: variations || []
};
}
});
});
productsDE.forEach(product => {
tasks.push({
label: `${product.name} (DE) - Prices`,
fn: async () => {
const wooProduct = await api.getProduct(product.id);
if (wooProduct) {
return {
productId: product.id,
locale: 'de',
regularPrice: wooProduct.regular_price || '',
salePrice: wooProduct.sale_price || '',
currency: wooProduct.currency || 'EUR',
stockStatus: wooProduct.stock_status || 'instock'
};
}
return null;
}
});
tasks.push({
label: `${product.name} (DE) - Variations`,
fn: async () => {
const variations = await api.getProductVariations(product.id);
return {
productId: product.id,
locale: 'de',
variations: variations || []
};
}
});
});
// Progress callback
const progressCallback = (current, total, label) => {
const progress = Math.round((current / total) * 100);
process.stdout.write(`\r📊 Progress: ${current}/${total} (${progress}%) - ${label}`);
};
// Process all tasks
const results = await api.processQueue(tasks, progressCallback);
// Clear progress line
process.stdout.write('\n');
// Organize results
const priceData = {};
const variationData = {};
results.forEach(result => {
if (!result) return;
const key = `${result.productId}_${result.locale}`;
if (result.variations) {
variationData[key] = result.variations;
} else {
priceData[key] = {
regularPrice: result.regularPrice,
salePrice: result.salePrice,
currency: result.currency,
stockStatus: result.stockStatus
};
}
});
console.log(`\n📈 API Statistics:`);
console.log(` Total requests: ${apiStats.total}`);
console.log(` Successful: ${apiStats.success}`);
console.log(` Failed: ${apiStats.failed}`);
console.log(` Retries: ${apiStats.retries}`);
// Process English products
productsEN.forEach(product => {
const key = `${product.id}_en`;
const priceInfo = priceData[key] || {};
const variations = variationData[key] || [];
const translationKey = product.slug;
const deMatch = translationMapping.products[translationKey];
processed.push({
id: product.id,
translationKey: translationKey,
locale: 'en',
slug: product.slug,
path: `/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.shortDescriptionHtml,
descriptionHtml: sanitizeHTML(product.descriptionHtml),
images: product.images,
featuredImage: product.featuredImage,
sku: product.sku,
regularPrice: priceInfo.regularPrice || '',
salePrice: priceInfo.salePrice || '',
currency: priceInfo.currency || 'EUR',
stockStatus: priceInfo.stockStatus || 'instock',
categories: product.categories,
attributes: product.attributes,
variations: variations,
updatedAt: product.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
// Process German products
productsDE.forEach(product => {
const key = `${product.id}_de`;
const priceInfo = priceData[key] || {};
const variations = variationData[key] || [];
const translationKey = product.slug;
const enMatch = translationMapping.products[translationKey];
processed.push({
id: product.id,
translationKey: translationKey,
locale: 'de',
slug: product.slug,
path: `/de/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.shortDescriptionHtml,
descriptionHtml: sanitizeHTML(product.descriptionHtml),
images: product.images,
featuredImage: product.featuredImage,
sku: product.sku,
regularPrice: priceInfo.regularPrice || '',
salePrice: priceInfo.salePrice || '',
currency: priceInfo.currency || 'EUR',
stockStatus: priceInfo.stockStatus || 'instock',
categories: product.categories,
attributes: product.attributes,
variations: variations,
updatedAt: product.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
/**
* Process pages
*/
function processPages(pagesEN, pagesDE, translationMapping) {
const processed = [];
// Process English pages
pagesEN.forEach(page => {
const translationKey = page.slug;
const deMatch = translationMapping.pages[translationKey];
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'en',
slug: page.slug,
path: `/${page.slug}`,
title: decodedTitle,
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: page.excerptHtml || '',
featuredImage: page.featuredImage,
updatedAt: page.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
// Process German pages
pagesDE.forEach(page => {
const translationKey = page.slug;
const enMatch = translationMapping.pages[translationKey];
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'de',
slug: page.slug,
path: `/de/${page.slug}`,
title: decodedTitle,
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: page.excerptHtml || '',
featuredImage: page.featuredImage,
updatedAt: page.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
/**
* Process posts
*/
function processPosts(postsEN, postsDE, translationMapping) {
const processed = [];
postsEN.forEach(post => {
const translationKey = post.slug;
const deMatch = translationMapping.posts[translationKey];
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'en',
slug: post.slug,
path: `/blog/${post.slug}`,
title: decodedTitle,
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: post.excerptHtml || '',
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
postsDE.forEach(post => {
const translationKey = post.slug;
const enMatch = translationMapping.posts[translationKey];
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'de',
slug: post.slug,
path: `/de/blog/${post.slug}`,
title: decodedTitle,
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: post.excerptHtml || '',
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
/**
* Process product categories
*/
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
const processed = [];
categoriesEN.forEach(category => {
const translationKey = category.slug;
const deMatch = translationMapping.productCategories[translationKey];
processed.push({
id: category.id,
translationKey: translationKey,
locale: 'en',
slug: category.slug,
name: category.name,
path: `/product-category/${category.slug}`,
description: category.description,
count: category.count,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
categoriesDE.forEach(category => {
const translationKey = category.slug;
const enMatch = translationMapping.productCategories[translationKey];
processed.push({
id: category.id,
translationKey: translationKey,
locale: 'de',
slug: category.slug,
name: category.name,
path: `/de/product-category/${category.slug}`,
description: category.description,
count: category.count,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
/**
* Process media manifest
*/
function processMedia(media) {
return media.map(item => ({
id: item.id,
filename: item.filename,
url: item.url,
localPath: `/media/${item.filename}`,
alt: item.alt,
width: item.width,
height: item.height,
mimeType: item.mime_type
}));
}
/**
* Generate asset map for URL replacement
*/
function generateAssetMap(media) {
const map = {};
media.forEach(item => {
if (item.url) {
map[item.url] = `/media/${item.filename}`;
}
});
return map;
}
/**
* Main processing function
*/
async function main() {
const exportDir = getLatestExportDir();
console.log('🔄 Processing WordPress Data for Next.js with WooCommerce Integration');
console.log('==========================================================\n');
// Load raw data
const loadJSON = (file) => {
try {
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
} catch (e) {
console.error(`❌ Failed to load ${file}:`, e.message);
return [];
}
};
const translationMapping = loadJSON('translation-mapping.json');
const pagesEN = loadJSON('pages.en.json');
const pagesDE = loadJSON('pages.de.json');
const postsEN = loadJSON('posts.en.json');
const postsDE = loadJSON('posts.de.json');
const productsEN = loadJSON('products.en.json');
const productsDE = loadJSON('products.de.json');
const categoriesEN = loadJSON('product-categories.en.json');
const categoriesDE = loadJSON('product-categories.de.json');
const media = loadJSON('media.json');
const redirects = loadJSON('redirects.json');
const siteInfo = loadJSON('site-info.json');
console.log('📊 Processing content types...\n');
// Process each content type
const pages = processPages(pagesEN, pagesDE, translationMapping);
const posts = processPosts(postsEN, postsDE, translationMapping);
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
const processedMedia = processMedia(media);
const assetMap = generateAssetMap(media);
// Process products with WooCommerce API
const products = await processProductsWithWooCommerce(productsEN, productsDE, translationMapping);
// Create processed data structure
const processedData = {
site: {
title: siteInfo.siteTitle,
description: siteInfo.siteDescription,
baseUrl: siteInfo.baseUrl,
defaultLocale: siteInfo.defaultLocale || 'en',
locales: ['en', 'de']
},
content: {
pages,
posts,
products,
categories
},
assets: {
media: processedMedia,
map: assetMap
},
redirects,
exportDate: new Date().toISOString()
};
// Save processed data
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
// Save individual files for easier access
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
// Always write products.json with the processed data
// Even if WooCommerce data is missing, we still want the base product structure
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
// Report on WooCommerce data quality
const productsWithPrices = products.filter(p => p.regularPrice).length;
const productsWithVariations = products.filter(p => p.variations && p.variations.length > 0).length;
console.log('📊 WooCommerce Data Quality:');
console.log(` Products with prices: ${productsWithPrices}/${products.length}`);
console.log(` Products with variations: ${productsWithVariations}/${products.length}`);
if (productsWithPrices === 0 && productsWithVariations === 0) {
console.log('⚠️ Warning: No WooCommerce pricing or variation data was retrieved');
console.log(' Products written with empty price fields\n');
} else {
console.log('✅ WooCommerce data integrated successfully\n');
}
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
// Summary
console.log('\n✅ Data Processing Complete\n');
console.log('📦 Processed Content:');
console.log(` Pages: ${pages.length} (with translations)`);
console.log(` Posts: ${posts.length} (with translations)`);
console.log(` Products: ${products.length} (with translations)`);
console.log(` Categories: ${categories.length} (with translations)`);
console.log(` Media: ${processedMedia.length} files`);
console.log(` Redirects: ${redirects.length} rules\n`);
console.log('📁 Output Files:');
console.log(` ${outputPath}`);
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
// Sample data
if (products.length > 0) {
console.log('📦 Sample Product with WooCommerce Data:');
const sampleProduct = products.find(p => p.regularPrice) || products[0];
console.log(` Name: ${sampleProduct.name}`);
console.log(` SKU: ${sampleProduct.sku}`);
console.log(` Price: ${sampleProduct.regularPrice} ${sampleProduct.currency}`);
console.log(` Sale Price: ${sampleProduct.salePrice || 'N/A'}`);
console.log(` Variations: ${sampleProduct.variations.length}`);
console.log(` Locale: ${sampleProduct.locale}\n`);
}
console.log('💡 Next: Ready for Next.js project setup with complete product data!');
}
// Helper function to get latest export directory
function getLatestExportDir() {
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
const stat = fs.statSync(path.join(RAW_DIR, f));
return stat.isDirectory();
});
dirs.sort().reverse();
return path.join(RAW_DIR, dirs[0]);
}
if (require.main === module) {
main().catch(console.error);
}
module.exports = {
processPages,
processPosts,
processProductCategories,
processProductsWithWooCommerce,
processMedia,
generateAssetMap,
decodeHTMLEntities,
sanitizeHTML
};