#!/usr/bin/env node /** * WordPress → Next.js Data Processing Pipeline with WooCommerce API Integration * Transforms raw WordPress data into Next.js compatible format with prices and variations */ const fs = require('fs'); const path = require('path'); const https = require('https'); const dotenv = require('dotenv'); // Load environment variables from .env file dotenv.config(); const DATA_DIR = path.join(__dirname, '..', 'data'); const RAW_DIR = path.join(DATA_DIR, 'raw'); const PROCESSED_DIR = path.join(DATA_DIR, 'processed'); // Create processed directory if (!fs.existsSync(PROCESSED_DIR)) { fs.mkdirSync(PROCESSED_DIR, { recursive: true }); } // WooCommerce API Configuration from .env const WOOCOMMERCE_CONFIG = { url: process.env.WOOCOMMERCE_URL || 'https://klz-cables.com', consumerKey: process.env.WOOCOMMERCE_CONSUMER_KEY, consumerSecret: process.env.WOOCOMMERCE_CONSUMER_SECRET, apiVersion: 'wc/v3' }; // Debug: Check if credentials are loaded if (!WOOCOMMERCE_CONFIG.consumerKey || !WOOCOMMERCE_CONFIG.consumerSecret) { console.error('❌ Missing WooCommerce credentials in environment'); console.error('WOOCOMMERCE_CONSUMER_KEY:', WOOCOMMERCE_CONFIG.consumerKey ? '✓ Loaded' : '❌ Missing'); console.error('WOOCOMMERCE_CONSUMER_SECRET:', WOOCOMMERCE_CONFIG.consumerSecret ? '✓ Loaded' : '❌ Missing'); process.exit(1); } else { console.log('✅ WooCommerce credentials loaded successfully'); } // Rate limiting configuration const RATE_LIMIT = { maxConcurrent: 2, // Max concurrent API calls delayBetweenCalls: 100, // ms between calls timeout: 30000 // 30 second timeout }; // API call queue and tracking let apiQueue = []; let activeRequests = 0; let apiStats = { total: 0, success: 0, failed: 0, retries: 0 }; /** * WooCommerce API Client */ class WooCommerceAPI { constructor(config) { this.config = config; this.baseURL = `${config.url}/wp-json/${config.apiVersion}`; } /** * Make authenticated API request */ async request(endpoint, method = 'GET', data = null) { return new Promise((resolve, reject) => { const url = new URL(`${this.baseURL}${endpoint}`); // Add authentication const auth = Buffer.from(`${this.config.consumerKey}:${this.config.consumerSecret}`).toString('base64'); const options = { hostname: url.hostname, port: url.port || 443, path: url.pathname + url.search, method, headers: { 'Authorization': `Basic ${auth}`, 'Content-Type': 'application/json', 'User-Agent': 'KLZ-Data-Processor/1.0' }, timeout: RATE_LIMIT.timeout }; // Log the request console.log(`🌐 API Request: ${method} ${url.pathname}`); const req = https.request(options, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { apiStats.total++; if (res.statusCode >= 200 && res.statusCode < 300) { try { const parsed = JSON.parse(data); apiStats.success++; resolve(parsed); } catch (e) { apiStats.failed++; reject(new Error(`JSON parse error: ${e.message}`)); } } else { apiStats.failed++; reject(new Error(`HTTP ${res.statusCode}: ${data}`)); } }); }); req.on('error', (err) => { apiStats.failed++; reject(err); }); req.on('timeout', () => { apiStats.failed++; req.destroy(); reject(new Error('Request timeout')); }); // Add request body for POST/PUT if (data && (method === 'POST' || method === 'PUT')) { const body = JSON.stringify(data); req.write(body); } req.end(); }); } /** * Get product by ID */ async getProduct(productId) { try { return await this.request(`/products/${productId}`); } catch (error) { console.error(`❌ Failed to fetch product ${productId}:`, error.message); return null; } } /** * Get product variations */ async getProductVariations(productId) { try { return await this.request(`/products/${productId}/variations?per_page=100`); } catch (error) { console.error(`❌ Failed to fetch variations for product ${productId}:`, error.message); return []; } } /** * Process API queue with rate limiting */ async processQueue(tasks, progressCallback) { const results = []; for (let i = 0; i < tasks.length; i++) { // Wait for rate limit if (i > 0) { await new Promise(resolve => setTimeout(resolve, RATE_LIMIT.delayBetweenCalls)); } const task = tasks[i]; let attempt = 0; let success = false; let result = null; // Retry logic while (attempt < 3 && !success) { try { if (progressCallback) { progressCallback(i + 1, tasks.length, task.label); } result = await task.fn(); success = true; if (attempt > 0) { apiStats.retries++; console.log(`✅ Retry successful for: ${task.label}`); } } catch (error) { attempt++; if (attempt < 3) { console.log(`⚠️ Retry ${attempt}/3 for: ${task.label} - ${error.message}`); await new Promise(resolve => setTimeout(resolve, 1000 * attempt)); // Exponential backoff } else { console.log(`❌ Failed after 3 attempts: ${task.label} - ${error.message}`); } } } results.push(result); } return results; } } /** * Decode HTML entities in text - comprehensive handling */ function decodeHTMLEntities(text) { if (!text) return ''; // First, handle numeric entities (decimal and hex) let result = text .replace(/&#(\d+);/g, (match, dec) => { const char = String.fromCharCode(parseInt(dec, 10)); return char; }) .replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => { const char = String.fromCharCode(parseInt(hex, 16)); return char; }); // Handle common named entities and Unicode characters const entityMap = { ' ': ' ', '‘': "'", '’': "'", '“': '"', '”': '"', '″': '"', '–': '-', '—': '—', '…': '…', '•': '•', '€': '€', '©': '©', '®': '®', '™': '™', '°': '°', '±': '±', '×': '×', '÷': '÷', '−': '−', '¢': '¢', '£': '£', '¥': '¥', '§': '§', '¶': '¶', 'µ': 'µ', '«': '«', '»': '»', '·': '·' }; // Replace all named entities for (const [entity, char] of Object.entries(entityMap)) { result = result.replace(new RegExp(entity, 'g'), char); } // Clean up any remaining ampersand patterns result = result.replace(/&([a-zA-Z]+);/g, (match, name) => { // If it's not in our map, try to decode it or leave as is return entityMap[`&${name};`] || match; }); return result; } /** * HTML sanitization - preserve content but clean dangerous elements */ function sanitizeHTML(html) { if (!html) return ''; let sanitized = html; // Remove script tags and inline handlers (security) sanitized = sanitized.replace(/.*?<\/script>/gis, ''); sanitized = sanitized.replace(/\son\w+=".*?"/gi, ''); // Remove WPBakery shortcode wrappers but keep their content sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '
'); sanitized = sanitized.replace(/\[\/vc_row\]/gi, '
'); sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '
'); sanitized = sanitized.replace(/\[\/vc_column\]/gi, '
'); // Remove other shortcodes but keep text content sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '
'); sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '
'); // Handle Nectar shortcodes sanitized = sanitized.replace(/\[nectar_cta.*?\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1'); sanitized = sanitized.replace(/\[nectar.*?\]/gi, ''); // Remove all remaining shortcodes sanitized = sanitized.replace(/\[.*?\]/g, ''); // Remove empty paragraphs and divs sanitized = sanitized.replace(/]*>\s*<\/p>/gi, ''); sanitized = sanitized.replace(/]*>\s*<\/div>/gi, ''); // Normalize whitespace sanitized = sanitized.replace(/\s+/g, ' ').trim(); return sanitized; } /** * Process products with WooCommerce API integration */ async function processProductsWithWooCommerce(productsEN, productsDE, translationMapping) { const api = new WooCommerceAPI(WOOCOMMERCE_CONFIG); const processed = []; console.log(`\n🚀 Starting WooCommerce API integration for ${productsEN.length} products...`); // Create tasks for price and variation fetching const tasks = []; productsEN.forEach(product => { tasks.push({ label: `${product.name} (EN) - Prices`, fn: async () => { const wooProduct = await api.getProduct(product.id); if (wooProduct) { return { productId: product.id, locale: 'en', regularPrice: wooProduct.regular_price || '', salePrice: wooProduct.sale_price || '', currency: wooProduct.currency || 'EUR', stockStatus: wooProduct.stock_status || 'instock' }; } return null; } }); tasks.push({ label: `${product.name} (EN) - Variations`, fn: async () => { const variations = await api.getProductVariations(product.id); return { productId: product.id, locale: 'en', variations: variations || [] }; } }); }); productsDE.forEach(product => { tasks.push({ label: `${product.name} (DE) - Prices`, fn: async () => { const wooProduct = await api.getProduct(product.id); if (wooProduct) { return { productId: product.id, locale: 'de', regularPrice: wooProduct.regular_price || '', salePrice: wooProduct.sale_price || '', currency: wooProduct.currency || 'EUR', stockStatus: wooProduct.stock_status || 'instock' }; } return null; } }); tasks.push({ label: `${product.name} (DE) - Variations`, fn: async () => { const variations = await api.getProductVariations(product.id); return { productId: product.id, locale: 'de', variations: variations || [] }; } }); }); // Progress callback const progressCallback = (current, total, label) => { const progress = Math.round((current / total) * 100); process.stdout.write(`\r📊 Progress: ${current}/${total} (${progress}%) - ${label}`); }; // Process all tasks const results = await api.processQueue(tasks, progressCallback); // Clear progress line process.stdout.write('\n'); // Organize results const priceData = {}; const variationData = {}; results.forEach(result => { if (!result) return; const key = `${result.productId}_${result.locale}`; if (result.variations) { variationData[key] = result.variations; } else { priceData[key] = { regularPrice: result.regularPrice, salePrice: result.salePrice, currency: result.currency, stockStatus: result.stockStatus }; } }); console.log(`\n📈 API Statistics:`); console.log(` Total requests: ${apiStats.total}`); console.log(` Successful: ${apiStats.success}`); console.log(` Failed: ${apiStats.failed}`); console.log(` Retries: ${apiStats.retries}`); // Process English products productsEN.forEach(product => { const key = `${product.id}_en`; const priceInfo = priceData[key] || {}; const variations = variationData[key] || []; const translationKey = product.slug; const deMatch = translationMapping.products[translationKey]; processed.push({ id: product.id, translationKey: translationKey, locale: 'en', slug: product.slug, path: `/product/${product.slug}`, name: product.name, shortDescriptionHtml: product.shortDescriptionHtml, descriptionHtml: sanitizeHTML(product.descriptionHtml), images: product.images, featuredImage: product.featuredImage, sku: product.sku, regularPrice: priceInfo.regularPrice || '', salePrice: priceInfo.salePrice || '', currency: priceInfo.currency || 'EUR', stockStatus: priceInfo.stockStatus || 'instock', categories: product.categories, attributes: product.attributes, variations: variations, updatedAt: product.updatedAt, translation: deMatch ? { locale: 'de', id: deMatch.de } : null }); }); // Process German products productsDE.forEach(product => { const key = `${product.id}_de`; const priceInfo = priceData[key] || {}; const variations = variationData[key] || []; const translationKey = product.slug; const enMatch = translationMapping.products[translationKey]; processed.push({ id: product.id, translationKey: translationKey, locale: 'de', slug: product.slug, path: `/de/product/${product.slug}`, name: product.name, shortDescriptionHtml: product.shortDescriptionHtml, descriptionHtml: sanitizeHTML(product.descriptionHtml), images: product.images, featuredImage: product.featuredImage, sku: product.sku, regularPrice: priceInfo.regularPrice || '', salePrice: priceInfo.salePrice || '', currency: priceInfo.currency || 'EUR', stockStatus: priceInfo.stockStatus || 'instock', categories: product.categories, attributes: product.attributes, variations: variations, updatedAt: product.updatedAt, translation: enMatch ? { locale: 'en', id: enMatch.en } : null }); }); return processed; } /** * Process pages */ function processPages(pagesEN, pagesDE, translationMapping) { const processed = []; // Process English pages pagesEN.forEach(page => { const translationKey = page.slug; const deMatch = translationMapping.pages[translationKey]; const rawTitle = page.titleHtml.replace(/<[^>]*>/g, ''); const decodedTitle = decodeHTMLEntities(rawTitle); processed.push({ id: page.id, translationKey: translationKey, locale: 'en', slug: page.slug, path: `/${page.slug}`, title: decodedTitle, titleHtml: page.titleHtml, contentHtml: sanitizeHTML(page.contentHtml), excerptHtml: page.excerptHtml || '', featuredImage: page.featuredImage, updatedAt: page.updatedAt, translation: deMatch ? { locale: 'de', id: deMatch.de } : null }); }); // Process German pages pagesDE.forEach(page => { const translationKey = page.slug; const enMatch = translationMapping.pages[translationKey]; const rawTitle = page.titleHtml.replace(/<[^>]*>/g, ''); const decodedTitle = decodeHTMLEntities(rawTitle); processed.push({ id: page.id, translationKey: translationKey, locale: 'de', slug: page.slug, path: `/de/${page.slug}`, title: decodedTitle, titleHtml: page.titleHtml, contentHtml: sanitizeHTML(page.contentHtml), excerptHtml: page.excerptHtml || '', featuredImage: page.featuredImage, updatedAt: page.updatedAt, translation: enMatch ? { locale: 'en', id: enMatch.en } : null }); }); return processed; } /** * Process posts */ function processPosts(postsEN, postsDE, translationMapping) { const processed = []; postsEN.forEach(post => { const translationKey = post.slug; const deMatch = translationMapping.posts[translationKey]; const rawTitle = post.titleHtml.replace(/<[^>]*>/g, ''); const decodedTitle = decodeHTMLEntities(rawTitle); processed.push({ id: post.id, translationKey: translationKey, locale: 'en', slug: post.slug, path: `/blog/${post.slug}`, title: decodedTitle, titleHtml: post.titleHtml, contentHtml: sanitizeHTML(post.contentHtml), excerptHtml: post.excerptHtml || '', featuredImage: post.featuredImage, datePublished: post.datePublished, updatedAt: post.updatedAt, translation: deMatch ? { locale: 'de', id: deMatch.de } : null }); }); postsDE.forEach(post => { const translationKey = post.slug; const enMatch = translationMapping.posts[translationKey]; const rawTitle = post.titleHtml.replace(/<[^>]*>/g, ''); const decodedTitle = decodeHTMLEntities(rawTitle); processed.push({ id: post.id, translationKey: translationKey, locale: 'de', slug: post.slug, path: `/de/blog/${post.slug}`, title: decodedTitle, titleHtml: post.titleHtml, contentHtml: sanitizeHTML(post.contentHtml), excerptHtml: post.excerptHtml || '', featuredImage: post.featuredImage, datePublished: post.datePublished, updatedAt: post.updatedAt, translation: enMatch ? { locale: 'en', id: enMatch.en } : null }); }); return processed; } /** * Process product categories */ function processProductCategories(categoriesEN, categoriesDE, translationMapping) { const processed = []; categoriesEN.forEach(category => { const translationKey = category.slug; const deMatch = translationMapping.productCategories[translationKey]; processed.push({ id: category.id, translationKey: translationKey, locale: 'en', slug: category.slug, name: category.name, path: `/product-category/${category.slug}`, description: category.description, count: category.count, translation: deMatch ? { locale: 'de', id: deMatch.de } : null }); }); categoriesDE.forEach(category => { const translationKey = category.slug; const enMatch = translationMapping.productCategories[translationKey]; processed.push({ id: category.id, translationKey: translationKey, locale: 'de', slug: category.slug, name: category.name, path: `/de/product-category/${category.slug}`, description: category.description, count: category.count, translation: enMatch ? { locale: 'en', id: enMatch.en } : null }); }); return processed; } /** * Process media manifest */ function processMedia(media) { return media.map(item => ({ id: item.id, filename: item.filename, url: item.url, localPath: `/media/${item.filename}`, alt: item.alt, width: item.width, height: item.height, mimeType: item.mime_type })); } /** * Generate asset map for URL replacement */ function generateAssetMap(media) { const map = {}; media.forEach(item => { if (item.url) { map[item.url] = `/media/${item.filename}`; } }); return map; } /** * Main processing function */ async function main() { const exportDir = getLatestExportDir(); console.log('🔄 Processing WordPress Data for Next.js with WooCommerce Integration'); console.log('==========================================================\n'); // Load raw data const loadJSON = (file) => { try { return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8')); } catch (e) { console.error(`❌ Failed to load ${file}:`, e.message); return []; } }; const translationMapping = loadJSON('translation-mapping.json'); const pagesEN = loadJSON('pages.en.json'); const pagesDE = loadJSON('pages.de.json'); const postsEN = loadJSON('posts.en.json'); const postsDE = loadJSON('posts.de.json'); const productsEN = loadJSON('products.en.json'); const productsDE = loadJSON('products.de.json'); const categoriesEN = loadJSON('product-categories.en.json'); const categoriesDE = loadJSON('product-categories.de.json'); const media = loadJSON('media.json'); const redirects = loadJSON('redirects.json'); const siteInfo = loadJSON('site-info.json'); console.log('📊 Processing content types...\n'); // Process each content type const pages = processPages(pagesEN, pagesDE, translationMapping); const posts = processPosts(postsEN, postsDE, translationMapping); const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping); const processedMedia = processMedia(media); const assetMap = generateAssetMap(media); // Process products with WooCommerce API const products = await processProductsWithWooCommerce(productsEN, productsDE, translationMapping); // Create processed data structure const processedData = { site: { title: siteInfo.siteTitle, description: siteInfo.siteDescription, baseUrl: siteInfo.baseUrl, defaultLocale: siteInfo.defaultLocale || 'en', locales: ['en', 'de'] }, content: { pages, posts, products, categories }, assets: { media: processedMedia, map: assetMap }, redirects, exportDate: new Date().toISOString() }; // Save processed data const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json'); fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2)); // Save individual files for easier access fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2)); fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2)); // Always write products.json with the processed data // Even if WooCommerce data is missing, we still want the base product structure fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2)); // Report on WooCommerce data quality const productsWithPrices = products.filter(p => p.regularPrice).length; const productsWithVariations = products.filter(p => p.variations && p.variations.length > 0).length; console.log('📊 WooCommerce Data Quality:'); console.log(` Products with prices: ${productsWithPrices}/${products.length}`); console.log(` Products with variations: ${productsWithVariations}/${products.length}`); if (productsWithPrices === 0 && productsWithVariations === 0) { console.log('⚠️ Warning: No WooCommerce pricing or variation data was retrieved'); console.log(' Products written with empty price fields\n'); } else { console.log('✅ WooCommerce data integrated successfully\n'); } fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2)); fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2)); fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2)); // Summary console.log('\n✅ Data Processing Complete\n'); console.log('📦 Processed Content:'); console.log(` Pages: ${pages.length} (with translations)`); console.log(` Posts: ${posts.length} (with translations)`); console.log(` Products: ${products.length} (with translations)`); console.log(` Categories: ${categories.length} (with translations)`); console.log(` Media: ${processedMedia.length} files`); console.log(` Redirects: ${redirects.length} rules\n`); console.log('📁 Output Files:'); console.log(` ${outputPath}`); console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`); console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`); console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`); console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`); console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`); console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`); // Sample data if (products.length > 0) { console.log('📦 Sample Product with WooCommerce Data:'); const sampleProduct = products.find(p => p.regularPrice) || products[0]; console.log(` Name: ${sampleProduct.name}`); console.log(` SKU: ${sampleProduct.sku}`); console.log(` Price: ${sampleProduct.regularPrice} ${sampleProduct.currency}`); console.log(` Sale Price: ${sampleProduct.salePrice || 'N/A'}`); console.log(` Variations: ${sampleProduct.variations.length}`); console.log(` Locale: ${sampleProduct.locale}\n`); } console.log('💡 Next: Ready for Next.js project setup with complete product data!'); } // Helper function to get latest export directory function getLatestExportDir() { const dirs = fs.readdirSync(RAW_DIR).filter(f => { const stat = fs.statSync(path.join(RAW_DIR, f)); return stat.isDirectory(); }); dirs.sort().reverse(); return path.join(RAW_DIR, dirs[0]); } if (require.main === module) { main().catch(console.error); } module.exports = { processPages, processPosts, processProductCategories, processProductsWithWooCommerce, processMedia, generateAssetMap, decodeHTMLEntities, sanitizeHTML };