Files
klz-cables.com/scripts/process-data-with-woocommerce.js
2026-01-06 16:52:59 +01:00

843 lines
25 KiB
JavaScript
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
/**
* WordPress → Next.js Data Processing Pipeline with WooCommerce API Integration
* Transforms raw WordPress data into Next.js compatible format with prices and variations
*/
const fs = require('fs');
const path = require('path');
const https = require('https');
const dotenv = require('dotenv');
// Load environment variables from .env file
dotenv.config();
const DATA_DIR = path.join(__dirname, '..', 'data');
const RAW_DIR = path.join(DATA_DIR, 'raw');
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
// Create processed directory
if (!fs.existsSync(PROCESSED_DIR)) {
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
}
// WooCommerce API Configuration from .env
const WOOCOMMERCE_CONFIG = {
url: process.env.WOOCOMMERCE_URL || 'https://klz-cables.com',
consumerKey: process.env.WOOCOMMERCE_CONSUMER_KEY,
consumerSecret: process.env.WOOCOMMERCE_CONSUMER_SECRET,
apiVersion: 'wc/v3'
};
// Debug: Check if credentials are loaded
if (!WOOCOMMERCE_CONFIG.consumerKey || !WOOCOMMERCE_CONFIG.consumerSecret) {
console.error('❌ Missing WooCommerce credentials in environment');
console.error('WOOCOMMERCE_CONSUMER_KEY:', WOOCOMMERCE_CONFIG.consumerKey ? '✓ Loaded' : '❌ Missing');
console.error('WOOCOMMERCE_CONSUMER_SECRET:', WOOCOMMERCE_CONFIG.consumerSecret ? '✓ Loaded' : '❌ Missing');
process.exit(1);
} else {
console.log('✅ WooCommerce credentials loaded successfully');
}
// Rate limiting configuration
const RATE_LIMIT = {
maxConcurrent: 2, // Max concurrent API calls
delayBetweenCalls: 100, // ms between calls
timeout: 30000 // 30 second timeout
};
// API call queue and tracking
let apiQueue = [];
let activeRequests = 0;
let apiStats = {
total: 0,
success: 0,
failed: 0,
retries: 0
};
/**
* WooCommerce API Client
*/
class WooCommerceAPI {
constructor(config) {
this.config = config;
this.baseURL = `${config.url}/wp-json/${config.apiVersion}`;
}
/**
* Make authenticated API request
*/
async request(endpoint, method = 'GET', data = null) {
return new Promise((resolve, reject) => {
const url = new URL(`${this.baseURL}${endpoint}`);
// Add authentication
const auth = Buffer.from(`${this.config.consumerKey}:${this.config.consumerSecret}`).toString('base64');
const options = {
hostname: url.hostname,
port: url.port || 443,
path: url.pathname + url.search,
method,
headers: {
'Authorization': `Basic ${auth}`,
'Content-Type': 'application/json',
'User-Agent': 'KLZ-Data-Processor/1.0'
},
timeout: RATE_LIMIT.timeout
};
// Log the request
console.log(`🌐 API Request: ${method} ${url.pathname}`);
const req = https.request(options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
apiStats.total++;
if (res.statusCode >= 200 && res.statusCode < 300) {
try {
const parsed = JSON.parse(data);
apiStats.success++;
resolve(parsed);
} catch (e) {
apiStats.failed++;
reject(new Error(`JSON parse error: ${e.message}`));
}
} else {
apiStats.failed++;
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
}
});
});
req.on('error', (err) => {
apiStats.failed++;
reject(err);
});
req.on('timeout', () => {
apiStats.failed++;
req.destroy();
reject(new Error('Request timeout'));
});
// Add request body for POST/PUT
if (data && (method === 'POST' || method === 'PUT')) {
const body = JSON.stringify(data);
req.write(body);
}
req.end();
});
}
/**
* Get product by ID
*/
async getProduct(productId) {
try {
return await this.request(`/products/${productId}`);
} catch (error) {
console.error(`❌ Failed to fetch product ${productId}:`, error.message);
return null;
}
}
/**
* Get product variations
*/
async getProductVariations(productId) {
try {
return await this.request(`/products/${productId}/variations?per_page=100`);
} catch (error) {
console.error(`❌ Failed to fetch variations for product ${productId}:`, error.message);
return [];
}
}
/**
* Process API queue with rate limiting
*/
async processQueue(tasks, progressCallback) {
const results = [];
for (let i = 0; i < tasks.length; i++) {
// Wait for rate limit
if (i > 0) {
await new Promise(resolve => setTimeout(resolve, RATE_LIMIT.delayBetweenCalls));
}
const task = tasks[i];
let attempt = 0;
let success = false;
let result = null;
// Retry logic
while (attempt < 3 && !success) {
try {
if (progressCallback) {
progressCallback(i + 1, tasks.length, task.label);
}
result = await task.fn();
success = true;
if (attempt > 0) {
apiStats.retries++;
console.log(`✅ Retry successful for: ${task.label}`);
}
} catch (error) {
attempt++;
if (attempt < 3) {
console.log(`⚠️ Retry ${attempt}/3 for: ${task.label} - ${error.message}`);
await new Promise(resolve => setTimeout(resolve, 1000 * attempt)); // Exponential backoff
} else {
console.log(`❌ Failed after 3 attempts: ${task.label} - ${error.message}`);
}
}
}
results.push(result);
}
return results;
}
}
/**
* Decode HTML entities in text - comprehensive handling
*/
function decodeHTMLEntities(text) {
if (!text) return '';
// First, handle numeric entities (decimal and hex)
let result = text
.replace(/&#(\d+);/g, (match, dec) => {
const char = String.fromCharCode(parseInt(dec, 10));
return char;
})
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
const char = String.fromCharCode(parseInt(hex, 16));
return char;
});
// Handle common named entities and Unicode characters
const entityMap = {
' ': ' ',
'': "'",
'': "'",
'“': '"',
'”': '"',
'″': '"',
'': '-',
'—': '—',
'…': '…',
'•': '•',
'€': '€',
'©': '©',
'®': '®',
'™': '™',
'°': '°',
'±': '±',
'×': '×',
'÷': '÷',
'': '',
'¢': '¢',
'£': '£',
'¥': '¥',
'§': '§',
'¶': '¶',
'µ': 'µ',
'«': '«',
'»': '»',
'·': '·'
};
// Replace all named entities
for (const [entity, char] of Object.entries(entityMap)) {
result = result.replace(new RegExp(entity, 'g'), char);
}
// Clean up any remaining ampersand patterns
result = result.replace(/&([a-zA-Z]+);/g, (match, name) => {
// If it's not in our map, try to decode it or leave as is
return entityMap[`&${name};`] || match;
});
return result;
}
/**
* HTML sanitization - preserve content but clean dangerous elements
*/
function sanitizeHTML(html) {
if (!html) return '';
let sanitized = html;
// Remove script tags and inline handlers (security)
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
// Remove WPBakery shortcode wrappers but keep their content
sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '<div class="vc-row">');
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '<div class="vc-column">');
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
// Remove other shortcodes but keep text content
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
// Handle Nectar shortcodes
sanitized = sanitized.replace(/\[nectar_cta.*?\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1');
sanitized = sanitized.replace(/\[nectar.*?\]/gi, '');
// Remove all remaining shortcodes
sanitized = sanitized.replace(/\[.*?\]/g, '');
// Remove empty paragraphs and divs
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace
sanitized = sanitized.replace(/\s+/g, ' ').trim();
return sanitized;
}
/**
* Process products with WooCommerce API integration
*/
async function processProductsWithWooCommerce(productsEN, productsDE, translationMapping) {
const api = new WooCommerceAPI(WOOCOMMERCE_CONFIG);
const processed = [];
console.log(`\n🚀 Starting WooCommerce API integration for ${productsEN.length} products...`);
// Create tasks for price and variation fetching
const tasks = [];
productsEN.forEach(product => {
tasks.push({
label: `${product.name} (EN) - Prices`,
fn: async () => {
const wooProduct = await api.getProduct(product.id);
if (wooProduct) {
return {
productId: product.id,
locale: 'en',
regularPrice: wooProduct.regular_price || '',
salePrice: wooProduct.sale_price || '',
currency: wooProduct.currency || 'EUR',
stockStatus: wooProduct.stock_status || 'instock'
};
}
return null;
}
});
tasks.push({
label: `${product.name} (EN) - Variations`,
fn: async () => {
const variations = await api.getProductVariations(product.id);
return {
productId: product.id,
locale: 'en',
variations: variations || []
};
}
});
});
productsDE.forEach(product => {
tasks.push({
label: `${product.name} (DE) - Prices`,
fn: async () => {
const wooProduct = await api.getProduct(product.id);
if (wooProduct) {
return {
productId: product.id,
locale: 'de',
regularPrice: wooProduct.regular_price || '',
salePrice: wooProduct.sale_price || '',
currency: wooProduct.currency || 'EUR',
stockStatus: wooProduct.stock_status || 'instock'
};
}
return null;
}
});
tasks.push({
label: `${product.name} (DE) - Variations`,
fn: async () => {
const variations = await api.getProductVariations(product.id);
return {
productId: product.id,
locale: 'de',
variations: variations || []
};
}
});
});
// Progress callback
const progressCallback = (current, total, label) => {
const progress = Math.round((current / total) * 100);
process.stdout.write(`\r📊 Progress: ${current}/${total} (${progress}%) - ${label}`);
};
// Process all tasks
const results = await api.processQueue(tasks, progressCallback);
// Clear progress line
process.stdout.write('\n');
// Organize results
const priceData = {};
const variationData = {};
results.forEach(result => {
if (!result) return;
const key = `${result.productId}_${result.locale}`;
if (result.variations) {
variationData[key] = result.variations;
} else {
priceData[key] = {
regularPrice: result.regularPrice,
salePrice: result.salePrice,
currency: result.currency,
stockStatus: result.stockStatus
};
}
});
console.log(`\n📈 API Statistics:`);
console.log(` Total requests: ${apiStats.total}`);
console.log(` Successful: ${apiStats.success}`);
console.log(` Failed: ${apiStats.failed}`);
console.log(` Retries: ${apiStats.retries}`);
// Process English products
productsEN.forEach(product => {
const key = `${product.id}_en`;
const priceInfo = priceData[key] || {};
const variations = variationData[key] || [];
const translationKey = product.slug;
const deMatch = translationMapping.products[translationKey];
processed.push({
id: product.id,
translationKey: translationKey,
locale: 'en',
slug: product.slug,
path: `/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.shortDescriptionHtml,
descriptionHtml: sanitizeHTML(product.descriptionHtml),
images: product.images,
featuredImage: product.featuredImage,
sku: product.sku,
regularPrice: priceInfo.regularPrice || '',
salePrice: priceInfo.salePrice || '',
currency: priceInfo.currency || 'EUR',
stockStatus: priceInfo.stockStatus || 'instock',
categories: product.categories,
attributes: product.attributes,
variations: variations,
updatedAt: product.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
// Process German products
productsDE.forEach(product => {
const key = `${product.id}_de`;
const priceInfo = priceData[key] || {};
const variations = variationData[key] || [];
const translationKey = product.slug;
const enMatch = translationMapping.products[translationKey];
processed.push({
id: product.id,
translationKey: translationKey,
locale: 'de',
slug: product.slug,
path: `/de/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.shortDescriptionHtml,
descriptionHtml: sanitizeHTML(product.descriptionHtml),
images: product.images,
featuredImage: product.featuredImage,
sku: product.sku,
regularPrice: priceInfo.regularPrice || '',
salePrice: priceInfo.salePrice || '',
currency: priceInfo.currency || 'EUR',
stockStatus: priceInfo.stockStatus || 'instock',
categories: product.categories,
attributes: product.attributes,
variations: variations,
updatedAt: product.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
/**
* Process pages
*/
function processPages(pagesEN, pagesDE, translationMapping) {
const processed = [];
// Process English pages
pagesEN.forEach(page => {
const translationKey = page.slug;
const deMatch = translationMapping.pages[translationKey];
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'en',
slug: page.slug,
path: `/${page.slug}`,
title: decodedTitle,
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: page.excerptHtml || '',
featuredImage: page.featuredImage,
updatedAt: page.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
// Process German pages
pagesDE.forEach(page => {
const translationKey = page.slug;
const enMatch = translationMapping.pages[translationKey];
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'de',
slug: page.slug,
path: `/de/${page.slug}`,
title: decodedTitle,
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: page.excerptHtml || '',
featuredImage: page.featuredImage,
updatedAt: page.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
/**
* Process posts
*/
function processPosts(postsEN, postsDE, translationMapping) {
const processed = [];
postsEN.forEach(post => {
const translationKey = post.slug;
const deMatch = translationMapping.posts[translationKey];
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'en',
slug: post.slug,
path: `/blog/${post.slug}`,
title: decodedTitle,
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: post.excerptHtml || '',
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
postsDE.forEach(post => {
const translationKey = post.slug;
const enMatch = translationMapping.posts[translationKey];
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'de',
slug: post.slug,
path: `/de/blog/${post.slug}`,
title: decodedTitle,
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: post.excerptHtml || '',
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
/**
* Process product categories
*/
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
const processed = [];
categoriesEN.forEach(category => {
const translationKey = category.slug;
const deMatch = translationMapping.productCategories[translationKey];
processed.push({
id: category.id,
translationKey: translationKey,
locale: 'en',
slug: category.slug,
name: category.name,
path: `/product-category/${category.slug}`,
description: category.description,
count: category.count,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
categoriesDE.forEach(category => {
const translationKey = category.slug;
const enMatch = translationMapping.productCategories[translationKey];
processed.push({
id: category.id,
translationKey: translationKey,
locale: 'de',
slug: category.slug,
name: category.name,
path: `/de/product-category/${category.slug}`,
description: category.description,
count: category.count,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
/**
* Process media manifest
*/
function processMedia(media) {
return media.map(item => ({
id: item.id,
filename: item.filename,
url: item.url,
localPath: `/media/${item.filename}`,
alt: item.alt,
width: item.width,
height: item.height,
mimeType: item.mime_type
}));
}
/**
* Generate asset map for URL replacement
*/
function generateAssetMap(media) {
const map = {};
media.forEach(item => {
if (item.url) {
map[item.url] = `/media/${item.filename}`;
}
});
return map;
}
/**
* Main processing function
*/
async function main() {
const exportDir = getLatestExportDir();
console.log('🔄 Processing WordPress Data for Next.js with WooCommerce Integration');
console.log('==========================================================\n');
// Load raw data
const loadJSON = (file) => {
try {
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
} catch (e) {
console.error(`❌ Failed to load ${file}:`, e.message);
return [];
}
};
const translationMapping = loadJSON('translation-mapping.json');
const pagesEN = loadJSON('pages.en.json');
const pagesDE = loadJSON('pages.de.json');
const postsEN = loadJSON('posts.en.json');
const postsDE = loadJSON('posts.de.json');
const productsEN = loadJSON('products.en.json');
const productsDE = loadJSON('products.de.json');
const categoriesEN = loadJSON('product-categories.en.json');
const categoriesDE = loadJSON('product-categories.de.json');
const media = loadJSON('media.json');
const redirects = loadJSON('redirects.json');
const siteInfo = loadJSON('site-info.json');
console.log('📊 Processing content types...\n');
// Process each content type
const pages = processPages(pagesEN, pagesDE, translationMapping);
const posts = processPosts(postsEN, postsDE, translationMapping);
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
const processedMedia = processMedia(media);
const assetMap = generateAssetMap(media);
// Process products with WooCommerce API
const products = await processProductsWithWooCommerce(productsEN, productsDE, translationMapping);
// Create processed data structure
const processedData = {
site: {
title: siteInfo.siteTitle,
description: siteInfo.siteDescription,
baseUrl: siteInfo.baseUrl,
defaultLocale: siteInfo.defaultLocale || 'en',
locales: ['en', 'de']
},
content: {
pages,
posts,
products,
categories
},
assets: {
media: processedMedia,
map: assetMap
},
redirects,
exportDate: new Date().toISOString()
};
// Save processed data
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
// Save individual files for easier access
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
// Always write products.json with the processed data
// Even if WooCommerce data is missing, we still want the base product structure
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
// Report on WooCommerce data quality
const productsWithPrices = products.filter(p => p.regularPrice).length;
const productsWithVariations = products.filter(p => p.variations && p.variations.length > 0).length;
console.log('📊 WooCommerce Data Quality:');
console.log(` Products with prices: ${productsWithPrices}/${products.length}`);
console.log(` Products with variations: ${productsWithVariations}/${products.length}`);
if (productsWithPrices === 0 && productsWithVariations === 0) {
console.log('⚠️ Warning: No WooCommerce pricing or variation data was retrieved');
console.log(' Products written with empty price fields\n');
} else {
console.log('✅ WooCommerce data integrated successfully\n');
}
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
// Summary
console.log('\n✅ Data Processing Complete\n');
console.log('📦 Processed Content:');
console.log(` Pages: ${pages.length} (with translations)`);
console.log(` Posts: ${posts.length} (with translations)`);
console.log(` Products: ${products.length} (with translations)`);
console.log(` Categories: ${categories.length} (with translations)`);
console.log(` Media: ${processedMedia.length} files`);
console.log(` Redirects: ${redirects.length} rules\n`);
console.log('📁 Output Files:');
console.log(` ${outputPath}`);
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
// Sample data
if (products.length > 0) {
console.log('📦 Sample Product with WooCommerce Data:');
const sampleProduct = products.find(p => p.regularPrice) || products[0];
console.log(` Name: ${sampleProduct.name}`);
console.log(` SKU: ${sampleProduct.sku}`);
console.log(` Price: ${sampleProduct.regularPrice} ${sampleProduct.currency}`);
console.log(` Sale Price: ${sampleProduct.salePrice || 'N/A'}`);
console.log(` Variations: ${sampleProduct.variations.length}`);
console.log(` Locale: ${sampleProduct.locale}\n`);
}
console.log('💡 Next: Ready for Next.js project setup with complete product data!');
}
// Helper function to get latest export directory
function getLatestExportDir() {
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
const stat = fs.statSync(path.join(RAW_DIR, f));
return stat.isDirectory();
});
dirs.sort().reverse();
return path.join(RAW_DIR, dirs[0]);
}
if (require.main === module) {
main().catch(console.error);
}
module.exports = {
processPages,
processPosts,
processProductCategories,
processProductsWithWooCommerce,
processMedia,
generateAssetMap,
decodeHTMLEntities,
sanitizeHTML
};