#!/usr/bin/env node /** * WordPress → Next.js Data Processing Pipeline * Transforms raw WordPress data into Next.js compatible format */ const fs = require('fs'); const path = require('path'); const DATA_DIR = path.join(__dirname, '..', 'data'); const RAW_DIR = path.join(DATA_DIR, 'raw'); const PROCESSED_DIR = path.join(DATA_DIR, 'processed'); // Create processed directory if (!fs.existsSync(PROCESSED_DIR)) { fs.mkdirSync(PROCESSED_DIR, { recursive: true }); } // Find latest export function getLatestExportDir() { const dirs = fs.readdirSync(RAW_DIR).filter(f => { const stat = fs.statSync(path.join(RAW_DIR, f)); return stat.isDirectory(); }); dirs.sort().reverse(); return path.join(RAW_DIR, dirs[0]); } // HTML sanitization - preserve content but clean dangerous elements function sanitizeHTML(html) { if (!html) return ''; let sanitized = html; // Remove script tags and inline handlers (security) sanitized = sanitized.replace(/.*?<\/script>/gis, ''); sanitized = sanitized.replace(/\son\w+=".*?"/gi, ''); // Remove WPBakery shortcode wrappers but keep their content // Replace vc_row/vc_column with divs to preserve structure sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '
'); sanitized = sanitized.replace(/\[\/vc_row\]/gi, '
'); sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '
'); sanitized = sanitized.replace(/\[\/vc_column\]/gi, '
'); // Remove other shortcodes but keep text content sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '
'); sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '
'); sanitized = sanitized.replace(/\[.*?\]/g, ''); // Remove empty paragraphs and divs sanitized = sanitized.replace(/]*>\s*<\/p>/gi, ''); sanitized = sanitized.replace(/]*>\s*<\/div>/gi, ''); // Normalize whitespace but preserve HTML structure sanitized = sanitized.replace(/\s+/g, ' ').trim(); return sanitized; } // Extract excerpt from content function generateExcerpt(content, maxLength = 200) { const text = content.replace(/<[^>]*>/g, ''); if (text.length <= maxLength) return text; return text.substring(0, maxLength) + '...'; } // Process pages function processPages(pagesEN, pagesDE, translationMapping) { const processed = []; // Process English pages pagesEN.forEach(page => { const translationKey = page.slug; const deMatch = translationMapping.pages[translationKey]; processed.push({ id: page.id, translationKey: translationKey, locale: 'en', slug: page.slug, path: `/${page.slug}`, title: page.titleHtml.replace(/<[^>]*>/g, ''), titleHtml: page.titleHtml, contentHtml: sanitizeHTML(page.contentHtml), excerptHtml: page.excerptHtml || generateExcerpt(page.contentHtml), featuredImage: page.featuredImage, updatedAt: page.updatedAt, translation: deMatch ? { locale: 'de', id: deMatch.de } : null }); }); // Process German pages pagesDE.forEach(page => { const translationKey = page.slug; const enMatch = translationMapping.pages[translationKey]; processed.push({ id: page.id, translationKey: translationKey, locale: 'de', slug: page.slug, path: `/de/${page.slug}`, title: page.titleHtml.replace(/<[^>]*>/g, ''), titleHtml: page.titleHtml, contentHtml: sanitizeHTML(page.contentHtml), excerptHtml: page.excerptHtml || generateExcerpt(page.contentHtml), featuredImage: page.featuredImage, updatedAt: page.updatedAt, translation: enMatch ? { locale: 'en', id: enMatch.en } : null }); }); return processed; } // Process posts function processPosts(postsEN, postsDE, translationMapping) { const processed = []; postsEN.forEach(post => { const translationKey = post.slug; const deMatch = translationMapping.posts[translationKey]; processed.push({ id: post.id, translationKey: translationKey, locale: 'en', slug: post.slug, path: `/blog/${post.slug}`, title: post.titleHtml.replace(/<[^>]*>/g, ''), titleHtml: post.titleHtml, contentHtml: sanitizeHTML(post.contentHtml), excerptHtml: post.excerptHtml || generateExcerpt(post.contentHtml), featuredImage: post.featuredImage, datePublished: post.datePublished, updatedAt: post.updatedAt, translation: deMatch ? { locale: 'de', id: deMatch.de } : null }); }); postsDE.forEach(post => { const translationKey = post.slug; const enMatch = translationMapping.posts[translationKey]; processed.push({ id: post.id, translationKey: translationKey, locale: 'de', slug: post.slug, path: `/de/blog/${post.slug}`, title: post.titleHtml.replace(/<[^>]*>/g, ''), titleHtml: post.titleHtml, contentHtml: sanitizeHTML(post.contentHtml), excerptHtml: post.excerptHtml || generateExcerpt(post.contentHtml), featuredImage: post.featuredImage, datePublished: post.datePublished, updatedAt: post.updatedAt, translation: enMatch ? { locale: 'en', id: enMatch.en } : null }); }); return processed; } // Process products function processProducts(productsEN, productsDE, translationMapping) { const processed = []; productsEN.forEach(product => { const translationKey = product.slug; const deMatch = translationMapping.products[translationKey]; processed.push({ id: product.id, translationKey: translationKey, locale: 'en', slug: product.slug, path: `/product/${product.slug}`, name: product.name, shortDescriptionHtml: product.shortDescriptionHtml, descriptionHtml: sanitizeHTML(product.descriptionHtml), images: product.images, featuredImage: product.featuredImage, sku: product.sku, regularPrice: product.regularPrice, salePrice: product.salePrice, currency: product.currency, stockStatus: product.stockStatus, categories: product.categories, attributes: product.attributes, variations: product.variations, updatedAt: product.updatedAt, translation: deMatch ? { locale: 'de', id: deMatch.de } : null }); }); productsDE.forEach(product => { const translationKey = product.slug; const enMatch = translationMapping.products[translationKey]; processed.push({ id: product.id, translationKey: translationKey, locale: 'de', slug: product.slug, path: `/de/product/${product.slug}`, name: product.name, shortDescriptionHtml: product.shortDescriptionHtml, descriptionHtml: sanitizeHTML(product.descriptionHtml), images: product.images, featuredImage: product.featuredImage, sku: product.sku, regularPrice: product.regularPrice, salePrice: product.salePrice, currency: product.currency, stockStatus: product.stockStatus, categories: product.categories, attributes: product.attributes, variations: product.variations, updatedAt: product.updatedAt, translation: enMatch ? { locale: 'en', id: enMatch.en } : null }); }); return processed; } // Process product categories function processProductCategories(categoriesEN, categoriesDE, translationMapping) { const processed = []; categoriesEN.forEach(category => { const translationKey = category.slug; const deMatch = translationMapping.productCategories[translationKey]; processed.push({ id: category.id, translationKey: translationKey, locale: 'en', slug: category.slug, name: category.name, path: `/product-category/${category.slug}`, description: category.description, count: category.count, translation: deMatch ? { locale: 'de', id: deMatch.de } : null }); }); categoriesDE.forEach(category => { const translationKey = category.slug; const enMatch = translationMapping.productCategories[translationKey]; processed.push({ id: category.id, translationKey: translationKey, locale: 'de', slug: category.slug, name: category.name, path: `/de/product-category/${category.slug}`, description: category.description, count: category.count, translation: enMatch ? { locale: 'en', id: enMatch.en } : null }); }); return processed; } // Process media manifest function processMedia(media) { return media.map(item => ({ id: item.id, filename: item.filename, url: item.url, localPath: `/media/${item.filename}`, alt: item.alt, width: item.width, height: item.height, mimeType: item.mime_type })); } // Generate asset map for URL replacement function generateAssetMap(media) { const map = {}; media.forEach(item => { if (item.url) { map[item.url] = `/media/${item.filename}`; } }); return map; } // Main processing function function main() { const exportDir = getLatestExportDir(); console.log('🔄 Processing WordPress Data for Next.js'); console.log('========================================\n'); // Load raw data const loadJSON = (file) => { try { return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8')); } catch (e) { console.error(`❌ Failed to load ${file}:`, e.message); return []; } }; const translationMapping = loadJSON('translation-mapping-improved.json'); const pagesEN = loadJSON('pages.en.json'); const pagesDE = loadJSON('pages.de.json'); const postsEN = loadJSON('posts.en.json'); const postsDE = loadJSON('posts.de.json'); const productsEN = loadJSON('products.en.json'); const productsDE = loadJSON('products.de.json'); const categoriesEN = loadJSON('product-categories.en.json'); const categoriesDE = loadJSON('product-categories.de.json'); const media = loadJSON('media.json'); const redirects = loadJSON('redirects.json'); const siteInfo = loadJSON('site-info.json'); console.log('📊 Processing content types...\n'); // Process each content type const pages = processPages(pagesEN, pagesDE, translationMapping); const posts = processPosts(postsEN, postsDE, translationMapping); const products = processProducts(productsEN, productsDE, translationMapping); const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping); const processedMedia = processMedia(media); const assetMap = generateAssetMap(media); // Create processed data structure const processedData = { site: { title: siteInfo.siteTitle, description: siteInfo.siteDescription, baseUrl: siteInfo.baseUrl, defaultLocale: siteInfo.defaultLocale || 'en', locales: ['en', 'de'] }, content: { pages, posts, products, categories }, assets: { media: processedMedia, map: assetMap }, redirects, exportDate: new Date().toISOString() }; // Save processed data const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json'); fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2)); // Save individual files for easier access fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2)); fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2)); fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2)); fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2)); fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2)); fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2)); // Summary console.log('✅ Data Processing Complete\n'); console.log('📦 Processed Content:'); console.log(` Pages: ${pages.length} (with translations)`); console.log(` Posts: ${posts.length} (with translations)`); console.log(` Products: ${products.length} (with translations)`); console.log(` Categories: ${categories.length} (with translations)`); console.log(` Media: ${processedMedia.length} files`); console.log(` Redirects: ${redirects.length} rules\n`); console.log('📁 Output Files:'); console.log(` ${outputPath}`); console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`); console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`); console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`); console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`); console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`); console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`); // Sample data if (pages.length > 0) { console.log('📄 Sample Page:'); console.log(` Title: ${pages[0].title}`); console.log(` Path: ${pages[0].path}`); console.log(` Locale: ${pages[0].locale}`); console.log(` Translation: ${pages[0].translation ? 'Yes' : 'No'}\n`); } if (posts.length > 0) { console.log('📝 Sample Post:'); console.log(` Title: ${posts[0].title}`); console.log(` Path: ${posts[0].path}`); console.log(` Locale: ${posts[0].locale}`); console.log(` Date: ${posts[0].datePublished}\n`); } console.log('💡 Next: Ready for Next.js project setup!'); } if (require.main === module) { main(); }