#!/usr/bin/env node /** * WordPress Export Analysis Script * Quickly analyzes exported data without loading large files entirely */ const fs = require('fs'); const path = require('path'); const DATA_DIR = path.join(__dirname, '..', 'data', 'raw'); // Find the latest export directory function getLatestExportDir() { if (!fs.existsSync(DATA_DIR)) { console.error('āŒ No data directory found'); process.exit(1); } const dirs = fs.readdirSync(DATA_DIR).filter(f => { const stat = fs.statSync(path.join(DATA_DIR, f)); return stat.isDirectory(); }); if (dirs.length === 0) { console.error('āŒ No export directories found'); process.exit(1); } // Sort by name (timestamp) and get latest dirs.sort().reverse(); return path.join(DATA_DIR, dirs[0]); } // Quick file analysis function analyzeFile(filePath, sampleSize = 3) { if (!fs.existsSync(filePath)) { return null; } const stats = fs.statSync(filePath); const content = fs.readFileSync(filePath, 'utf8'); const lines = content.split('\n'); // Parse JSON safely let data; try { data = JSON.parse(content); } catch (e) { return { error: 'Invalid JSON' }; } const isArray = Array.isArray(data); const count = isArray ? data.length : (data.menus ? data.menus.length : 0); // Get sample items const sample = isArray ? data.slice(0, sampleSize) : (data.menus ? data.menus.slice(0, sampleSize) : []); return { size: stats.size, sizeHuman: `${(stats.size / 1024).toFixed(1)} KB`, lines: lines.length, count: count, sample: sample, isArray: isArray }; } // Main analysis function main() { const exportDir = getLatestExportDir(); console.log('šŸ“Š WordPress Export Analysis'); console.log('============================'); console.log(`Directory: ${path.basename(exportDir)}\n`); const files = [ 'site-info.json', 'translation-mapping.json', 'pages.en.json', 'pages.de.json', 'posts.en.json', 'posts.de.json', 'products.en.json', 'products.de.json', 'product-categories.en.json', 'product-categories.de.json', 'menus.en.json', 'menus.de.json', 'redirects.json', 'media.json' ]; const results = {}; files.forEach(file => { const filePath = path.join(exportDir, file); const analysis = analyzeFile(filePath, 2); if (analysis) { results[file] = analysis; } }); // Summary console.log('šŸ“‹ EXPORT SUMMARY'); console.log('=================\n'); // Content counts const pagesEN = results['pages.en.json']?.count || 0; const pagesDE = results['pages.de.json']?.count || 0; const postsEN = results['posts.en.json']?.count || 0; const postsDE = results['posts.de.json']?.count || 0; const productsEN = results['products.en.json']?.count || 0; const productsDE = results['products.de.json']?.count || 0; const categoriesEN = results['product-categories.en.json']?.count || 0; const categoriesDE = results['product-categories.de.json']?.count || 0; const media = results['media.json']?.count || 0; const redirects = results['redirects.json']?.count || 0; console.log('šŸ“„ Content by Type & Language:'); console.log(` Pages: EN: ${pagesEN} | DE: ${pagesDE} | Total: ${pagesEN + pagesDE}`); console.log(` Posts: EN: ${postsEN} | DE: ${postsDE} | Total: ${postsEN + postsDE}`); console.log(` Products: EN: ${productsEN} | DE: ${productsDE} | Total: ${productsEN + productsDE}`); console.log(` Categories: EN: ${categoriesEN} | DE: ${categoriesDE} | Total: ${categoriesEN + categoriesDE}`); console.log(` Media: ${media} files`); console.log(` Redirects: ${redirects} rules\n`); // Translation mapping const translation = results['translation-mapping.json']?.sample?.[0] || results['translation-mapping.json']?.sample; if (translation) { const pagePairs = Object.keys(translation.pages || {}).length; const postPairs = Object.keys(translation.posts || {}).length; const productPairs = Object.keys(translation.products || {}).length; const categoryPairs = Object.keys(translation.productCategories || {}).length; console.log('🌐 Translation Pairs:'); console.log(` Pages: ${pagePairs}`); console.log(` Posts: ${postPairs}`); console.log(` Products: ${productPairs}`); console.log(` Categories: ${categoryPairs}`); console.log(` Total: ${pagePairs + postPairs + productPairs + categoryPairs}\n`); } // File sizes console.log('šŸ’¾ File Sizes:'); Object.entries(results).forEach(([file, data]) => { console.log(` ${file.padEnd(30)} ${data.sizeHuman}`); }); // Sample data console.log('\nšŸ” Sample Data (first item from each):'); if (results['pages.en.json']?.sample?.[0]) { const page = results['pages.en.json'].sample[0]; console.log(`\n Page (EN): "${page.titleHtml}"`); console.log(` Path: ${page.path}`); console.log(` Slug: ${page.slug}`); } if (results['posts.en.json']?.sample?.[0]) { const post = results['posts.en.json'].sample[0]; console.log(`\n Post (EN): "${post.titleHtml}"`); console.log(` Path: ${post.path}`); console.log(` Date: ${post.datePublished}`); } if (results['products.en.json']?.sample?.[0]) { const product = results['products.en.json'].sample[0]; console.log(`\n Product (EN): "${product.name}"`); console.log(` Path: ${product.path}`); console.log(` SKU: ${product.sku}`); console.log(` Price: ${product.regularPrice} ${product.currency}`); } if (results['menus.en.json']?.sample?.[0]) { const menu = results['menus.en.json'].sample[0]; console.log(`\n Menu (EN): "${menu.name}"`); console.log(` Slug: ${menu.slug}`); console.log(` Items: ${menu.items ? menu.items.length : 0}`); } // Data quality checks console.log('\nāœ… Data Quality Checks:'); const checks = [ { name: 'Both languages present', pass: pagesEN > 0 && pagesDE > 0 && postsEN > 0 && postsDE > 0 }, { name: 'Translation pairs exist', pass: (results['translation-mapping.json']?.sample && Object.keys(results['translation-mapping.json'].sample.pages || {}).length > 0) }, { name: 'Media files downloaded', pass: media > 0 }, { name: 'Redirects generated', pass: redirects > 0 }, { name: 'Site info complete', pass: results['site-info.json']?.sample?.siteTitle !== undefined } ]; checks.forEach(check => { console.log(` ${check.pass ? 'āœ…' : 'āŒ'} ${check.name}`); }); // Recommendations console.log('\nšŸ’” Recommendations:'); if (postsEN === 0 || postsDE === 0) { console.log(' āš ļø No posts found in one or both languages'); } if (results['translation-mapping.json']?.sample) { const mapping = results['translation-mapping.json'].sample; const missingPosts = Object.keys(mapping.posts || {}).length === 0; if (missingPosts) { console.log(' āš ļø No post translation pairs found - check if posts have matching slugs'); } } if (media === 0) { console.log(' āš ļø No media files downloaded - check API permissions'); } console.log('\nšŸŽÆ Next Steps:'); console.log(' 1. Review sample data above for accuracy'); console.log(' 2. Check translation mapping for completeness'); console.log(' 3. Verify media files are properly named'); console.log(' 4. Proceed to Next.js data processing'); } if (require.main === module) { main(); }