240 lines
7.4 KiB
JavaScript
Executable File
240 lines
7.4 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
|
|
/**
|
|
* WordPress Export Analysis Script
|
|
* Quickly analyzes exported data without loading large files entirely
|
|
*/
|
|
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const DATA_DIR = path.join(__dirname, '..', 'data', 'raw');
|
|
|
|
// Find the latest export directory
|
|
function getLatestExportDir() {
|
|
if (!fs.existsSync(DATA_DIR)) {
|
|
console.error('❌ No data directory found');
|
|
process.exit(1);
|
|
}
|
|
|
|
const dirs = fs.readdirSync(DATA_DIR).filter(f => {
|
|
const stat = fs.statSync(path.join(DATA_DIR, f));
|
|
return stat.isDirectory();
|
|
});
|
|
|
|
if (dirs.length === 0) {
|
|
console.error('❌ No export directories found');
|
|
process.exit(1);
|
|
}
|
|
|
|
// Sort by name (timestamp) and get latest
|
|
dirs.sort().reverse();
|
|
return path.join(DATA_DIR, dirs[0]);
|
|
}
|
|
|
|
// Quick file analysis
|
|
function analyzeFile(filePath, sampleSize = 3) {
|
|
if (!fs.existsSync(filePath)) {
|
|
return null;
|
|
}
|
|
|
|
const stats = fs.statSync(filePath);
|
|
const content = fs.readFileSync(filePath, 'utf8');
|
|
const lines = content.split('\n');
|
|
|
|
// Parse JSON safely
|
|
let data;
|
|
try {
|
|
data = JSON.parse(content);
|
|
} catch (e) {
|
|
return { error: 'Invalid JSON' };
|
|
}
|
|
|
|
const isArray = Array.isArray(data);
|
|
const count = isArray ? data.length : (data.menus ? data.menus.length : 0);
|
|
|
|
// Get sample items
|
|
const sample = isArray ? data.slice(0, sampleSize) : (data.menus ? data.menus.slice(0, sampleSize) : []);
|
|
|
|
return {
|
|
size: stats.size,
|
|
sizeHuman: `${(stats.size / 1024).toFixed(1)} KB`,
|
|
lines: lines.length,
|
|
count: count,
|
|
sample: sample,
|
|
isArray: isArray
|
|
};
|
|
}
|
|
|
|
// Main analysis
|
|
function main() {
|
|
const exportDir = getLatestExportDir();
|
|
console.log('📊 WordPress Export Analysis');
|
|
console.log('============================');
|
|
console.log(`Directory: ${path.basename(exportDir)}\n`);
|
|
|
|
const files = [
|
|
'site-info.json',
|
|
'translation-mapping.json',
|
|
'pages.en.json',
|
|
'pages.de.json',
|
|
'posts.en.json',
|
|
'posts.de.json',
|
|
'products.en.json',
|
|
'products.de.json',
|
|
'product-categories.en.json',
|
|
'product-categories.de.json',
|
|
'menus.en.json',
|
|
'menus.de.json',
|
|
'redirects.json',
|
|
'media.json'
|
|
];
|
|
|
|
const results = {};
|
|
|
|
files.forEach(file => {
|
|
const filePath = path.join(exportDir, file);
|
|
const analysis = analyzeFile(filePath, 2);
|
|
if (analysis) {
|
|
results[file] = analysis;
|
|
}
|
|
});
|
|
|
|
// Summary
|
|
console.log('📋 EXPORT SUMMARY');
|
|
console.log('=================\n');
|
|
|
|
// Content counts
|
|
const pagesEN = results['pages.en.json']?.count || 0;
|
|
const pagesDE = results['pages.de.json']?.count || 0;
|
|
const postsEN = results['posts.en.json']?.count || 0;
|
|
const postsDE = results['posts.de.json']?.count || 0;
|
|
const productsEN = results['products.en.json']?.count || 0;
|
|
const productsDE = results['products.de.json']?.count || 0;
|
|
const categoriesEN = results['product-categories.en.json']?.count || 0;
|
|
const categoriesDE = results['product-categories.de.json']?.count || 0;
|
|
const media = results['media.json']?.count || 0;
|
|
const redirects = results['redirects.json']?.count || 0;
|
|
|
|
console.log('📄 Content by Type & Language:');
|
|
console.log(` Pages: EN: ${pagesEN} | DE: ${pagesDE} | Total: ${pagesEN + pagesDE}`);
|
|
console.log(` Posts: EN: ${postsEN} | DE: ${postsDE} | Total: ${postsEN + postsDE}`);
|
|
console.log(` Products: EN: ${productsEN} | DE: ${productsDE} | Total: ${productsEN + productsDE}`);
|
|
console.log(` Categories: EN: ${categoriesEN} | DE: ${categoriesDE} | Total: ${categoriesEN + categoriesDE}`);
|
|
console.log(` Media: ${media} files`);
|
|
console.log(` Redirects: ${redirects} rules\n`);
|
|
|
|
// Translation mapping
|
|
const translation = results['translation-mapping.json']?.sample?.[0] || results['translation-mapping.json']?.sample;
|
|
if (translation) {
|
|
const pagePairs = Object.keys(translation.pages || {}).length;
|
|
const postPairs = Object.keys(translation.posts || {}).length;
|
|
const productPairs = Object.keys(translation.products || {}).length;
|
|
const categoryPairs = Object.keys(translation.productCategories || {}).length;
|
|
|
|
console.log('🌐 Translation Pairs:');
|
|
console.log(` Pages: ${pagePairs}`);
|
|
console.log(` Posts: ${postPairs}`);
|
|
console.log(` Products: ${productPairs}`);
|
|
console.log(` Categories: ${categoryPairs}`);
|
|
console.log(` Total: ${pagePairs + postPairs + productPairs + categoryPairs}\n`);
|
|
}
|
|
|
|
// File sizes
|
|
console.log('💾 File Sizes:');
|
|
Object.entries(results).forEach(([file, data]) => {
|
|
console.log(` ${file.padEnd(30)} ${data.sizeHuman}`);
|
|
});
|
|
|
|
// Sample data
|
|
console.log('\n🔍 Sample Data (first item from each):');
|
|
|
|
if (results['pages.en.json']?.sample?.[0]) {
|
|
const page = results['pages.en.json'].sample[0];
|
|
console.log(`\n Page (EN): "${page.titleHtml}"`);
|
|
console.log(` Path: ${page.path}`);
|
|
console.log(` Slug: ${page.slug}`);
|
|
}
|
|
|
|
if (results['posts.en.json']?.sample?.[0]) {
|
|
const post = results['posts.en.json'].sample[0];
|
|
console.log(`\n Post (EN): "${post.titleHtml}"`);
|
|
console.log(` Path: ${post.path}`);
|
|
console.log(` Date: ${post.datePublished}`);
|
|
}
|
|
|
|
if (results['products.en.json']?.sample?.[0]) {
|
|
const product = results['products.en.json'].sample[0];
|
|
console.log(`\n Product (EN): "${product.name}"`);
|
|
console.log(` Path: ${product.path}`);
|
|
console.log(` SKU: ${product.sku}`);
|
|
console.log(` Price: ${product.regularPrice} ${product.currency}`);
|
|
}
|
|
|
|
if (results['menus.en.json']?.sample?.[0]) {
|
|
const menu = results['menus.en.json'].sample[0];
|
|
console.log(`\n Menu (EN): "${menu.name}"`);
|
|
console.log(` Slug: ${menu.slug}`);
|
|
console.log(` Items: ${menu.items ? menu.items.length : 0}`);
|
|
}
|
|
|
|
// Data quality checks
|
|
console.log('\n✅ Data Quality Checks:');
|
|
|
|
const checks = [
|
|
{
|
|
name: 'Both languages present',
|
|
pass: pagesEN > 0 && pagesDE > 0 && postsEN > 0 && postsDE > 0
|
|
},
|
|
{
|
|
name: 'Translation pairs exist',
|
|
pass: (results['translation-mapping.json']?.sample &&
|
|
Object.keys(results['translation-mapping.json'].sample.pages || {}).length > 0)
|
|
},
|
|
{
|
|
name: 'Media files downloaded',
|
|
pass: media > 0
|
|
},
|
|
{
|
|
name: 'Redirects generated',
|
|
pass: redirects > 0
|
|
},
|
|
{
|
|
name: 'Site info complete',
|
|
pass: results['site-info.json']?.sample?.siteTitle !== undefined
|
|
}
|
|
];
|
|
|
|
checks.forEach(check => {
|
|
console.log(` ${check.pass ? '✅' : '❌'} ${check.name}`);
|
|
});
|
|
|
|
// Recommendations
|
|
console.log('\n💡 Recommendations:');
|
|
|
|
if (postsEN === 0 || postsDE === 0) {
|
|
console.log(' ⚠️ No posts found in one or both languages');
|
|
}
|
|
|
|
if (results['translation-mapping.json']?.sample) {
|
|
const mapping = results['translation-mapping.json'].sample;
|
|
const missingPosts = Object.keys(mapping.posts || {}).length === 0;
|
|
if (missingPosts) {
|
|
console.log(' ⚠️ No post translation pairs found - check if posts have matching slugs');
|
|
}
|
|
}
|
|
|
|
if (media === 0) {
|
|
console.log(' ⚠️ No media files downloaded - check API permissions');
|
|
}
|
|
|
|
console.log('\n🎯 Next Steps:');
|
|
console.log(' 1. Review sample data above for accuracy');
|
|
console.log(' 2. Check translation mapping for completeness');
|
|
console.log(' 3. Verify media files are properly named');
|
|
console.log(' 4. Proceed to Next.js data processing');
|
|
}
|
|
|
|
if (require.main === module) {
|
|
main();
|
|
} |