initial migration
This commit is contained in:
240
scripts/analyze-export.js
Executable file
240
scripts/analyze-export.js
Executable file
@@ -0,0 +1,240 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* WordPress Export Analysis Script
|
||||
* Quickly analyzes exported data without loading large files entirely
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const DATA_DIR = path.join(__dirname, '..', 'data', 'raw');
|
||||
|
||||
// Find the latest export directory
|
||||
function getLatestExportDir() {
|
||||
if (!fs.existsSync(DATA_DIR)) {
|
||||
console.error('❌ No data directory found');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const dirs = fs.readdirSync(DATA_DIR).filter(f => {
|
||||
const stat = fs.statSync(path.join(DATA_DIR, f));
|
||||
return stat.isDirectory();
|
||||
});
|
||||
|
||||
if (dirs.length === 0) {
|
||||
console.error('❌ No export directories found');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Sort by name (timestamp) and get latest
|
||||
dirs.sort().reverse();
|
||||
return path.join(DATA_DIR, dirs[0]);
|
||||
}
|
||||
|
||||
// Quick file analysis
|
||||
function analyzeFile(filePath, sampleSize = 3) {
|
||||
if (!fs.existsSync(filePath)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const stats = fs.statSync(filePath);
|
||||
const content = fs.readFileSync(filePath, 'utf8');
|
||||
const lines = content.split('\n');
|
||||
|
||||
// Parse JSON safely
|
||||
let data;
|
||||
try {
|
||||
data = JSON.parse(content);
|
||||
} catch (e) {
|
||||
return { error: 'Invalid JSON' };
|
||||
}
|
||||
|
||||
const isArray = Array.isArray(data);
|
||||
const count = isArray ? data.length : (data.menus ? data.menus.length : 0);
|
||||
|
||||
// Get sample items
|
||||
const sample = isArray ? data.slice(0, sampleSize) : (data.menus ? data.menus.slice(0, sampleSize) : []);
|
||||
|
||||
return {
|
||||
size: stats.size,
|
||||
sizeHuman: `${(stats.size / 1024).toFixed(1)} KB`,
|
||||
lines: lines.length,
|
||||
count: count,
|
||||
sample: sample,
|
||||
isArray: isArray
|
||||
};
|
||||
}
|
||||
|
||||
// Main analysis
|
||||
function main() {
|
||||
const exportDir = getLatestExportDir();
|
||||
console.log('📊 WordPress Export Analysis');
|
||||
console.log('============================');
|
||||
console.log(`Directory: ${path.basename(exportDir)}\n`);
|
||||
|
||||
const files = [
|
||||
'site-info.json',
|
||||
'translation-mapping.json',
|
||||
'pages.en.json',
|
||||
'pages.de.json',
|
||||
'posts.en.json',
|
||||
'posts.de.json',
|
||||
'products.en.json',
|
||||
'products.de.json',
|
||||
'product-categories.en.json',
|
||||
'product-categories.de.json',
|
||||
'menus.en.json',
|
||||
'menus.de.json',
|
||||
'redirects.json',
|
||||
'media.json'
|
||||
];
|
||||
|
||||
const results = {};
|
||||
|
||||
files.forEach(file => {
|
||||
const filePath = path.join(exportDir, file);
|
||||
const analysis = analyzeFile(filePath, 2);
|
||||
if (analysis) {
|
||||
results[file] = analysis;
|
||||
}
|
||||
});
|
||||
|
||||
// Summary
|
||||
console.log('📋 EXPORT SUMMARY');
|
||||
console.log('=================\n');
|
||||
|
||||
// Content counts
|
||||
const pagesEN = results['pages.en.json']?.count || 0;
|
||||
const pagesDE = results['pages.de.json']?.count || 0;
|
||||
const postsEN = results['posts.en.json']?.count || 0;
|
||||
const postsDE = results['posts.de.json']?.count || 0;
|
||||
const productsEN = results['products.en.json']?.count || 0;
|
||||
const productsDE = results['products.de.json']?.count || 0;
|
||||
const categoriesEN = results['product-categories.en.json']?.count || 0;
|
||||
const categoriesDE = results['product-categories.de.json']?.count || 0;
|
||||
const media = results['media.json']?.count || 0;
|
||||
const redirects = results['redirects.json']?.count || 0;
|
||||
|
||||
console.log('📄 Content by Type & Language:');
|
||||
console.log(` Pages: EN: ${pagesEN} | DE: ${pagesDE} | Total: ${pagesEN + pagesDE}`);
|
||||
console.log(` Posts: EN: ${postsEN} | DE: ${postsDE} | Total: ${postsEN + postsDE}`);
|
||||
console.log(` Products: EN: ${productsEN} | DE: ${productsDE} | Total: ${productsEN + productsDE}`);
|
||||
console.log(` Categories: EN: ${categoriesEN} | DE: ${categoriesDE} | Total: ${categoriesEN + categoriesDE}`);
|
||||
console.log(` Media: ${media} files`);
|
||||
console.log(` Redirects: ${redirects} rules\n`);
|
||||
|
||||
// Translation mapping
|
||||
const translation = results['translation-mapping.json']?.sample?.[0] || results['translation-mapping.json']?.sample;
|
||||
if (translation) {
|
||||
const pagePairs = Object.keys(translation.pages || {}).length;
|
||||
const postPairs = Object.keys(translation.posts || {}).length;
|
||||
const productPairs = Object.keys(translation.products || {}).length;
|
||||
const categoryPairs = Object.keys(translation.productCategories || {}).length;
|
||||
|
||||
console.log('🌐 Translation Pairs:');
|
||||
console.log(` Pages: ${pagePairs}`);
|
||||
console.log(` Posts: ${postPairs}`);
|
||||
console.log(` Products: ${productPairs}`);
|
||||
console.log(` Categories: ${categoryPairs}`);
|
||||
console.log(` Total: ${pagePairs + postPairs + productPairs + categoryPairs}\n`);
|
||||
}
|
||||
|
||||
// File sizes
|
||||
console.log('💾 File Sizes:');
|
||||
Object.entries(results).forEach(([file, data]) => {
|
||||
console.log(` ${file.padEnd(30)} ${data.sizeHuman}`);
|
||||
});
|
||||
|
||||
// Sample data
|
||||
console.log('\n🔍 Sample Data (first item from each):');
|
||||
|
||||
if (results['pages.en.json']?.sample?.[0]) {
|
||||
const page = results['pages.en.json'].sample[0];
|
||||
console.log(`\n Page (EN): "${page.titleHtml}"`);
|
||||
console.log(` Path: ${page.path}`);
|
||||
console.log(` Slug: ${page.slug}`);
|
||||
}
|
||||
|
||||
if (results['posts.en.json']?.sample?.[0]) {
|
||||
const post = results['posts.en.json'].sample[0];
|
||||
console.log(`\n Post (EN): "${post.titleHtml}"`);
|
||||
console.log(` Path: ${post.path}`);
|
||||
console.log(` Date: ${post.datePublished}`);
|
||||
}
|
||||
|
||||
if (results['products.en.json']?.sample?.[0]) {
|
||||
const product = results['products.en.json'].sample[0];
|
||||
console.log(`\n Product (EN): "${product.name}"`);
|
||||
console.log(` Path: ${product.path}`);
|
||||
console.log(` SKU: ${product.sku}`);
|
||||
console.log(` Price: ${product.regularPrice} ${product.currency}`);
|
||||
}
|
||||
|
||||
if (results['menus.en.json']?.sample?.[0]) {
|
||||
const menu = results['menus.en.json'].sample[0];
|
||||
console.log(`\n Menu (EN): "${menu.name}"`);
|
||||
console.log(` Slug: ${menu.slug}`);
|
||||
console.log(` Items: ${menu.items ? menu.items.length : 0}`);
|
||||
}
|
||||
|
||||
// Data quality checks
|
||||
console.log('\n✅ Data Quality Checks:');
|
||||
|
||||
const checks = [
|
||||
{
|
||||
name: 'Both languages present',
|
||||
pass: pagesEN > 0 && pagesDE > 0 && postsEN > 0 && postsDE > 0
|
||||
},
|
||||
{
|
||||
name: 'Translation pairs exist',
|
||||
pass: (results['translation-mapping.json']?.sample &&
|
||||
Object.keys(results['translation-mapping.json'].sample.pages || {}).length > 0)
|
||||
},
|
||||
{
|
||||
name: 'Media files downloaded',
|
||||
pass: media > 0
|
||||
},
|
||||
{
|
||||
name: 'Redirects generated',
|
||||
pass: redirects > 0
|
||||
},
|
||||
{
|
||||
name: 'Site info complete',
|
||||
pass: results['site-info.json']?.sample?.siteTitle !== undefined
|
||||
}
|
||||
];
|
||||
|
||||
checks.forEach(check => {
|
||||
console.log(` ${check.pass ? '✅' : '❌'} ${check.name}`);
|
||||
});
|
||||
|
||||
// Recommendations
|
||||
console.log('\n💡 Recommendations:');
|
||||
|
||||
if (postsEN === 0 || postsDE === 0) {
|
||||
console.log(' ⚠️ No posts found in one or both languages');
|
||||
}
|
||||
|
||||
if (results['translation-mapping.json']?.sample) {
|
||||
const mapping = results['translation-mapping.json'].sample;
|
||||
const missingPosts = Object.keys(mapping.posts || {}).length === 0;
|
||||
if (missingPosts) {
|
||||
console.log(' ⚠️ No post translation pairs found - check if posts have matching slugs');
|
||||
}
|
||||
}
|
||||
|
||||
if (media === 0) {
|
||||
console.log(' ⚠️ No media files downloaded - check API permissions');
|
||||
}
|
||||
|
||||
console.log('\n🎯 Next Steps:');
|
||||
console.log(' 1. Review sample data above for accuracy');
|
||||
console.log(' 2. Check translation mapping for completeness');
|
||||
console.log(' 3. Verify media files are properly named');
|
||||
console.log(' 4. Proceed to Next.js data processing');
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
Reference in New Issue
Block a user