initial migration
This commit is contained in:
240
scripts/analyze-export.js
Executable file
240
scripts/analyze-export.js
Executable file
@@ -0,0 +1,240 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* WordPress Export Analysis Script
|
||||
* Quickly analyzes exported data without loading large files entirely
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const DATA_DIR = path.join(__dirname, '..', 'data', 'raw');
|
||||
|
||||
// Find the latest export directory
|
||||
function getLatestExportDir() {
|
||||
if (!fs.existsSync(DATA_DIR)) {
|
||||
console.error('❌ No data directory found');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const dirs = fs.readdirSync(DATA_DIR).filter(f => {
|
||||
const stat = fs.statSync(path.join(DATA_DIR, f));
|
||||
return stat.isDirectory();
|
||||
});
|
||||
|
||||
if (dirs.length === 0) {
|
||||
console.error('❌ No export directories found');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Sort by name (timestamp) and get latest
|
||||
dirs.sort().reverse();
|
||||
return path.join(DATA_DIR, dirs[0]);
|
||||
}
|
||||
|
||||
// Quick file analysis
|
||||
function analyzeFile(filePath, sampleSize = 3) {
|
||||
if (!fs.existsSync(filePath)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const stats = fs.statSync(filePath);
|
||||
const content = fs.readFileSync(filePath, 'utf8');
|
||||
const lines = content.split('\n');
|
||||
|
||||
// Parse JSON safely
|
||||
let data;
|
||||
try {
|
||||
data = JSON.parse(content);
|
||||
} catch (e) {
|
||||
return { error: 'Invalid JSON' };
|
||||
}
|
||||
|
||||
const isArray = Array.isArray(data);
|
||||
const count = isArray ? data.length : (data.menus ? data.menus.length : 0);
|
||||
|
||||
// Get sample items
|
||||
const sample = isArray ? data.slice(0, sampleSize) : (data.menus ? data.menus.slice(0, sampleSize) : []);
|
||||
|
||||
return {
|
||||
size: stats.size,
|
||||
sizeHuman: `${(stats.size / 1024).toFixed(1)} KB`,
|
||||
lines: lines.length,
|
||||
count: count,
|
||||
sample: sample,
|
||||
isArray: isArray
|
||||
};
|
||||
}
|
||||
|
||||
// Main analysis
|
||||
function main() {
|
||||
const exportDir = getLatestExportDir();
|
||||
console.log('📊 WordPress Export Analysis');
|
||||
console.log('============================');
|
||||
console.log(`Directory: ${path.basename(exportDir)}\n`);
|
||||
|
||||
const files = [
|
||||
'site-info.json',
|
||||
'translation-mapping.json',
|
||||
'pages.en.json',
|
||||
'pages.de.json',
|
||||
'posts.en.json',
|
||||
'posts.de.json',
|
||||
'products.en.json',
|
||||
'products.de.json',
|
||||
'product-categories.en.json',
|
||||
'product-categories.de.json',
|
||||
'menus.en.json',
|
||||
'menus.de.json',
|
||||
'redirects.json',
|
||||
'media.json'
|
||||
];
|
||||
|
||||
const results = {};
|
||||
|
||||
files.forEach(file => {
|
||||
const filePath = path.join(exportDir, file);
|
||||
const analysis = analyzeFile(filePath, 2);
|
||||
if (analysis) {
|
||||
results[file] = analysis;
|
||||
}
|
||||
});
|
||||
|
||||
// Summary
|
||||
console.log('📋 EXPORT SUMMARY');
|
||||
console.log('=================\n');
|
||||
|
||||
// Content counts
|
||||
const pagesEN = results['pages.en.json']?.count || 0;
|
||||
const pagesDE = results['pages.de.json']?.count || 0;
|
||||
const postsEN = results['posts.en.json']?.count || 0;
|
||||
const postsDE = results['posts.de.json']?.count || 0;
|
||||
const productsEN = results['products.en.json']?.count || 0;
|
||||
const productsDE = results['products.de.json']?.count || 0;
|
||||
const categoriesEN = results['product-categories.en.json']?.count || 0;
|
||||
const categoriesDE = results['product-categories.de.json']?.count || 0;
|
||||
const media = results['media.json']?.count || 0;
|
||||
const redirects = results['redirects.json']?.count || 0;
|
||||
|
||||
console.log('📄 Content by Type & Language:');
|
||||
console.log(` Pages: EN: ${pagesEN} | DE: ${pagesDE} | Total: ${pagesEN + pagesDE}`);
|
||||
console.log(` Posts: EN: ${postsEN} | DE: ${postsDE} | Total: ${postsEN + postsDE}`);
|
||||
console.log(` Products: EN: ${productsEN} | DE: ${productsDE} | Total: ${productsEN + productsDE}`);
|
||||
console.log(` Categories: EN: ${categoriesEN} | DE: ${categoriesDE} | Total: ${categoriesEN + categoriesDE}`);
|
||||
console.log(` Media: ${media} files`);
|
||||
console.log(` Redirects: ${redirects} rules\n`);
|
||||
|
||||
// Translation mapping
|
||||
const translation = results['translation-mapping.json']?.sample?.[0] || results['translation-mapping.json']?.sample;
|
||||
if (translation) {
|
||||
const pagePairs = Object.keys(translation.pages || {}).length;
|
||||
const postPairs = Object.keys(translation.posts || {}).length;
|
||||
const productPairs = Object.keys(translation.products || {}).length;
|
||||
const categoryPairs = Object.keys(translation.productCategories || {}).length;
|
||||
|
||||
console.log('🌐 Translation Pairs:');
|
||||
console.log(` Pages: ${pagePairs}`);
|
||||
console.log(` Posts: ${postPairs}`);
|
||||
console.log(` Products: ${productPairs}`);
|
||||
console.log(` Categories: ${categoryPairs}`);
|
||||
console.log(` Total: ${pagePairs + postPairs + productPairs + categoryPairs}\n`);
|
||||
}
|
||||
|
||||
// File sizes
|
||||
console.log('💾 File Sizes:');
|
||||
Object.entries(results).forEach(([file, data]) => {
|
||||
console.log(` ${file.padEnd(30)} ${data.sizeHuman}`);
|
||||
});
|
||||
|
||||
// Sample data
|
||||
console.log('\n🔍 Sample Data (first item from each):');
|
||||
|
||||
if (results['pages.en.json']?.sample?.[0]) {
|
||||
const page = results['pages.en.json'].sample[0];
|
||||
console.log(`\n Page (EN): "${page.titleHtml}"`);
|
||||
console.log(` Path: ${page.path}`);
|
||||
console.log(` Slug: ${page.slug}`);
|
||||
}
|
||||
|
||||
if (results['posts.en.json']?.sample?.[0]) {
|
||||
const post = results['posts.en.json'].sample[0];
|
||||
console.log(`\n Post (EN): "${post.titleHtml}"`);
|
||||
console.log(` Path: ${post.path}`);
|
||||
console.log(` Date: ${post.datePublished}`);
|
||||
}
|
||||
|
||||
if (results['products.en.json']?.sample?.[0]) {
|
||||
const product = results['products.en.json'].sample[0];
|
||||
console.log(`\n Product (EN): "${product.name}"`);
|
||||
console.log(` Path: ${product.path}`);
|
||||
console.log(` SKU: ${product.sku}`);
|
||||
console.log(` Price: ${product.regularPrice} ${product.currency}`);
|
||||
}
|
||||
|
||||
if (results['menus.en.json']?.sample?.[0]) {
|
||||
const menu = results['menus.en.json'].sample[0];
|
||||
console.log(`\n Menu (EN): "${menu.name}"`);
|
||||
console.log(` Slug: ${menu.slug}`);
|
||||
console.log(` Items: ${menu.items ? menu.items.length : 0}`);
|
||||
}
|
||||
|
||||
// Data quality checks
|
||||
console.log('\n✅ Data Quality Checks:');
|
||||
|
||||
const checks = [
|
||||
{
|
||||
name: 'Both languages present',
|
||||
pass: pagesEN > 0 && pagesDE > 0 && postsEN > 0 && postsDE > 0
|
||||
},
|
||||
{
|
||||
name: 'Translation pairs exist',
|
||||
pass: (results['translation-mapping.json']?.sample &&
|
||||
Object.keys(results['translation-mapping.json'].sample.pages || {}).length > 0)
|
||||
},
|
||||
{
|
||||
name: 'Media files downloaded',
|
||||
pass: media > 0
|
||||
},
|
||||
{
|
||||
name: 'Redirects generated',
|
||||
pass: redirects > 0
|
||||
},
|
||||
{
|
||||
name: 'Site info complete',
|
||||
pass: results['site-info.json']?.sample?.siteTitle !== undefined
|
||||
}
|
||||
];
|
||||
|
||||
checks.forEach(check => {
|
||||
console.log(` ${check.pass ? '✅' : '❌'} ${check.name}`);
|
||||
});
|
||||
|
||||
// Recommendations
|
||||
console.log('\n💡 Recommendations:');
|
||||
|
||||
if (postsEN === 0 || postsDE === 0) {
|
||||
console.log(' ⚠️ No posts found in one or both languages');
|
||||
}
|
||||
|
||||
if (results['translation-mapping.json']?.sample) {
|
||||
const mapping = results['translation-mapping.json'].sample;
|
||||
const missingPosts = Object.keys(mapping.posts || {}).length === 0;
|
||||
if (missingPosts) {
|
||||
console.log(' ⚠️ No post translation pairs found - check if posts have matching slugs');
|
||||
}
|
||||
}
|
||||
|
||||
if (media === 0) {
|
||||
console.log(' ⚠️ No media files downloaded - check API permissions');
|
||||
}
|
||||
|
||||
console.log('\n🎯 Next Steps:');
|
||||
console.log(' 1. Review sample data above for accuracy');
|
||||
console.log(' 2. Check translation mapping for completeness');
|
||||
console.log(' 3. Verify media files are properly named');
|
||||
console.log(' 4. Proceed to Next.js data processing');
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
230
scripts/create-manual-mapping.ts
Normal file
230
scripts/create-manual-mapping.ts
Normal file
@@ -0,0 +1,230 @@
|
||||
#!/usr/bin/env tsx
|
||||
|
||||
/**
|
||||
* Manual Translation Mapping Generator
|
||||
* Creates translationKey mappings for posts that couldn't be auto-detected
|
||||
*/
|
||||
|
||||
import { readFileSync, writeFileSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
|
||||
interface Post {
|
||||
id: number;
|
||||
slug: string;
|
||||
title: { rendered: string };
|
||||
date: string;
|
||||
lang: string;
|
||||
pll_translation_id?: number;
|
||||
pll_master_post_id?: number;
|
||||
}
|
||||
|
||||
interface TranslationMapping {
|
||||
posts: Record<string, string[]>; // translationKey -> [en_id, de_id]
|
||||
products: Record<string, string[]>;
|
||||
pages: Record<string, string[]>;
|
||||
}
|
||||
|
||||
interface RawData {
|
||||
posts: {
|
||||
en: Post[];
|
||||
de: Post[];
|
||||
};
|
||||
products: {
|
||||
en: any[];
|
||||
de: any[];
|
||||
};
|
||||
pages: {
|
||||
en: any[];
|
||||
de: any[];
|
||||
};
|
||||
}
|
||||
|
||||
// Simple text similarity function
|
||||
function calculateSimilarity(text1: string, text2: string): number {
|
||||
const normalize = (str: string) =>
|
||||
str.toLowerCase()
|
||||
.replace(/[^\w\s]/g, '')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
const s1 = normalize(text1);
|
||||
const s2 = normalize(text2);
|
||||
|
||||
if (s1 === s2) return 1.0;
|
||||
|
||||
// Simple overlap calculation
|
||||
const words1 = s1.split(' ');
|
||||
const words2 = s2.split(' ');
|
||||
const intersection = words1.filter(w => words2.includes(w));
|
||||
const union = new Set([...words1, ...words2]);
|
||||
|
||||
return intersection.length / union.size;
|
||||
}
|
||||
|
||||
// Generate translation key from title
|
||||
function generateKeyFromTitle(title: string): string {
|
||||
return title.toLowerCase()
|
||||
.replace(/[^\w\s-]/g, '')
|
||||
.replace(/\s+/g, '-')
|
||||
.replace(/-+/g, '-')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function findPostTranslations(
|
||||
postsEn: Post[],
|
||||
postsDe: Post[]
|
||||
): TranslationMapping['posts'] {
|
||||
const mapping: TranslationMapping['posts'] = {};
|
||||
|
||||
// First pass: try to match by Polylang metadata
|
||||
const deById = new Map(postsDe.map(p => [p.id, p]));
|
||||
const deByTranslationId = new Map(postsDe.map(p => [p.pll_translation_id, p]));
|
||||
|
||||
for (const enPost of postsEn) {
|
||||
// Try by pll_translation_id
|
||||
if (enPost.pll_translation_id && deByTranslationId.has(enPost.pll_translation_id)) {
|
||||
const dePost = deByTranslationId.get(enPost.pll_translation_id)!;
|
||||
const key = `post-${enPost.pll_translation_id}`;
|
||||
mapping[key] = [enPost.id, dePost.id];
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try by pll_master_post_id
|
||||
if (enPost.pll_master_post_id && deById.has(enPost.pll_master_post_id)) {
|
||||
const dePost = deById.get(enPost.pll_master_post_id)!;
|
||||
const key = `post-${enPost.pll_master_post_id}`;
|
||||
mapping[key] = [enPost.id, dePost.id];
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: content-based matching for remaining unmatched posts
|
||||
const matchedEnIds = new Set(
|
||||
Object.values(mapping).flat()
|
||||
);
|
||||
|
||||
const unmatchedEn = postsEn.filter(p => !matchedEnIds.includes(p.id));
|
||||
const unmatchedDe = postsDe.filter(p => !matchedEnIds.includes(p.id));
|
||||
|
||||
for (const enPost of unmatchedEn) {
|
||||
let bestMatch: { post: Post; score: number } | null = null;
|
||||
|
||||
for (const dePost of unmatchedDe) {
|
||||
const titleScore = calculateSimilarity(enPost.title.rendered, dePost.title.rendered);
|
||||
const slugScore = calculateSimilarity(enPost.slug, dePost.slug);
|
||||
const dateScore = enPost.date === dePost.date ? 1.0 : 0.0;
|
||||
|
||||
// Weighted average
|
||||
const score = (titleScore * 0.6) + (slugScore * 0.3) + (dateScore * 0.1);
|
||||
|
||||
if (score > 0.7 && (!bestMatch || score > bestMatch.score)) {
|
||||
bestMatch = { post: dePost, score };
|
||||
}
|
||||
}
|
||||
|
||||
if (bestMatch) {
|
||||
const key = generateKeyFromTitle(enPost.title.rendered);
|
||||
mapping[key] = [enPost.id, bestMatch.post.id];
|
||||
unmatchedDe.splice(unmatchedDe.indexOf(bestMatch.post), 1);
|
||||
}
|
||||
}
|
||||
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function findProductTranslations(
|
||||
productsEn: any[],
|
||||
productsDe: any[]
|
||||
): TranslationMapping['products'] {
|
||||
const mapping: TranslationMapping['products'] = {};
|
||||
|
||||
// Use SKU as primary key if available
|
||||
const deBySku = new Map(productsDe.map(p => [p.sku, p]));
|
||||
|
||||
for (const enProduct of productsEn) {
|
||||
if (enProduct.sku && deBySku.has(enProduct.sku)) {
|
||||
const key = `product-${enProduct.sku}`;
|
||||
mapping[key] = [enProduct.id, deBySku.get(enProduct.sku)!.id];
|
||||
}
|
||||
}
|
||||
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function findPageTranslations(
|
||||
pagesEn: any[],
|
||||
pagesDe: any[]
|
||||
): TranslationMapping['pages'] {
|
||||
const mapping: TranslationMapping['pages'] = {};
|
||||
|
||||
// Pages should have better Polylang metadata
|
||||
const deById = new Map(pagesDe.map(p => [p.id, p]));
|
||||
const deByTranslationId = new Map(pagesDe.map(p => [p.pll_translation_id, p]));
|
||||
|
||||
for (const enPage of pagesEn) {
|
||||
if (enPage.pll_translation_id && deByTranslationId.has(enPage.pll_translation_id)) {
|
||||
const dePage = deByTranslationId.get(enPage.pll_translation_id)!;
|
||||
const key = `page-${enPage.pll_translation_id}`;
|
||||
mapping[key] = [enPage.id, dePage.id];
|
||||
}
|
||||
}
|
||||
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function main() {
|
||||
console.log('🔍 Creating manual translation mapping...\n');
|
||||
|
||||
// Read raw data
|
||||
const rawData: RawData = {
|
||||
posts: {
|
||||
en: JSON.parse(readFileSync('data/raw/posts.en.json', 'utf8')),
|
||||
de: JSON.parse(readFileSync('data/raw/posts.de.json', 'utf8'))
|
||||
},
|
||||
products: {
|
||||
en: JSON.parse(readFileSync('data/raw/products.en.json', 'utf8')),
|
||||
de: JSON.parse(readFileSync('data/raw/products.de.json', 'utf8'))
|
||||
},
|
||||
pages: {
|
||||
en: JSON.parse(readFileSync('data/raw/pages.en.json', 'utf8')),
|
||||
de: JSON.parse(readFileSync('data/raw/pages.de.json', 'utf8'))
|
||||
}
|
||||
};
|
||||
|
||||
console.log('📊 Raw data loaded:');
|
||||
console.log(` - Posts: ${rawData.posts.en.length} EN, ${rawData.posts.de.length} DE`);
|
||||
console.log(` - Products: ${rawData.products.en.length} EN, ${rawData.products.de.length} DE`);
|
||||
console.log(` - Pages: ${rawData.pages.en.length} EN, ${rawData.pages.de.length} DE`);
|
||||
console.log('');
|
||||
|
||||
// Generate mappings
|
||||
const mapping: TranslationMapping = {
|
||||
posts: findPostTranslations(rawData.posts.en, rawData.posts.de),
|
||||
products: findProductTranslations(rawData.products.en, rawData.products.de),
|
||||
pages: findPageTranslations(rawData.pages.en, rawData.pages.de)
|
||||
};
|
||||
|
||||
// Save mapping
|
||||
const outputPath = 'data/manual-translation-mapping.json';
|
||||
writeFileSync(outputPath, JSON.stringify(mapping, null, 2));
|
||||
|
||||
console.log('✅ Manual translation mapping created:\n');
|
||||
console.log(`Posts: ${Object.keys(mapping.posts).length} pairs`);
|
||||
console.log(`Products: ${Object.keys(mapping.products).length} pairs`);
|
||||
console.log(`Pages: ${Object.keys(mapping.pages).length} pairs`);
|
||||
console.log(`\nSaved to: ${outputPath}`);
|
||||
|
||||
// Show some examples
|
||||
if (Object.keys(mapping.posts).length > 0) {
|
||||
console.log('\n📝 Post mapping examples:');
|
||||
Object.entries(mapping.posts).slice(0, 3).forEach(([key, ids]) => {
|
||||
const enPost = rawData.posts.en.find(p => p.id === ids[0]);
|
||||
const dePost = rawData.posts.de.find(p => p.id === ids[1]);
|
||||
console.log(` ${key}:`);
|
||||
console.log(` EN: [${ids[0]}] ${enPost?.title.rendered}`);
|
||||
console.log(` DE: [${ids[1]}] ${dePost?.title.rendered}`);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
246
scripts/improve-translation-mapping.js
Executable file
246
scripts/improve-translation-mapping.js
Executable file
@@ -0,0 +1,246 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Improved Translation Mapping Script
|
||||
* Creates translation pairs by analyzing content similarity and patterns
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const DATA_DIR = path.join(__dirname, '..', 'data', 'raw');
|
||||
|
||||
// Find the latest export directory
|
||||
function getLatestExportDir() {
|
||||
const dirs = fs.readdirSync(DATA_DIR).filter(f => {
|
||||
const stat = fs.statSync(path.join(DATA_DIR, f));
|
||||
return stat.isDirectory();
|
||||
});
|
||||
dirs.sort().reverse();
|
||||
return path.join(DATA_DIR, dirs[0]);
|
||||
}
|
||||
|
||||
// Simple text similarity (Levenshtein-like)
|
||||
function similarity(str1, str2) {
|
||||
const longer = str1.length > str2.length ? str1 : str2;
|
||||
const shorter = str1.length > str2.length ? str2 : str1;
|
||||
|
||||
if (longer.length === 0) return 1.0;
|
||||
|
||||
const editDistance = (s1, s2) => {
|
||||
const costs = [];
|
||||
for (let i = 0; i <= s1.length; i++) {
|
||||
let lastValue = i;
|
||||
for (let j = 0; j <= s2.length; j++) {
|
||||
if (i === 0) {
|
||||
costs[j] = j;
|
||||
} else if (j > 0) {
|
||||
let newValue = costs[j - 1];
|
||||
if (s1.charAt(i - 1) !== s2.charAt(j - 1)) {
|
||||
newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1;
|
||||
}
|
||||
costs[j - 1] = lastValue;
|
||||
lastValue = newValue;
|
||||
}
|
||||
}
|
||||
if (i > 0) costs[s2.length] = lastValue;
|
||||
}
|
||||
return costs[s2.length];
|
||||
};
|
||||
|
||||
return (longer.length - editDistance(longer, shorter)) / longer.length;
|
||||
}
|
||||
|
||||
// Extract keywords from slug
|
||||
function extractKeywords(slug) {
|
||||
return slug
|
||||
.split('-')
|
||||
.filter(word => word.length > 3)
|
||||
.sort()
|
||||
.join('-');
|
||||
}
|
||||
|
||||
// Find translation pairs using multiple strategies
|
||||
function findTranslationPairs(itemsEN, itemsDE, threshold = 0.6) {
|
||||
const pairs = [];
|
||||
const usedDE = new Set();
|
||||
|
||||
itemsEN.forEach(enItem => {
|
||||
let bestMatch = null;
|
||||
let bestScore = 0;
|
||||
|
||||
itemsDE.forEach(deItem => {
|
||||
if (usedDE.has(deItem.id)) return;
|
||||
|
||||
// Strategy 1: Keyword similarity
|
||||
const enKeywords = extractKeywords(enItem.slug);
|
||||
const deKeywords = extractKeywords(deItem.slug);
|
||||
const keywordScore = similarity(enKeywords, deKeywords);
|
||||
|
||||
// Strategy 2: Title similarity (if available)
|
||||
let titleScore = 0;
|
||||
if (enItem.titleHtml && deItem.titleHtml) {
|
||||
const enTitle = enItem.titleHtml.replace(/<[^>]*>/g, '').toLowerCase();
|
||||
const deTitle = deItem.titleHtml.replace(/<[^>]*>/g, '').toLowerCase();
|
||||
titleScore = similarity(enTitle, deTitle);
|
||||
}
|
||||
|
||||
// Strategy 3: Content preview similarity
|
||||
let contentScore = 0;
|
||||
if (enItem.contentHtml && deItem.contentHtml) {
|
||||
const enPreview = enItem.contentHtml.substring(0, 200).replace(/<[^>]*>/g, '').toLowerCase();
|
||||
const dePreview = deItem.contentHtml.substring(0, 200).replace(/<[^>]*>/g, '').toLowerCase();
|
||||
contentScore = similarity(enPreview, dePreview);
|
||||
}
|
||||
|
||||
// Combined score (weighted)
|
||||
const combinedScore = (keywordScore * 0.4) + (titleScore * 0.4) + (contentScore * 0.2);
|
||||
|
||||
if (combinedScore > bestScore && combinedScore > threshold) {
|
||||
bestScore = combinedScore;
|
||||
bestMatch = deItem;
|
||||
}
|
||||
});
|
||||
|
||||
if (bestMatch) {
|
||||
usedDE.add(bestMatch.id);
|
||||
pairs.push({
|
||||
translationKey: `${enItem.slug}`,
|
||||
en: enItem.id,
|
||||
de: bestMatch.id,
|
||||
score: bestScore,
|
||||
enSlug: enItem.slug,
|
||||
deSlug: bestMatch.slug
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return pairs;
|
||||
}
|
||||
|
||||
// Main function
|
||||
function main() {
|
||||
const exportDir = getLatestExportDir();
|
||||
console.log('🔧 Improving Translation Mapping');
|
||||
console.log('================================\n');
|
||||
|
||||
// Load data
|
||||
const loadJSON = (file) => {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
|
||||
} catch (e) {
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
const pagesEN = loadJSON('pages.en.json');
|
||||
const pagesDE = loadJSON('pages.de.json');
|
||||
const postsEN = loadJSON('posts.en.json');
|
||||
const postsDE = loadJSON('posts.de.json');
|
||||
const productsEN = loadJSON('products.en.json');
|
||||
const productsDE = loadJSON('products.de.json');
|
||||
const categoriesEN = loadJSON('product-categories.en.json');
|
||||
const categoriesDE = loadJSON('product-categories.de.json');
|
||||
|
||||
console.log('📊 Content loaded:');
|
||||
console.log(` Pages: ${pagesEN.length} EN, ${pagesDE.length} DE`);
|
||||
console.log(` Posts: ${postsEN.length} EN, ${postsDE.length} DE`);
|
||||
console.log(` Products: ${productsEN.length} EN, ${productsDE.length} DE`);
|
||||
console.log(` Categories: ${categoriesEN.length} EN, ${categoriesDE.length} DE\n`);
|
||||
|
||||
// Find pairs
|
||||
console.log('🔍 Finding translation pairs...\n');
|
||||
|
||||
const pagePairs = findTranslationPairs(pagesEN, pagesDE, 0.5);
|
||||
const postPairs = findTranslationPairs(postsEN, postsDE, 0.5);
|
||||
const productPairs = findTranslationPairs(productsEN, productsDE, 0.6);
|
||||
const categoryPairs = findTranslationPairs(categoriesEN, categoriesDE, 0.5);
|
||||
|
||||
// Build mapping
|
||||
const mapping = {
|
||||
pages: {},
|
||||
posts: {},
|
||||
products: {},
|
||||
productCategories: {}
|
||||
};
|
||||
|
||||
pagePairs.forEach(pair => {
|
||||
mapping.pages[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
|
||||
});
|
||||
|
||||
postPairs.forEach(pair => {
|
||||
mapping.posts[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
|
||||
});
|
||||
|
||||
productPairs.forEach(pair => {
|
||||
mapping.products[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
|
||||
});
|
||||
|
||||
categoryPairs.forEach(pair => {
|
||||
mapping.productCategories[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
|
||||
});
|
||||
|
||||
// Save improved mapping
|
||||
const outputDir = path.join(exportDir, 'translation-mapping-improved.json');
|
||||
fs.writeFileSync(outputDir, JSON.stringify(mapping, null, 2));
|
||||
|
||||
// Summary
|
||||
console.log('✅ Translation Mapping Complete\n');
|
||||
console.log('Pairs found:');
|
||||
console.log(` Pages: ${pagePairs.length}`);
|
||||
console.log(` Posts: ${postPairs.length}`);
|
||||
console.log(` Products: ${productPairs.length}`);
|
||||
console.log(` Categories: ${categoryPairs.length}`);
|
||||
console.log(` Total: ${pagePairs.length + postPairs.length + productPairs.length + categoryPairs.length}\n`);
|
||||
|
||||
// Show some examples
|
||||
if (postPairs.length > 0) {
|
||||
console.log('📝 Sample Post Pairs:');
|
||||
postPairs.slice(0, 3).forEach(pair => {
|
||||
console.log(` ${pair.enSlug} (${pair.score.toFixed(2)})`);
|
||||
console.log(` ↔ ${pair.deSlug}`);
|
||||
console.log('');
|
||||
});
|
||||
}
|
||||
|
||||
if (productPairs.length > 0) {
|
||||
console.log('📦 Sample Product Pairs:');
|
||||
productPairs.slice(0, 3).forEach(pair => {
|
||||
console.log(` ${pair.enSlug} (${pair.score.toFixed(2)})`);
|
||||
console.log(` ↔ ${pair.deSlug}`);
|
||||
console.log('');
|
||||
});
|
||||
}
|
||||
|
||||
// Show unmatched items
|
||||
const matchedEN = new Set([...pagePairs.map(p => p.en), ...postPairs.map(p => p.en), ...productPairs.map(p => p.en), ...categoryPairs.map(p => p.en)]);
|
||||
const matchedDE = new Set([...pagePairs.map(p => p.de), ...postPairs.map(p => p.de), ...productPairs.map(p => p.de), ...categoryPairs.map(p => p.de)]);
|
||||
|
||||
const unmatchedEN = {
|
||||
pages: pagesEN.filter(p => !matchedEN.has(p.id)).length,
|
||||
posts: postsEN.filter(p => !matchedEN.has(p.id)).length,
|
||||
products: productsEN.filter(p => !matchedEN.has(p.id)).length,
|
||||
categories: categoriesEN.filter(p => !matchedEN.has(p.id)).length
|
||||
};
|
||||
|
||||
const unmatchedDE = {
|
||||
pages: pagesDE.filter(p => !matchedDE.has(p.id)).length,
|
||||
posts: postsDE.filter(p => !matchedDE.has(p.id)).length,
|
||||
products: productsDE.filter(p => !matchedDE.has(p.id)).length,
|
||||
categories: categoriesDE.filter(p => !matchedDE.has(p.id)).length
|
||||
};
|
||||
|
||||
console.log('🔍 Unmatched Items (may need manual review):');
|
||||
console.log(` EN: ${unmatchedEN.pages} pages, ${unmatchedEN.posts} posts, ${unmatchedEN.products} products, ${unmatchedEN.categories} categories`);
|
||||
console.log(` DE: ${unmatchedDE.pages} pages, ${unmatchedDE.posts} posts, ${unmatchedDE.products} products, ${unmatchedDE.categories} categories`);
|
||||
|
||||
console.log('\n💾 File saved:', outputDir);
|
||||
console.log('\n💡 Next steps:');
|
||||
console.log(' 1. Review the improved mapping for accuracy');
|
||||
console.log(' 2. Manually add any missing pairs');
|
||||
console.log(' 3. Use this mapping for Next.js i18n implementation');
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
411
scripts/process-data.js
Executable file
411
scripts/process-data.js
Executable file
@@ -0,0 +1,411 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* WordPress → Next.js Data Processing Pipeline
|
||||
* Transforms raw WordPress data into Next.js compatible format
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const DATA_DIR = path.join(__dirname, '..', 'data');
|
||||
const RAW_DIR = path.join(DATA_DIR, 'raw');
|
||||
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
|
||||
|
||||
// Create processed directory
|
||||
if (!fs.existsSync(PROCESSED_DIR)) {
|
||||
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
// Find latest export
|
||||
function getLatestExportDir() {
|
||||
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
|
||||
const stat = fs.statSync(path.join(RAW_DIR, f));
|
||||
return stat.isDirectory();
|
||||
});
|
||||
dirs.sort().reverse();
|
||||
return path.join(RAW_DIR, dirs[0]);
|
||||
}
|
||||
|
||||
// HTML sanitization - preserve content but clean dangerous elements
|
||||
function sanitizeHTML(html) {
|
||||
if (!html) return '';
|
||||
|
||||
let sanitized = html;
|
||||
|
||||
// Remove script tags and inline handlers (security)
|
||||
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
|
||||
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
|
||||
|
||||
// Remove WPBakery shortcode wrappers but keep their content
|
||||
// Replace vc_row/vc_column with divs to preserve structure
|
||||
sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '<div class="vc-row">');
|
||||
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
|
||||
sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '<div class="vc-column">');
|
||||
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
|
||||
|
||||
// Remove other shortcodes but keep text content
|
||||
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
|
||||
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||||
sanitized = sanitized.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Remove empty paragraphs and divs
|
||||
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace but preserve HTML structure
|
||||
sanitized = sanitized.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
// Extract excerpt from content
|
||||
function generateExcerpt(content, maxLength = 200) {
|
||||
const text = content.replace(/<[^>]*>/g, '');
|
||||
if (text.length <= maxLength) return text;
|
||||
return text.substring(0, maxLength) + '...';
|
||||
}
|
||||
|
||||
// Process pages
|
||||
function processPages(pagesEN, pagesDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
// Process English pages
|
||||
pagesEN.forEach(page => {
|
||||
const translationKey = page.slug;
|
||||
const deMatch = translationMapping.pages[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: page.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: page.slug,
|
||||
path: `/${page.slug}`,
|
||||
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: page.excerptHtml || generateExcerpt(page.contentHtml),
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
// Process German pages
|
||||
pagesDE.forEach(page => {
|
||||
const translationKey = page.slug;
|
||||
const enMatch = translationMapping.pages[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: page.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: page.slug,
|
||||
path: `/de/${page.slug}`,
|
||||
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: page.excerptHtml || generateExcerpt(page.contentHtml),
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process posts
|
||||
function processPosts(postsEN, postsDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
postsEN.forEach(post => {
|
||||
const translationKey = post.slug;
|
||||
const deMatch = translationMapping.posts[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: post.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: post.slug,
|
||||
path: `/blog/${post.slug}`,
|
||||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: post.excerptHtml || generateExcerpt(post.contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
postsDE.forEach(post => {
|
||||
const translationKey = post.slug;
|
||||
const enMatch = translationMapping.posts[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: post.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: post.slug,
|
||||
path: `/de/blog/${post.slug}`,
|
||||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: post.excerptHtml || generateExcerpt(post.contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process products
|
||||
function processProducts(productsEN, productsDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
productsEN.forEach(product => {
|
||||
const translationKey = product.slug;
|
||||
const deMatch = translationMapping.products[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: product.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: product.slug,
|
||||
path: `/product/${product.slug}`,
|
||||
name: product.name,
|
||||
shortDescriptionHtml: product.shortDescriptionHtml,
|
||||
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
||||
images: product.images,
|
||||
featuredImage: product.featuredImage,
|
||||
sku: product.sku,
|
||||
regularPrice: product.regularPrice,
|
||||
salePrice: product.salePrice,
|
||||
currency: product.currency,
|
||||
stockStatus: product.stockStatus,
|
||||
categories: product.categories,
|
||||
attributes: product.attributes,
|
||||
variations: product.variations,
|
||||
updatedAt: product.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
productsDE.forEach(product => {
|
||||
const translationKey = product.slug;
|
||||
const enMatch = translationMapping.products[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: product.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: product.slug,
|
||||
path: `/de/product/${product.slug}`,
|
||||
name: product.name,
|
||||
shortDescriptionHtml: product.shortDescriptionHtml,
|
||||
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
||||
images: product.images,
|
||||
featuredImage: product.featuredImage,
|
||||
sku: product.sku,
|
||||
regularPrice: product.regularPrice,
|
||||
salePrice: product.salePrice,
|
||||
currency: product.currency,
|
||||
stockStatus: product.stockStatus,
|
||||
categories: product.categories,
|
||||
attributes: product.attributes,
|
||||
variations: product.variations,
|
||||
updatedAt: product.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process product categories
|
||||
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
categoriesEN.forEach(category => {
|
||||
const translationKey = category.slug;
|
||||
const deMatch = translationMapping.productCategories[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: category.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: category.slug,
|
||||
name: category.name,
|
||||
path: `/product-category/${category.slug}`,
|
||||
description: category.description,
|
||||
count: category.count,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
categoriesDE.forEach(category => {
|
||||
const translationKey = category.slug;
|
||||
const enMatch = translationMapping.productCategories[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: category.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: category.slug,
|
||||
name: category.name,
|
||||
path: `/de/product-category/${category.slug}`,
|
||||
description: category.description,
|
||||
count: category.count,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process media manifest
|
||||
function processMedia(media) {
|
||||
return media.map(item => ({
|
||||
id: item.id,
|
||||
filename: item.filename,
|
||||
url: item.url,
|
||||
localPath: `/media/${item.filename}`,
|
||||
alt: item.alt,
|
||||
width: item.width,
|
||||
height: item.height,
|
||||
mimeType: item.mime_type
|
||||
}));
|
||||
}
|
||||
|
||||
// Generate asset map for URL replacement
|
||||
function generateAssetMap(media) {
|
||||
const map = {};
|
||||
media.forEach(item => {
|
||||
if (item.url) {
|
||||
map[item.url] = `/media/${item.filename}`;
|
||||
}
|
||||
});
|
||||
return map;
|
||||
}
|
||||
|
||||
// Main processing function
|
||||
function main() {
|
||||
const exportDir = getLatestExportDir();
|
||||
console.log('🔄 Processing WordPress Data for Next.js');
|
||||
console.log('========================================\n');
|
||||
|
||||
// Load raw data
|
||||
const loadJSON = (file) => {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
|
||||
} catch (e) {
|
||||
console.error(`❌ Failed to load ${file}:`, e.message);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
const translationMapping = loadJSON('translation-mapping-improved.json');
|
||||
const pagesEN = loadJSON('pages.en.json');
|
||||
const pagesDE = loadJSON('pages.de.json');
|
||||
const postsEN = loadJSON('posts.en.json');
|
||||
const postsDE = loadJSON('posts.de.json');
|
||||
const productsEN = loadJSON('products.en.json');
|
||||
const productsDE = loadJSON('products.de.json');
|
||||
const categoriesEN = loadJSON('product-categories.en.json');
|
||||
const categoriesDE = loadJSON('product-categories.de.json');
|
||||
const media = loadJSON('media.json');
|
||||
const redirects = loadJSON('redirects.json');
|
||||
const siteInfo = loadJSON('site-info.json');
|
||||
|
||||
console.log('📊 Processing content types...\n');
|
||||
|
||||
// Process each content type
|
||||
const pages = processPages(pagesEN, pagesDE, translationMapping);
|
||||
const posts = processPosts(postsEN, postsDE, translationMapping);
|
||||
const products = processProducts(productsEN, productsDE, translationMapping);
|
||||
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
|
||||
const processedMedia = processMedia(media);
|
||||
const assetMap = generateAssetMap(media);
|
||||
|
||||
// Create processed data structure
|
||||
const processedData = {
|
||||
site: {
|
||||
title: siteInfo.siteTitle,
|
||||
description: siteInfo.siteDescription,
|
||||
baseUrl: siteInfo.baseUrl,
|
||||
defaultLocale: siteInfo.defaultLocale || 'en',
|
||||
locales: ['en', 'de']
|
||||
},
|
||||
content: {
|
||||
pages,
|
||||
posts,
|
||||
products,
|
||||
categories
|
||||
},
|
||||
assets: {
|
||||
media: processedMedia,
|
||||
map: assetMap
|
||||
},
|
||||
redirects,
|
||||
exportDate: new Date().toISOString()
|
||||
};
|
||||
|
||||
// Save processed data
|
||||
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
|
||||
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
|
||||
|
||||
// Save individual files for easier access
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
|
||||
|
||||
// Summary
|
||||
console.log('✅ Data Processing Complete\n');
|
||||
console.log('📦 Processed Content:');
|
||||
console.log(` Pages: ${pages.length} (with translations)`);
|
||||
console.log(` Posts: ${posts.length} (with translations)`);
|
||||
console.log(` Products: ${products.length} (with translations)`);
|
||||
console.log(` Categories: ${categories.length} (with translations)`);
|
||||
console.log(` Media: ${processedMedia.length} files`);
|
||||
console.log(` Redirects: ${redirects.length} rules\n`);
|
||||
|
||||
console.log('📁 Output Files:');
|
||||
console.log(` ${outputPath}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
|
||||
|
||||
// Sample data
|
||||
if (pages.length > 0) {
|
||||
console.log('📄 Sample Page:');
|
||||
console.log(` Title: ${pages[0].title}`);
|
||||
console.log(` Path: ${pages[0].path}`);
|
||||
console.log(` Locale: ${pages[0].locale}`);
|
||||
console.log(` Translation: ${pages[0].translation ? 'Yes' : 'No'}\n`);
|
||||
}
|
||||
|
||||
if (posts.length > 0) {
|
||||
console.log('📝 Sample Post:');
|
||||
console.log(` Title: ${posts[0].title}`);
|
||||
console.log(` Path: ${posts[0].path}`);
|
||||
console.log(` Locale: ${posts[0].locale}`);
|
||||
console.log(` Date: ${posts[0].datePublished}\n`);
|
||||
}
|
||||
|
||||
console.log('💡 Next: Ready for Next.js project setup!');
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
706
scripts/wordpress-export.js
Executable file
706
scripts/wordpress-export.js
Executable file
@@ -0,0 +1,706 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* WordPress to Next.js Data Export Script
|
||||
* Gathers all required data from WordPress/WooCommerce for static site generation
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const https = require('https');
|
||||
|
||||
// Load environment variables
|
||||
require('dotenv').config();
|
||||
|
||||
const BASE_URL = process.env.WOOCOMMERCE_URL;
|
||||
const CONSUMER_KEY = process.env.WOOCOMMERCE_CONSUMER_KEY;
|
||||
const CONSUMER_SECRET = process.env.WOOCOMMERCE_CONSUMER_SECRET;
|
||||
const APP_PASSWORD = process.env.WORDPRESS_APP_PASSWORD;
|
||||
|
||||
// Validate environment
|
||||
if (!BASE_URL || !CONSUMER_KEY || !CONSUMER_SECRET) {
|
||||
console.error('❌ Missing required environment variables');
|
||||
console.error('Please check .env file for:');
|
||||
console.error(' - WOOCOMMERCE_URL');
|
||||
console.error(' - WOOCOMMERCE_CONSUMER_KEY');
|
||||
console.error(' - WOOCOMMERCE_CONSUMER_SECRET');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Configuration
|
||||
const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-');
|
||||
const OUTPUT_DIR = path.join(__dirname, '..', 'data', 'raw', TIMESTAMP);
|
||||
const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media');
|
||||
|
||||
// Create output directories
|
||||
if (!fs.existsSync(OUTPUT_DIR)) {
|
||||
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
|
||||
}
|
||||
if (!fs.existsSync(MEDIA_DIR)) {
|
||||
fs.mkdirSync(MEDIA_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
// API Helper Functions
|
||||
function buildAuthHeader() {
|
||||
const credentials = Buffer.from(`${CONSUMER_KEY}:${CONSUMER_SECRET}`).toString('base64');
|
||||
return `Basic ${credentials}`;
|
||||
}
|
||||
|
||||
function buildWordPressAuth() {
|
||||
// For WordPress REST API with app password
|
||||
return {
|
||||
'Authorization': `Basic ${Buffer.from(`admin:${APP_PASSWORD}`).toString('base64')}`,
|
||||
'Content-Type': 'application/json'
|
||||
};
|
||||
}
|
||||
|
||||
function makeRequest(url, headers = {}) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const options = {
|
||||
headers: {
|
||||
'User-Agent': 'WordPress-NextJS-Migration/1.0',
|
||||
...headers
|
||||
}
|
||||
};
|
||||
|
||||
https.get(url, options, (res) => {
|
||||
let data = '';
|
||||
|
||||
res.on('data', (chunk) => {
|
||||
data += chunk;
|
||||
});
|
||||
|
||||
res.on('end', () => {
|
||||
if (res.statusCode >= 200 && res.statusCode < 300) {
|
||||
try {
|
||||
resolve(JSON.parse(data));
|
||||
} catch (e) {
|
||||
resolve(data);
|
||||
}
|
||||
} else {
|
||||
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
|
||||
}
|
||||
});
|
||||
}).on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchWithPagination(endpoint, params = {}, locale = null) {
|
||||
const allItems = [];
|
||||
let page = 1;
|
||||
const perPage = 100;
|
||||
|
||||
while (true) {
|
||||
const queryString = new URLSearchParams({
|
||||
...params,
|
||||
page: page.toString(),
|
||||
per_page: perPage.toString(),
|
||||
...(locale ? { lang: locale } : {})
|
||||
}).toString();
|
||||
|
||||
const url = `${BASE_URL}/wp-json/wp/v2/${endpoint}?${queryString}`;
|
||||
|
||||
console.log(`📥 Fetching ${endpoint} page ${page}${locale ? ` (${locale})` : ''}...`);
|
||||
|
||||
try {
|
||||
const items = await makeRequest(url, buildWordPressAuth());
|
||||
|
||||
if (!Array.isArray(items) || items.length === 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
allItems.push(...items);
|
||||
|
||||
// Check if we got a full page (indicates more pages might exist)
|
||||
if (items.length < perPage) {
|
||||
break;
|
||||
}
|
||||
|
||||
page++;
|
||||
} catch (error) {
|
||||
console.error(`❌ Error fetching ${endpoint} page ${page}:`, error.message);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return allItems;
|
||||
}
|
||||
|
||||
async function fetchWooCommerce(endpoint, params = {}, locale = null) {
|
||||
const queryString = new URLSearchParams({
|
||||
...params,
|
||||
per_page: '100',
|
||||
...(locale ? { lang: locale } : {})
|
||||
}).toString();
|
||||
|
||||
const url = `${BASE_URL}/wp-json/wc/v3/${endpoint}?${queryString}`;
|
||||
|
||||
console.log(`📥 Fetching WooCommerce ${endpoint}${locale ? ` (${locale})` : ''}...`);
|
||||
|
||||
try {
|
||||
const response = await makeRequest(url, {
|
||||
'Authorization': buildAuthHeader(),
|
||||
'Content-Type': 'application/json'
|
||||
});
|
||||
|
||||
return Array.isArray(response) ? response : [response];
|
||||
} catch (error) {
|
||||
console.error(`❌ Error fetching WooCommerce ${endpoint}:`, error.message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchMedia(mediaId) {
|
||||
const url = `${BASE_URL}/wp-json/wp/v2/media/${mediaId}`;
|
||||
|
||||
try {
|
||||
const media = await makeRequest(url, buildWordPressAuth());
|
||||
return media;
|
||||
} catch (error) {
|
||||
console.error(`❌ Error fetching media ${mediaId}:`, error.message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function downloadMedia(url, filename) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const filePath = path.join(MEDIA_DIR, filename);
|
||||
|
||||
// Check if file already exists
|
||||
if (fs.existsSync(filePath)) {
|
||||
console.log(`✅ Media already downloaded: ${filename}`);
|
||||
resolve(filePath);
|
||||
return;
|
||||
}
|
||||
|
||||
const file = fs.createWriteStream(filePath);
|
||||
|
||||
https.get(url, (res) => {
|
||||
if (res.statusCode === 200) {
|
||||
res.pipe(file);
|
||||
file.on('finish', () => {
|
||||
console.log(`✅ Downloaded: ${filename}`);
|
||||
resolve(filePath);
|
||||
});
|
||||
} else {
|
||||
reject(new Error(`Failed to download: ${res.statusCode}`));
|
||||
}
|
||||
}).on('error', (err) => {
|
||||
fs.unlink(filePath, () => {});
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Data Processing Functions
|
||||
function extractFeaturedImage(item) {
|
||||
if (item.featured_media) {
|
||||
return item.featured_media;
|
||||
}
|
||||
if (item._embedded && item._embedded['wp:featuredmedia']) {
|
||||
return item._embedded['wp:featuredmedia'][0];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function processPage(page, locale) {
|
||||
return {
|
||||
id: page.id,
|
||||
translationKey: `page-${page.slug}`, // Will be refined with Polylang data
|
||||
locale: locale,
|
||||
slug: page.slug,
|
||||
path: locale === 'en' ? `/${page.slug}` : `/${locale}/${page.slug}`,
|
||||
titleHtml: page.title?.rendered || '',
|
||||
contentHtml: page.content?.rendered || '',
|
||||
excerptHtml: page.excerpt?.rendered || '',
|
||||
featuredImage: page.featured_media || null,
|
||||
updatedAt: page.modified || page.date
|
||||
};
|
||||
}
|
||||
|
||||
function processPost(post, locale) {
|
||||
return {
|
||||
id: post.id,
|
||||
translationKey: `post-${post.slug}`,
|
||||
locale: locale,
|
||||
slug: post.slug,
|
||||
path: locale === 'en' ? `/blog/${post.slug}` : `/${locale}/blog/${post.slug}`,
|
||||
titleHtml: post.title?.rendered || '',
|
||||
contentHtml: post.content?.rendered || '',
|
||||
excerptHtml: post.excerpt?.rendered || '',
|
||||
featuredImage: post.featured_media || null,
|
||||
datePublished: post.date,
|
||||
updatedAt: post.modified || post.date
|
||||
};
|
||||
}
|
||||
|
||||
function processProduct(product, locale) {
|
||||
return {
|
||||
id: product.id,
|
||||
translationKey: `product-${product.slug}`,
|
||||
locale: locale,
|
||||
slug: product.slug,
|
||||
path: locale === 'en' ? `/product/${product.slug}` : `/${locale}/product/${product.slug}`,
|
||||
name: product.name,
|
||||
shortDescriptionHtml: product.short_description || '',
|
||||
descriptionHtml: product.description || '',
|
||||
images: product.images ? product.images.map(img => img.src) : [],
|
||||
featuredImage: product.images && product.images.length > 0 ? product.images[0].src : null,
|
||||
sku: product.sku,
|
||||
regularPrice: product.regular_price,
|
||||
salePrice: product.sale_price,
|
||||
currency: product.currency || 'EUR',
|
||||
stockStatus: product.stock_status,
|
||||
categories: product.categories ? product.categories.map(cat => ({ id: cat.id, name: cat.name, slug: cat.slug })) : [],
|
||||
attributes: product.attributes || [],
|
||||
variations: product.variations || [],
|
||||
updatedAt: product.date_modified
|
||||
};
|
||||
}
|
||||
|
||||
function processProductCategory(category, locale) {
|
||||
return {
|
||||
id: category.id,
|
||||
translationKey: `product-category-${category.slug}`,
|
||||
locale: locale,
|
||||
slug: category.slug,
|
||||
name: category.name,
|
||||
path: locale === 'en' ? `/product-category/${category.slug}` : `/${locale}/product-category/${category.slug}`,
|
||||
description: category.description || '',
|
||||
count: category.count || 0
|
||||
};
|
||||
}
|
||||
|
||||
function processMenu(menu, locale) {
|
||||
// WordPress menus are complex, we'll extract basic structure
|
||||
return {
|
||||
id: menu.term_id || menu.id,
|
||||
slug: menu.slug,
|
||||
name: menu.name,
|
||||
locale: locale,
|
||||
items: menu.items || []
|
||||
};
|
||||
}
|
||||
|
||||
// Main Export Functions
|
||||
async function exportPages() {
|
||||
console.log('\n📊 EXPORTING PAGES');
|
||||
|
||||
const pagesEN = await fetchWithPagination('pages', { status: 'publish' }, 'en');
|
||||
const pagesDE = await fetchWithPagination('pages', { status: 'publish' }, 'de');
|
||||
|
||||
const processedEN = pagesEN.map(p => processPage(p, 'en'));
|
||||
const processedDE = pagesDE.map(p => processPage(p, 'de'));
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'pages.en.json'),
|
||||
JSON.stringify(processedEN, null, 2)
|
||||
);
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'pages.de.json'),
|
||||
JSON.stringify(processedDE, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Pages: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||||
|
||||
return { en: processedEN, de: processedDE };
|
||||
}
|
||||
|
||||
async function exportPosts() {
|
||||
console.log('\n📊 EXPORTING POSTS');
|
||||
|
||||
const postsEN = await fetchWithPagination('posts', { status: 'publish' }, 'en');
|
||||
const postsDE = await fetchWithPagination('posts', { status: 'publish' }, 'de');
|
||||
|
||||
const processedEN = postsEN.map(p => processPost(p, 'en'));
|
||||
const processedDE = postsDE.map(p => processPost(p, 'de'));
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'posts.en.json'),
|
||||
JSON.stringify(processedEN, null, 2)
|
||||
);
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'posts.de.json'),
|
||||
JSON.stringify(processedDE, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Posts: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||||
|
||||
return { en: processedEN, de: processedDE };
|
||||
}
|
||||
|
||||
async function exportProducts() {
|
||||
console.log('\n📊 EXPORTING PRODUCTS');
|
||||
|
||||
const productsEN = await fetchWooCommerce('products', {}, 'en');
|
||||
const productsDE = await fetchWooCommerce('products', {}, 'de');
|
||||
|
||||
const processedEN = productsEN.map(p => processProduct(p, 'en'));
|
||||
const processedDE = productsDE.map(p => processProduct(p, 'de'));
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'products.en.json'),
|
||||
JSON.stringify(processedEN, null, 2)
|
||||
);
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'products.de.json'),
|
||||
JSON.stringify(processedDE, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Products: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||||
|
||||
return { en: processedEN, de: processedDE };
|
||||
}
|
||||
|
||||
async function exportProductCategories() {
|
||||
console.log('\n📊 EXPORTING PRODUCT CATEGORIES');
|
||||
|
||||
const categoriesEN = await fetchWooCommerce('products/categories', {}, 'en');
|
||||
const categoriesDE = await fetchWooCommerce('products/categories', {}, 'de');
|
||||
|
||||
const processedEN = categoriesEN.map(c => processProductCategory(c, 'en'));
|
||||
const processedDE = categoriesDE.map(c => processProductCategory(c, 'de'));
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'product-categories.en.json'),
|
||||
JSON.stringify(processedEN, null, 2)
|
||||
);
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'product-categories.de.json'),
|
||||
JSON.stringify(processedDE, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Product Categories: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||||
|
||||
return { en: processedEN, de: processedDE };
|
||||
}
|
||||
|
||||
async function exportMenus() {
|
||||
console.log('\n📊 EXPORTING MENUS');
|
||||
|
||||
// Try to get menus via WordPress REST API
|
||||
// Note: This might require additional plugins or direct DB access
|
||||
const menusEN = await fetchWithPagination('menus', {}, 'en').catch(() => []);
|
||||
const menusDE = await fetchWithPagination('menus', {}, 'de').catch(() => []);
|
||||
|
||||
// If menus endpoint doesn't work, try to get menu locations
|
||||
let menuLocations = {};
|
||||
try {
|
||||
const locations = await makeRequest(`${BASE_URL}/wp-json/wp/v2/menu-locations`, buildWordPressAuth());
|
||||
menuLocations = locations;
|
||||
} catch (e) {
|
||||
console.log('⚠️ Menu locations endpoint not available');
|
||||
}
|
||||
|
||||
const processedEN = menusEN.map(m => processMenu(m, 'en'));
|
||||
const processedDE = menusDE.map(m => processMenu(m, 'de'));
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'menus.en.json'),
|
||||
JSON.stringify({ menus: processedEN, locations: menuLocations }, null, 2)
|
||||
);
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'menus.de.json'),
|
||||
JSON.stringify({ menus: processedDE, locations: menuLocations }, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Menus: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||||
|
||||
return { en: processedEN, de: processedDE, locations: menuLocations };
|
||||
}
|
||||
|
||||
async function exportMedia() {
|
||||
console.log('\n📊 EXPORTING MEDIA');
|
||||
|
||||
// Get all unique media IDs from collected data
|
||||
const mediaIds = new Set();
|
||||
|
||||
// Read all JSON files to find media references
|
||||
const jsonFiles = fs.readdirSync(OUTPUT_DIR).filter(f => f.endsWith('.json'));
|
||||
|
||||
for (const file of jsonFiles) {
|
||||
const content = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, file), 'utf8'));
|
||||
const items = Array.isArray(content) ? content : (content.menus || []);
|
||||
|
||||
items.forEach(item => {
|
||||
if (item.featuredImage) mediaIds.add(item.featuredImage);
|
||||
if (item.images) item.images.forEach(img => {
|
||||
// Extract ID from URL if possible, or add as URL
|
||||
if (typeof img === 'string' && img.includes('/wp-content/')) {
|
||||
mediaIds.add(img);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const mediaManifest = [];
|
||||
const downloadPromises = [];
|
||||
|
||||
for (const mediaRef of mediaIds) {
|
||||
if (typeof mediaRef === 'number') {
|
||||
// Fetch media info
|
||||
const media = await fetchMedia(mediaRef);
|
||||
if (media && media.source_url) {
|
||||
const filename = `${mediaRef}-${path.basename(media.source_url)}`;
|
||||
mediaManifest.push({
|
||||
id: mediaRef,
|
||||
url: media.source_url,
|
||||
filename: filename,
|
||||
alt: media.alt_text || '',
|
||||
width: media.media_details?.width,
|
||||
height: media.media_details?.height,
|
||||
mime_type: media.mime_type
|
||||
});
|
||||
|
||||
// Download file
|
||||
downloadPromises.push(
|
||||
downloadMedia(media.source_url, filename).catch(err => {
|
||||
console.warn(`⚠️ Failed to download media ${mediaRef}:`, err.message);
|
||||
})
|
||||
);
|
||||
}
|
||||
} else if (typeof mediaRef === 'string' && mediaRef.startsWith('http')) {
|
||||
// Direct URL
|
||||
const filename = `media-${Date.now()}-${path.basename(mediaRef)}`;
|
||||
mediaManifest.push({
|
||||
id: null,
|
||||
url: mediaRef,
|
||||
filename: filename,
|
||||
alt: '',
|
||||
width: null,
|
||||
height: null,
|
||||
mime_type: null
|
||||
});
|
||||
|
||||
downloadPromises.push(
|
||||
downloadMedia(mediaRef, filename).catch(err => {
|
||||
console.warn(`⚠️ Failed to download media from URL:`, err.message);
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for all downloads
|
||||
await Promise.all(downloadPromises);
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'media.json'),
|
||||
JSON.stringify(mediaManifest, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Media: ${mediaManifest.length} items`);
|
||||
|
||||
return mediaManifest;
|
||||
}
|
||||
|
||||
async function exportSiteInfo() {
|
||||
console.log('\n📊 EXPORTING SITE INFORMATION');
|
||||
|
||||
const siteInfo = {
|
||||
baseUrl: BASE_URL,
|
||||
exportDate: new Date().toISOString(),
|
||||
timestamp: TIMESTAMP,
|
||||
polylang: false,
|
||||
languages: ['en', 'de'],
|
||||
defaultLocale: 'en' // Will need to confirm
|
||||
};
|
||||
|
||||
// Check for Polylang
|
||||
try {
|
||||
const plugins = await makeRequest(`${BASE_URL}/wp-json/wp/v2/plugins`, buildWordPressAuth());
|
||||
const polylangPlugin = plugins.find(p => p.name.includes('polylang'));
|
||||
if (polylangPlugin) {
|
||||
siteInfo.polylang = true;
|
||||
siteInfo.polylangVersion = polylangPlugin.version;
|
||||
}
|
||||
} catch (e) {
|
||||
console.log('⚠️ Could not check plugins');
|
||||
}
|
||||
|
||||
// Get site settings
|
||||
try {
|
||||
const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
|
||||
siteInfo.siteTitle = settings.title;
|
||||
siteInfo.siteDescription = settings.description;
|
||||
siteInfo.defaultLanguage = settings.default_language || 'en';
|
||||
} catch (e) {
|
||||
console.log('⚠️ Could not fetch settings');
|
||||
}
|
||||
|
||||
// Get permalink structure
|
||||
try {
|
||||
const permalink = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
|
||||
siteInfo.permalinkStructure = permalink.permalink_structure;
|
||||
} catch (e) {
|
||||
console.log('⚠️ Could not fetch permalink structure');
|
||||
}
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'site-info.json'),
|
||||
JSON.stringify(siteInfo, null, 2)
|
||||
);
|
||||
|
||||
console.log('✅ Site info exported');
|
||||
|
||||
return siteInfo;
|
||||
}
|
||||
|
||||
async function generateTranslationMapping() {
|
||||
console.log('\n📊 GENERATING TRANSLATION MAPPING');
|
||||
|
||||
// This function creates translationKey mappings between locales
|
||||
// We'll use slug-based matching for now, but this should be enhanced with Polylang data
|
||||
|
||||
const mapping = {
|
||||
pages: {},
|
||||
posts: {},
|
||||
products: {},
|
||||
productCategories: {}
|
||||
};
|
||||
|
||||
// Load all data
|
||||
const loadFile = (filename) => {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, filename), 'utf8'));
|
||||
} catch (e) {
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
const pagesEN = loadFile('pages.en.json');
|
||||
const pagesDE = loadFile('pages.de.json');
|
||||
const postsEN = loadFile('posts.en.json');
|
||||
const postsDE = loadFile('posts.de.json');
|
||||
const productsEN = loadFile('products.en.json');
|
||||
const productsDE = loadFile('products.de.json');
|
||||
const categoriesEN = loadFile('product-categories.en.json');
|
||||
const categoriesDE = loadFile('product-categories.de.json');
|
||||
|
||||
// Helper to find translation pairs by slug
|
||||
function findTranslationPairs(enItems, deItems) {
|
||||
const pairs = {};
|
||||
|
||||
enItems.forEach(enItem => {
|
||||
const deMatch = deItems.find(de => de.slug === enItem.slug);
|
||||
if (deMatch) {
|
||||
const translationKey = `${enItem.slug}`;
|
||||
pairs[translationKey] = {
|
||||
en: enItem.id,
|
||||
de: deMatch.id
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
return pairs;
|
||||
}
|
||||
|
||||
mapping.pages = findTranslationPairs(pagesEN, pagesDE);
|
||||
mapping.posts = findTranslationPairs(postsEN, postsDE);
|
||||
mapping.products = findTranslationPairs(productsEN, productsDE);
|
||||
mapping.productCategories = findTranslationPairs(categoriesEN, categoriesDE);
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'translation-mapping.json'),
|
||||
JSON.stringify(mapping, null, 2)
|
||||
);
|
||||
|
||||
const totalPairs = Object.values(mapping).reduce((sum, obj) => sum + Object.keys(obj).length, 0);
|
||||
console.log(`✅ Translation mapping: ${totalPairs} pairs found`);
|
||||
|
||||
return mapping;
|
||||
}
|
||||
|
||||
async function generateRedirects() {
|
||||
console.log('\n📊 GENERATING REDIRECT RULES');
|
||||
|
||||
const redirects = [];
|
||||
|
||||
// Load posts
|
||||
const postsEN = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.en.json'), 'utf8'));
|
||||
const postsDE = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.de.json'), 'utf8'));
|
||||
|
||||
// Base redirect: /{postSlug} → /blog/{postSlug} (English)
|
||||
postsEN.forEach(post => {
|
||||
redirects.push({
|
||||
source: `/${post.slug}`,
|
||||
destination: `/blog/${post.slug}`,
|
||||
permanent: true,
|
||||
locale: 'en'
|
||||
});
|
||||
});
|
||||
|
||||
// German redirects: /de/{postSlug} → /de/blog/{postSlug}
|
||||
postsDE.forEach(post => {
|
||||
redirects.push({
|
||||
source: `/de/${post.slug}`,
|
||||
destination: `/de/blog/${post.slug}`,
|
||||
permanent: true,
|
||||
locale: 'de'
|
||||
});
|
||||
});
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(OUTPUT_DIR, 'redirects.json'),
|
||||
JSON.stringify(redirects, null, 2)
|
||||
);
|
||||
|
||||
console.log(`✅ Redirects: ${redirects.length} rules generated`);
|
||||
|
||||
return redirects;
|
||||
}
|
||||
|
||||
// Main Execution
|
||||
async function main() {
|
||||
console.log('🚀 WordPress → Next.js Data Export');
|
||||
console.log('=====================================');
|
||||
console.log(`Target: ${BASE_URL}`);
|
||||
console.log(`Output: ${OUTPUT_DIR}`);
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// Step 1: Export all content
|
||||
await exportSiteInfo();
|
||||
await exportPages();
|
||||
await exportPosts();
|
||||
await exportProducts();
|
||||
await exportProductCategories();
|
||||
await exportMenus();
|
||||
await exportMedia();
|
||||
|
||||
// Step 2: Generate mappings and redirects
|
||||
await generateTranslationMapping();
|
||||
await generateRedirects();
|
||||
|
||||
console.log('\n🎉 Export Complete!');
|
||||
console.log('=====================================');
|
||||
console.log(`📁 Data directory: data/raw/${TIMESTAMP}`);
|
||||
console.log(`🖼️ Media directory: public/media/`);
|
||||
console.log('');
|
||||
console.log('Next steps:');
|
||||
console.log('1. Review exported data for completeness');
|
||||
console.log('2. Check for any missing translations');
|
||||
console.log('3. Verify media downloads');
|
||||
console.log('4. Proceed with Next.js data processing');
|
||||
|
||||
} catch (error) {
|
||||
console.error('\n❌ Export failed:', error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run if called directly
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
exportPages,
|
||||
exportPosts,
|
||||
exportProducts,
|
||||
exportProductCategories,
|
||||
exportMenus,
|
||||
exportMedia,
|
||||
exportSiteInfo,
|
||||
generateTranslationMapping,
|
||||
generateRedirects
|
||||
};
|
||||
Reference in New Issue
Block a user