initial migration

This commit is contained in:
2025-12-28 23:28:31 +01:00
parent 1f99781458
commit 292975299d
284 changed files with 119466 additions and 0 deletions

240
scripts/analyze-export.js Executable file
View File

@@ -0,0 +1,240 @@
#!/usr/bin/env node
/**
* WordPress Export Analysis Script
* Quickly analyzes exported data without loading large files entirely
*/
const fs = require('fs');
const path = require('path');
const DATA_DIR = path.join(__dirname, '..', 'data', 'raw');
// Find the latest export directory
function getLatestExportDir() {
if (!fs.existsSync(DATA_DIR)) {
console.error('❌ No data directory found');
process.exit(1);
}
const dirs = fs.readdirSync(DATA_DIR).filter(f => {
const stat = fs.statSync(path.join(DATA_DIR, f));
return stat.isDirectory();
});
if (dirs.length === 0) {
console.error('❌ No export directories found');
process.exit(1);
}
// Sort by name (timestamp) and get latest
dirs.sort().reverse();
return path.join(DATA_DIR, dirs[0]);
}
// Quick file analysis
function analyzeFile(filePath, sampleSize = 3) {
if (!fs.existsSync(filePath)) {
return null;
}
const stats = fs.statSync(filePath);
const content = fs.readFileSync(filePath, 'utf8');
const lines = content.split('\n');
// Parse JSON safely
let data;
try {
data = JSON.parse(content);
} catch (e) {
return { error: 'Invalid JSON' };
}
const isArray = Array.isArray(data);
const count = isArray ? data.length : (data.menus ? data.menus.length : 0);
// Get sample items
const sample = isArray ? data.slice(0, sampleSize) : (data.menus ? data.menus.slice(0, sampleSize) : []);
return {
size: stats.size,
sizeHuman: `${(stats.size / 1024).toFixed(1)} KB`,
lines: lines.length,
count: count,
sample: sample,
isArray: isArray
};
}
// Main analysis
function main() {
const exportDir = getLatestExportDir();
console.log('📊 WordPress Export Analysis');
console.log('============================');
console.log(`Directory: ${path.basename(exportDir)}\n`);
const files = [
'site-info.json',
'translation-mapping.json',
'pages.en.json',
'pages.de.json',
'posts.en.json',
'posts.de.json',
'products.en.json',
'products.de.json',
'product-categories.en.json',
'product-categories.de.json',
'menus.en.json',
'menus.de.json',
'redirects.json',
'media.json'
];
const results = {};
files.forEach(file => {
const filePath = path.join(exportDir, file);
const analysis = analyzeFile(filePath, 2);
if (analysis) {
results[file] = analysis;
}
});
// Summary
console.log('📋 EXPORT SUMMARY');
console.log('=================\n');
// Content counts
const pagesEN = results['pages.en.json']?.count || 0;
const pagesDE = results['pages.de.json']?.count || 0;
const postsEN = results['posts.en.json']?.count || 0;
const postsDE = results['posts.de.json']?.count || 0;
const productsEN = results['products.en.json']?.count || 0;
const productsDE = results['products.de.json']?.count || 0;
const categoriesEN = results['product-categories.en.json']?.count || 0;
const categoriesDE = results['product-categories.de.json']?.count || 0;
const media = results['media.json']?.count || 0;
const redirects = results['redirects.json']?.count || 0;
console.log('📄 Content by Type & Language:');
console.log(` Pages: EN: ${pagesEN} | DE: ${pagesDE} | Total: ${pagesEN + pagesDE}`);
console.log(` Posts: EN: ${postsEN} | DE: ${postsDE} | Total: ${postsEN + postsDE}`);
console.log(` Products: EN: ${productsEN} | DE: ${productsDE} | Total: ${productsEN + productsDE}`);
console.log(` Categories: EN: ${categoriesEN} | DE: ${categoriesDE} | Total: ${categoriesEN + categoriesDE}`);
console.log(` Media: ${media} files`);
console.log(` Redirects: ${redirects} rules\n`);
// Translation mapping
const translation = results['translation-mapping.json']?.sample?.[0] || results['translation-mapping.json']?.sample;
if (translation) {
const pagePairs = Object.keys(translation.pages || {}).length;
const postPairs = Object.keys(translation.posts || {}).length;
const productPairs = Object.keys(translation.products || {}).length;
const categoryPairs = Object.keys(translation.productCategories || {}).length;
console.log('🌐 Translation Pairs:');
console.log(` Pages: ${pagePairs}`);
console.log(` Posts: ${postPairs}`);
console.log(` Products: ${productPairs}`);
console.log(` Categories: ${categoryPairs}`);
console.log(` Total: ${pagePairs + postPairs + productPairs + categoryPairs}\n`);
}
// File sizes
console.log('💾 File Sizes:');
Object.entries(results).forEach(([file, data]) => {
console.log(` ${file.padEnd(30)} ${data.sizeHuman}`);
});
// Sample data
console.log('\n🔍 Sample Data (first item from each):');
if (results['pages.en.json']?.sample?.[0]) {
const page = results['pages.en.json'].sample[0];
console.log(`\n Page (EN): "${page.titleHtml}"`);
console.log(` Path: ${page.path}`);
console.log(` Slug: ${page.slug}`);
}
if (results['posts.en.json']?.sample?.[0]) {
const post = results['posts.en.json'].sample[0];
console.log(`\n Post (EN): "${post.titleHtml}"`);
console.log(` Path: ${post.path}`);
console.log(` Date: ${post.datePublished}`);
}
if (results['products.en.json']?.sample?.[0]) {
const product = results['products.en.json'].sample[0];
console.log(`\n Product (EN): "${product.name}"`);
console.log(` Path: ${product.path}`);
console.log(` SKU: ${product.sku}`);
console.log(` Price: ${product.regularPrice} ${product.currency}`);
}
if (results['menus.en.json']?.sample?.[0]) {
const menu = results['menus.en.json'].sample[0];
console.log(`\n Menu (EN): "${menu.name}"`);
console.log(` Slug: ${menu.slug}`);
console.log(` Items: ${menu.items ? menu.items.length : 0}`);
}
// Data quality checks
console.log('\n✅ Data Quality Checks:');
const checks = [
{
name: 'Both languages present',
pass: pagesEN > 0 && pagesDE > 0 && postsEN > 0 && postsDE > 0
},
{
name: 'Translation pairs exist',
pass: (results['translation-mapping.json']?.sample &&
Object.keys(results['translation-mapping.json'].sample.pages || {}).length > 0)
},
{
name: 'Media files downloaded',
pass: media > 0
},
{
name: 'Redirects generated',
pass: redirects > 0
},
{
name: 'Site info complete',
pass: results['site-info.json']?.sample?.siteTitle !== undefined
}
];
checks.forEach(check => {
console.log(` ${check.pass ? '✅' : '❌'} ${check.name}`);
});
// Recommendations
console.log('\n💡 Recommendations:');
if (postsEN === 0 || postsDE === 0) {
console.log(' ⚠️ No posts found in one or both languages');
}
if (results['translation-mapping.json']?.sample) {
const mapping = results['translation-mapping.json'].sample;
const missingPosts = Object.keys(mapping.posts || {}).length === 0;
if (missingPosts) {
console.log(' ⚠️ No post translation pairs found - check if posts have matching slugs');
}
}
if (media === 0) {
console.log(' ⚠️ No media files downloaded - check API permissions');
}
console.log('\n🎯 Next Steps:');
console.log(' 1. Review sample data above for accuracy');
console.log(' 2. Check translation mapping for completeness');
console.log(' 3. Verify media files are properly named');
console.log(' 4. Proceed to Next.js data processing');
}
if (require.main === module) {
main();
}

View File

@@ -0,0 +1,230 @@
#!/usr/bin/env tsx
/**
* Manual Translation Mapping Generator
* Creates translationKey mappings for posts that couldn't be auto-detected
*/
import { readFileSync, writeFileSync } from 'fs';
import { join } from 'path';
interface Post {
id: number;
slug: string;
title: { rendered: string };
date: string;
lang: string;
pll_translation_id?: number;
pll_master_post_id?: number;
}
interface TranslationMapping {
posts: Record<string, string[]>; // translationKey -> [en_id, de_id]
products: Record<string, string[]>;
pages: Record<string, string[]>;
}
interface RawData {
posts: {
en: Post[];
de: Post[];
};
products: {
en: any[];
de: any[];
};
pages: {
en: any[];
de: any[];
};
}
// Simple text similarity function
function calculateSimilarity(text1: string, text2: string): number {
const normalize = (str: string) =>
str.toLowerCase()
.replace(/[^\w\s]/g, '')
.replace(/\s+/g, ' ')
.trim();
const s1 = normalize(text1);
const s2 = normalize(text2);
if (s1 === s2) return 1.0;
// Simple overlap calculation
const words1 = s1.split(' ');
const words2 = s2.split(' ');
const intersection = words1.filter(w => words2.includes(w));
const union = new Set([...words1, ...words2]);
return intersection.length / union.size;
}
// Generate translation key from title
function generateKeyFromTitle(title: string): string {
return title.toLowerCase()
.replace(/[^\w\s-]/g, '')
.replace(/\s+/g, '-')
.replace(/-+/g, '-')
.trim();
}
function findPostTranslations(
postsEn: Post[],
postsDe: Post[]
): TranslationMapping['posts'] {
const mapping: TranslationMapping['posts'] = {};
// First pass: try to match by Polylang metadata
const deById = new Map(postsDe.map(p => [p.id, p]));
const deByTranslationId = new Map(postsDe.map(p => [p.pll_translation_id, p]));
for (const enPost of postsEn) {
// Try by pll_translation_id
if (enPost.pll_translation_id && deByTranslationId.has(enPost.pll_translation_id)) {
const dePost = deByTranslationId.get(enPost.pll_translation_id)!;
const key = `post-${enPost.pll_translation_id}`;
mapping[key] = [enPost.id, dePost.id];
continue;
}
// Try by pll_master_post_id
if (enPost.pll_master_post_id && deById.has(enPost.pll_master_post_id)) {
const dePost = deById.get(enPost.pll_master_post_id)!;
const key = `post-${enPost.pll_master_post_id}`;
mapping[key] = [enPost.id, dePost.id];
continue;
}
}
// Second pass: content-based matching for remaining unmatched posts
const matchedEnIds = new Set(
Object.values(mapping).flat()
);
const unmatchedEn = postsEn.filter(p => !matchedEnIds.includes(p.id));
const unmatchedDe = postsDe.filter(p => !matchedEnIds.includes(p.id));
for (const enPost of unmatchedEn) {
let bestMatch: { post: Post; score: number } | null = null;
for (const dePost of unmatchedDe) {
const titleScore = calculateSimilarity(enPost.title.rendered, dePost.title.rendered);
const slugScore = calculateSimilarity(enPost.slug, dePost.slug);
const dateScore = enPost.date === dePost.date ? 1.0 : 0.0;
// Weighted average
const score = (titleScore * 0.6) + (slugScore * 0.3) + (dateScore * 0.1);
if (score > 0.7 && (!bestMatch || score > bestMatch.score)) {
bestMatch = { post: dePost, score };
}
}
if (bestMatch) {
const key = generateKeyFromTitle(enPost.title.rendered);
mapping[key] = [enPost.id, bestMatch.post.id];
unmatchedDe.splice(unmatchedDe.indexOf(bestMatch.post), 1);
}
}
return mapping;
}
function findProductTranslations(
productsEn: any[],
productsDe: any[]
): TranslationMapping['products'] {
const mapping: TranslationMapping['products'] = {};
// Use SKU as primary key if available
const deBySku = new Map(productsDe.map(p => [p.sku, p]));
for (const enProduct of productsEn) {
if (enProduct.sku && deBySku.has(enProduct.sku)) {
const key = `product-${enProduct.sku}`;
mapping[key] = [enProduct.id, deBySku.get(enProduct.sku)!.id];
}
}
return mapping;
}
function findPageTranslations(
pagesEn: any[],
pagesDe: any[]
): TranslationMapping['pages'] {
const mapping: TranslationMapping['pages'] = {};
// Pages should have better Polylang metadata
const deById = new Map(pagesDe.map(p => [p.id, p]));
const deByTranslationId = new Map(pagesDe.map(p => [p.pll_translation_id, p]));
for (const enPage of pagesEn) {
if (enPage.pll_translation_id && deByTranslationId.has(enPage.pll_translation_id)) {
const dePage = deByTranslationId.get(enPage.pll_translation_id)!;
const key = `page-${enPage.pll_translation_id}`;
mapping[key] = [enPage.id, dePage.id];
}
}
return mapping;
}
function main() {
console.log('🔍 Creating manual translation mapping...\n');
// Read raw data
const rawData: RawData = {
posts: {
en: JSON.parse(readFileSync('data/raw/posts.en.json', 'utf8')),
de: JSON.parse(readFileSync('data/raw/posts.de.json', 'utf8'))
},
products: {
en: JSON.parse(readFileSync('data/raw/products.en.json', 'utf8')),
de: JSON.parse(readFileSync('data/raw/products.de.json', 'utf8'))
},
pages: {
en: JSON.parse(readFileSync('data/raw/pages.en.json', 'utf8')),
de: JSON.parse(readFileSync('data/raw/pages.de.json', 'utf8'))
}
};
console.log('📊 Raw data loaded:');
console.log(` - Posts: ${rawData.posts.en.length} EN, ${rawData.posts.de.length} DE`);
console.log(` - Products: ${rawData.products.en.length} EN, ${rawData.products.de.length} DE`);
console.log(` - Pages: ${rawData.pages.en.length} EN, ${rawData.pages.de.length} DE`);
console.log('');
// Generate mappings
const mapping: TranslationMapping = {
posts: findPostTranslations(rawData.posts.en, rawData.posts.de),
products: findProductTranslations(rawData.products.en, rawData.products.de),
pages: findPageTranslations(rawData.pages.en, rawData.pages.de)
};
// Save mapping
const outputPath = 'data/manual-translation-mapping.json';
writeFileSync(outputPath, JSON.stringify(mapping, null, 2));
console.log('✅ Manual translation mapping created:\n');
console.log(`Posts: ${Object.keys(mapping.posts).length} pairs`);
console.log(`Products: ${Object.keys(mapping.products).length} pairs`);
console.log(`Pages: ${Object.keys(mapping.pages).length} pairs`);
console.log(`\nSaved to: ${outputPath}`);
// Show some examples
if (Object.keys(mapping.posts).length > 0) {
console.log('\n📝 Post mapping examples:');
Object.entries(mapping.posts).slice(0, 3).forEach(([key, ids]) => {
const enPost = rawData.posts.en.find(p => p.id === ids[0]);
const dePost = rawData.posts.de.find(p => p.id === ids[1]);
console.log(` ${key}:`);
console.log(` EN: [${ids[0]}] ${enPost?.title.rendered}`);
console.log(` DE: [${ids[1]}] ${dePost?.title.rendered}`);
});
}
}
main();

View File

@@ -0,0 +1,246 @@
#!/usr/bin/env node
/**
* Improved Translation Mapping Script
* Creates translation pairs by analyzing content similarity and patterns
*/
const fs = require('fs');
const path = require('path');
const DATA_DIR = path.join(__dirname, '..', 'data', 'raw');
// Find the latest export directory
function getLatestExportDir() {
const dirs = fs.readdirSync(DATA_DIR).filter(f => {
const stat = fs.statSync(path.join(DATA_DIR, f));
return stat.isDirectory();
});
dirs.sort().reverse();
return path.join(DATA_DIR, dirs[0]);
}
// Simple text similarity (Levenshtein-like)
function similarity(str1, str2) {
const longer = str1.length > str2.length ? str1 : str2;
const shorter = str1.length > str2.length ? str2 : str1;
if (longer.length === 0) return 1.0;
const editDistance = (s1, s2) => {
const costs = [];
for (let i = 0; i <= s1.length; i++) {
let lastValue = i;
for (let j = 0; j <= s2.length; j++) {
if (i === 0) {
costs[j] = j;
} else if (j > 0) {
let newValue = costs[j - 1];
if (s1.charAt(i - 1) !== s2.charAt(j - 1)) {
newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1;
}
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
if (i > 0) costs[s2.length] = lastValue;
}
return costs[s2.length];
};
return (longer.length - editDistance(longer, shorter)) / longer.length;
}
// Extract keywords from slug
function extractKeywords(slug) {
return slug
.split('-')
.filter(word => word.length > 3)
.sort()
.join('-');
}
// Find translation pairs using multiple strategies
function findTranslationPairs(itemsEN, itemsDE, threshold = 0.6) {
const pairs = [];
const usedDE = new Set();
itemsEN.forEach(enItem => {
let bestMatch = null;
let bestScore = 0;
itemsDE.forEach(deItem => {
if (usedDE.has(deItem.id)) return;
// Strategy 1: Keyword similarity
const enKeywords = extractKeywords(enItem.slug);
const deKeywords = extractKeywords(deItem.slug);
const keywordScore = similarity(enKeywords, deKeywords);
// Strategy 2: Title similarity (if available)
let titleScore = 0;
if (enItem.titleHtml && deItem.titleHtml) {
const enTitle = enItem.titleHtml.replace(/<[^>]*>/g, '').toLowerCase();
const deTitle = deItem.titleHtml.replace(/<[^>]*>/g, '').toLowerCase();
titleScore = similarity(enTitle, deTitle);
}
// Strategy 3: Content preview similarity
let contentScore = 0;
if (enItem.contentHtml && deItem.contentHtml) {
const enPreview = enItem.contentHtml.substring(0, 200).replace(/<[^>]*>/g, '').toLowerCase();
const dePreview = deItem.contentHtml.substring(0, 200).replace(/<[^>]*>/g, '').toLowerCase();
contentScore = similarity(enPreview, dePreview);
}
// Combined score (weighted)
const combinedScore = (keywordScore * 0.4) + (titleScore * 0.4) + (contentScore * 0.2);
if (combinedScore > bestScore && combinedScore > threshold) {
bestScore = combinedScore;
bestMatch = deItem;
}
});
if (bestMatch) {
usedDE.add(bestMatch.id);
pairs.push({
translationKey: `${enItem.slug}`,
en: enItem.id,
de: bestMatch.id,
score: bestScore,
enSlug: enItem.slug,
deSlug: bestMatch.slug
});
}
});
return pairs;
}
// Main function
function main() {
const exportDir = getLatestExportDir();
console.log('🔧 Improving Translation Mapping');
console.log('================================\n');
// Load data
const loadJSON = (file) => {
try {
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
} catch (e) {
return [];
}
};
const pagesEN = loadJSON('pages.en.json');
const pagesDE = loadJSON('pages.de.json');
const postsEN = loadJSON('posts.en.json');
const postsDE = loadJSON('posts.de.json');
const productsEN = loadJSON('products.en.json');
const productsDE = loadJSON('products.de.json');
const categoriesEN = loadJSON('product-categories.en.json');
const categoriesDE = loadJSON('product-categories.de.json');
console.log('📊 Content loaded:');
console.log(` Pages: ${pagesEN.length} EN, ${pagesDE.length} DE`);
console.log(` Posts: ${postsEN.length} EN, ${postsDE.length} DE`);
console.log(` Products: ${productsEN.length} EN, ${productsDE.length} DE`);
console.log(` Categories: ${categoriesEN.length} EN, ${categoriesDE.length} DE\n`);
// Find pairs
console.log('🔍 Finding translation pairs...\n');
const pagePairs = findTranslationPairs(pagesEN, pagesDE, 0.5);
const postPairs = findTranslationPairs(postsEN, postsDE, 0.5);
const productPairs = findTranslationPairs(productsEN, productsDE, 0.6);
const categoryPairs = findTranslationPairs(categoriesEN, categoriesDE, 0.5);
// Build mapping
const mapping = {
pages: {},
posts: {},
products: {},
productCategories: {}
};
pagePairs.forEach(pair => {
mapping.pages[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
});
postPairs.forEach(pair => {
mapping.posts[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
});
productPairs.forEach(pair => {
mapping.products[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
});
categoryPairs.forEach(pair => {
mapping.productCategories[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score };
});
// Save improved mapping
const outputDir = path.join(exportDir, 'translation-mapping-improved.json');
fs.writeFileSync(outputDir, JSON.stringify(mapping, null, 2));
// Summary
console.log('✅ Translation Mapping Complete\n');
console.log('Pairs found:');
console.log(` Pages: ${pagePairs.length}`);
console.log(` Posts: ${postPairs.length}`);
console.log(` Products: ${productPairs.length}`);
console.log(` Categories: ${categoryPairs.length}`);
console.log(` Total: ${pagePairs.length + postPairs.length + productPairs.length + categoryPairs.length}\n`);
// Show some examples
if (postPairs.length > 0) {
console.log('📝 Sample Post Pairs:');
postPairs.slice(0, 3).forEach(pair => {
console.log(` ${pair.enSlug} (${pair.score.toFixed(2)})`);
console.log(`${pair.deSlug}`);
console.log('');
});
}
if (productPairs.length > 0) {
console.log('📦 Sample Product Pairs:');
productPairs.slice(0, 3).forEach(pair => {
console.log(` ${pair.enSlug} (${pair.score.toFixed(2)})`);
console.log(`${pair.deSlug}`);
console.log('');
});
}
// Show unmatched items
const matchedEN = new Set([...pagePairs.map(p => p.en), ...postPairs.map(p => p.en), ...productPairs.map(p => p.en), ...categoryPairs.map(p => p.en)]);
const matchedDE = new Set([...pagePairs.map(p => p.de), ...postPairs.map(p => p.de), ...productPairs.map(p => p.de), ...categoryPairs.map(p => p.de)]);
const unmatchedEN = {
pages: pagesEN.filter(p => !matchedEN.has(p.id)).length,
posts: postsEN.filter(p => !matchedEN.has(p.id)).length,
products: productsEN.filter(p => !matchedEN.has(p.id)).length,
categories: categoriesEN.filter(p => !matchedEN.has(p.id)).length
};
const unmatchedDE = {
pages: pagesDE.filter(p => !matchedDE.has(p.id)).length,
posts: postsDE.filter(p => !matchedDE.has(p.id)).length,
products: productsDE.filter(p => !matchedDE.has(p.id)).length,
categories: categoriesDE.filter(p => !matchedDE.has(p.id)).length
};
console.log('🔍 Unmatched Items (may need manual review):');
console.log(` EN: ${unmatchedEN.pages} pages, ${unmatchedEN.posts} posts, ${unmatchedEN.products} products, ${unmatchedEN.categories} categories`);
console.log(` DE: ${unmatchedDE.pages} pages, ${unmatchedDE.posts} posts, ${unmatchedDE.products} products, ${unmatchedDE.categories} categories`);
console.log('\n💾 File saved:', outputDir);
console.log('\n💡 Next steps:');
console.log(' 1. Review the improved mapping for accuracy');
console.log(' 2. Manually add any missing pairs');
console.log(' 3. Use this mapping for Next.js i18n implementation');
}
if (require.main === module) {
main();
}

411
scripts/process-data.js Executable file
View File

@@ -0,0 +1,411 @@
#!/usr/bin/env node
/**
* WordPress → Next.js Data Processing Pipeline
* Transforms raw WordPress data into Next.js compatible format
*/
const fs = require('fs');
const path = require('path');
const DATA_DIR = path.join(__dirname, '..', 'data');
const RAW_DIR = path.join(DATA_DIR, 'raw');
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
// Create processed directory
if (!fs.existsSync(PROCESSED_DIR)) {
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
}
// Find latest export
function getLatestExportDir() {
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
const stat = fs.statSync(path.join(RAW_DIR, f));
return stat.isDirectory();
});
dirs.sort().reverse();
return path.join(RAW_DIR, dirs[0]);
}
// HTML sanitization - preserve content but clean dangerous elements
function sanitizeHTML(html) {
if (!html) return '';
let sanitized = html;
// Remove script tags and inline handlers (security)
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
// Remove WPBakery shortcode wrappers but keep their content
// Replace vc_row/vc_column with divs to preserve structure
sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '<div class="vc-row">');
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '<div class="vc-column">');
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
// Remove other shortcodes but keep text content
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
sanitized = sanitized.replace(/\[.*?\]/g, '');
// Remove empty paragraphs and divs
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace but preserve HTML structure
sanitized = sanitized.replace(/\s+/g, ' ').trim();
return sanitized;
}
// Extract excerpt from content
function generateExcerpt(content, maxLength = 200) {
const text = content.replace(/<[^>]*>/g, '');
if (text.length <= maxLength) return text;
return text.substring(0, maxLength) + '...';
}
// Process pages
function processPages(pagesEN, pagesDE, translationMapping) {
const processed = [];
// Process English pages
pagesEN.forEach(page => {
const translationKey = page.slug;
const deMatch = translationMapping.pages[translationKey];
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'en',
slug: page.slug,
path: `/${page.slug}`,
title: page.titleHtml.replace(/<[^>]*>/g, ''),
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: page.excerptHtml || generateExcerpt(page.contentHtml),
featuredImage: page.featuredImage,
updatedAt: page.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
// Process German pages
pagesDE.forEach(page => {
const translationKey = page.slug;
const enMatch = translationMapping.pages[translationKey];
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'de',
slug: page.slug,
path: `/de/${page.slug}`,
title: page.titleHtml.replace(/<[^>]*>/g, ''),
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: page.excerptHtml || generateExcerpt(page.contentHtml),
featuredImage: page.featuredImage,
updatedAt: page.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
// Process posts
function processPosts(postsEN, postsDE, translationMapping) {
const processed = [];
postsEN.forEach(post => {
const translationKey = post.slug;
const deMatch = translationMapping.posts[translationKey];
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'en',
slug: post.slug,
path: `/blog/${post.slug}`,
title: post.titleHtml.replace(/<[^>]*>/g, ''),
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: post.excerptHtml || generateExcerpt(post.contentHtml),
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
postsDE.forEach(post => {
const translationKey = post.slug;
const enMatch = translationMapping.posts[translationKey];
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'de',
slug: post.slug,
path: `/de/blog/${post.slug}`,
title: post.titleHtml.replace(/<[^>]*>/g, ''),
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: post.excerptHtml || generateExcerpt(post.contentHtml),
featuredImage: post.featuredImage,
datePublished: post.datePublished,
updatedAt: post.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
// Process products
function processProducts(productsEN, productsDE, translationMapping) {
const processed = [];
productsEN.forEach(product => {
const translationKey = product.slug;
const deMatch = translationMapping.products[translationKey];
processed.push({
id: product.id,
translationKey: translationKey,
locale: 'en',
slug: product.slug,
path: `/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.shortDescriptionHtml,
descriptionHtml: sanitizeHTML(product.descriptionHtml),
images: product.images,
featuredImage: product.featuredImage,
sku: product.sku,
regularPrice: product.regularPrice,
salePrice: product.salePrice,
currency: product.currency,
stockStatus: product.stockStatus,
categories: product.categories,
attributes: product.attributes,
variations: product.variations,
updatedAt: product.updatedAt,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
productsDE.forEach(product => {
const translationKey = product.slug;
const enMatch = translationMapping.products[translationKey];
processed.push({
id: product.id,
translationKey: translationKey,
locale: 'de',
slug: product.slug,
path: `/de/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.shortDescriptionHtml,
descriptionHtml: sanitizeHTML(product.descriptionHtml),
images: product.images,
featuredImage: product.featuredImage,
sku: product.sku,
regularPrice: product.regularPrice,
salePrice: product.salePrice,
currency: product.currency,
stockStatus: product.stockStatus,
categories: product.categories,
attributes: product.attributes,
variations: product.variations,
updatedAt: product.updatedAt,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
// Process product categories
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
const processed = [];
categoriesEN.forEach(category => {
const translationKey = category.slug;
const deMatch = translationMapping.productCategories[translationKey];
processed.push({
id: category.id,
translationKey: translationKey,
locale: 'en',
slug: category.slug,
name: category.name,
path: `/product-category/${category.slug}`,
description: category.description,
count: category.count,
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
});
});
categoriesDE.forEach(category => {
const translationKey = category.slug;
const enMatch = translationMapping.productCategories[translationKey];
processed.push({
id: category.id,
translationKey: translationKey,
locale: 'de',
slug: category.slug,
name: category.name,
path: `/de/product-category/${category.slug}`,
description: category.description,
count: category.count,
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
});
});
return processed;
}
// Process media manifest
function processMedia(media) {
return media.map(item => ({
id: item.id,
filename: item.filename,
url: item.url,
localPath: `/media/${item.filename}`,
alt: item.alt,
width: item.width,
height: item.height,
mimeType: item.mime_type
}));
}
// Generate asset map for URL replacement
function generateAssetMap(media) {
const map = {};
media.forEach(item => {
if (item.url) {
map[item.url] = `/media/${item.filename}`;
}
});
return map;
}
// Main processing function
function main() {
const exportDir = getLatestExportDir();
console.log('🔄 Processing WordPress Data for Next.js');
console.log('========================================\n');
// Load raw data
const loadJSON = (file) => {
try {
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
} catch (e) {
console.error(`❌ Failed to load ${file}:`, e.message);
return [];
}
};
const translationMapping = loadJSON('translation-mapping-improved.json');
const pagesEN = loadJSON('pages.en.json');
const pagesDE = loadJSON('pages.de.json');
const postsEN = loadJSON('posts.en.json');
const postsDE = loadJSON('posts.de.json');
const productsEN = loadJSON('products.en.json');
const productsDE = loadJSON('products.de.json');
const categoriesEN = loadJSON('product-categories.en.json');
const categoriesDE = loadJSON('product-categories.de.json');
const media = loadJSON('media.json');
const redirects = loadJSON('redirects.json');
const siteInfo = loadJSON('site-info.json');
console.log('📊 Processing content types...\n');
// Process each content type
const pages = processPages(pagesEN, pagesDE, translationMapping);
const posts = processPosts(postsEN, postsDE, translationMapping);
const products = processProducts(productsEN, productsDE, translationMapping);
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
const processedMedia = processMedia(media);
const assetMap = generateAssetMap(media);
// Create processed data structure
const processedData = {
site: {
title: siteInfo.siteTitle,
description: siteInfo.siteDescription,
baseUrl: siteInfo.baseUrl,
defaultLocale: siteInfo.defaultLocale || 'en',
locales: ['en', 'de']
},
content: {
pages,
posts,
products,
categories
},
assets: {
media: processedMedia,
map: assetMap
},
redirects,
exportDate: new Date().toISOString()
};
// Save processed data
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
// Save individual files for easier access
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
// Summary
console.log('✅ Data Processing Complete\n');
console.log('📦 Processed Content:');
console.log(` Pages: ${pages.length} (with translations)`);
console.log(` Posts: ${posts.length} (with translations)`);
console.log(` Products: ${products.length} (with translations)`);
console.log(` Categories: ${categories.length} (with translations)`);
console.log(` Media: ${processedMedia.length} files`);
console.log(` Redirects: ${redirects.length} rules\n`);
console.log('📁 Output Files:');
console.log(` ${outputPath}`);
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
// Sample data
if (pages.length > 0) {
console.log('📄 Sample Page:');
console.log(` Title: ${pages[0].title}`);
console.log(` Path: ${pages[0].path}`);
console.log(` Locale: ${pages[0].locale}`);
console.log(` Translation: ${pages[0].translation ? 'Yes' : 'No'}\n`);
}
if (posts.length > 0) {
console.log('📝 Sample Post:');
console.log(` Title: ${posts[0].title}`);
console.log(` Path: ${posts[0].path}`);
console.log(` Locale: ${posts[0].locale}`);
console.log(` Date: ${posts[0].datePublished}\n`);
}
console.log('💡 Next: Ready for Next.js project setup!');
}
if (require.main === module) {
main();
}

706
scripts/wordpress-export.js Executable file
View File

@@ -0,0 +1,706 @@
#!/usr/bin/env node
/**
* WordPress to Next.js Data Export Script
* Gathers all required data from WordPress/WooCommerce for static site generation
*/
const fs = require('fs');
const path = require('path');
const https = require('https');
// Load environment variables
require('dotenv').config();
const BASE_URL = process.env.WOOCOMMERCE_URL;
const CONSUMER_KEY = process.env.WOOCOMMERCE_CONSUMER_KEY;
const CONSUMER_SECRET = process.env.WOOCOMMERCE_CONSUMER_SECRET;
const APP_PASSWORD = process.env.WORDPRESS_APP_PASSWORD;
// Validate environment
if (!BASE_URL || !CONSUMER_KEY || !CONSUMER_SECRET) {
console.error('❌ Missing required environment variables');
console.error('Please check .env file for:');
console.error(' - WOOCOMMERCE_URL');
console.error(' - WOOCOMMERCE_CONSUMER_KEY');
console.error(' - WOOCOMMERCE_CONSUMER_SECRET');
process.exit(1);
}
// Configuration
const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-');
const OUTPUT_DIR = path.join(__dirname, '..', 'data', 'raw', TIMESTAMP);
const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media');
// Create output directories
if (!fs.existsSync(OUTPUT_DIR)) {
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
}
if (!fs.existsSync(MEDIA_DIR)) {
fs.mkdirSync(MEDIA_DIR, { recursive: true });
}
// API Helper Functions
function buildAuthHeader() {
const credentials = Buffer.from(`${CONSUMER_KEY}:${CONSUMER_SECRET}`).toString('base64');
return `Basic ${credentials}`;
}
function buildWordPressAuth() {
// For WordPress REST API with app password
return {
'Authorization': `Basic ${Buffer.from(`admin:${APP_PASSWORD}`).toString('base64')}`,
'Content-Type': 'application/json'
};
}
function makeRequest(url, headers = {}) {
return new Promise((resolve, reject) => {
const options = {
headers: {
'User-Agent': 'WordPress-NextJS-Migration/1.0',
...headers
}
};
https.get(url, options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
if (res.statusCode >= 200 && res.statusCode < 300) {
try {
resolve(JSON.parse(data));
} catch (e) {
resolve(data);
}
} else {
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
}
});
}).on('error', reject);
});
}
async function fetchWithPagination(endpoint, params = {}, locale = null) {
const allItems = [];
let page = 1;
const perPage = 100;
while (true) {
const queryString = new URLSearchParams({
...params,
page: page.toString(),
per_page: perPage.toString(),
...(locale ? { lang: locale } : {})
}).toString();
const url = `${BASE_URL}/wp-json/wp/v2/${endpoint}?${queryString}`;
console.log(`📥 Fetching ${endpoint} page ${page}${locale ? ` (${locale})` : ''}...`);
try {
const items = await makeRequest(url, buildWordPressAuth());
if (!Array.isArray(items) || items.length === 0) {
break;
}
allItems.push(...items);
// Check if we got a full page (indicates more pages might exist)
if (items.length < perPage) {
break;
}
page++;
} catch (error) {
console.error(`❌ Error fetching ${endpoint} page ${page}:`, error.message);
break;
}
}
return allItems;
}
async function fetchWooCommerce(endpoint, params = {}, locale = null) {
const queryString = new URLSearchParams({
...params,
per_page: '100',
...(locale ? { lang: locale } : {})
}).toString();
const url = `${BASE_URL}/wp-json/wc/v3/${endpoint}?${queryString}`;
console.log(`📥 Fetching WooCommerce ${endpoint}${locale ? ` (${locale})` : ''}...`);
try {
const response = await makeRequest(url, {
'Authorization': buildAuthHeader(),
'Content-Type': 'application/json'
});
return Array.isArray(response) ? response : [response];
} catch (error) {
console.error(`❌ Error fetching WooCommerce ${endpoint}:`, error.message);
return [];
}
}
async function fetchMedia(mediaId) {
const url = `${BASE_URL}/wp-json/wp/v2/media/${mediaId}`;
try {
const media = await makeRequest(url, buildWordPressAuth());
return media;
} catch (error) {
console.error(`❌ Error fetching media ${mediaId}:`, error.message);
return null;
}
}
async function downloadMedia(url, filename) {
return new Promise((resolve, reject) => {
const filePath = path.join(MEDIA_DIR, filename);
// Check if file already exists
if (fs.existsSync(filePath)) {
console.log(`✅ Media already downloaded: ${filename}`);
resolve(filePath);
return;
}
const file = fs.createWriteStream(filePath);
https.get(url, (res) => {
if (res.statusCode === 200) {
res.pipe(file);
file.on('finish', () => {
console.log(`✅ Downloaded: ${filename}`);
resolve(filePath);
});
} else {
reject(new Error(`Failed to download: ${res.statusCode}`));
}
}).on('error', (err) => {
fs.unlink(filePath, () => {});
reject(err);
});
});
}
// Data Processing Functions
function extractFeaturedImage(item) {
if (item.featured_media) {
return item.featured_media;
}
if (item._embedded && item._embedded['wp:featuredmedia']) {
return item._embedded['wp:featuredmedia'][0];
}
return null;
}
function processPage(page, locale) {
return {
id: page.id,
translationKey: `page-${page.slug}`, // Will be refined with Polylang data
locale: locale,
slug: page.slug,
path: locale === 'en' ? `/${page.slug}` : `/${locale}/${page.slug}`,
titleHtml: page.title?.rendered || '',
contentHtml: page.content?.rendered || '',
excerptHtml: page.excerpt?.rendered || '',
featuredImage: page.featured_media || null,
updatedAt: page.modified || page.date
};
}
function processPost(post, locale) {
return {
id: post.id,
translationKey: `post-${post.slug}`,
locale: locale,
slug: post.slug,
path: locale === 'en' ? `/blog/${post.slug}` : `/${locale}/blog/${post.slug}`,
titleHtml: post.title?.rendered || '',
contentHtml: post.content?.rendered || '',
excerptHtml: post.excerpt?.rendered || '',
featuredImage: post.featured_media || null,
datePublished: post.date,
updatedAt: post.modified || post.date
};
}
function processProduct(product, locale) {
return {
id: product.id,
translationKey: `product-${product.slug}`,
locale: locale,
slug: product.slug,
path: locale === 'en' ? `/product/${product.slug}` : `/${locale}/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.short_description || '',
descriptionHtml: product.description || '',
images: product.images ? product.images.map(img => img.src) : [],
featuredImage: product.images && product.images.length > 0 ? product.images[0].src : null,
sku: product.sku,
regularPrice: product.regular_price,
salePrice: product.sale_price,
currency: product.currency || 'EUR',
stockStatus: product.stock_status,
categories: product.categories ? product.categories.map(cat => ({ id: cat.id, name: cat.name, slug: cat.slug })) : [],
attributes: product.attributes || [],
variations: product.variations || [],
updatedAt: product.date_modified
};
}
function processProductCategory(category, locale) {
return {
id: category.id,
translationKey: `product-category-${category.slug}`,
locale: locale,
slug: category.slug,
name: category.name,
path: locale === 'en' ? `/product-category/${category.slug}` : `/${locale}/product-category/${category.slug}`,
description: category.description || '',
count: category.count || 0
};
}
function processMenu(menu, locale) {
// WordPress menus are complex, we'll extract basic structure
return {
id: menu.term_id || menu.id,
slug: menu.slug,
name: menu.name,
locale: locale,
items: menu.items || []
};
}
// Main Export Functions
async function exportPages() {
console.log('\n📊 EXPORTING PAGES');
const pagesEN = await fetchWithPagination('pages', { status: 'publish' }, 'en');
const pagesDE = await fetchWithPagination('pages', { status: 'publish' }, 'de');
const processedEN = pagesEN.map(p => processPage(p, 'en'));
const processedDE = pagesDE.map(p => processPage(p, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'pages.en.json'),
JSON.stringify(processedEN, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'pages.de.json'),
JSON.stringify(processedDE, null, 2)
);
console.log(`✅ Pages: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE };
}
async function exportPosts() {
console.log('\n📊 EXPORTING POSTS');
const postsEN = await fetchWithPagination('posts', { status: 'publish' }, 'en');
const postsDE = await fetchWithPagination('posts', { status: 'publish' }, 'de');
const processedEN = postsEN.map(p => processPost(p, 'en'));
const processedDE = postsDE.map(p => processPost(p, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'posts.en.json'),
JSON.stringify(processedEN, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'posts.de.json'),
JSON.stringify(processedDE, null, 2)
);
console.log(`✅ Posts: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE };
}
async function exportProducts() {
console.log('\n📊 EXPORTING PRODUCTS');
const productsEN = await fetchWooCommerce('products', {}, 'en');
const productsDE = await fetchWooCommerce('products', {}, 'de');
const processedEN = productsEN.map(p => processProduct(p, 'en'));
const processedDE = productsDE.map(p => processProduct(p, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'products.en.json'),
JSON.stringify(processedEN, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'products.de.json'),
JSON.stringify(processedDE, null, 2)
);
console.log(`✅ Products: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE };
}
async function exportProductCategories() {
console.log('\n📊 EXPORTING PRODUCT CATEGORIES');
const categoriesEN = await fetchWooCommerce('products/categories', {}, 'en');
const categoriesDE = await fetchWooCommerce('products/categories', {}, 'de');
const processedEN = categoriesEN.map(c => processProductCategory(c, 'en'));
const processedDE = categoriesDE.map(c => processProductCategory(c, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'product-categories.en.json'),
JSON.stringify(processedEN, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'product-categories.de.json'),
JSON.stringify(processedDE, null, 2)
);
console.log(`✅ Product Categories: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE };
}
async function exportMenus() {
console.log('\n📊 EXPORTING MENUS');
// Try to get menus via WordPress REST API
// Note: This might require additional plugins or direct DB access
const menusEN = await fetchWithPagination('menus', {}, 'en').catch(() => []);
const menusDE = await fetchWithPagination('menus', {}, 'de').catch(() => []);
// If menus endpoint doesn't work, try to get menu locations
let menuLocations = {};
try {
const locations = await makeRequest(`${BASE_URL}/wp-json/wp/v2/menu-locations`, buildWordPressAuth());
menuLocations = locations;
} catch (e) {
console.log('⚠️ Menu locations endpoint not available');
}
const processedEN = menusEN.map(m => processMenu(m, 'en'));
const processedDE = menusDE.map(m => processMenu(m, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'menus.en.json'),
JSON.stringify({ menus: processedEN, locations: menuLocations }, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'menus.de.json'),
JSON.stringify({ menus: processedDE, locations: menuLocations }, null, 2)
);
console.log(`✅ Menus: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE, locations: menuLocations };
}
async function exportMedia() {
console.log('\n📊 EXPORTING MEDIA');
// Get all unique media IDs from collected data
const mediaIds = new Set();
// Read all JSON files to find media references
const jsonFiles = fs.readdirSync(OUTPUT_DIR).filter(f => f.endsWith('.json'));
for (const file of jsonFiles) {
const content = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, file), 'utf8'));
const items = Array.isArray(content) ? content : (content.menus || []);
items.forEach(item => {
if (item.featuredImage) mediaIds.add(item.featuredImage);
if (item.images) item.images.forEach(img => {
// Extract ID from URL if possible, or add as URL
if (typeof img === 'string' && img.includes('/wp-content/')) {
mediaIds.add(img);
}
});
});
}
const mediaManifest = [];
const downloadPromises = [];
for (const mediaRef of mediaIds) {
if (typeof mediaRef === 'number') {
// Fetch media info
const media = await fetchMedia(mediaRef);
if (media && media.source_url) {
const filename = `${mediaRef}-${path.basename(media.source_url)}`;
mediaManifest.push({
id: mediaRef,
url: media.source_url,
filename: filename,
alt: media.alt_text || '',
width: media.media_details?.width,
height: media.media_details?.height,
mime_type: media.mime_type
});
// Download file
downloadPromises.push(
downloadMedia(media.source_url, filename).catch(err => {
console.warn(`⚠️ Failed to download media ${mediaRef}:`, err.message);
})
);
}
} else if (typeof mediaRef === 'string' && mediaRef.startsWith('http')) {
// Direct URL
const filename = `media-${Date.now()}-${path.basename(mediaRef)}`;
mediaManifest.push({
id: null,
url: mediaRef,
filename: filename,
alt: '',
width: null,
height: null,
mime_type: null
});
downloadPromises.push(
downloadMedia(mediaRef, filename).catch(err => {
console.warn(`⚠️ Failed to download media from URL:`, err.message);
})
);
}
}
// Wait for all downloads
await Promise.all(downloadPromises);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'media.json'),
JSON.stringify(mediaManifest, null, 2)
);
console.log(`✅ Media: ${mediaManifest.length} items`);
return mediaManifest;
}
async function exportSiteInfo() {
console.log('\n📊 EXPORTING SITE INFORMATION');
const siteInfo = {
baseUrl: BASE_URL,
exportDate: new Date().toISOString(),
timestamp: TIMESTAMP,
polylang: false,
languages: ['en', 'de'],
defaultLocale: 'en' // Will need to confirm
};
// Check for Polylang
try {
const plugins = await makeRequest(`${BASE_URL}/wp-json/wp/v2/plugins`, buildWordPressAuth());
const polylangPlugin = plugins.find(p => p.name.includes('polylang'));
if (polylangPlugin) {
siteInfo.polylang = true;
siteInfo.polylangVersion = polylangPlugin.version;
}
} catch (e) {
console.log('⚠️ Could not check plugins');
}
// Get site settings
try {
const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
siteInfo.siteTitle = settings.title;
siteInfo.siteDescription = settings.description;
siteInfo.defaultLanguage = settings.default_language || 'en';
} catch (e) {
console.log('⚠️ Could not fetch settings');
}
// Get permalink structure
try {
const permalink = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
siteInfo.permalinkStructure = permalink.permalink_structure;
} catch (e) {
console.log('⚠️ Could not fetch permalink structure');
}
fs.writeFileSync(
path.join(OUTPUT_DIR, 'site-info.json'),
JSON.stringify(siteInfo, null, 2)
);
console.log('✅ Site info exported');
return siteInfo;
}
async function generateTranslationMapping() {
console.log('\n📊 GENERATING TRANSLATION MAPPING');
// This function creates translationKey mappings between locales
// We'll use slug-based matching for now, but this should be enhanced with Polylang data
const mapping = {
pages: {},
posts: {},
products: {},
productCategories: {}
};
// Load all data
const loadFile = (filename) => {
try {
return JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, filename), 'utf8'));
} catch (e) {
return [];
}
};
const pagesEN = loadFile('pages.en.json');
const pagesDE = loadFile('pages.de.json');
const postsEN = loadFile('posts.en.json');
const postsDE = loadFile('posts.de.json');
const productsEN = loadFile('products.en.json');
const productsDE = loadFile('products.de.json');
const categoriesEN = loadFile('product-categories.en.json');
const categoriesDE = loadFile('product-categories.de.json');
// Helper to find translation pairs by slug
function findTranslationPairs(enItems, deItems) {
const pairs = {};
enItems.forEach(enItem => {
const deMatch = deItems.find(de => de.slug === enItem.slug);
if (deMatch) {
const translationKey = `${enItem.slug}`;
pairs[translationKey] = {
en: enItem.id,
de: deMatch.id
};
}
});
return pairs;
}
mapping.pages = findTranslationPairs(pagesEN, pagesDE);
mapping.posts = findTranslationPairs(postsEN, postsDE);
mapping.products = findTranslationPairs(productsEN, productsDE);
mapping.productCategories = findTranslationPairs(categoriesEN, categoriesDE);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'translation-mapping.json'),
JSON.stringify(mapping, null, 2)
);
const totalPairs = Object.values(mapping).reduce((sum, obj) => sum + Object.keys(obj).length, 0);
console.log(`✅ Translation mapping: ${totalPairs} pairs found`);
return mapping;
}
async function generateRedirects() {
console.log('\n📊 GENERATING REDIRECT RULES');
const redirects = [];
// Load posts
const postsEN = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.en.json'), 'utf8'));
const postsDE = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.de.json'), 'utf8'));
// Base redirect: /{postSlug} → /blog/{postSlug} (English)
postsEN.forEach(post => {
redirects.push({
source: `/${post.slug}`,
destination: `/blog/${post.slug}`,
permanent: true,
locale: 'en'
});
});
// German redirects: /de/{postSlug} → /de/blog/{postSlug}
postsDE.forEach(post => {
redirects.push({
source: `/de/${post.slug}`,
destination: `/de/blog/${post.slug}`,
permanent: true,
locale: 'de'
});
});
fs.writeFileSync(
path.join(OUTPUT_DIR, 'redirects.json'),
JSON.stringify(redirects, null, 2)
);
console.log(`✅ Redirects: ${redirects.length} rules generated`);
return redirects;
}
// Main Execution
async function main() {
console.log('🚀 WordPress → Next.js Data Export');
console.log('=====================================');
console.log(`Target: ${BASE_URL}`);
console.log(`Output: ${OUTPUT_DIR}`);
console.log('');
try {
// Step 1: Export all content
await exportSiteInfo();
await exportPages();
await exportPosts();
await exportProducts();
await exportProductCategories();
await exportMenus();
await exportMedia();
// Step 2: Generate mappings and redirects
await generateTranslationMapping();
await generateRedirects();
console.log('\n🎉 Export Complete!');
console.log('=====================================');
console.log(`📁 Data directory: data/raw/${TIMESTAMP}`);
console.log(`🖼️ Media directory: public/media/`);
console.log('');
console.log('Next steps:');
console.log('1. Review exported data for completeness');
console.log('2. Check for any missing translations');
console.log('3. Verify media downloads');
console.log('4. Proceed with Next.js data processing');
} catch (error) {
console.error('\n❌ Export failed:', error.message);
process.exit(1);
}
}
// Run if called directly
if (require.main === module) {
main();
}
module.exports = {
exportPages,
exportPosts,
exportProducts,
exportProductCategories,
exportMenus,
exportMedia,
exportSiteInfo,
generateTranslationMapping,
generateRedirects
};