Files
klz-cables.com/scripts/wordpress-export-enhanced.js
2025-12-30 16:19:42 +01:00

1178 lines
34 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
/**
* WordPress to Next.js Data Export Script - Enhanced Version
* Gathers all required data from WordPress/WooCommerce for static site generation
* PLUS: Crawls all contentHtml to find and download missing images
*/
const fs = require('fs');
const path = require('path');
const https = require('https');
// Load environment variables
require('dotenv').config();
const BASE_URL = process.env.WOOCOMMERCE_URL;
const CONSUMER_KEY = process.env.WOOCOMMERCE_CONSUMER_KEY;
const CONSUMER_SECRET = process.env.WOOCOMMERCE_CONSUMER_SECRET;
const APP_PASSWORD = process.env.WORDPRESS_APP_PASSWORD;
// Validate environment
if (!BASE_URL || !CONSUMER_KEY || !CONSUMER_SECRET) {
console.error('❌ Missing required environment variables');
console.error('Please check .env file for:');
console.error(' - WOOCOMMERCE_URL');
console.error(' - WOOCOMMERCE_CONSUMER_KEY');
console.error(' - WOOCOMMERCE_CONSUMER_SECRET');
process.exit(1);
}
// Configuration
const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-');
const OUTPUT_DIR = path.join(__dirname, '..', 'data', 'raw', TIMESTAMP);
const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media');
const PUBLIC_DIR = path.join(__dirname, '..', 'public');
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
// Create output directories
if (!fs.existsSync(OUTPUT_DIR)) {
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
}
if (!fs.existsSync(MEDIA_DIR)) {
fs.mkdirSync(MEDIA_DIR, { recursive: true });
}
if (!fs.existsSync(PROCESSED_DIR)) {
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
}
// API Helper Functions
function buildAuthHeader() {
const credentials = Buffer.from(`${CONSUMER_KEY}:${CONSUMER_SECRET}`).toString('base64');
return `Basic ${credentials}`;
}
function buildWordPressAuth() {
return {
'Authorization': `Basic ${Buffer.from(`admin:${APP_PASSWORD}`).toString('base64')}`,
'Content-Type': 'application/json'
};
}
function makeRequest(url, headers = {}) {
return new Promise((resolve, reject) => {
const options = {
headers: {
'User-Agent': 'WordPress-NextJS-Migration/1.0',
...headers
}
};
https.get(url, options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
if (res.statusCode >= 200 && res.statusCode < 300) {
try {
resolve(JSON.parse(data));
} catch (e) {
resolve(data);
}
} else {
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
}
});
}).on('error', reject);
});
}
async function fetchWithPagination(endpoint, params = {}, locale = null) {
const allItems = [];
let page = 1;
const perPage = 100;
while (true) {
const queryString = new URLSearchParams({
...params,
page: page.toString(),
per_page: perPage.toString(),
...(locale ? { lang: locale } : {})
}).toString();
const url = `${BASE_URL}/wp-json/wp/v2/${endpoint}?${queryString}`;
console.log(`📥 Fetching ${endpoint} page ${page}${locale ? ` (${locale})` : ''}...`);
try {
const items = await makeRequest(url, buildWordPressAuth());
if (!Array.isArray(items) || items.length === 0) {
break;
}
allItems.push(...items);
if (items.length < perPage) {
break;
}
page++;
} catch (error) {
console.error(`❌ Error fetching ${endpoint} page ${page}:`, error.message);
break;
}
}
return allItems;
}
async function fetchWooCommerce(endpoint, params = {}, locale = null) {
const queryString = new URLSearchParams({
...params,
per_page: '100',
...(locale ? { lang: locale } : {})
}).toString();
const url = `${BASE_URL}/wp-json/wc/v3/${endpoint}?${queryString}`;
console.log(`📥 Fetching WooCommerce ${endpoint}${locale ? ` (${locale})` : ''}...`);
try {
const response = await makeRequest(url, {
'Authorization': buildAuthHeader(),
'Content-Type': 'application/json'
});
return Array.isArray(response) ? response : [response];
} catch (error) {
console.error(`❌ Error fetching WooCommerce ${endpoint}:`, error.message);
return [];
}
}
async function fetchMedia(mediaId) {
const url = `${BASE_URL}/wp-json/wp/v2/media/${mediaId}`;
try {
const media = await makeRequest(url, buildWordPressAuth());
return media;
} catch (error) {
console.error(`❌ Error fetching media ${mediaId}:`, error.message);
return null;
}
}
async function downloadMedia(url, filename) {
return new Promise((resolve, reject) => {
const filePath = path.join(MEDIA_DIR, filename);
// Check if file already exists
if (fs.existsSync(filePath)) {
console.log(`✅ Media already downloaded: ${filename}`);
resolve(filePath);
return;
}
const file = fs.createWriteStream(filePath);
https.get(url, (res) => {
if (res.statusCode === 200) {
res.pipe(file);
file.on('finish', () => {
console.log(`✅ Downloaded: ${filename}`);
resolve(filePath);
});
} else {
reject(new Error(`Failed to download: ${res.statusCode}`));
}
}).on('error', (err) => {
fs.unlink(filePath, () => {});
reject(err);
});
});
}
async function downloadFavicon(url, filename) {
return new Promise((resolve, reject) => {
const filePath = path.join(PUBLIC_DIR, filename);
if (fs.existsSync(filePath)) {
console.log(`✅ Favicon already exists: ${filename}`);
resolve(filePath);
return;
}
const file = fs.createWriteStream(filePath);
https.get(url, (res) => {
if (res.statusCode === 200) {
res.pipe(file);
file.on('finish', () => {
console.log(`✅ Downloaded favicon: ${filename}`);
resolve(filePath);
});
} else {
reject(new Error(`Failed to download favicon: ${res.statusCode}`));
}
}).on('error', (err) => {
fs.unlink(filePath, () => {});
reject(err);
});
});
}
// Enhanced Image Extraction Functions
function extractImageUrlsFromContent(html) {
const urls = new Set();
if (!html) return [];
// 1. bg_image attribute (WPBakery)
const bgImages = html.match(/bg_image=[\"']([^\"']+)[\"']/g) || [];
bgImages.forEach(m => {
const id = m.split(/[\"']/)[1];
if (id && !id.includes(' ') && /^\d+$/.test(id)) {
urls.add(id);
}
});
// 2. background_image attribute
const bgImages2 = html.match(/background_image=[\"']([^\"']+)[\"']/g) || [];
bgImages2.forEach(m => {
const id = m.split(/[\"']/)[1];
if (id && !id.includes(' ') && /^\d+$/.test(id)) {
urls.add(id);
}
});
// 3. vc_gallery images
const galleries = html.match(/images=[\"']([^\"']+)[\"']/g) || [];
galleries.forEach(m => {
const ids = m.split(/[\"']/)[1];
if (ids) {
ids.split(',').forEach(id => {
const cleanId = id.trim();
if (cleanId && /^\d+$/.test(cleanId)) {
urls.add(cleanId);
}
});
}
});
// 4. column_background_image
const colBg = html.match(/column_background_image=[\"']([^\"']+)[\"']/g) || [];
colBg.forEach(m => {
const id = m.split(/[\"']/)[1];
if (id && !id.includes(' ') && /^\d+$/.test(id)) {
urls.add(id);
}
});
// 5. poster attribute (video)
const posters = html.match(/poster=[\"']([^\"']+)[\"']/g) || [];
posters.forEach(m => {
const id = m.split(/[\"']/)[1];
if (id && /^\d+$/.test(id)) {
urls.add(id);
}
});
// 6. Direct WordPress image URLs in src
const imgSrc = html.match(/src=[\"']([^\"']+)[\"']/g) || [];
imgSrc.forEach(m => {
const url = m.split(/[\"']/)[1];
if (url && url.includes('/wp-content/uploads/')) {
urls.add(url);
}
});
// 7. Background image URLs in style
const bgStyle = html.match(/background-image:[^;]*url\([^)]+\)/g) || [];
bgStyle.forEach(m => {
const match = m.match(/url\([\"']?([^\"')]+)[\"']?\)/);
if (match && match[1].includes('/wp-content/uploads/')) {
urls.add(match[1]);
}
});
// 8. Data attributes with image URLs
const dataAttrs = html.match(/data-[a-z-]+=[\"']([^\"']+)[\"']/g) || [];
dataAttrs.forEach(m => {
const url = m.split(/[\"']/)[1];
if (url && url.includes('/wp-content/uploads/')) {
urls.add(url);
}
});
return Array.from(urls);
}
async function extractAllContentImages() {
console.log('\n🔍 CRAWLING ALL CONTENT FOR IMAGES');
const allUrls = new Set();
// Load all content files
const contentFiles = [
'pages.en.json', 'pages.de.json',
'posts.en.json', 'posts.de.json',
'products.en.json', 'products.de.json'
];
for (const file of contentFiles) {
const filePath = path.join(OUTPUT_DIR, file);
if (!fs.existsSync(filePath)) continue;
const items = JSON.parse(fs.readFileSync(filePath, 'utf8'));
items.forEach(item => {
// Extract from contentHtml
const contentImages = extractImageUrlsFromContent(item.contentHtml);
contentImages.forEach(url => allUrls.add(url));
// Extract from excerptHtml
const excerptImages = extractImageUrlsFromContent(item.excerptHtml);
excerptImages.forEach(url => allUrls.add(url));
// Extract from product images
if (item.images) {
item.images.forEach(img => {
if (typeof img === 'string' && img.includes('/wp-content/uploads/')) {
allUrls.add(img);
}
});
}
// Extract from featuredImage if it's a URL
if (item.featuredImage && typeof item.featuredImage === 'string' && item.featuredImage.includes('/wp-content/uploads/')) {
allUrls.add(item.featuredImage);
}
});
}
return Array.from(allUrls);
}
// Data Processing Functions
function processPage(page, locale) {
return {
id: page.id,
translationKey: `page-${page.slug}`,
locale: locale,
slug: page.slug,
path: locale === 'en' ? `/${page.slug}` : `/${locale}/${page.slug}`,
titleHtml: page.title?.rendered || '',
contentHtml: page.content?.rendered || '',
excerptHtml: page.excerpt?.rendered || '',
featuredImage: page.featured_media || null,
updatedAt: page.modified || page.date
};
}
function processPost(post, locale) {
return {
id: post.id,
translationKey: `post-${post.slug}`,
locale: locale,
slug: post.slug,
path: locale === 'en' ? `/blog/${post.slug}` : `/${locale}/blog/${post.slug}`,
titleHtml: post.title?.rendered || '',
contentHtml: post.content?.rendered || '',
excerptHtml: post.excerpt?.rendered || '',
featuredImage: post.featured_media || null,
datePublished: post.date,
updatedAt: post.modified || post.date
};
}
function processProduct(product, locale) {
return {
id: product.id,
translationKey: `product-${product.slug}`,
locale: locale,
slug: product.slug,
path: locale === 'en' ? `/product/${product.slug}` : `/${locale}/product/${product.slug}`,
name: product.name,
shortDescriptionHtml: product.short_description || '',
descriptionHtml: product.description || '',
images: product.images ? product.images.map(img => img.src) : [],
featuredImage: product.images && product.images.length > 0 ? product.images[0].src : null,
sku: product.sku,
regularPrice: product.regular_price,
salePrice: product.sale_price,
currency: product.currency || 'EUR',
stockStatus: product.stock_status,
categories: product.categories ? product.categories.map(cat => ({ id: cat.id, name: cat.name, slug: cat.slug })) : [],
attributes: product.attributes || [],
variations: product.variations || [],
updatedAt: product.date_modified
};
}
function processProductCategory(category, locale) {
return {
id: category.id,
translationKey: `product-category-${category.slug}`,
locale: locale,
slug: category.slug,
name: category.name,
path: locale === 'en' ? `/product-category/${category.slug}` : `/${locale}/product-category/${category.slug}`,
description: category.description || '',
count: category.count || 0
};
}
function processMenu(menu, locale) {
return {
id: menu.term_id || menu.id,
slug: menu.slug,
name: menu.name,
locale: locale,
items: menu.items || []
};
}
// Main Export Functions
async function exportPages() {
console.log('\n📊 EXPORTING PAGES');
const pagesEN = await fetchWithPagination('pages', { status: 'publish' }, 'en');
const pagesDE = await fetchWithPagination('pages', { status: 'publish' }, 'de');
const processedEN = pagesEN.map(p => processPage(p, 'en'));
const processedDE = pagesDE.map(p => processPage(p, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'pages.en.json'),
JSON.stringify(processedEN, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'pages.de.json'),
JSON.stringify(processedDE, null, 2)
);
console.log(`✅ Pages: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE };
}
async function exportPosts() {
console.log('\n📊 EXPORTING POSTS');
const postsEN = await fetchWithPagination('posts', { status: 'publish' }, 'en');
const postsDE = await fetchWithPagination('posts', { status: 'publish' }, 'de');
const processedEN = postsEN.map(p => processPost(p, 'en'));
const processedDE = postsDE.map(p => processPost(p, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'posts.en.json'),
JSON.stringify(processedEN, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'posts.de.json'),
JSON.stringify(processedDE, null, 2)
);
console.log(`✅ Posts: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE };
}
async function exportProducts() {
console.log('\n📊 EXPORTING PRODUCTS');
const productsEN = await fetchWooCommerce('products', {}, 'en');
const productsDE = await fetchWooCommerce('products', {}, 'de');
const processedEN = productsEN.map(p => processProduct(p, 'en'));
const processedDE = productsDE.map(p => processProduct(p, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'products.en.json'),
JSON.stringify(processedEN, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'products.de.json'),
JSON.stringify(processedDE, null, 2)
);
console.log(`✅ Products: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE };
}
async function exportProductCategories() {
console.log('\n📊 EXPORTING PRODUCT CATEGORIES');
const categoriesEN = await fetchWooCommerce('products/categories', {}, 'en');
const categoriesDE = await fetchWooCommerce('products/categories', {}, 'de');
const processedEN = categoriesEN.map(c => processProductCategory(c, 'en'));
const processedDE = categoriesDE.map(c => processProductCategory(c, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'product-categories.en.json'),
JSON.stringify(processedEN, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'product-categories.de.json'),
JSON.stringify(processedDE, null, 2)
);
console.log(`✅ Product Categories: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE };
}
async function exportMenus() {
console.log('\n📊 EXPORTING MENUS');
const menusEN = await fetchWithPagination('menus', {}, 'en').catch(() => []);
const menusDE = await fetchWithPagination('menus', {}, 'de').catch(() => []);
let menuLocations = {};
try {
const locations = await makeRequest(`${BASE_URL}/wp-json/wp/v2/menu-locations`, buildWordPressAuth());
menuLocations = locations;
} catch (e) {
console.log('⚠️ Menu locations endpoint not available');
}
const processedEN = menusEN.map(m => processMenu(m, 'en'));
const processedDE = menusDE.map(m => processMenu(m, 'de'));
fs.writeFileSync(
path.join(OUTPUT_DIR, 'menus.en.json'),
JSON.stringify({ menus: processedEN, locations: menuLocations }, null, 2)
);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'menus.de.json'),
JSON.stringify({ menus: processedDE, locations: menuLocations }, null, 2)
);
console.log(`✅ Menus: ${processedEN.length} EN, ${processedDE.length} DE`);
return { en: processedEN, de: processedDE, locations: menuLocations };
}
async function exportMedia() {
console.log('\n📊 EXPORTING MEDIA');
const mediaIds = new Set();
// Read all JSON files to find media references
const jsonFiles = fs.readdirSync(OUTPUT_DIR).filter(f => f.endsWith('.json'));
for (const file of jsonFiles) {
const content = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, file), 'utf8'));
const items = Array.isArray(content) ? content : (content.menus || []);
items.forEach(item => {
if (item.featuredImage) mediaIds.add(item.featuredImage);
if (item.images) item.images.forEach(img => {
if (typeof img === 'string' && img.includes('/wp-content/')) {
mediaIds.add(img);
}
});
});
}
const mediaManifest = [];
const downloadPromises = [];
for (const mediaRef of mediaIds) {
if (typeof mediaRef === 'number') {
const media = await fetchMedia(mediaRef);
if (media && media.source_url) {
const filename = `${mediaRef}-${path.basename(media.source_url)}`;
mediaManifest.push({
id: mediaRef,
url: media.source_url,
filename: filename,
alt: media.alt_text || '',
width: media.media_details?.width,
height: media.media_details?.height,
mime_type: media.mime_type
});
downloadPromises.push(
downloadMedia(media.source_url, filename).catch(err => {
console.warn(`⚠️ Failed to download media ${mediaRef}:`, err.message);
})
);
}
} else if (typeof mediaRef === 'string' && mediaRef.startsWith('http')) {
const filename = `media-${Date.now()}-${path.basename(mediaRef)}`;
mediaManifest.push({
id: null,
url: mediaRef,
filename: filename,
alt: '',
width: null,
height: null,
mime_type: null
});
downloadPromises.push(
downloadMedia(mediaRef, filename).catch(err => {
console.warn(`⚠️ Failed to download media from URL:`, err.message);
})
);
}
}
await Promise.all(downloadPromises);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'media.json'),
JSON.stringify(mediaManifest, null, 2)
);
console.log(`✅ Media: ${mediaManifest.length} items`);
return mediaManifest;
}
async function exportSiteInfo() {
console.log('\n📊 EXPORTING SITE INFORMATION');
const siteInfo = {
baseUrl: BASE_URL,
exportDate: new Date().toISOString(),
timestamp: TIMESTAMP,
polylang: false,
languages: ['en', 'de'],
defaultLocale: 'en'
};
try {
const plugins = await makeRequest(`${BASE_URL}/wp-json/wp/v2/plugins`, buildWordPressAuth());
const polylangPlugin = plugins.find(p => p.name.includes('polylang'));
if (polylangPlugin) {
siteInfo.polylang = true;
siteInfo.polylangVersion = polylangPlugin.version;
}
} catch (e) {
console.log('⚠️ Could not check plugins');
}
try {
const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
siteInfo.siteTitle = settings.title;
siteInfo.siteDescription = settings.description;
siteInfo.defaultLanguage = settings.default_language || 'en';
} catch (e) {
console.log('⚠️ Could not fetch settings');
}
try {
const permalink = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
siteInfo.permalinkStructure = permalink.permalink_structure;
} catch (e) {
console.log('⚠️ Could not fetch permalink structure');
}
fs.writeFileSync(
path.join(OUTPUT_DIR, 'site-info.json'),
JSON.stringify(siteInfo, null, 2)
);
console.log('✅ Site info exported');
return siteInfo;
}
async function exportLogoAndFavicon() {
console.log('\n📊 EXPORTING LOGO AND FAVICON');
const assets = {
logo: null,
favicon: null,
appleTouchIcon: null
};
try {
const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
if (settings.custom_logo) {
console.log(`📥 Found custom_logo ID: ${settings.custom_logo}`);
const logoMedia = await fetchMedia(settings.custom_logo);
if (logoMedia && logoMedia.source_url) {
const logoFilename = 'logo.webp';
await downloadMedia(logoMedia.source_url, logoFilename);
assets.logo = `/media/${logoFilename}`;
console.log(`✅ Logo downloaded: ${logoFilename}`);
}
}
if (settings.site_icon) {
console.log(`📥 Found site_icon ID: ${settings.site_icon}`);
const iconMedia = await fetchMedia(settings.site_icon);
if (iconMedia && iconMedia.source_url) {
const faviconFilename = 'favicon.ico';
await downloadFavicon(iconMedia.source_url, faviconFilename);
assets.favicon = `/favicon.ico`;
console.log(`✅ Favicon downloaded: ${faviconFilename}`);
const appleTouchFilename = 'apple-touch-icon.png';
await downloadFavicon(iconMedia.source_url, appleTouchFilename);
assets.appleTouchIcon = `/apple-touch-icon.png`;
console.log(`✅ Apple touch icon downloaded: ${appleTouchFilename}`);
}
}
if (!assets.logo) {
console.log('⚠️ No logo found in settings, searching media...');
const allMedia = await fetchWithPagination('media', { per_page: 100 });
const logoCandidates = allMedia.filter(m =>
m.title?.rendered?.toLowerCase().includes('logo') ||
m.slug?.toLowerCase().includes('logo') ||
m.source_url?.toLowerCase().includes('logo')
);
if (logoCandidates.length > 0) {
const logoMedia = logoCandidates[0];
const logoFilename = 'logo.webp';
await downloadMedia(logoMedia.source_url, logoFilename);
assets.logo = `/media/${logoFilename}`;
console.log(`✅ Logo found and downloaded: ${logoFilename}`);
}
}
if (!assets.favicon) {
console.log('⚠️ No favicon found in settings, trying common locations...');
const faviconUrls = [
`${BASE_URL}/favicon.ico`,
`${BASE_URL}/wp-content/uploads/favicon.ico`
];
for (const url of faviconUrls) {
try {
await downloadFavicon(url, 'favicon.ico');
assets.favicon = '/favicon.ico';
console.log(`✅ Favicon downloaded from: ${url}`);
await downloadFavicon(url, 'apple-touch-icon.png');
assets.appleTouchIcon = '/apple-touch-icon.png';
break;
} catch (e) {
// Continue to next URL
}
}
}
fs.writeFileSync(
path.join(OUTPUT_DIR, 'assets.json'),
JSON.stringify(assets, null, 2)
);
console.log('✅ Logo and favicon export complete');
} catch (error) {
console.error('❌ Error exporting logo/favicon:', error.message);
}
return assets;
}
async function generateTranslationMapping() {
console.log('\n📊 GENERATING TRANSLATION MAPPING');
const mapping = {
pages: {},
posts: {},
products: {},
productCategories: {}
};
const loadFile = (filename) => {
try {
return JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, filename), 'utf8'));
} catch (e) {
return [];
}
};
const pagesEN = loadFile('pages.en.json');
const pagesDE = loadFile('pages.de.json');
const postsEN = loadFile('posts.en.json');
const postsDE = loadFile('posts.de.json');
const productsEN = loadFile('products.en.json');
const productsDE = loadFile('products.de.json');
const categoriesEN = loadFile('product-categories.en.json');
const categoriesDE = loadFile('product-categories.de.json');
function findTranslationPairs(enItems, deItems) {
const pairs = {};
enItems.forEach(enItem => {
const deMatch = deItems.find(de => de.slug === enItem.slug);
if (deMatch) {
const translationKey = `${enItem.slug}`;
pairs[translationKey] = {
en: enItem.id,
de: deMatch.id
};
}
});
return pairs;
}
mapping.pages = findTranslationPairs(pagesEN, pagesDE);
mapping.posts = findTranslationPairs(postsEN, postsDE);
mapping.products = findTranslationPairs(productsEN, productsDE);
mapping.productCategories = findTranslationPairs(categoriesEN, categoriesDE);
fs.writeFileSync(
path.join(OUTPUT_DIR, 'translation-mapping.json'),
JSON.stringify(mapping, null, 2)
);
const totalPairs = Object.values(mapping).reduce((sum, obj) => sum + Object.keys(obj).length, 0);
console.log(`✅ Translation mapping: ${totalPairs} pairs found`);
return mapping;
}
async function generateRedirects() {
console.log('\n📊 GENERATING REDIRECT RULES');
const redirects = [];
const postsEN = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.en.json'), 'utf8'));
const postsDE = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.de.json'), 'utf8'));
postsEN.forEach(post => {
redirects.push({
source: `/${post.slug}`,
destination: `/blog/${post.slug}`,
permanent: true,
locale: 'en'
});
});
postsDE.forEach(post => {
redirects.push({
source: `/de/${post.slug}`,
destination: `/de/blog/${post.slug}`,
permanent: true,
locale: 'de'
});
});
fs.writeFileSync(
path.join(OUTPUT_DIR, 'redirects.json'),
JSON.stringify(redirects, null, 2)
);
console.log(`✅ Redirects: ${redirects.length} rules generated`);
return redirects;
}
// NEW: Enhanced Media Download from Content
async function downloadMissingContentImages() {
console.log('\n📥 DOWNLOADING MISSING IMAGES FROM CONTENT');
// Extract all image URLs from content
const imageUrls = await extractAllContentImages();
console.log(`Found ${imageUrls.length} unique image URLs in content`);
const assetMap = {};
const downloadPromises = [];
let downloadedCount = 0;
for (const urlOrId of imageUrls) {
let actualUrl = null;
let mediaId = null;
// Determine if it's an ID or URL
if (typeof urlOrId === 'number' || /^\d+$/.test(urlOrId)) {
// It's a media ID
mediaId = parseInt(urlOrId);
const media = await fetchMedia(mediaId);
if (media && media.source_url) {
actualUrl = media.source_url;
}
} else if (typeof urlOrId === 'string' && urlOrId.startsWith('http')) {
// It's a direct URL
actualUrl = urlOrId;
}
if (!actualUrl) continue;
// Generate filename
const ext = path.extname(actualUrl) || '.webp';
const baseName = path.basename(actualUrl, ext);
const filename = mediaId ? `${mediaId}-${baseName}${ext}` : `media-${Date.now()}-${baseName}${ext}`;
const localPath = `/media/${filename}`;
// Check if already in asset map
if (assetMap[actualUrl]) {
console.log(`✅ Already mapped: ${actualUrl}${assetMap[actualUrl]}`);
continue;
}
// Check if file exists
const filePath = path.join(MEDIA_DIR, filename);
if (fs.existsSync(filePath)) {
console.log(`✅ File exists: ${filename}`);
assetMap[actualUrl] = localPath;
continue;
}
// Download
console.log(`⬇️ Downloading: ${actualUrl}`);
downloadPromises.push(
downloadMedia(actualUrl, filename).then(() => {
assetMap[actualUrl] = localPath;
downloadedCount++;
}).catch(err => {
console.warn(`⚠️ Failed to download ${actualUrl}:`, err.message);
})
);
// Small delay to avoid overwhelming the server
if (downloadPromises.length >= 5) {
await Promise.all(downloadPromises);
downloadPromises.length = 0;
}
}
await Promise.all(downloadPromises);
// Load existing asset map
const assetMapPath = path.join(PROCESSED_DIR, 'asset-map.json');
let existingMap = {};
if (fs.existsSync(assetMapPath)) {
existingMap = JSON.parse(fs.readFileSync(assetMapPath, 'utf8'));
}
// Merge with new mappings
const mergedMap = { ...existingMap, ...assetMap };
// Save updated asset map
fs.writeFileSync(
assetMapPath,
JSON.stringify(mergedMap, null, 2)
);
console.log(`✅ Downloaded ${downloadedCount} new images`);
console.log(`✅ Asset map now has ${Object.keys(mergedMap).length} mappings`);
return mergedMap;
}
// NEW: Update processed files with local image paths
async function updateProcessedFiles() {
console.log('\n🔄 UPDATING PROCESSED FILES WITH LOCAL PATHS');
const assetMapPath = path.join(PROCESSED_DIR, 'asset-map.json');
if (!fs.existsSync(assetMapPath)) {
console.log('⚠️ No asset map found, skipping update');
return;
}
const assetMap = JSON.parse(fs.readFileSync(assetMapPath, 'utf8'));
function replaceImageUrls(content, map) {
if (!content) return content;
let updated = content;
// Replace all URLs in the map
for (const [wpUrl, localPath] of Object.entries(map)) {
// Escape special characters in URL
const escapedUrl = wpUrl.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const regex = new RegExp(escapedUrl, 'g');
updated = updated.replace(regex, localPath);
}
return updated;
}
// Process each content file
const contentFiles = [
'pages.en.json', 'pages.de.json',
'posts.en.json', 'posts.de.json',
'products.en.json', 'products.de.json'
];
for (const file of contentFiles) {
const filePath = path.join(OUTPUT_DIR, file);
if (!fs.existsSync(filePath)) continue;
const items = JSON.parse(fs.readFileSync(filePath, 'utf8'));
let updated = false;
items.forEach(item => {
const originalContent = item.contentHtml;
const originalExcerpt = item.excerptHtml;
const originalImages = item.images;
item.contentHtml = replaceImageUrls(item.contentHtml, assetMap);
item.excerptHtml = replaceImageUrls(item.excerptHtml, assetMap);
if (item.images && Array.isArray(item.images)) {
item.images = item.images.map(img => {
if (typeof img === 'string' && assetMap[img]) {
return assetMap[img];
}
return img;
});
}
if (originalContent !== item.contentHtml ||
originalExcerpt !== item.excerptHtml ||
(originalImages && JSON.stringify(originalImages) !== JSON.stringify(item.images))) {
updated = true;
}
});
if (updated) {
fs.writeFileSync(filePath, JSON.stringify(items, null, 2));
console.log(`✅ Updated ${file}`);
} else {
console.log(` No changes needed for ${file}`);
}
}
console.log('✅ All processed files updated');
}
// NEW: Process data for Next.js
async function processDataForNextJs() {
console.log('\n🔄 PROCESSING DATA FOR NEXT.JS');
// Load raw data
const loadRaw = (filename) => {
try {
return JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, filename), 'utf8'));
} catch (e) {
return [];
}
};
const pages = [...loadRaw('pages.en.json'), ...loadRaw('pages.de.json')];
const posts = [...loadRaw('posts.en.json'), ...loadRaw('posts.de.json')];
const products = [...loadRaw('products.en.json'), ...loadRaw('products.de.json')];
const categories = [...loadRaw('product-categories.en.json'), ...loadRaw('product-categories.de.json')];
const media = loadRaw('media.json');
const siteInfo = loadRaw('site-info.json');
const assets = loadRaw('assets.json');
const translationMapping = loadRaw('translation-mapping.json');
const redirects = loadRaw('redirects.json');
// Save to processed directory
fs.writeFileSync(
path.join(PROCESSED_DIR, 'pages.json'),
JSON.stringify(pages, null, 2)
);
fs.writeFileSync(
path.join(PROCESSED_DIR, 'posts.json'),
JSON.stringify(posts, null, 2)
);
fs.writeFileSync(
path.join(PROCESSED_DIR, 'products.json'),
JSON.stringify(products, null, 2)
);
fs.writeFileSync(
path.join(PROCESSED_DIR, 'categories.json'),
JSON.stringify(categories, null, 2)
);
fs.writeFileSync(
path.join(PROCESSED_DIR, 'media.json'),
JSON.stringify(media, null, 2)
);
fs.writeFileSync(
path.join(PROCESSED_DIR, 'wordpress-data.json'),
JSON.stringify({
siteInfo,
assets,
translationMapping,
redirects,
exportDate: new Date().toISOString()
}, null, 2)
);
console.log('✅ Data processed for Next.js');
}
// Main Execution
async function main() {
console.log('🚀 WordPress → Next.js Data Export (Enhanced)');
console.log('=====================================');
console.log(`Target: ${BASE_URL}`);
console.log(`Output: ${OUTPUT_DIR}`);
console.log('');
try {
// Step 1: Export all content
await exportSiteInfo();
await exportPages();
await exportPosts();
await exportProducts();
await exportProductCategories();
await exportMenus();
await exportMedia();
await exportLogoAndFavicon();
// Step 2: Generate mappings and redirects
await generateTranslationMapping();
await generateRedirects();
// Step 3: NEW - Download missing images from content
await downloadMissingContentImages();
// Step 4: NEW - Update processed files with local paths
await updateProcessedFiles();
// Step 5: NEW - Process for Next.js
await processDataForNextJs();
console.log('\n🎉 Export Complete!');
console.log('=====================================');
console.log(`📁 Data directory: data/raw/${TIMESTAMP}`);
console.log(`📁 Processed: data/processed/`);
console.log(`🖼️ Media directory: public/media/`);
console.log(`🎨 Logo/Favicon: public/`);
console.log('');
console.log('Next steps:');
console.log('1. Review exported data for completeness');
console.log('2. Check asset-map.json for all mappings');
console.log('3. Verify all images downloaded');
console.log('4. Ready for Next.js integration');
} catch (error) {
console.error('\n❌ Export failed:', error.message);
process.exit(1);
}
}
// Run if called directly
if (require.main === module) {
main();
}
module.exports = {
exportPages,
exportPosts,
exportProducts,
exportProductCategories,
exportMenus,
exportMedia,
exportSiteInfo,
exportLogoAndFavicon,
generateTranslationMapping,
generateRedirects,
downloadMissingContentImages,
updateProcessedFiles,
processDataForNextJs
};