1178 lines
34 KiB
JavaScript
1178 lines
34 KiB
JavaScript
#!/usr/bin/env node
|
||
|
||
/**
|
||
* WordPress to Next.js Data Export Script - Enhanced Version
|
||
* Gathers all required data from WordPress/WooCommerce for static site generation
|
||
* PLUS: Crawls all contentHtml to find and download missing images
|
||
*/
|
||
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
const https = require('https');
|
||
|
||
// Load environment variables
|
||
require('dotenv').config();
|
||
|
||
const BASE_URL = process.env.WOOCOMMERCE_URL;
|
||
const CONSUMER_KEY = process.env.WOOCOMMERCE_CONSUMER_KEY;
|
||
const CONSUMER_SECRET = process.env.WOOCOMMERCE_CONSUMER_SECRET;
|
||
const APP_PASSWORD = process.env.WORDPRESS_APP_PASSWORD;
|
||
|
||
// Validate environment
|
||
if (!BASE_URL || !CONSUMER_KEY || !CONSUMER_SECRET) {
|
||
console.error('❌ Missing required environment variables');
|
||
console.error('Please check .env file for:');
|
||
console.error(' - WOOCOMMERCE_URL');
|
||
console.error(' - WOOCOMMERCE_CONSUMER_KEY');
|
||
console.error(' - WOOCOMMERCE_CONSUMER_SECRET');
|
||
process.exit(1);
|
||
}
|
||
|
||
// Configuration
|
||
const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-');
|
||
const OUTPUT_DIR = path.join(__dirname, '..', 'data', 'raw', TIMESTAMP);
|
||
const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media');
|
||
const PUBLIC_DIR = path.join(__dirname, '..', 'public');
|
||
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
|
||
|
||
// Create output directories
|
||
if (!fs.existsSync(OUTPUT_DIR)) {
|
||
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
|
||
}
|
||
if (!fs.existsSync(MEDIA_DIR)) {
|
||
fs.mkdirSync(MEDIA_DIR, { recursive: true });
|
||
}
|
||
if (!fs.existsSync(PROCESSED_DIR)) {
|
||
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
|
||
}
|
||
|
||
// API Helper Functions
|
||
function buildAuthHeader() {
|
||
const credentials = Buffer.from(`${CONSUMER_KEY}:${CONSUMER_SECRET}`).toString('base64');
|
||
return `Basic ${credentials}`;
|
||
}
|
||
|
||
function buildWordPressAuth() {
|
||
return {
|
||
'Authorization': `Basic ${Buffer.from(`admin:${APP_PASSWORD}`).toString('base64')}`,
|
||
'Content-Type': 'application/json'
|
||
};
|
||
}
|
||
|
||
function makeRequest(url, headers = {}) {
|
||
return new Promise((resolve, reject) => {
|
||
const options = {
|
||
headers: {
|
||
'User-Agent': 'WordPress-NextJS-Migration/1.0',
|
||
...headers
|
||
}
|
||
};
|
||
|
||
https.get(url, options, (res) => {
|
||
let data = '';
|
||
|
||
res.on('data', (chunk) => {
|
||
data += chunk;
|
||
});
|
||
|
||
res.on('end', () => {
|
||
if (res.statusCode >= 200 && res.statusCode < 300) {
|
||
try {
|
||
resolve(JSON.parse(data));
|
||
} catch (e) {
|
||
resolve(data);
|
||
}
|
||
} else {
|
||
reject(new Error(`HTTP ${res.statusCode}: ${data}`));
|
||
}
|
||
});
|
||
}).on('error', reject);
|
||
});
|
||
}
|
||
|
||
async function fetchWithPagination(endpoint, params = {}, locale = null) {
|
||
const allItems = [];
|
||
let page = 1;
|
||
const perPage = 100;
|
||
|
||
while (true) {
|
||
const queryString = new URLSearchParams({
|
||
...params,
|
||
page: page.toString(),
|
||
per_page: perPage.toString(),
|
||
...(locale ? { lang: locale } : {})
|
||
}).toString();
|
||
|
||
const url = `${BASE_URL}/wp-json/wp/v2/${endpoint}?${queryString}`;
|
||
|
||
console.log(`📥 Fetching ${endpoint} page ${page}${locale ? ` (${locale})` : ''}...`);
|
||
|
||
try {
|
||
const items = await makeRequest(url, buildWordPressAuth());
|
||
|
||
if (!Array.isArray(items) || items.length === 0) {
|
||
break;
|
||
}
|
||
|
||
allItems.push(...items);
|
||
|
||
if (items.length < perPage) {
|
||
break;
|
||
}
|
||
|
||
page++;
|
||
} catch (error) {
|
||
console.error(`❌ Error fetching ${endpoint} page ${page}:`, error.message);
|
||
break;
|
||
}
|
||
}
|
||
|
||
return allItems;
|
||
}
|
||
|
||
async function fetchWooCommerce(endpoint, params = {}, locale = null) {
|
||
const queryString = new URLSearchParams({
|
||
...params,
|
||
per_page: '100',
|
||
...(locale ? { lang: locale } : {})
|
||
}).toString();
|
||
|
||
const url = `${BASE_URL}/wp-json/wc/v3/${endpoint}?${queryString}`;
|
||
|
||
console.log(`📥 Fetching WooCommerce ${endpoint}${locale ? ` (${locale})` : ''}...`);
|
||
|
||
try {
|
||
const response = await makeRequest(url, {
|
||
'Authorization': buildAuthHeader(),
|
||
'Content-Type': 'application/json'
|
||
});
|
||
|
||
return Array.isArray(response) ? response : [response];
|
||
} catch (error) {
|
||
console.error(`❌ Error fetching WooCommerce ${endpoint}:`, error.message);
|
||
return [];
|
||
}
|
||
}
|
||
|
||
async function fetchMedia(mediaId) {
|
||
const url = `${BASE_URL}/wp-json/wp/v2/media/${mediaId}`;
|
||
|
||
try {
|
||
const media = await makeRequest(url, buildWordPressAuth());
|
||
return media;
|
||
} catch (error) {
|
||
console.error(`❌ Error fetching media ${mediaId}:`, error.message);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function downloadMedia(url, filename) {
|
||
return new Promise((resolve, reject) => {
|
||
const filePath = path.join(MEDIA_DIR, filename);
|
||
|
||
// Check if file already exists
|
||
if (fs.existsSync(filePath)) {
|
||
console.log(`✅ Media already downloaded: ${filename}`);
|
||
resolve(filePath);
|
||
return;
|
||
}
|
||
|
||
const file = fs.createWriteStream(filePath);
|
||
|
||
https.get(url, (res) => {
|
||
if (res.statusCode === 200) {
|
||
res.pipe(file);
|
||
file.on('finish', () => {
|
||
console.log(`✅ Downloaded: ${filename}`);
|
||
resolve(filePath);
|
||
});
|
||
} else {
|
||
reject(new Error(`Failed to download: ${res.statusCode}`));
|
||
}
|
||
}).on('error', (err) => {
|
||
fs.unlink(filePath, () => {});
|
||
reject(err);
|
||
});
|
||
});
|
||
}
|
||
|
||
async function downloadFavicon(url, filename) {
|
||
return new Promise((resolve, reject) => {
|
||
const filePath = path.join(PUBLIC_DIR, filename);
|
||
|
||
if (fs.existsSync(filePath)) {
|
||
console.log(`✅ Favicon already exists: ${filename}`);
|
||
resolve(filePath);
|
||
return;
|
||
}
|
||
|
||
const file = fs.createWriteStream(filePath);
|
||
|
||
https.get(url, (res) => {
|
||
if (res.statusCode === 200) {
|
||
res.pipe(file);
|
||
file.on('finish', () => {
|
||
console.log(`✅ Downloaded favicon: ${filename}`);
|
||
resolve(filePath);
|
||
});
|
||
} else {
|
||
reject(new Error(`Failed to download favicon: ${res.statusCode}`));
|
||
}
|
||
}).on('error', (err) => {
|
||
fs.unlink(filePath, () => {});
|
||
reject(err);
|
||
});
|
||
});
|
||
}
|
||
|
||
// Enhanced Image Extraction Functions
|
||
function extractImageUrlsFromContent(html) {
|
||
const urls = new Set();
|
||
|
||
if (!html) return [];
|
||
|
||
// 1. bg_image attribute (WPBakery)
|
||
const bgImages = html.match(/bg_image=[\"']([^\"']+)[\"']/g) || [];
|
||
bgImages.forEach(m => {
|
||
const id = m.split(/[\"']/)[1];
|
||
if (id && !id.includes(' ') && /^\d+$/.test(id)) {
|
||
urls.add(id);
|
||
}
|
||
});
|
||
|
||
// 2. background_image attribute
|
||
const bgImages2 = html.match(/background_image=[\"']([^\"']+)[\"']/g) || [];
|
||
bgImages2.forEach(m => {
|
||
const id = m.split(/[\"']/)[1];
|
||
if (id && !id.includes(' ') && /^\d+$/.test(id)) {
|
||
urls.add(id);
|
||
}
|
||
});
|
||
|
||
// 3. vc_gallery images
|
||
const galleries = html.match(/images=[\"']([^\"']+)[\"']/g) || [];
|
||
galleries.forEach(m => {
|
||
const ids = m.split(/[\"']/)[1];
|
||
if (ids) {
|
||
ids.split(',').forEach(id => {
|
||
const cleanId = id.trim();
|
||
if (cleanId && /^\d+$/.test(cleanId)) {
|
||
urls.add(cleanId);
|
||
}
|
||
});
|
||
}
|
||
});
|
||
|
||
// 4. column_background_image
|
||
const colBg = html.match(/column_background_image=[\"']([^\"']+)[\"']/g) || [];
|
||
colBg.forEach(m => {
|
||
const id = m.split(/[\"']/)[1];
|
||
if (id && !id.includes(' ') && /^\d+$/.test(id)) {
|
||
urls.add(id);
|
||
}
|
||
});
|
||
|
||
// 5. poster attribute (video)
|
||
const posters = html.match(/poster=[\"']([^\"']+)[\"']/g) || [];
|
||
posters.forEach(m => {
|
||
const id = m.split(/[\"']/)[1];
|
||
if (id && /^\d+$/.test(id)) {
|
||
urls.add(id);
|
||
}
|
||
});
|
||
|
||
// 6. Direct WordPress image URLs in src
|
||
const imgSrc = html.match(/src=[\"']([^\"']+)[\"']/g) || [];
|
||
imgSrc.forEach(m => {
|
||
const url = m.split(/[\"']/)[1];
|
||
if (url && url.includes('/wp-content/uploads/')) {
|
||
urls.add(url);
|
||
}
|
||
});
|
||
|
||
// 7. Background image URLs in style
|
||
const bgStyle = html.match(/background-image:[^;]*url\([^)]+\)/g) || [];
|
||
bgStyle.forEach(m => {
|
||
const match = m.match(/url\([\"']?([^\"')]+)[\"']?\)/);
|
||
if (match && match[1].includes('/wp-content/uploads/')) {
|
||
urls.add(match[1]);
|
||
}
|
||
});
|
||
|
||
// 8. Data attributes with image URLs
|
||
const dataAttrs = html.match(/data-[a-z-]+=[\"']([^\"']+)[\"']/g) || [];
|
||
dataAttrs.forEach(m => {
|
||
const url = m.split(/[\"']/)[1];
|
||
if (url && url.includes('/wp-content/uploads/')) {
|
||
urls.add(url);
|
||
}
|
||
});
|
||
|
||
return Array.from(urls);
|
||
}
|
||
|
||
async function extractAllContentImages() {
|
||
console.log('\n🔍 CRAWLING ALL CONTENT FOR IMAGES');
|
||
|
||
const allUrls = new Set();
|
||
|
||
// Load all content files
|
||
const contentFiles = [
|
||
'pages.en.json', 'pages.de.json',
|
||
'posts.en.json', 'posts.de.json',
|
||
'products.en.json', 'products.de.json'
|
||
];
|
||
|
||
for (const file of contentFiles) {
|
||
const filePath = path.join(OUTPUT_DIR, file);
|
||
if (!fs.existsSync(filePath)) continue;
|
||
|
||
const items = JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
||
|
||
items.forEach(item => {
|
||
// Extract from contentHtml
|
||
const contentImages = extractImageUrlsFromContent(item.contentHtml);
|
||
contentImages.forEach(url => allUrls.add(url));
|
||
|
||
// Extract from excerptHtml
|
||
const excerptImages = extractImageUrlsFromContent(item.excerptHtml);
|
||
excerptImages.forEach(url => allUrls.add(url));
|
||
|
||
// Extract from product images
|
||
if (item.images) {
|
||
item.images.forEach(img => {
|
||
if (typeof img === 'string' && img.includes('/wp-content/uploads/')) {
|
||
allUrls.add(img);
|
||
}
|
||
});
|
||
}
|
||
|
||
// Extract from featuredImage if it's a URL
|
||
if (item.featuredImage && typeof item.featuredImage === 'string' && item.featuredImage.includes('/wp-content/uploads/')) {
|
||
allUrls.add(item.featuredImage);
|
||
}
|
||
});
|
||
}
|
||
|
||
return Array.from(allUrls);
|
||
}
|
||
|
||
// Data Processing Functions
|
||
function processPage(page, locale) {
|
||
return {
|
||
id: page.id,
|
||
translationKey: `page-${page.slug}`,
|
||
locale: locale,
|
||
slug: page.slug,
|
||
path: locale === 'en' ? `/${page.slug}` : `/${locale}/${page.slug}`,
|
||
titleHtml: page.title?.rendered || '',
|
||
contentHtml: page.content?.rendered || '',
|
||
excerptHtml: page.excerpt?.rendered || '',
|
||
featuredImage: page.featured_media || null,
|
||
updatedAt: page.modified || page.date
|
||
};
|
||
}
|
||
|
||
function processPost(post, locale) {
|
||
return {
|
||
id: post.id,
|
||
translationKey: `post-${post.slug}`,
|
||
locale: locale,
|
||
slug: post.slug,
|
||
path: locale === 'en' ? `/blog/${post.slug}` : `/${locale}/blog/${post.slug}`,
|
||
titleHtml: post.title?.rendered || '',
|
||
contentHtml: post.content?.rendered || '',
|
||
excerptHtml: post.excerpt?.rendered || '',
|
||
featuredImage: post.featured_media || null,
|
||
datePublished: post.date,
|
||
updatedAt: post.modified || post.date
|
||
};
|
||
}
|
||
|
||
function processProduct(product, locale) {
|
||
return {
|
||
id: product.id,
|
||
translationKey: `product-${product.slug}`,
|
||
locale: locale,
|
||
slug: product.slug,
|
||
path: locale === 'en' ? `/product/${product.slug}` : `/${locale}/product/${product.slug}`,
|
||
name: product.name,
|
||
shortDescriptionHtml: product.short_description || '',
|
||
descriptionHtml: product.description || '',
|
||
images: product.images ? product.images.map(img => img.src) : [],
|
||
featuredImage: product.images && product.images.length > 0 ? product.images[0].src : null,
|
||
sku: product.sku,
|
||
regularPrice: product.regular_price,
|
||
salePrice: product.sale_price,
|
||
currency: product.currency || 'EUR',
|
||
stockStatus: product.stock_status,
|
||
categories: product.categories ? product.categories.map(cat => ({ id: cat.id, name: cat.name, slug: cat.slug })) : [],
|
||
attributes: product.attributes || [],
|
||
variations: product.variations || [],
|
||
updatedAt: product.date_modified
|
||
};
|
||
}
|
||
|
||
function processProductCategory(category, locale) {
|
||
return {
|
||
id: category.id,
|
||
translationKey: `product-category-${category.slug}`,
|
||
locale: locale,
|
||
slug: category.slug,
|
||
name: category.name,
|
||
path: locale === 'en' ? `/product-category/${category.slug}` : `/${locale}/product-category/${category.slug}`,
|
||
description: category.description || '',
|
||
count: category.count || 0
|
||
};
|
||
}
|
||
|
||
function processMenu(menu, locale) {
|
||
return {
|
||
id: menu.term_id || menu.id,
|
||
slug: menu.slug,
|
||
name: menu.name,
|
||
locale: locale,
|
||
items: menu.items || []
|
||
};
|
||
}
|
||
|
||
// Main Export Functions
|
||
async function exportPages() {
|
||
console.log('\n📊 EXPORTING PAGES');
|
||
|
||
const pagesEN = await fetchWithPagination('pages', { status: 'publish' }, 'en');
|
||
const pagesDE = await fetchWithPagination('pages', { status: 'publish' }, 'de');
|
||
|
||
const processedEN = pagesEN.map(p => processPage(p, 'en'));
|
||
const processedDE = pagesDE.map(p => processPage(p, 'de'));
|
||
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'pages.en.json'),
|
||
JSON.stringify(processedEN, null, 2)
|
||
);
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'pages.de.json'),
|
||
JSON.stringify(processedDE, null, 2)
|
||
);
|
||
|
||
console.log(`✅ Pages: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||
|
||
return { en: processedEN, de: processedDE };
|
||
}
|
||
|
||
async function exportPosts() {
|
||
console.log('\n📊 EXPORTING POSTS');
|
||
|
||
const postsEN = await fetchWithPagination('posts', { status: 'publish' }, 'en');
|
||
const postsDE = await fetchWithPagination('posts', { status: 'publish' }, 'de');
|
||
|
||
const processedEN = postsEN.map(p => processPost(p, 'en'));
|
||
const processedDE = postsDE.map(p => processPost(p, 'de'));
|
||
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'posts.en.json'),
|
||
JSON.stringify(processedEN, null, 2)
|
||
);
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'posts.de.json'),
|
||
JSON.stringify(processedDE, null, 2)
|
||
);
|
||
|
||
console.log(`✅ Posts: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||
|
||
return { en: processedEN, de: processedDE };
|
||
}
|
||
|
||
async function exportProducts() {
|
||
console.log('\n📊 EXPORTING PRODUCTS');
|
||
|
||
const productsEN = await fetchWooCommerce('products', {}, 'en');
|
||
const productsDE = await fetchWooCommerce('products', {}, 'de');
|
||
|
||
const processedEN = productsEN.map(p => processProduct(p, 'en'));
|
||
const processedDE = productsDE.map(p => processProduct(p, 'de'));
|
||
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'products.en.json'),
|
||
JSON.stringify(processedEN, null, 2)
|
||
);
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'products.de.json'),
|
||
JSON.stringify(processedDE, null, 2)
|
||
);
|
||
|
||
console.log(`✅ Products: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||
|
||
return { en: processedEN, de: processedDE };
|
||
}
|
||
|
||
async function exportProductCategories() {
|
||
console.log('\n📊 EXPORTING PRODUCT CATEGORIES');
|
||
|
||
const categoriesEN = await fetchWooCommerce('products/categories', {}, 'en');
|
||
const categoriesDE = await fetchWooCommerce('products/categories', {}, 'de');
|
||
|
||
const processedEN = categoriesEN.map(c => processProductCategory(c, 'en'));
|
||
const processedDE = categoriesDE.map(c => processProductCategory(c, 'de'));
|
||
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'product-categories.en.json'),
|
||
JSON.stringify(processedEN, null, 2)
|
||
);
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'product-categories.de.json'),
|
||
JSON.stringify(processedDE, null, 2)
|
||
);
|
||
|
||
console.log(`✅ Product Categories: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||
|
||
return { en: processedEN, de: processedDE };
|
||
}
|
||
|
||
async function exportMenus() {
|
||
console.log('\n📊 EXPORTING MENUS');
|
||
|
||
const menusEN = await fetchWithPagination('menus', {}, 'en').catch(() => []);
|
||
const menusDE = await fetchWithPagination('menus', {}, 'de').catch(() => []);
|
||
|
||
let menuLocations = {};
|
||
try {
|
||
const locations = await makeRequest(`${BASE_URL}/wp-json/wp/v2/menu-locations`, buildWordPressAuth());
|
||
menuLocations = locations;
|
||
} catch (e) {
|
||
console.log('⚠️ Menu locations endpoint not available');
|
||
}
|
||
|
||
const processedEN = menusEN.map(m => processMenu(m, 'en'));
|
||
const processedDE = menusDE.map(m => processMenu(m, 'de'));
|
||
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'menus.en.json'),
|
||
JSON.stringify({ menus: processedEN, locations: menuLocations }, null, 2)
|
||
);
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'menus.de.json'),
|
||
JSON.stringify({ menus: processedDE, locations: menuLocations }, null, 2)
|
||
);
|
||
|
||
console.log(`✅ Menus: ${processedEN.length} EN, ${processedDE.length} DE`);
|
||
|
||
return { en: processedEN, de: processedDE, locations: menuLocations };
|
||
}
|
||
|
||
async function exportMedia() {
|
||
console.log('\n📊 EXPORTING MEDIA');
|
||
|
||
const mediaIds = new Set();
|
||
|
||
// Read all JSON files to find media references
|
||
const jsonFiles = fs.readdirSync(OUTPUT_DIR).filter(f => f.endsWith('.json'));
|
||
|
||
for (const file of jsonFiles) {
|
||
const content = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, file), 'utf8'));
|
||
const items = Array.isArray(content) ? content : (content.menus || []);
|
||
|
||
items.forEach(item => {
|
||
if (item.featuredImage) mediaIds.add(item.featuredImage);
|
||
if (item.images) item.images.forEach(img => {
|
||
if (typeof img === 'string' && img.includes('/wp-content/')) {
|
||
mediaIds.add(img);
|
||
}
|
||
});
|
||
});
|
||
}
|
||
|
||
const mediaManifest = [];
|
||
const downloadPromises = [];
|
||
|
||
for (const mediaRef of mediaIds) {
|
||
if (typeof mediaRef === 'number') {
|
||
const media = await fetchMedia(mediaRef);
|
||
if (media && media.source_url) {
|
||
const filename = `${mediaRef}-${path.basename(media.source_url)}`;
|
||
mediaManifest.push({
|
||
id: mediaRef,
|
||
url: media.source_url,
|
||
filename: filename,
|
||
alt: media.alt_text || '',
|
||
width: media.media_details?.width,
|
||
height: media.media_details?.height,
|
||
mime_type: media.mime_type
|
||
});
|
||
|
||
downloadPromises.push(
|
||
downloadMedia(media.source_url, filename).catch(err => {
|
||
console.warn(`⚠️ Failed to download media ${mediaRef}:`, err.message);
|
||
})
|
||
);
|
||
}
|
||
} else if (typeof mediaRef === 'string' && mediaRef.startsWith('http')) {
|
||
const filename = `media-${Date.now()}-${path.basename(mediaRef)}`;
|
||
mediaManifest.push({
|
||
id: null,
|
||
url: mediaRef,
|
||
filename: filename,
|
||
alt: '',
|
||
width: null,
|
||
height: null,
|
||
mime_type: null
|
||
});
|
||
|
||
downloadPromises.push(
|
||
downloadMedia(mediaRef, filename).catch(err => {
|
||
console.warn(`⚠️ Failed to download media from URL:`, err.message);
|
||
})
|
||
);
|
||
}
|
||
}
|
||
|
||
await Promise.all(downloadPromises);
|
||
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'media.json'),
|
||
JSON.stringify(mediaManifest, null, 2)
|
||
);
|
||
|
||
console.log(`✅ Media: ${mediaManifest.length} items`);
|
||
|
||
return mediaManifest;
|
||
}
|
||
|
||
async function exportSiteInfo() {
|
||
console.log('\n📊 EXPORTING SITE INFORMATION');
|
||
|
||
const siteInfo = {
|
||
baseUrl: BASE_URL,
|
||
exportDate: new Date().toISOString(),
|
||
timestamp: TIMESTAMP,
|
||
polylang: false,
|
||
languages: ['en', 'de'],
|
||
defaultLocale: 'en'
|
||
};
|
||
|
||
try {
|
||
const plugins = await makeRequest(`${BASE_URL}/wp-json/wp/v2/plugins`, buildWordPressAuth());
|
||
const polylangPlugin = plugins.find(p => p.name.includes('polylang'));
|
||
if (polylangPlugin) {
|
||
siteInfo.polylang = true;
|
||
siteInfo.polylangVersion = polylangPlugin.version;
|
||
}
|
||
} catch (e) {
|
||
console.log('⚠️ Could not check plugins');
|
||
}
|
||
|
||
try {
|
||
const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
|
||
siteInfo.siteTitle = settings.title;
|
||
siteInfo.siteDescription = settings.description;
|
||
siteInfo.defaultLanguage = settings.default_language || 'en';
|
||
} catch (e) {
|
||
console.log('⚠️ Could not fetch settings');
|
||
}
|
||
|
||
try {
|
||
const permalink = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
|
||
siteInfo.permalinkStructure = permalink.permalink_structure;
|
||
} catch (e) {
|
||
console.log('⚠️ Could not fetch permalink structure');
|
||
}
|
||
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'site-info.json'),
|
||
JSON.stringify(siteInfo, null, 2)
|
||
);
|
||
|
||
console.log('✅ Site info exported');
|
||
|
||
return siteInfo;
|
||
}
|
||
|
||
async function exportLogoAndFavicon() {
|
||
console.log('\n📊 EXPORTING LOGO AND FAVICON');
|
||
|
||
const assets = {
|
||
logo: null,
|
||
favicon: null,
|
||
appleTouchIcon: null
|
||
};
|
||
|
||
try {
|
||
const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
|
||
|
||
if (settings.custom_logo) {
|
||
console.log(`📥 Found custom_logo ID: ${settings.custom_logo}`);
|
||
const logoMedia = await fetchMedia(settings.custom_logo);
|
||
if (logoMedia && logoMedia.source_url) {
|
||
const logoFilename = 'logo.webp';
|
||
await downloadMedia(logoMedia.source_url, logoFilename);
|
||
assets.logo = `/media/${logoFilename}`;
|
||
console.log(`✅ Logo downloaded: ${logoFilename}`);
|
||
}
|
||
}
|
||
|
||
if (settings.site_icon) {
|
||
console.log(`📥 Found site_icon ID: ${settings.site_icon}`);
|
||
const iconMedia = await fetchMedia(settings.site_icon);
|
||
if (iconMedia && iconMedia.source_url) {
|
||
const faviconFilename = 'favicon.ico';
|
||
await downloadFavicon(iconMedia.source_url, faviconFilename);
|
||
assets.favicon = `/favicon.ico`;
|
||
console.log(`✅ Favicon downloaded: ${faviconFilename}`);
|
||
|
||
const appleTouchFilename = 'apple-touch-icon.png';
|
||
await downloadFavicon(iconMedia.source_url, appleTouchFilename);
|
||
assets.appleTouchIcon = `/apple-touch-icon.png`;
|
||
console.log(`✅ Apple touch icon downloaded: ${appleTouchFilename}`);
|
||
}
|
||
}
|
||
|
||
if (!assets.logo) {
|
||
console.log('⚠️ No logo found in settings, searching media...');
|
||
const allMedia = await fetchWithPagination('media', { per_page: 100 });
|
||
const logoCandidates = allMedia.filter(m =>
|
||
m.title?.rendered?.toLowerCase().includes('logo') ||
|
||
m.slug?.toLowerCase().includes('logo') ||
|
||
m.source_url?.toLowerCase().includes('logo')
|
||
);
|
||
|
||
if (logoCandidates.length > 0) {
|
||
const logoMedia = logoCandidates[0];
|
||
const logoFilename = 'logo.webp';
|
||
await downloadMedia(logoMedia.source_url, logoFilename);
|
||
assets.logo = `/media/${logoFilename}`;
|
||
console.log(`✅ Logo found and downloaded: ${logoFilename}`);
|
||
}
|
||
}
|
||
|
||
if (!assets.favicon) {
|
||
console.log('⚠️ No favicon found in settings, trying common locations...');
|
||
const faviconUrls = [
|
||
`${BASE_URL}/favicon.ico`,
|
||
`${BASE_URL}/wp-content/uploads/favicon.ico`
|
||
];
|
||
|
||
for (const url of faviconUrls) {
|
||
try {
|
||
await downloadFavicon(url, 'favicon.ico');
|
||
assets.favicon = '/favicon.ico';
|
||
console.log(`✅ Favicon downloaded from: ${url}`);
|
||
|
||
await downloadFavicon(url, 'apple-touch-icon.png');
|
||
assets.appleTouchIcon = '/apple-touch-icon.png';
|
||
break;
|
||
} catch (e) {
|
||
// Continue to next URL
|
||
}
|
||
}
|
||
}
|
||
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'assets.json'),
|
||
JSON.stringify(assets, null, 2)
|
||
);
|
||
|
||
console.log('✅ Logo and favicon export complete');
|
||
|
||
} catch (error) {
|
||
console.error('❌ Error exporting logo/favicon:', error.message);
|
||
}
|
||
|
||
return assets;
|
||
}
|
||
|
||
async function generateTranslationMapping() {
|
||
console.log('\n📊 GENERATING TRANSLATION MAPPING');
|
||
|
||
const mapping = {
|
||
pages: {},
|
||
posts: {},
|
||
products: {},
|
||
productCategories: {}
|
||
};
|
||
|
||
const loadFile = (filename) => {
|
||
try {
|
||
return JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, filename), 'utf8'));
|
||
} catch (e) {
|
||
return [];
|
||
}
|
||
};
|
||
|
||
const pagesEN = loadFile('pages.en.json');
|
||
const pagesDE = loadFile('pages.de.json');
|
||
const postsEN = loadFile('posts.en.json');
|
||
const postsDE = loadFile('posts.de.json');
|
||
const productsEN = loadFile('products.en.json');
|
||
const productsDE = loadFile('products.de.json');
|
||
const categoriesEN = loadFile('product-categories.en.json');
|
||
const categoriesDE = loadFile('product-categories.de.json');
|
||
|
||
function findTranslationPairs(enItems, deItems) {
|
||
const pairs = {};
|
||
|
||
enItems.forEach(enItem => {
|
||
const deMatch = deItems.find(de => de.slug === enItem.slug);
|
||
if (deMatch) {
|
||
const translationKey = `${enItem.slug}`;
|
||
pairs[translationKey] = {
|
||
en: enItem.id,
|
||
de: deMatch.id
|
||
};
|
||
}
|
||
});
|
||
|
||
return pairs;
|
||
}
|
||
|
||
mapping.pages = findTranslationPairs(pagesEN, pagesDE);
|
||
mapping.posts = findTranslationPairs(postsEN, postsDE);
|
||
mapping.products = findTranslationPairs(productsEN, productsDE);
|
||
mapping.productCategories = findTranslationPairs(categoriesEN, categoriesDE);
|
||
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'translation-mapping.json'),
|
||
JSON.stringify(mapping, null, 2)
|
||
);
|
||
|
||
const totalPairs = Object.values(mapping).reduce((sum, obj) => sum + Object.keys(obj).length, 0);
|
||
console.log(`✅ Translation mapping: ${totalPairs} pairs found`);
|
||
|
||
return mapping;
|
||
}
|
||
|
||
async function generateRedirects() {
|
||
console.log('\n📊 GENERATING REDIRECT RULES');
|
||
|
||
const redirects = [];
|
||
|
||
const postsEN = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.en.json'), 'utf8'));
|
||
const postsDE = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.de.json'), 'utf8'));
|
||
|
||
postsEN.forEach(post => {
|
||
redirects.push({
|
||
source: `/${post.slug}`,
|
||
destination: `/blog/${post.slug}`,
|
||
permanent: true,
|
||
locale: 'en'
|
||
});
|
||
});
|
||
|
||
postsDE.forEach(post => {
|
||
redirects.push({
|
||
source: `/de/${post.slug}`,
|
||
destination: `/de/blog/${post.slug}`,
|
||
permanent: true,
|
||
locale: 'de'
|
||
});
|
||
});
|
||
|
||
fs.writeFileSync(
|
||
path.join(OUTPUT_DIR, 'redirects.json'),
|
||
JSON.stringify(redirects, null, 2)
|
||
);
|
||
|
||
console.log(`✅ Redirects: ${redirects.length} rules generated`);
|
||
|
||
return redirects;
|
||
}
|
||
|
||
// NEW: Enhanced Media Download from Content
|
||
async function downloadMissingContentImages() {
|
||
console.log('\n📥 DOWNLOADING MISSING IMAGES FROM CONTENT');
|
||
|
||
// Extract all image URLs from content
|
||
const imageUrls = await extractAllContentImages();
|
||
console.log(`Found ${imageUrls.length} unique image URLs in content`);
|
||
|
||
const assetMap = {};
|
||
const downloadPromises = [];
|
||
let downloadedCount = 0;
|
||
|
||
for (const urlOrId of imageUrls) {
|
||
let actualUrl = null;
|
||
let mediaId = null;
|
||
|
||
// Determine if it's an ID or URL
|
||
if (typeof urlOrId === 'number' || /^\d+$/.test(urlOrId)) {
|
||
// It's a media ID
|
||
mediaId = parseInt(urlOrId);
|
||
const media = await fetchMedia(mediaId);
|
||
if (media && media.source_url) {
|
||
actualUrl = media.source_url;
|
||
}
|
||
} else if (typeof urlOrId === 'string' && urlOrId.startsWith('http')) {
|
||
// It's a direct URL
|
||
actualUrl = urlOrId;
|
||
}
|
||
|
||
if (!actualUrl) continue;
|
||
|
||
// Generate filename
|
||
const ext = path.extname(actualUrl) || '.webp';
|
||
const baseName = path.basename(actualUrl, ext);
|
||
const filename = mediaId ? `${mediaId}-${baseName}${ext}` : `media-${Date.now()}-${baseName}${ext}`;
|
||
const localPath = `/media/${filename}`;
|
||
|
||
// Check if already in asset map
|
||
if (assetMap[actualUrl]) {
|
||
console.log(`✅ Already mapped: ${actualUrl} → ${assetMap[actualUrl]}`);
|
||
continue;
|
||
}
|
||
|
||
// Check if file exists
|
||
const filePath = path.join(MEDIA_DIR, filename);
|
||
if (fs.existsSync(filePath)) {
|
||
console.log(`✅ File exists: ${filename}`);
|
||
assetMap[actualUrl] = localPath;
|
||
continue;
|
||
}
|
||
|
||
// Download
|
||
console.log(`⬇️ Downloading: ${actualUrl}`);
|
||
downloadPromises.push(
|
||
downloadMedia(actualUrl, filename).then(() => {
|
||
assetMap[actualUrl] = localPath;
|
||
downloadedCount++;
|
||
}).catch(err => {
|
||
console.warn(`⚠️ Failed to download ${actualUrl}:`, err.message);
|
||
})
|
||
);
|
||
|
||
// Small delay to avoid overwhelming the server
|
||
if (downloadPromises.length >= 5) {
|
||
await Promise.all(downloadPromises);
|
||
downloadPromises.length = 0;
|
||
}
|
||
}
|
||
|
||
await Promise.all(downloadPromises);
|
||
|
||
// Load existing asset map
|
||
const assetMapPath = path.join(PROCESSED_DIR, 'asset-map.json');
|
||
let existingMap = {};
|
||
if (fs.existsSync(assetMapPath)) {
|
||
existingMap = JSON.parse(fs.readFileSync(assetMapPath, 'utf8'));
|
||
}
|
||
|
||
// Merge with new mappings
|
||
const mergedMap = { ...existingMap, ...assetMap };
|
||
|
||
// Save updated asset map
|
||
fs.writeFileSync(
|
||
assetMapPath,
|
||
JSON.stringify(mergedMap, null, 2)
|
||
);
|
||
|
||
console.log(`✅ Downloaded ${downloadedCount} new images`);
|
||
console.log(`✅ Asset map now has ${Object.keys(mergedMap).length} mappings`);
|
||
|
||
return mergedMap;
|
||
}
|
||
|
||
// NEW: Update processed files with local image paths
|
||
async function updateProcessedFiles() {
|
||
console.log('\n🔄 UPDATING PROCESSED FILES WITH LOCAL PATHS');
|
||
|
||
const assetMapPath = path.join(PROCESSED_DIR, 'asset-map.json');
|
||
if (!fs.existsSync(assetMapPath)) {
|
||
console.log('⚠️ No asset map found, skipping update');
|
||
return;
|
||
}
|
||
|
||
const assetMap = JSON.parse(fs.readFileSync(assetMapPath, 'utf8'));
|
||
|
||
function replaceImageUrls(content, map) {
|
||
if (!content) return content;
|
||
|
||
let updated = content;
|
||
|
||
// Replace all URLs in the map
|
||
for (const [wpUrl, localPath] of Object.entries(map)) {
|
||
// Escape special characters in URL
|
||
const escapedUrl = wpUrl.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||
const regex = new RegExp(escapedUrl, 'g');
|
||
updated = updated.replace(regex, localPath);
|
||
}
|
||
|
||
return updated;
|
||
}
|
||
|
||
// Process each content file
|
||
const contentFiles = [
|
||
'pages.en.json', 'pages.de.json',
|
||
'posts.en.json', 'posts.de.json',
|
||
'products.en.json', 'products.de.json'
|
||
];
|
||
|
||
for (const file of contentFiles) {
|
||
const filePath = path.join(OUTPUT_DIR, file);
|
||
if (!fs.existsSync(filePath)) continue;
|
||
|
||
const items = JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
||
let updated = false;
|
||
|
||
items.forEach(item => {
|
||
const originalContent = item.contentHtml;
|
||
const originalExcerpt = item.excerptHtml;
|
||
const originalImages = item.images;
|
||
|
||
item.contentHtml = replaceImageUrls(item.contentHtml, assetMap);
|
||
item.excerptHtml = replaceImageUrls(item.excerptHtml, assetMap);
|
||
|
||
if (item.images && Array.isArray(item.images)) {
|
||
item.images = item.images.map(img => {
|
||
if (typeof img === 'string' && assetMap[img]) {
|
||
return assetMap[img];
|
||
}
|
||
return img;
|
||
});
|
||
}
|
||
|
||
if (originalContent !== item.contentHtml ||
|
||
originalExcerpt !== item.excerptHtml ||
|
||
(originalImages && JSON.stringify(originalImages) !== JSON.stringify(item.images))) {
|
||
updated = true;
|
||
}
|
||
});
|
||
|
||
if (updated) {
|
||
fs.writeFileSync(filePath, JSON.stringify(items, null, 2));
|
||
console.log(`✅ Updated ${file}`);
|
||
} else {
|
||
console.log(`ℹ️ No changes needed for ${file}`);
|
||
}
|
||
}
|
||
|
||
console.log('✅ All processed files updated');
|
||
}
|
||
|
||
// NEW: Process data for Next.js
|
||
async function processDataForNextJs() {
|
||
console.log('\n🔄 PROCESSING DATA FOR NEXT.JS');
|
||
|
||
// Load raw data
|
||
const loadRaw = (filename) => {
|
||
try {
|
||
return JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, filename), 'utf8'));
|
||
} catch (e) {
|
||
return [];
|
||
}
|
||
};
|
||
|
||
const pages = [...loadRaw('pages.en.json'), ...loadRaw('pages.de.json')];
|
||
const posts = [...loadRaw('posts.en.json'), ...loadRaw('posts.de.json')];
|
||
const products = [...loadRaw('products.en.json'), ...loadRaw('products.de.json')];
|
||
const categories = [...loadRaw('product-categories.en.json'), ...loadRaw('product-categories.de.json')];
|
||
const media = loadRaw('media.json');
|
||
const siteInfo = loadRaw('site-info.json');
|
||
const assets = loadRaw('assets.json');
|
||
const translationMapping = loadRaw('translation-mapping.json');
|
||
const redirects = loadRaw('redirects.json');
|
||
|
||
// Save to processed directory
|
||
fs.writeFileSync(
|
||
path.join(PROCESSED_DIR, 'pages.json'),
|
||
JSON.stringify(pages, null, 2)
|
||
);
|
||
fs.writeFileSync(
|
||
path.join(PROCESSED_DIR, 'posts.json'),
|
||
JSON.stringify(posts, null, 2)
|
||
);
|
||
fs.writeFileSync(
|
||
path.join(PROCESSED_DIR, 'products.json'),
|
||
JSON.stringify(products, null, 2)
|
||
);
|
||
fs.writeFileSync(
|
||
path.join(PROCESSED_DIR, 'categories.json'),
|
||
JSON.stringify(categories, null, 2)
|
||
);
|
||
fs.writeFileSync(
|
||
path.join(PROCESSED_DIR, 'media.json'),
|
||
JSON.stringify(media, null, 2)
|
||
);
|
||
fs.writeFileSync(
|
||
path.join(PROCESSED_DIR, 'wordpress-data.json'),
|
||
JSON.stringify({
|
||
siteInfo,
|
||
assets,
|
||
translationMapping,
|
||
redirects,
|
||
exportDate: new Date().toISOString()
|
||
}, null, 2)
|
||
);
|
||
|
||
console.log('✅ Data processed for Next.js');
|
||
}
|
||
|
||
// Main Execution
|
||
async function main() {
|
||
console.log('🚀 WordPress → Next.js Data Export (Enhanced)');
|
||
console.log('=====================================');
|
||
console.log(`Target: ${BASE_URL}`);
|
||
console.log(`Output: ${OUTPUT_DIR}`);
|
||
console.log('');
|
||
|
||
try {
|
||
// Step 1: Export all content
|
||
await exportSiteInfo();
|
||
await exportPages();
|
||
await exportPosts();
|
||
await exportProducts();
|
||
await exportProductCategories();
|
||
await exportMenus();
|
||
await exportMedia();
|
||
await exportLogoAndFavicon();
|
||
|
||
// Step 2: Generate mappings and redirects
|
||
await generateTranslationMapping();
|
||
await generateRedirects();
|
||
|
||
// Step 3: NEW - Download missing images from content
|
||
await downloadMissingContentImages();
|
||
|
||
// Step 4: NEW - Update processed files with local paths
|
||
await updateProcessedFiles();
|
||
|
||
// Step 5: NEW - Process for Next.js
|
||
await processDataForNextJs();
|
||
|
||
console.log('\n🎉 Export Complete!');
|
||
console.log('=====================================');
|
||
console.log(`📁 Data directory: data/raw/${TIMESTAMP}`);
|
||
console.log(`📁 Processed: data/processed/`);
|
||
console.log(`🖼️ Media directory: public/media/`);
|
||
console.log(`🎨 Logo/Favicon: public/`);
|
||
console.log('');
|
||
console.log('Next steps:');
|
||
console.log('1. Review exported data for completeness');
|
||
console.log('2. Check asset-map.json for all mappings');
|
||
console.log('3. Verify all images downloaded');
|
||
console.log('4. Ready for Next.js integration');
|
||
|
||
} catch (error) {
|
||
console.error('\n❌ Export failed:', error.message);
|
||
process.exit(1);
|
||
}
|
||
}
|
||
|
||
// Run if called directly
|
||
if (require.main === module) {
|
||
main();
|
||
}
|
||
|
||
module.exports = {
|
||
exportPages,
|
||
exportPosts,
|
||
exportProducts,
|
||
exportProductCategories,
|
||
exportMenus,
|
||
exportMedia,
|
||
exportSiteInfo,
|
||
exportLogoAndFavicon,
|
||
generateTranslationMapping,
|
||
generateRedirects,
|
||
downloadMissingContentImages,
|
||
updateProcessedFiles,
|
||
processDataForNextJs
|
||
};
|