#!/usr/bin/env node /** * WordPress to Next.js Data Export Script - Enhanced Version * Gathers all required data from WordPress/WooCommerce for static site generation * PLUS: Crawls all contentHtml to find and download missing images */ const fs = require('fs'); const path = require('path'); const https = require('https'); // Load environment variables require('dotenv').config(); const BASE_URL = process.env.WOOCOMMERCE_URL; const CONSUMER_KEY = process.env.WOOCOMMERCE_CONSUMER_KEY; const CONSUMER_SECRET = process.env.WOOCOMMERCE_CONSUMER_SECRET; const APP_PASSWORD = process.env.WORDPRESS_APP_PASSWORD; // Validate environment if (!BASE_URL || !CONSUMER_KEY || !CONSUMER_SECRET) { console.error('āŒ Missing required environment variables'); console.error('Please check .env file for:'); console.error(' - WOOCOMMERCE_URL'); console.error(' - WOOCOMMERCE_CONSUMER_KEY'); console.error(' - WOOCOMMERCE_CONSUMER_SECRET'); process.exit(1); } // Configuration const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-'); const OUTPUT_DIR = path.join(__dirname, '..', 'data', 'raw', TIMESTAMP); const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media'); const PUBLIC_DIR = path.join(__dirname, '..', 'public'); const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed'); // Create output directories if (!fs.existsSync(OUTPUT_DIR)) { fs.mkdirSync(OUTPUT_DIR, { recursive: true }); } if (!fs.existsSync(MEDIA_DIR)) { fs.mkdirSync(MEDIA_DIR, { recursive: true }); } if (!fs.existsSync(PROCESSED_DIR)) { fs.mkdirSync(PROCESSED_DIR, { recursive: true }); } // API Helper Functions function buildAuthHeader() { const credentials = Buffer.from(`${CONSUMER_KEY}:${CONSUMER_SECRET}`).toString('base64'); return `Basic ${credentials}`; } function buildWordPressAuth() { return { 'Authorization': `Basic ${Buffer.from(`admin:${APP_PASSWORD}`).toString('base64')}`, 'Content-Type': 'application/json' }; } function makeRequest(url, headers = {}) { return new Promise((resolve, reject) => { const options = { headers: { 'User-Agent': 'WordPress-NextJS-Migration/1.0', ...headers } }; https.get(url, options, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { if (res.statusCode >= 200 && res.statusCode < 300) { try { resolve(JSON.parse(data)); } catch (e) { resolve(data); } } else { reject(new Error(`HTTP ${res.statusCode}: ${data}`)); } }); }).on('error', reject); }); } async function fetchWithPagination(endpoint, params = {}, locale = null) { const allItems = []; let page = 1; const perPage = 100; while (true) { const queryString = new URLSearchParams({ ...params, page: page.toString(), per_page: perPage.toString(), ...(locale ? { lang: locale } : {}) }).toString(); const url = `${BASE_URL}/wp-json/wp/v2/${endpoint}?${queryString}`; console.log(`šŸ“„ Fetching ${endpoint} page ${page}${locale ? ` (${locale})` : ''}...`); try { const items = await makeRequest(url, buildWordPressAuth()); if (!Array.isArray(items) || items.length === 0) { break; } allItems.push(...items); if (items.length < perPage) { break; } page++; } catch (error) { console.error(`āŒ Error fetching ${endpoint} page ${page}:`, error.message); break; } } return allItems; } async function fetchWooCommerce(endpoint, params = {}, locale = null) { const queryString = new URLSearchParams({ ...params, per_page: '100', ...(locale ? { lang: locale } : {}) }).toString(); const url = `${BASE_URL}/wp-json/wc/v3/${endpoint}?${queryString}`; console.log(`šŸ“„ Fetching WooCommerce ${endpoint}${locale ? ` (${locale})` : ''}...`); try { const response = await makeRequest(url, { 'Authorization': buildAuthHeader(), 'Content-Type': 'application/json' }); return Array.isArray(response) ? response : [response]; } catch (error) { console.error(`āŒ Error fetching WooCommerce ${endpoint}:`, error.message); return []; } } async function fetchMedia(mediaId) { const url = `${BASE_URL}/wp-json/wp/v2/media/${mediaId}`; try { const media = await makeRequest(url, buildWordPressAuth()); return media; } catch (error) { console.error(`āŒ Error fetching media ${mediaId}:`, error.message); return null; } } async function downloadMedia(url, filename) { return new Promise((resolve, reject) => { const filePath = path.join(MEDIA_DIR, filename); // Check if file already exists if (fs.existsSync(filePath)) { console.log(`āœ… Media already downloaded: ${filename}`); resolve(filePath); return; } const file = fs.createWriteStream(filePath); https.get(url, (res) => { if (res.statusCode === 200) { res.pipe(file); file.on('finish', () => { console.log(`āœ… Downloaded: ${filename}`); resolve(filePath); }); } else { reject(new Error(`Failed to download: ${res.statusCode}`)); } }).on('error', (err) => { fs.unlink(filePath, () => {}); reject(err); }); }); } async function downloadFavicon(url, filename) { return new Promise((resolve, reject) => { const filePath = path.join(PUBLIC_DIR, filename); if (fs.existsSync(filePath)) { console.log(`āœ… Favicon already exists: ${filename}`); resolve(filePath); return; } const file = fs.createWriteStream(filePath); https.get(url, (res) => { if (res.statusCode === 200) { res.pipe(file); file.on('finish', () => { console.log(`āœ… Downloaded favicon: ${filename}`); resolve(filePath); }); } else { reject(new Error(`Failed to download favicon: ${res.statusCode}`)); } }).on('error', (err) => { fs.unlink(filePath, () => {}); reject(err); }); }); } // Enhanced Image Extraction Functions function extractImageUrlsFromContent(html) { const urls = new Set(); if (!html) return []; // 1. bg_image attribute (WPBakery) const bgImages = html.match(/bg_image=[\"']([^\"']+)[\"']/g) || []; bgImages.forEach(m => { const id = m.split(/[\"']/)[1]; if (id && !id.includes(' ') && /^\d+$/.test(id)) { urls.add(id); } }); // 2. background_image attribute const bgImages2 = html.match(/background_image=[\"']([^\"']+)[\"']/g) || []; bgImages2.forEach(m => { const id = m.split(/[\"']/)[1]; if (id && !id.includes(' ') && /^\d+$/.test(id)) { urls.add(id); } }); // 3. vc_gallery images const galleries = html.match(/images=[\"']([^\"']+)[\"']/g) || []; galleries.forEach(m => { const ids = m.split(/[\"']/)[1]; if (ids) { ids.split(',').forEach(id => { const cleanId = id.trim(); if (cleanId && /^\d+$/.test(cleanId)) { urls.add(cleanId); } }); } }); // 4. column_background_image const colBg = html.match(/column_background_image=[\"']([^\"']+)[\"']/g) || []; colBg.forEach(m => { const id = m.split(/[\"']/)[1]; if (id && !id.includes(' ') && /^\d+$/.test(id)) { urls.add(id); } }); // 5. poster attribute (video) const posters = html.match(/poster=[\"']([^\"']+)[\"']/g) || []; posters.forEach(m => { const id = m.split(/[\"']/)[1]; if (id && /^\d+$/.test(id)) { urls.add(id); } }); // 6. Direct WordPress image URLs in src const imgSrc = html.match(/src=[\"']([^\"']+)[\"']/g) || []; imgSrc.forEach(m => { const url = m.split(/[\"']/)[1]; if (url && url.includes('/wp-content/uploads/')) { urls.add(url); } }); // 7. Background image URLs in style const bgStyle = html.match(/background-image:[^;]*url\([^)]+\)/g) || []; bgStyle.forEach(m => { const match = m.match(/url\([\"']?([^\"')]+)[\"']?\)/); if (match && match[1].includes('/wp-content/uploads/')) { urls.add(match[1]); } }); // 8. Data attributes with image URLs const dataAttrs = html.match(/data-[a-z-]+=[\"']([^\"']+)[\"']/g) || []; dataAttrs.forEach(m => { const url = m.split(/[\"']/)[1]; if (url && url.includes('/wp-content/uploads/')) { urls.add(url); } }); return Array.from(urls); } async function extractAllContentImages() { console.log('\nšŸ” CRAWLING ALL CONTENT FOR IMAGES'); const allUrls = new Set(); // Load all content files const contentFiles = [ 'pages.en.json', 'pages.de.json', 'posts.en.json', 'posts.de.json', 'products.en.json', 'products.de.json' ]; for (const file of contentFiles) { const filePath = path.join(OUTPUT_DIR, file); if (!fs.existsSync(filePath)) continue; const items = JSON.parse(fs.readFileSync(filePath, 'utf8')); items.forEach(item => { // Extract from contentHtml const contentImages = extractImageUrlsFromContent(item.contentHtml); contentImages.forEach(url => allUrls.add(url)); // Extract from excerptHtml const excerptImages = extractImageUrlsFromContent(item.excerptHtml); excerptImages.forEach(url => allUrls.add(url)); // Extract from product images if (item.images) { item.images.forEach(img => { if (typeof img === 'string' && img.includes('/wp-content/uploads/')) { allUrls.add(img); } }); } // Extract from featuredImage if it's a URL if (item.featuredImage && typeof item.featuredImage === 'string' && item.featuredImage.includes('/wp-content/uploads/')) { allUrls.add(item.featuredImage); } }); } return Array.from(allUrls); } // Data Processing Functions function processPage(page, locale) { return { id: page.id, translationKey: `page-${page.slug}`, locale: locale, slug: page.slug, path: locale === 'en' ? `/${page.slug}` : `/${locale}/${page.slug}`, titleHtml: page.title?.rendered || '', contentHtml: page.content?.rendered || '', excerptHtml: page.excerpt?.rendered || '', featuredImage: page.featured_media || null, updatedAt: page.modified || page.date }; } function processPost(post, locale) { return { id: post.id, translationKey: `post-${post.slug}`, locale: locale, slug: post.slug, path: locale === 'en' ? `/blog/${post.slug}` : `/${locale}/blog/${post.slug}`, titleHtml: post.title?.rendered || '', contentHtml: post.content?.rendered || '', excerptHtml: post.excerpt?.rendered || '', featuredImage: post.featured_media || null, datePublished: post.date, updatedAt: post.modified || post.date }; } function processProduct(product, locale) { return { id: product.id, translationKey: `product-${product.slug}`, locale: locale, slug: product.slug, path: locale === 'en' ? `/product/${product.slug}` : `/${locale}/product/${product.slug}`, name: product.name, shortDescriptionHtml: product.short_description || '', descriptionHtml: product.description || '', images: product.images ? product.images.map(img => img.src) : [], featuredImage: product.images && product.images.length > 0 ? product.images[0].src : null, sku: product.sku, regularPrice: product.regular_price, salePrice: product.sale_price, currency: product.currency || 'EUR', stockStatus: product.stock_status, categories: product.categories ? product.categories.map(cat => ({ id: cat.id, name: cat.name, slug: cat.slug })) : [], attributes: product.attributes || [], variations: product.variations || [], updatedAt: product.date_modified }; } function processProductCategory(category, locale) { return { id: category.id, translationKey: `product-category-${category.slug}`, locale: locale, slug: category.slug, name: category.name, path: locale === 'en' ? `/product-category/${category.slug}` : `/${locale}/product-category/${category.slug}`, description: category.description || '', count: category.count || 0 }; } function processMenu(menu, locale) { return { id: menu.term_id || menu.id, slug: menu.slug, name: menu.name, locale: locale, items: menu.items || [] }; } // Main Export Functions async function exportPages() { console.log('\nšŸ“Š EXPORTING PAGES'); const pagesEN = await fetchWithPagination('pages', { status: 'publish' }, 'en'); const pagesDE = await fetchWithPagination('pages', { status: 'publish' }, 'de'); const processedEN = pagesEN.map(p => processPage(p, 'en')); const processedDE = pagesDE.map(p => processPage(p, 'de')); fs.writeFileSync( path.join(OUTPUT_DIR, 'pages.en.json'), JSON.stringify(processedEN, null, 2) ); fs.writeFileSync( path.join(OUTPUT_DIR, 'pages.de.json'), JSON.stringify(processedDE, null, 2) ); console.log(`āœ… Pages: ${processedEN.length} EN, ${processedDE.length} DE`); return { en: processedEN, de: processedDE }; } async function exportPosts() { console.log('\nšŸ“Š EXPORTING POSTS'); const postsEN = await fetchWithPagination('posts', { status: 'publish' }, 'en'); const postsDE = await fetchWithPagination('posts', { status: 'publish' }, 'de'); const processedEN = postsEN.map(p => processPost(p, 'en')); const processedDE = postsDE.map(p => processPost(p, 'de')); fs.writeFileSync( path.join(OUTPUT_DIR, 'posts.en.json'), JSON.stringify(processedEN, null, 2) ); fs.writeFileSync( path.join(OUTPUT_DIR, 'posts.de.json'), JSON.stringify(processedDE, null, 2) ); console.log(`āœ… Posts: ${processedEN.length} EN, ${processedDE.length} DE`); return { en: processedEN, de: processedDE }; } async function exportProducts() { console.log('\nšŸ“Š EXPORTING PRODUCTS'); const productsEN = await fetchWooCommerce('products', {}, 'en'); const productsDE = await fetchWooCommerce('products', {}, 'de'); const processedEN = productsEN.map(p => processProduct(p, 'en')); const processedDE = productsDE.map(p => processProduct(p, 'de')); fs.writeFileSync( path.join(OUTPUT_DIR, 'products.en.json'), JSON.stringify(processedEN, null, 2) ); fs.writeFileSync( path.join(OUTPUT_DIR, 'products.de.json'), JSON.stringify(processedDE, null, 2) ); console.log(`āœ… Products: ${processedEN.length} EN, ${processedDE.length} DE`); return { en: processedEN, de: processedDE }; } async function exportProductCategories() { console.log('\nšŸ“Š EXPORTING PRODUCT CATEGORIES'); const categoriesEN = await fetchWooCommerce('products/categories', {}, 'en'); const categoriesDE = await fetchWooCommerce('products/categories', {}, 'de'); const processedEN = categoriesEN.map(c => processProductCategory(c, 'en')); const processedDE = categoriesDE.map(c => processProductCategory(c, 'de')); fs.writeFileSync( path.join(OUTPUT_DIR, 'product-categories.en.json'), JSON.stringify(processedEN, null, 2) ); fs.writeFileSync( path.join(OUTPUT_DIR, 'product-categories.de.json'), JSON.stringify(processedDE, null, 2) ); console.log(`āœ… Product Categories: ${processedEN.length} EN, ${processedDE.length} DE`); return { en: processedEN, de: processedDE }; } async function exportMenus() { console.log('\nšŸ“Š EXPORTING MENUS'); const menusEN = await fetchWithPagination('menus', {}, 'en').catch(() => []); const menusDE = await fetchWithPagination('menus', {}, 'de').catch(() => []); let menuLocations = {}; try { const locations = await makeRequest(`${BASE_URL}/wp-json/wp/v2/menu-locations`, buildWordPressAuth()); menuLocations = locations; } catch (e) { console.log('āš ļø Menu locations endpoint not available'); } const processedEN = menusEN.map(m => processMenu(m, 'en')); const processedDE = menusDE.map(m => processMenu(m, 'de')); fs.writeFileSync( path.join(OUTPUT_DIR, 'menus.en.json'), JSON.stringify({ menus: processedEN, locations: menuLocations }, null, 2) ); fs.writeFileSync( path.join(OUTPUT_DIR, 'menus.de.json'), JSON.stringify({ menus: processedDE, locations: menuLocations }, null, 2) ); console.log(`āœ… Menus: ${processedEN.length} EN, ${processedDE.length} DE`); return { en: processedEN, de: processedDE, locations: menuLocations }; } async function exportMedia() { console.log('\nšŸ“Š EXPORTING MEDIA'); const mediaIds = new Set(); // Read all JSON files to find media references const jsonFiles = fs.readdirSync(OUTPUT_DIR).filter(f => f.endsWith('.json')); for (const file of jsonFiles) { const content = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, file), 'utf8')); const items = Array.isArray(content) ? content : (content.menus || []); items.forEach(item => { if (item.featuredImage) mediaIds.add(item.featuredImage); if (item.images) item.images.forEach(img => { if (typeof img === 'string' && img.includes('/wp-content/')) { mediaIds.add(img); } }); }); } const mediaManifest = []; const downloadPromises = []; for (const mediaRef of mediaIds) { if (typeof mediaRef === 'number') { const media = await fetchMedia(mediaRef); if (media && media.source_url) { const filename = `${mediaRef}-${path.basename(media.source_url)}`; mediaManifest.push({ id: mediaRef, url: media.source_url, filename: filename, alt: media.alt_text || '', width: media.media_details?.width, height: media.media_details?.height, mime_type: media.mime_type }); downloadPromises.push( downloadMedia(media.source_url, filename).catch(err => { console.warn(`āš ļø Failed to download media ${mediaRef}:`, err.message); }) ); } } else if (typeof mediaRef === 'string' && mediaRef.startsWith('http')) { const filename = `media-${Date.now()}-${path.basename(mediaRef)}`; mediaManifest.push({ id: null, url: mediaRef, filename: filename, alt: '', width: null, height: null, mime_type: null }); downloadPromises.push( downloadMedia(mediaRef, filename).catch(err => { console.warn(`āš ļø Failed to download media from URL:`, err.message); }) ); } } await Promise.all(downloadPromises); fs.writeFileSync( path.join(OUTPUT_DIR, 'media.json'), JSON.stringify(mediaManifest, null, 2) ); console.log(`āœ… Media: ${mediaManifest.length} items`); return mediaManifest; } async function exportSiteInfo() { console.log('\nšŸ“Š EXPORTING SITE INFORMATION'); const siteInfo = { baseUrl: BASE_URL, exportDate: new Date().toISOString(), timestamp: TIMESTAMP, polylang: false, languages: ['en', 'de'], defaultLocale: 'en' }; try { const plugins = await makeRequest(`${BASE_URL}/wp-json/wp/v2/plugins`, buildWordPressAuth()); const polylangPlugin = plugins.find(p => p.name.includes('polylang')); if (polylangPlugin) { siteInfo.polylang = true; siteInfo.polylangVersion = polylangPlugin.version; } } catch (e) { console.log('āš ļø Could not check plugins'); } try { const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth()); siteInfo.siteTitle = settings.title; siteInfo.siteDescription = settings.description; siteInfo.defaultLanguage = settings.default_language || 'en'; } catch (e) { console.log('āš ļø Could not fetch settings'); } try { const permalink = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth()); siteInfo.permalinkStructure = permalink.permalink_structure; } catch (e) { console.log('āš ļø Could not fetch permalink structure'); } fs.writeFileSync( path.join(OUTPUT_DIR, 'site-info.json'), JSON.stringify(siteInfo, null, 2) ); console.log('āœ… Site info exported'); return siteInfo; } async function exportLogoAndFavicon() { console.log('\nšŸ“Š EXPORTING LOGO AND FAVICON'); const assets = { logo: null, favicon: null, appleTouchIcon: null }; try { const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth()); if (settings.custom_logo) { console.log(`šŸ“„ Found custom_logo ID: ${settings.custom_logo}`); const logoMedia = await fetchMedia(settings.custom_logo); if (logoMedia && logoMedia.source_url) { const logoFilename = 'logo.webp'; await downloadMedia(logoMedia.source_url, logoFilename); assets.logo = `/media/${logoFilename}`; console.log(`āœ… Logo downloaded: ${logoFilename}`); } } if (settings.site_icon) { console.log(`šŸ“„ Found site_icon ID: ${settings.site_icon}`); const iconMedia = await fetchMedia(settings.site_icon); if (iconMedia && iconMedia.source_url) { const faviconFilename = 'favicon.ico'; await downloadFavicon(iconMedia.source_url, faviconFilename); assets.favicon = `/favicon.ico`; console.log(`āœ… Favicon downloaded: ${faviconFilename}`); const appleTouchFilename = 'apple-touch-icon.png'; await downloadFavicon(iconMedia.source_url, appleTouchFilename); assets.appleTouchIcon = `/apple-touch-icon.png`; console.log(`āœ… Apple touch icon downloaded: ${appleTouchFilename}`); } } if (!assets.logo) { console.log('āš ļø No logo found in settings, searching media...'); const allMedia = await fetchWithPagination('media', { per_page: 100 }); const logoCandidates = allMedia.filter(m => m.title?.rendered?.toLowerCase().includes('logo') || m.slug?.toLowerCase().includes('logo') || m.source_url?.toLowerCase().includes('logo') ); if (logoCandidates.length > 0) { const logoMedia = logoCandidates[0]; const logoFilename = 'logo.webp'; await downloadMedia(logoMedia.source_url, logoFilename); assets.logo = `/media/${logoFilename}`; console.log(`āœ… Logo found and downloaded: ${logoFilename}`); } } if (!assets.favicon) { console.log('āš ļø No favicon found in settings, trying common locations...'); const faviconUrls = [ `${BASE_URL}/favicon.ico`, `${BASE_URL}/wp-content/uploads/favicon.ico` ]; for (const url of faviconUrls) { try { await downloadFavicon(url, 'favicon.ico'); assets.favicon = '/favicon.ico'; console.log(`āœ… Favicon downloaded from: ${url}`); await downloadFavicon(url, 'apple-touch-icon.png'); assets.appleTouchIcon = '/apple-touch-icon.png'; break; } catch (e) { // Continue to next URL } } } fs.writeFileSync( path.join(OUTPUT_DIR, 'assets.json'), JSON.stringify(assets, null, 2) ); console.log('āœ… Logo and favicon export complete'); } catch (error) { console.error('āŒ Error exporting logo/favicon:', error.message); } return assets; } async function generateTranslationMapping() { console.log('\nšŸ“Š GENERATING TRANSLATION MAPPING'); const mapping = { pages: {}, posts: {}, products: {}, productCategories: {} }; const loadFile = (filename) => { try { return JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, filename), 'utf8')); } catch (e) { return []; } }; const pagesEN = loadFile('pages.en.json'); const pagesDE = loadFile('pages.de.json'); const postsEN = loadFile('posts.en.json'); const postsDE = loadFile('posts.de.json'); const productsEN = loadFile('products.en.json'); const productsDE = loadFile('products.de.json'); const categoriesEN = loadFile('product-categories.en.json'); const categoriesDE = loadFile('product-categories.de.json'); function findTranslationPairs(enItems, deItems) { const pairs = {}; enItems.forEach(enItem => { const deMatch = deItems.find(de => de.slug === enItem.slug); if (deMatch) { const translationKey = `${enItem.slug}`; pairs[translationKey] = { en: enItem.id, de: deMatch.id }; } }); return pairs; } mapping.pages = findTranslationPairs(pagesEN, pagesDE); mapping.posts = findTranslationPairs(postsEN, postsDE); mapping.products = findTranslationPairs(productsEN, productsDE); mapping.productCategories = findTranslationPairs(categoriesEN, categoriesDE); fs.writeFileSync( path.join(OUTPUT_DIR, 'translation-mapping.json'), JSON.stringify(mapping, null, 2) ); const totalPairs = Object.values(mapping).reduce((sum, obj) => sum + Object.keys(obj).length, 0); console.log(`āœ… Translation mapping: ${totalPairs} pairs found`); return mapping; } async function generateRedirects() { console.log('\nšŸ“Š GENERATING REDIRECT RULES'); const redirects = []; const postsEN = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.en.json'), 'utf8')); const postsDE = JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, 'posts.de.json'), 'utf8')); postsEN.forEach(post => { redirects.push({ source: `/${post.slug}`, destination: `/blog/${post.slug}`, permanent: true, locale: 'en' }); }); postsDE.forEach(post => { redirects.push({ source: `/de/${post.slug}`, destination: `/de/blog/${post.slug}`, permanent: true, locale: 'de' }); }); fs.writeFileSync( path.join(OUTPUT_DIR, 'redirects.json'), JSON.stringify(redirects, null, 2) ); console.log(`āœ… Redirects: ${redirects.length} rules generated`); return redirects; } // NEW: Enhanced Media Download from Content async function downloadMissingContentImages() { console.log('\nšŸ“„ DOWNLOADING MISSING IMAGES FROM CONTENT'); // Extract all image URLs from content const imageUrls = await extractAllContentImages(); console.log(`Found ${imageUrls.length} unique image URLs in content`); const assetMap = {}; const downloadPromises = []; let downloadedCount = 0; for (const urlOrId of imageUrls) { let actualUrl = null; let mediaId = null; // Determine if it's an ID or URL if (typeof urlOrId === 'number' || /^\d+$/.test(urlOrId)) { // It's a media ID mediaId = parseInt(urlOrId); const media = await fetchMedia(mediaId); if (media && media.source_url) { actualUrl = media.source_url; } } else if (typeof urlOrId === 'string' && urlOrId.startsWith('http')) { // It's a direct URL actualUrl = urlOrId; } if (!actualUrl) continue; // Generate filename const ext = path.extname(actualUrl) || '.webp'; const baseName = path.basename(actualUrl, ext); const filename = mediaId ? `${mediaId}-${baseName}${ext}` : `media-${Date.now()}-${baseName}${ext}`; const localPath = `/media/${filename}`; // Check if already in asset map if (assetMap[actualUrl]) { console.log(`āœ… Already mapped: ${actualUrl} → ${assetMap[actualUrl]}`); continue; } // Check if file exists const filePath = path.join(MEDIA_DIR, filename); if (fs.existsSync(filePath)) { console.log(`āœ… File exists: ${filename}`); assetMap[actualUrl] = localPath; continue; } // Download console.log(`ā¬‡ļø Downloading: ${actualUrl}`); downloadPromises.push( downloadMedia(actualUrl, filename).then(() => { assetMap[actualUrl] = localPath; downloadedCount++; }).catch(err => { console.warn(`āš ļø Failed to download ${actualUrl}:`, err.message); }) ); // Small delay to avoid overwhelming the server if (downloadPromises.length >= 5) { await Promise.all(downloadPromises); downloadPromises.length = 0; } } await Promise.all(downloadPromises); // Load existing asset map const assetMapPath = path.join(PROCESSED_DIR, 'asset-map.json'); let existingMap = {}; if (fs.existsSync(assetMapPath)) { existingMap = JSON.parse(fs.readFileSync(assetMapPath, 'utf8')); } // Merge with new mappings const mergedMap = { ...existingMap, ...assetMap }; // Save updated asset map fs.writeFileSync( assetMapPath, JSON.stringify(mergedMap, null, 2) ); console.log(`āœ… Downloaded ${downloadedCount} new images`); console.log(`āœ… Asset map now has ${Object.keys(mergedMap).length} mappings`); return mergedMap; } // NEW: Update processed files with local image paths async function updateProcessedFiles() { console.log('\nšŸ”„ UPDATING PROCESSED FILES WITH LOCAL PATHS'); const assetMapPath = path.join(PROCESSED_DIR, 'asset-map.json'); if (!fs.existsSync(assetMapPath)) { console.log('āš ļø No asset map found, skipping update'); return; } const assetMap = JSON.parse(fs.readFileSync(assetMapPath, 'utf8')); function replaceImageUrls(content, map) { if (!content) return content; let updated = content; // Replace all URLs in the map for (const [wpUrl, localPath] of Object.entries(map)) { // Escape special characters in URL const escapedUrl = wpUrl.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const regex = new RegExp(escapedUrl, 'g'); updated = updated.replace(regex, localPath); } return updated; } // Process each content file const contentFiles = [ 'pages.en.json', 'pages.de.json', 'posts.en.json', 'posts.de.json', 'products.en.json', 'products.de.json' ]; for (const file of contentFiles) { const filePath = path.join(OUTPUT_DIR, file); if (!fs.existsSync(filePath)) continue; const items = JSON.parse(fs.readFileSync(filePath, 'utf8')); let updated = false; items.forEach(item => { const originalContent = item.contentHtml; const originalExcerpt = item.excerptHtml; const originalImages = item.images; item.contentHtml = replaceImageUrls(item.contentHtml, assetMap); item.excerptHtml = replaceImageUrls(item.excerptHtml, assetMap); if (item.images && Array.isArray(item.images)) { item.images = item.images.map(img => { if (typeof img === 'string' && assetMap[img]) { return assetMap[img]; } return img; }); } if (originalContent !== item.contentHtml || originalExcerpt !== item.excerptHtml || (originalImages && JSON.stringify(originalImages) !== JSON.stringify(item.images))) { updated = true; } }); if (updated) { fs.writeFileSync(filePath, JSON.stringify(items, null, 2)); console.log(`āœ… Updated ${file}`); } else { console.log(`ā„¹ļø No changes needed for ${file}`); } } console.log('āœ… All processed files updated'); } // NEW: Process data for Next.js async function processDataForNextJs() { console.log('\nšŸ”„ PROCESSING DATA FOR NEXT.JS'); // Load raw data const loadRaw = (filename) => { try { return JSON.parse(fs.readFileSync(path.join(OUTPUT_DIR, filename), 'utf8')); } catch (e) { return []; } }; const pages = [...loadRaw('pages.en.json'), ...loadRaw('pages.de.json')]; const posts = [...loadRaw('posts.en.json'), ...loadRaw('posts.de.json')]; const products = [...loadRaw('products.en.json'), ...loadRaw('products.de.json')]; const categories = [...loadRaw('product-categories.en.json'), ...loadRaw('product-categories.de.json')]; const media = loadRaw('media.json'); const siteInfo = loadRaw('site-info.json'); const assets = loadRaw('assets.json'); const translationMapping = loadRaw('translation-mapping.json'); const redirects = loadRaw('redirects.json'); // Save to processed directory fs.writeFileSync( path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2) ); fs.writeFileSync( path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2) ); fs.writeFileSync( path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2) ); fs.writeFileSync( path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2) ); fs.writeFileSync( path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(media, null, 2) ); fs.writeFileSync( path.join(PROCESSED_DIR, 'wordpress-data.json'), JSON.stringify({ siteInfo, assets, translationMapping, redirects, exportDate: new Date().toISOString() }, null, 2) ); console.log('āœ… Data processed for Next.js'); } // Main Execution async function main() { console.log('šŸš€ WordPress → Next.js Data Export (Enhanced)'); console.log('====================================='); console.log(`Target: ${BASE_URL}`); console.log(`Output: ${OUTPUT_DIR}`); console.log(''); try { // Step 1: Export all content await exportSiteInfo(); await exportPages(); await exportPosts(); await exportProducts(); await exportProductCategories(); await exportMenus(); await exportMedia(); await exportLogoAndFavicon(); // Step 2: Generate mappings and redirects await generateTranslationMapping(); await generateRedirects(); // Step 3: NEW - Download missing images from content await downloadMissingContentImages(); // Step 4: NEW - Update processed files with local paths await updateProcessedFiles(); // Step 5: NEW - Process for Next.js await processDataForNextJs(); console.log('\nšŸŽ‰ Export Complete!'); console.log('====================================='); console.log(`šŸ“ Data directory: data/raw/${TIMESTAMP}`); console.log(`šŸ“ Processed: data/processed/`); console.log(`šŸ–¼ļø Media directory: public/media/`); console.log(`šŸŽØ Logo/Favicon: public/`); console.log(''); console.log('Next steps:'); console.log('1. Review exported data for completeness'); console.log('2. Check asset-map.json for all mappings'); console.log('3. Verify all images downloaded'); console.log('4. Ready for Next.js integration'); } catch (error) { console.error('\nāŒ Export failed:', error.message); process.exit(1); } } // Run if called directly if (require.main === module) { main(); } module.exports = { exportPages, exportPosts, exportProducts, exportProductCategories, exportMenus, exportMedia, exportSiteInfo, exportLogoAndFavicon, generateTranslationMapping, generateRedirects, downloadMissingContentImages, updateProcessedFiles, processDataForNextJs };