411 lines
13 KiB
JavaScript
Executable File
411 lines
13 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
|
|
/**
|
|
* WordPress → Next.js Data Processing Pipeline
|
|
* Transforms raw WordPress data into Next.js compatible format
|
|
*/
|
|
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const DATA_DIR = path.join(__dirname, '..', 'data');
|
|
const RAW_DIR = path.join(DATA_DIR, 'raw');
|
|
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
|
|
|
|
// Create processed directory
|
|
if (!fs.existsSync(PROCESSED_DIR)) {
|
|
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
|
|
}
|
|
|
|
// Find latest export
|
|
function getLatestExportDir() {
|
|
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
|
|
const stat = fs.statSync(path.join(RAW_DIR, f));
|
|
return stat.isDirectory();
|
|
});
|
|
dirs.sort().reverse();
|
|
return path.join(RAW_DIR, dirs[0]);
|
|
}
|
|
|
|
// HTML sanitization - preserve content but clean dangerous elements
|
|
function sanitizeHTML(html) {
|
|
if (!html) return '';
|
|
|
|
let sanitized = html;
|
|
|
|
// Remove script tags and inline handlers (security)
|
|
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
|
|
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
|
|
|
|
// Remove WPBakery shortcode wrappers but keep their content
|
|
// Replace vc_row/vc_column with divs to preserve structure
|
|
sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '<div class="vc-row">');
|
|
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
|
|
sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '<div class="vc-column">');
|
|
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
|
|
|
|
// Remove other shortcodes but keep text content
|
|
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
|
|
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
|
|
sanitized = sanitized.replace(/\[.*?\]/g, '');
|
|
|
|
// Remove empty paragraphs and divs
|
|
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
|
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
|
|
|
// Normalize whitespace but preserve HTML structure
|
|
sanitized = sanitized.replace(/\s+/g, ' ').trim();
|
|
|
|
return sanitized;
|
|
}
|
|
|
|
// Extract excerpt from content
|
|
function generateExcerpt(content, maxLength = 200) {
|
|
const text = content.replace(/<[^>]*>/g, '');
|
|
if (text.length <= maxLength) return text;
|
|
return text.substring(0, maxLength) + '...';
|
|
}
|
|
|
|
// Process pages
|
|
function processPages(pagesEN, pagesDE, translationMapping) {
|
|
const processed = [];
|
|
|
|
// Process English pages
|
|
pagesEN.forEach(page => {
|
|
const translationKey = page.slug;
|
|
const deMatch = translationMapping.pages[translationKey];
|
|
|
|
processed.push({
|
|
id: page.id,
|
|
translationKey: translationKey,
|
|
locale: 'en',
|
|
slug: page.slug,
|
|
path: `/${page.slug}`,
|
|
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
|
titleHtml: page.titleHtml,
|
|
contentHtml: sanitizeHTML(page.contentHtml),
|
|
excerptHtml: page.excerptHtml || generateExcerpt(page.contentHtml),
|
|
featuredImage: page.featuredImage,
|
|
updatedAt: page.updatedAt,
|
|
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
|
});
|
|
});
|
|
|
|
// Process German pages
|
|
pagesDE.forEach(page => {
|
|
const translationKey = page.slug;
|
|
const enMatch = translationMapping.pages[translationKey];
|
|
|
|
processed.push({
|
|
id: page.id,
|
|
translationKey: translationKey,
|
|
locale: 'de',
|
|
slug: page.slug,
|
|
path: `/de/${page.slug}`,
|
|
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
|
titleHtml: page.titleHtml,
|
|
contentHtml: sanitizeHTML(page.contentHtml),
|
|
excerptHtml: page.excerptHtml || generateExcerpt(page.contentHtml),
|
|
featuredImage: page.featuredImage,
|
|
updatedAt: page.updatedAt,
|
|
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
|
});
|
|
});
|
|
|
|
return processed;
|
|
}
|
|
|
|
// Process posts
|
|
function processPosts(postsEN, postsDE, translationMapping) {
|
|
const processed = [];
|
|
|
|
postsEN.forEach(post => {
|
|
const translationKey = post.slug;
|
|
const deMatch = translationMapping.posts[translationKey];
|
|
|
|
processed.push({
|
|
id: post.id,
|
|
translationKey: translationKey,
|
|
locale: 'en',
|
|
slug: post.slug,
|
|
path: `/blog/${post.slug}`,
|
|
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
|
titleHtml: post.titleHtml,
|
|
contentHtml: sanitizeHTML(post.contentHtml),
|
|
excerptHtml: post.excerptHtml || generateExcerpt(post.contentHtml),
|
|
featuredImage: post.featuredImage,
|
|
datePublished: post.datePublished,
|
|
updatedAt: post.updatedAt,
|
|
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
|
});
|
|
});
|
|
|
|
postsDE.forEach(post => {
|
|
const translationKey = post.slug;
|
|
const enMatch = translationMapping.posts[translationKey];
|
|
|
|
processed.push({
|
|
id: post.id,
|
|
translationKey: translationKey,
|
|
locale: 'de',
|
|
slug: post.slug,
|
|
path: `/de/blog/${post.slug}`,
|
|
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
|
titleHtml: post.titleHtml,
|
|
contentHtml: sanitizeHTML(post.contentHtml),
|
|
excerptHtml: post.excerptHtml || generateExcerpt(post.contentHtml),
|
|
featuredImage: post.featuredImage,
|
|
datePublished: post.datePublished,
|
|
updatedAt: post.updatedAt,
|
|
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
|
});
|
|
});
|
|
|
|
return processed;
|
|
}
|
|
|
|
// Process products
|
|
function processProducts(productsEN, productsDE, translationMapping) {
|
|
const processed = [];
|
|
|
|
productsEN.forEach(product => {
|
|
const translationKey = product.slug;
|
|
const deMatch = translationMapping.products[translationKey];
|
|
|
|
processed.push({
|
|
id: product.id,
|
|
translationKey: translationKey,
|
|
locale: 'en',
|
|
slug: product.slug,
|
|
path: `/product/${product.slug}`,
|
|
name: product.name,
|
|
shortDescriptionHtml: product.shortDescriptionHtml,
|
|
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
|
images: product.images,
|
|
featuredImage: product.featuredImage,
|
|
sku: product.sku,
|
|
regularPrice: product.regularPrice,
|
|
salePrice: product.salePrice,
|
|
currency: product.currency,
|
|
stockStatus: product.stockStatus,
|
|
categories: product.categories,
|
|
attributes: product.attributes,
|
|
variations: product.variations,
|
|
updatedAt: product.updatedAt,
|
|
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
|
});
|
|
});
|
|
|
|
productsDE.forEach(product => {
|
|
const translationKey = product.slug;
|
|
const enMatch = translationMapping.products[translationKey];
|
|
|
|
processed.push({
|
|
id: product.id,
|
|
translationKey: translationKey,
|
|
locale: 'de',
|
|
slug: product.slug,
|
|
path: `/de/product/${product.slug}`,
|
|
name: product.name,
|
|
shortDescriptionHtml: product.shortDescriptionHtml,
|
|
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
|
images: product.images,
|
|
featuredImage: product.featuredImage,
|
|
sku: product.sku,
|
|
regularPrice: product.regularPrice,
|
|
salePrice: product.salePrice,
|
|
currency: product.currency,
|
|
stockStatus: product.stockStatus,
|
|
categories: product.categories,
|
|
attributes: product.attributes,
|
|
variations: product.variations,
|
|
updatedAt: product.updatedAt,
|
|
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
|
});
|
|
});
|
|
|
|
return processed;
|
|
}
|
|
|
|
// Process product categories
|
|
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
|
|
const processed = [];
|
|
|
|
categoriesEN.forEach(category => {
|
|
const translationKey = category.slug;
|
|
const deMatch = translationMapping.productCategories[translationKey];
|
|
|
|
processed.push({
|
|
id: category.id,
|
|
translationKey: translationKey,
|
|
locale: 'en',
|
|
slug: category.slug,
|
|
name: category.name,
|
|
path: `/product-category/${category.slug}`,
|
|
description: category.description,
|
|
count: category.count,
|
|
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
|
});
|
|
});
|
|
|
|
categoriesDE.forEach(category => {
|
|
const translationKey = category.slug;
|
|
const enMatch = translationMapping.productCategories[translationKey];
|
|
|
|
processed.push({
|
|
id: category.id,
|
|
translationKey: translationKey,
|
|
locale: 'de',
|
|
slug: category.slug,
|
|
name: category.name,
|
|
path: `/de/product-category/${category.slug}`,
|
|
description: category.description,
|
|
count: category.count,
|
|
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
|
});
|
|
});
|
|
|
|
return processed;
|
|
}
|
|
|
|
// Process media manifest
|
|
function processMedia(media) {
|
|
return media.map(item => ({
|
|
id: item.id,
|
|
filename: item.filename,
|
|
url: item.url,
|
|
localPath: `/media/${item.filename}`,
|
|
alt: item.alt,
|
|
width: item.width,
|
|
height: item.height,
|
|
mimeType: item.mime_type
|
|
}));
|
|
}
|
|
|
|
// Generate asset map for URL replacement
|
|
function generateAssetMap(media) {
|
|
const map = {};
|
|
media.forEach(item => {
|
|
if (item.url) {
|
|
map[item.url] = `/media/${item.filename}`;
|
|
}
|
|
});
|
|
return map;
|
|
}
|
|
|
|
// Main processing function
|
|
function main() {
|
|
const exportDir = getLatestExportDir();
|
|
console.log('🔄 Processing WordPress Data for Next.js');
|
|
console.log('========================================\n');
|
|
|
|
// Load raw data
|
|
const loadJSON = (file) => {
|
|
try {
|
|
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
|
|
} catch (e) {
|
|
console.error(`❌ Failed to load ${file}:`, e.message);
|
|
return [];
|
|
}
|
|
};
|
|
|
|
const translationMapping = loadJSON('translation-mapping-improved.json');
|
|
const pagesEN = loadJSON('pages.en.json');
|
|
const pagesDE = loadJSON('pages.de.json');
|
|
const postsEN = loadJSON('posts.en.json');
|
|
const postsDE = loadJSON('posts.de.json');
|
|
const productsEN = loadJSON('products.en.json');
|
|
const productsDE = loadJSON('products.de.json');
|
|
const categoriesEN = loadJSON('product-categories.en.json');
|
|
const categoriesDE = loadJSON('product-categories.de.json');
|
|
const media = loadJSON('media.json');
|
|
const redirects = loadJSON('redirects.json');
|
|
const siteInfo = loadJSON('site-info.json');
|
|
|
|
console.log('📊 Processing content types...\n');
|
|
|
|
// Process each content type
|
|
const pages = processPages(pagesEN, pagesDE, translationMapping);
|
|
const posts = processPosts(postsEN, postsDE, translationMapping);
|
|
const products = processProducts(productsEN, productsDE, translationMapping);
|
|
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
|
|
const processedMedia = processMedia(media);
|
|
const assetMap = generateAssetMap(media);
|
|
|
|
// Create processed data structure
|
|
const processedData = {
|
|
site: {
|
|
title: siteInfo.siteTitle,
|
|
description: siteInfo.siteDescription,
|
|
baseUrl: siteInfo.baseUrl,
|
|
defaultLocale: siteInfo.defaultLocale || 'en',
|
|
locales: ['en', 'de']
|
|
},
|
|
content: {
|
|
pages,
|
|
posts,
|
|
products,
|
|
categories
|
|
},
|
|
assets: {
|
|
media: processedMedia,
|
|
map: assetMap
|
|
},
|
|
redirects,
|
|
exportDate: new Date().toISOString()
|
|
};
|
|
|
|
// Save processed data
|
|
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
|
|
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
|
|
|
|
// Save individual files for easier access
|
|
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
|
|
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
|
|
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
|
|
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
|
|
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
|
|
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
|
|
|
|
// Summary
|
|
console.log('✅ Data Processing Complete\n');
|
|
console.log('📦 Processed Content:');
|
|
console.log(` Pages: ${pages.length} (with translations)`);
|
|
console.log(` Posts: ${posts.length} (with translations)`);
|
|
console.log(` Products: ${products.length} (with translations)`);
|
|
console.log(` Categories: ${categories.length} (with translations)`);
|
|
console.log(` Media: ${processedMedia.length} files`);
|
|
console.log(` Redirects: ${redirects.length} rules\n`);
|
|
|
|
console.log('📁 Output Files:');
|
|
console.log(` ${outputPath}`);
|
|
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
|
|
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
|
|
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
|
|
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
|
|
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
|
|
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
|
|
|
|
// Sample data
|
|
if (pages.length > 0) {
|
|
console.log('📄 Sample Page:');
|
|
console.log(` Title: ${pages[0].title}`);
|
|
console.log(` Path: ${pages[0].path}`);
|
|
console.log(` Locale: ${pages[0].locale}`);
|
|
console.log(` Translation: ${pages[0].translation ? 'Yes' : 'No'}\n`);
|
|
}
|
|
|
|
if (posts.length > 0) {
|
|
console.log('📝 Sample Post:');
|
|
console.log(` Title: ${posts[0].title}`);
|
|
console.log(` Path: ${posts[0].path}`);
|
|
console.log(` Locale: ${posts[0].locale}`);
|
|
console.log(` Date: ${posts[0].datePublished}\n`);
|
|
}
|
|
|
|
console.log('💡 Next: Ready for Next.js project setup!');
|
|
}
|
|
|
|
if (require.main === module) {
|
|
main();
|
|
} |