migration wip
This commit is contained in:
230
scripts/fix-images.js
Normal file
230
scripts/fix-images.js
Normal file
@@ -0,0 +1,230 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
|
||||
const ASSET_MAP_PATH = path.join(PROCESSED_DIR, 'asset-map.json');
|
||||
|
||||
// Load asset map
|
||||
const assetMap = JSON.parse(fs.readFileSync(ASSET_MAP_PATH, 'utf8'));
|
||||
|
||||
// Create ID to path mapping
|
||||
const idToPath = {};
|
||||
for (const [wpUrl, localPath] of Object.entries(assetMap)) {
|
||||
const patterns = [/\/(\d+)-/, /\/(\d+)\./, /id=(\d+)/];
|
||||
for (const pattern of patterns) {
|
||||
const match = wpUrl.match(pattern);
|
||||
if (match) {
|
||||
idToPath[match[1]] = localPath;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add manual mappings
|
||||
idToPath['45569'] = '/media/45569-Still-2025-02-10-104337_1.1.1.webp';
|
||||
idToPath['10648'] = '/media/10648-low-voltage-scaled.webp';
|
||||
idToPath['6486'] = '/media/6486-Low-Voltage.svg';
|
||||
idToPath['10649'] = '/media/10649-medium-voltage-scaled.webp';
|
||||
idToPath['6487'] = '/media/6487-Medium-Voltage.svg';
|
||||
idToPath['46786'] = '/media/46786-na2xsfl2y-rendered.webp';
|
||||
idToPath['6485'] = '/media/6485-High-Voltage.svg';
|
||||
idToPath['46359'] = '/media/46359-3.webp';
|
||||
idToPath['6484'] = '/media/6484-Solar.svg';
|
||||
idToPath['6527'] = '/media/6527-high-voltage-category.webp';
|
||||
idToPath['6519'] = '/media/6519-solar-category.webp';
|
||||
idToPath['6521'] = '/media/6521-low-voltage-category.webp';
|
||||
idToPath['6517'] = '/media/6517-medium-voltage-category.webp';
|
||||
|
||||
console.log('Found', Object.keys(idToPath).length, 'media ID mappings');
|
||||
|
||||
// HTML entity decoding - handles decimal, hex, and named entities
|
||||
function decodeHTMLEntities(text) {
|
||||
if (!text) return '';
|
||||
|
||||
let result = text;
|
||||
|
||||
// First, handle numeric entities (decimal and hex)
|
||||
result = result
|
||||
.replace(/&#(\d+);/g, (match, dec) => {
|
||||
const char = String.fromCharCode(parseInt(dec, 10));
|
||||
return char;
|
||||
})
|
||||
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
|
||||
const char = String.fromCharCode(parseInt(hex, 16));
|
||||
return char;
|
||||
});
|
||||
|
||||
// Handle common named entities and Unicode characters
|
||||
const entityMap = {
|
||||
' ': ' ',
|
||||
'‘': "'",
|
||||
'’': "'",
|
||||
'“': '"',
|
||||
'”': '"',
|
||||
'″': '"', // Double prime (8243)
|
||||
'–': '-',
|
||||
'—': '—',
|
||||
'…': '…',
|
||||
'•': '•',
|
||||
'€': '€',
|
||||
'©': '©',
|
||||
'®': '®',
|
||||
'™': '™',
|
||||
'°': '°',
|
||||
'±': '±',
|
||||
'×': '×',
|
||||
'÷': '÷',
|
||||
'−': '−',
|
||||
'¢': '¢',
|
||||
'£': '£',
|
||||
'¥': '¥',
|
||||
'§': '§',
|
||||
'¶': '¶',
|
||||
'µ': 'µ',
|
||||
'«': '«',
|
||||
'»': '»',
|
||||
'·': '·'
|
||||
};
|
||||
|
||||
// Replace all named entities
|
||||
for (const [entity, char] of Object.entries(entityMap)) {
|
||||
result = result.replace(new RegExp(entity, 'g'), char);
|
||||
}
|
||||
|
||||
// Clean up any remaining ampersand patterns
|
||||
result = result.replace(/&([a-zA-Z]+);/g, (match, name) => {
|
||||
return entityMap[`&${name};`] || match;
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Process files
|
||||
const files = ['pages.json', 'posts.json', 'products.json'];
|
||||
|
||||
files.forEach(file => {
|
||||
const filePath = path.join(PROCESSED_DIR, file);
|
||||
if (!fs.existsSync(filePath)) return;
|
||||
|
||||
const items = JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
||||
let updated = false;
|
||||
let updateCount = 0;
|
||||
let decodeCount = 0;
|
||||
|
||||
items.forEach(item => {
|
||||
let contentChanged = false;
|
||||
let wasDecoded = false;
|
||||
|
||||
if (item.contentHtml) {
|
||||
// Decode entities first
|
||||
const original = item.contentHtml;
|
||||
item.contentHtml = decodeHTMLEntities(item.contentHtml);
|
||||
if (item.contentHtml !== original) {
|
||||
wasDecoded = true;
|
||||
decodeCount++;
|
||||
}
|
||||
|
||||
// Now replace IDs with local paths
|
||||
for (const [id, localPath] of Object.entries(idToPath)) {
|
||||
// Pattern 1: bg_image="45569" (standard quotes)
|
||||
const patterns = [
|
||||
{ search: 'bg_image="' + id + '"', replace: 'bg_image="' + localPath + '"' },
|
||||
{ search: 'background_image="' + id + '"', replace: 'background_image="' + localPath + '"' },
|
||||
{ search: 'image_url="' + id + '"', replace: 'image_url="' + localPath + '"' },
|
||||
{ search: 'custom_icon_image="' + id + '"', replace: 'custom_icon_image="' + localPath + '"' },
|
||||
{ search: 'poster="' + id + '"', replace: 'poster="' + localPath + '"' },
|
||||
{ search: 'column_background_image="' + id + '"', replace: 'column_background_image="' + localPath + '"' },
|
||||
];
|
||||
|
||||
patterns.forEach(({ search, replace }) => {
|
||||
if (item.contentHtml.includes(search)) {
|
||||
item.contentHtml = item.contentHtml.split(search).join(replace);
|
||||
contentChanged = true;
|
||||
}
|
||||
});
|
||||
|
||||
// Also check for HTML-encoded attribute values (after decodeHTMLEntities, these become regular quotes)
|
||||
// But we need to handle the case where the HTML entities haven't been decoded yet
|
||||
const encodedPatterns = [
|
||||
{ search: 'bg_image=”' + id + '″', replace: 'bg_image="' + localPath + '"' },
|
||||
{ search: 'bg_image=”' + id + '”', replace: 'bg_image="' + localPath + '"' },
|
||||
{ search: 'bg_image="' + id + '"', replace: 'bg_image="' + localPath + '"' },
|
||||
];
|
||||
|
||||
encodedPatterns.forEach(({ search, replace }) => {
|
||||
if (item.contentHtml.includes(search)) {
|
||||
item.contentHtml = item.contentHtml.split(search).join(replace);
|
||||
contentChanged = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (item.excerptHtml) {
|
||||
const original = item.excerptHtml;
|
||||
item.excerptHtml = decodeHTMLEntities(item.excerptHtml);
|
||||
|
||||
for (const [id, localPath] of Object.entries(idToPath)) {
|
||||
// Standard pattern
|
||||
const search = 'bg_image="' + id + '"';
|
||||
const replace = 'bg_image="' + localPath + '"';
|
||||
if (item.excerptHtml.includes(search)) {
|
||||
item.excerptHtml = item.excerptHtml.split(search).join(replace);
|
||||
contentChanged = true;
|
||||
}
|
||||
|
||||
// Also check for HTML-encoded patterns that might remain (after decode)
|
||||
// Handle various quote combinations
|
||||
const encodedPatterns = [
|
||||
'bg_image="' + id + '"', // Already decoded
|
||||
'bg_image="' + id + '″', // Opening regular, closing double prime
|
||||
'bg_image="' + id + '"', // Both regular
|
||||
];
|
||||
|
||||
encodedPatterns.forEach(search => {
|
||||
if (item.excerptHtml.includes(search)) {
|
||||
item.excerptHtml = item.excerptHtml.split(search).join(replace);
|
||||
contentChanged = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (item.excerptHtml !== original && !contentChanged) contentChanged = true;
|
||||
}
|
||||
|
||||
if (contentChanged || wasDecoded) {
|
||||
updated = true;
|
||||
if (contentChanged) updateCount++;
|
||||
}
|
||||
});
|
||||
|
||||
if (updated) {
|
||||
fs.writeFileSync(filePath, JSON.stringify(items, null, 2));
|
||||
console.log('✅ Updated ' + file + ' (' + updateCount + ' replacements, ' + decodeCount + ' decoded)');
|
||||
} else {
|
||||
console.log('ℹ️ No changes for ' + file);
|
||||
}
|
||||
});
|
||||
|
||||
// Verify
|
||||
const pages = JSON.parse(fs.readFileSync(path.join(PROCESSED_DIR, 'pages.json'), 'utf8'));
|
||||
const homeEn = pages.find(p => p.slug === 'corporate-3-landing-2' && p.locale === 'en');
|
||||
const homeDe = pages.find(p => p.slug === 'start' && p.locale === 'de');
|
||||
|
||||
console.log('\n✅ Verification:');
|
||||
console.log('EN home images:', (homeEn?.contentHtml?.match(/\/media\//g) || []).length);
|
||||
console.log('DE home images:', (homeDe?.contentHtml?.match(/\/media\//g) || []).length);
|
||||
|
||||
// Check for remaining IDs
|
||||
const remainingIds = homeEn?.contentHtml?.match(/bg_image="\d+"/g) || [];
|
||||
console.log('Remaining IDs in EN:', remainingIds.length > 0 ? remainingIds : 'None');
|
||||
|
||||
// Show examples
|
||||
if (homeEn?.contentHtml) {
|
||||
const matches = homeEn.contentHtml.match(/bg_image="[^"]+"/g);
|
||||
if (matches) {
|
||||
console.log('\nEN bg_image examples:', matches.slice(0, 3));
|
||||
}
|
||||
}
|
||||
@@ -27,52 +27,83 @@ function getLatestExportDir() {
|
||||
return path.join(RAW_DIR, dirs[0]);
|
||||
}
|
||||
|
||||
// Decode HTML entities in text
|
||||
// Decode HTML entities in text - comprehensive handling
|
||||
function decodeHTMLEntities(text) {
|
||||
if (!text) return '';
|
||||
|
||||
return text
|
||||
// Decode numeric HTML entities first
|
||||
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
|
||||
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => String.fromCharCode(parseInt(hex, 16)))
|
||||
|
||||
// Handle common named entities
|
||||
.replace(/ /g, ' ')
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/•/g, '•')
|
||||
.replace(/€/g, '€')
|
||||
|
||||
// Handle Unicode characters that might appear
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-') // En dash
|
||||
.replace(/—/g, '—') // Em dash
|
||||
.replace(/…/g, '…') // Ellipsis
|
||||
.replace(/•/g, '•') // Bullet
|
||||
.replace(/€/g, '€'); // Euro
|
||||
// First, handle numeric entities (decimal and hex)
|
||||
let result = text
|
||||
.replace(/&#(\d+);/g, (match, dec) => {
|
||||
const char = String.fromCharCode(parseInt(dec, 10));
|
||||
return char;
|
||||
})
|
||||
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
|
||||
const char = String.fromCharCode(parseInt(hex, 16));
|
||||
return char;
|
||||
});
|
||||
|
||||
// Handle common named entities and Unicode characters
|
||||
const entityMap = {
|
||||
' ': ' ',
|
||||
'‘': "'",
|
||||
'’': "'",
|
||||
'“': '"',
|
||||
'”': '"',
|
||||
'″': '"', // Double prime (8243)
|
||||
'–': '-',
|
||||
'—': '—',
|
||||
'…': '…',
|
||||
'•': '•',
|
||||
'€': '€',
|
||||
'©': '©',
|
||||
'®': '®',
|
||||
'™': '™',
|
||||
'°': '°',
|
||||
'±': '±',
|
||||
'×': '×',
|
||||
'÷': '÷',
|
||||
'−': '−',
|
||||
'¢': '¢',
|
||||
'£': '£',
|
||||
'¥': '¥',
|
||||
'§': '§',
|
||||
'¶': '¶',
|
||||
'µ': 'µ',
|
||||
'«': '«',
|
||||
'»': '»',
|
||||
'·': '·'
|
||||
};
|
||||
|
||||
// Replace all named entities
|
||||
for (const [entity, char] of Object.entries(entityMap)) {
|
||||
result = result.replace(new RegExp(entity, 'g'), char);
|
||||
}
|
||||
|
||||
// Clean up any remaining ampersand patterns
|
||||
result = result.replace(/&([a-zA-Z]+);/g, (match, name) => {
|
||||
// If it's not in our map, try to decode it or leave as is
|
||||
return entityMap[`&${name};`] || match;
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// HTML sanitization - preserve content but clean dangerous elements
|
||||
// Also preserves bg_image attributes for later processing by fix-images.js
|
||||
function sanitizeHTML(html) {
|
||||
if (!html) return '';
|
||||
|
||||
let sanitized = html;
|
||||
|
||||
// Temporarily preserve bg_image attributes by replacing them with placeholders
|
||||
// Handle both regular quotes and Unicode quotes
|
||||
const bgImagePlaceholders = [];
|
||||
sanitized = sanitized.replace(/(bg_image=)(["”])([^"”]*?)["”]/gi, (match) => {
|
||||
const placeholder = `__BG_IMAGE_${bgImagePlaceholders.length}__`;
|
||||
bgImagePlaceholders.push(match);
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// Remove script tags and inline handlers (security)
|
||||
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
|
||||
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
|
||||
@@ -103,6 +134,11 @@ function sanitizeHTML(html) {
|
||||
// Normalize whitespace but preserve HTML structure
|
||||
sanitized = sanitized.replace(/\s+/g, ' ').trim();
|
||||
|
||||
// Restore bg_image placeholders
|
||||
bgImagePlaceholders.forEach((placeholder, index) => {
|
||||
sanitized = sanitized.replace(`__BG_IMAGE_${index}__`, placeholder);
|
||||
});
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
@@ -115,15 +151,29 @@ function processExcerptShortcodes(excerptHtml) {
|
||||
// First, decode HTML entities to regular characters
|
||||
processed = decodeHTMLEntities(processed);
|
||||
|
||||
// Temporarily preserve bg_image attributes (handle both regular and Unicode quotes)
|
||||
const bgImagePlaceholders = [];
|
||||
processed = processed.replace(/(bg_image=)(["”])([^"”]*?)["”]/gi, (match) => {
|
||||
const placeholder = `__BG_IMAGE_${bgImagePlaceholders.length}__`;
|
||||
bgImagePlaceholders.push(match);
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes (handle both complete and truncated)
|
||||
// Preserve any placeholders in the attributes
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
|
||||
// Extract and preserve placeholders from attrs
|
||||
const placeholderMatches = attrs.match(/__BG_IMAGE_\d+__/g) || [];
|
||||
const preservedAttrs = placeholderMatches.join(' ');
|
||||
|
||||
return `<div class="${classes.join(' ')}" ${preservedAttrs}>`;
|
||||
})
|
||||
// Handle truncated vc_row (no closing bracket)
|
||||
.replace(/\[vc_row([^\]]*)$/gi, (match, attrs) => {
|
||||
@@ -131,7 +181,12 @@ function processExcerptShortcodes(excerptHtml) {
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
|
||||
// Extract and preserve placeholders from attrs
|
||||
const placeholderMatches = attrs.match(/__BG_IMAGE_\d+__/g) || [];
|
||||
const preservedAttrs = placeholderMatches.join(' ');
|
||||
|
||||
return `<div class="${classes.join(' ')}" ${preservedAttrs}>`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
@@ -172,15 +227,15 @@ function processExcerptShortcodes(excerptHtml) {
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>')
|
||||
|
||||
// nectar_cta - convert to button
|
||||
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-cta">$2</a>')
|
||||
|
||||
// nectar_highlighted_text - convert to span
|
||||
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
|
||||
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
|
||||
'<span class="nectar-highlighted">$2</span>')
|
||||
|
||||
// nectar_responsive_text - convert to span
|
||||
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
|
||||
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
|
||||
'<span class="nectar-responsive">$2</span>')
|
||||
|
||||
// nectar_icon_list - convert to ul
|
||||
@@ -188,15 +243,15 @@ function processExcerptShortcodes(excerptHtml) {
|
||||
.replace(/\[\/nectar_icon_list\]/gi, '</ul>')
|
||||
|
||||
// nectar_icon_list_item - convert to li
|
||||
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
|
||||
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
|
||||
'<li><strong>$2</strong>: $4</li>')
|
||||
|
||||
// nectar_btn - convert to button
|
||||
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-btn">$2</a>')
|
||||
|
||||
// split_line_heading - convert to heading
|
||||
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
|
||||
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
|
||||
'<h2 class="split-line-heading">$2</h2>')
|
||||
|
||||
// vc_row_inner - convert to div
|
||||
@@ -229,6 +284,11 @@ function processExcerptShortcodes(excerptHtml) {
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
// Restore bg_image placeholders
|
||||
bgImagePlaceholders.forEach((placeholder, index) => {
|
||||
processed = processed.replace(`__BG_IMAGE_${index}__`, placeholder);
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
@@ -498,7 +558,7 @@ function main() {
|
||||
}
|
||||
};
|
||||
|
||||
const translationMapping = loadJSON('translation-mapping-improved.json');
|
||||
const translationMapping = loadJSON('translation-mapping.json');
|
||||
const pagesEN = loadJSON('pages.en.json');
|
||||
const pagesDE = loadJSON('pages.de.json');
|
||||
const postsEN = loadJSON('posts.en.json');
|
||||
|
||||
1177
scripts/wordpress-export-enhanced.js
Normal file
1177
scripts/wordpress-export-enhanced.js
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user