Files
klz-cables.com/scripts/fix-images.js
2025-12-30 16:19:42 +01:00

231 lines
7.7 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
const fs = require('fs');
const path = require('path');
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
const ASSET_MAP_PATH = path.join(PROCESSED_DIR, 'asset-map.json');
// Load asset map
const assetMap = JSON.parse(fs.readFileSync(ASSET_MAP_PATH, 'utf8'));
// Create ID to path mapping
const idToPath = {};
for (const [wpUrl, localPath] of Object.entries(assetMap)) {
const patterns = [/\/(\d+)-/, /\/(\d+)\./, /id=(\d+)/];
for (const pattern of patterns) {
const match = wpUrl.match(pattern);
if (match) {
idToPath[match[1]] = localPath;
break;
}
}
}
// Add manual mappings
idToPath['45569'] = '/media/45569-Still-2025-02-10-104337_1.1.1.webp';
idToPath['10648'] = '/media/10648-low-voltage-scaled.webp';
idToPath['6486'] = '/media/6486-Low-Voltage.svg';
idToPath['10649'] = '/media/10649-medium-voltage-scaled.webp';
idToPath['6487'] = '/media/6487-Medium-Voltage.svg';
idToPath['46786'] = '/media/46786-na2xsfl2y-rendered.webp';
idToPath['6485'] = '/media/6485-High-Voltage.svg';
idToPath['46359'] = '/media/46359-3.webp';
idToPath['6484'] = '/media/6484-Solar.svg';
idToPath['6527'] = '/media/6527-high-voltage-category.webp';
idToPath['6519'] = '/media/6519-solar-category.webp';
idToPath['6521'] = '/media/6521-low-voltage-category.webp';
idToPath['6517'] = '/media/6517-medium-voltage-category.webp';
console.log('Found', Object.keys(idToPath).length, 'media ID mappings');
// HTML entity decoding - handles decimal, hex, and named entities
function decodeHTMLEntities(text) {
if (!text) return '';
let result = text;
// First, handle numeric entities (decimal and hex)
result = result
.replace(/&#(\d+);/g, (match, dec) => {
const char = String.fromCharCode(parseInt(dec, 10));
return char;
})
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
const char = String.fromCharCode(parseInt(hex, 16));
return char;
});
// Handle common named entities and Unicode characters
const entityMap = {
' ': ' ',
'': "'",
'': "'",
'“': '"',
'”': '"',
'″': '"', // Double prime (8243)
'': '-',
'—': '—',
'…': '…',
'•': '•',
'€': '€',
'©': '©',
'®': '®',
'™': '™',
'°': '°',
'±': '±',
'×': '×',
'÷': '÷',
'': '',
'¢': '¢',
'£': '£',
'¥': '¥',
'§': '§',
'¶': '¶',
'µ': 'µ',
'«': '«',
'»': '»',
'·': '·'
};
// Replace all named entities
for (const [entity, char] of Object.entries(entityMap)) {
result = result.replace(new RegExp(entity, 'g'), char);
}
// Clean up any remaining ampersand patterns
result = result.replace(/&([a-zA-Z]+);/g, (match, name) => {
return entityMap[`&${name};`] || match;
});
return result;
}
// Process files
const files = ['pages.json', 'posts.json', 'products.json'];
files.forEach(file => {
const filePath = path.join(PROCESSED_DIR, file);
if (!fs.existsSync(filePath)) return;
const items = JSON.parse(fs.readFileSync(filePath, 'utf8'));
let updated = false;
let updateCount = 0;
let decodeCount = 0;
items.forEach(item => {
let contentChanged = false;
let wasDecoded = false;
if (item.contentHtml) {
// Decode entities first
const original = item.contentHtml;
item.contentHtml = decodeHTMLEntities(item.contentHtml);
if (item.contentHtml !== original) {
wasDecoded = true;
decodeCount++;
}
// Now replace IDs with local paths
for (const [id, localPath] of Object.entries(idToPath)) {
// Pattern 1: bg_image="45569" (standard quotes)
const patterns = [
{ search: 'bg_image="' + id + '"', replace: 'bg_image="' + localPath + '"' },
{ search: 'background_image="' + id + '"', replace: 'background_image="' + localPath + '"' },
{ search: 'image_url="' + id + '"', replace: 'image_url="' + localPath + '"' },
{ search: 'custom_icon_image="' + id + '"', replace: 'custom_icon_image="' + localPath + '"' },
{ search: 'poster="' + id + '"', replace: 'poster="' + localPath + '"' },
{ search: 'column_background_image="' + id + '"', replace: 'column_background_image="' + localPath + '"' },
];
patterns.forEach(({ search, replace }) => {
if (item.contentHtml.includes(search)) {
item.contentHtml = item.contentHtml.split(search).join(replace);
contentChanged = true;
}
});
// Also check for HTML-encoded attribute values (after decodeHTMLEntities, these become regular quotes)
// But we need to handle the case where the HTML entities haven't been decoded yet
const encodedPatterns = [
{ search: 'bg_image=”' + id + '″', replace: 'bg_image="' + localPath + '"' },
{ search: 'bg_image=”' + id + '”', replace: 'bg_image="' + localPath + '"' },
{ search: 'bg_image="' + id + '"', replace: 'bg_image="' + localPath + '"' },
];
encodedPatterns.forEach(({ search, replace }) => {
if (item.contentHtml.includes(search)) {
item.contentHtml = item.contentHtml.split(search).join(replace);
contentChanged = true;
}
});
}
}
if (item.excerptHtml) {
const original = item.excerptHtml;
item.excerptHtml = decodeHTMLEntities(item.excerptHtml);
for (const [id, localPath] of Object.entries(idToPath)) {
// Standard pattern
const search = 'bg_image="' + id + '"';
const replace = 'bg_image="' + localPath + '"';
if (item.excerptHtml.includes(search)) {
item.excerptHtml = item.excerptHtml.split(search).join(replace);
contentChanged = true;
}
// Also check for HTML-encoded patterns that might remain (after decode)
// Handle various quote combinations
const encodedPatterns = [
'bg_image="' + id + '"', // Already decoded
'bg_image="' + id + '″', // Opening regular, closing double prime
'bg_image="' + id + '"', // Both regular
];
encodedPatterns.forEach(search => {
if (item.excerptHtml.includes(search)) {
item.excerptHtml = item.excerptHtml.split(search).join(replace);
contentChanged = true;
}
});
}
if (item.excerptHtml !== original && !contentChanged) contentChanged = true;
}
if (contentChanged || wasDecoded) {
updated = true;
if (contentChanged) updateCount++;
}
});
if (updated) {
fs.writeFileSync(filePath, JSON.stringify(items, null, 2));
console.log('✅ Updated ' + file + ' (' + updateCount + ' replacements, ' + decodeCount + ' decoded)');
} else {
console.log(' No changes for ' + file);
}
});
// Verify
const pages = JSON.parse(fs.readFileSync(path.join(PROCESSED_DIR, 'pages.json'), 'utf8'));
const homeEn = pages.find(p => p.slug === 'corporate-3-landing-2' && p.locale === 'en');
const homeDe = pages.find(p => p.slug === 'start' && p.locale === 'de');
console.log('\n✅ Verification:');
console.log('EN home images:', (homeEn?.contentHtml?.match(/\/media\//g) || []).length);
console.log('DE home images:', (homeDe?.contentHtml?.match(/\/media\//g) || []).length);
// Check for remaining IDs
const remainingIds = homeEn?.contentHtml?.match(/bg_image="\d+"/g) || [];
console.log('Remaining IDs in EN:', remainingIds.length > 0 ? remainingIds : 'None');
// Show examples
if (homeEn?.contentHtml) {
const matches = homeEn.contentHtml.match(/bg_image="[^"]+"/g);
if (matches) {
console.log('\nEN bg_image examples:', matches.slice(0, 3));
}
}