#!/usr/bin/env node const fs = require('fs'); const path = require('path'); const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed'); const ASSET_MAP_PATH = path.join(PROCESSED_DIR, 'asset-map.json'); // Load asset map const assetMap = JSON.parse(fs.readFileSync(ASSET_MAP_PATH, 'utf8')); // Create ID to path mapping const idToPath = {}; for (const [wpUrl, localPath] of Object.entries(assetMap)) { const patterns = [/\/(\d+)-/, /\/(\d+)\./, /id=(\d+)/]; for (const pattern of patterns) { const match = wpUrl.match(pattern); if (match) { idToPath[match[1]] = localPath; break; } } } // Add manual mappings idToPath['45569'] = '/media/45569-Still-2025-02-10-104337_1.1.1.webp'; idToPath['10648'] = '/media/10648-low-voltage-scaled.webp'; idToPath['6486'] = '/media/6486-Low-Voltage.svg'; idToPath['10649'] = '/media/10649-medium-voltage-scaled.webp'; idToPath['6487'] = '/media/6487-Medium-Voltage.svg'; idToPath['46786'] = '/media/46786-na2xsfl2y-rendered.webp'; idToPath['6485'] = '/media/6485-High-Voltage.svg'; idToPath['46359'] = '/media/46359-3.webp'; idToPath['6484'] = '/media/6484-Solar.svg'; idToPath['6527'] = '/media/6527-high-voltage-category.webp'; idToPath['6519'] = '/media/6519-solar-category.webp'; idToPath['6521'] = '/media/6521-low-voltage-category.webp'; idToPath['6517'] = '/media/6517-medium-voltage-category.webp'; console.log('Found', Object.keys(idToPath).length, 'media ID mappings'); // HTML entity decoding - handles decimal, hex, and named entities function decodeHTMLEntities(text) { if (!text) return ''; let result = text; // First, handle numeric entities (decimal and hex) result = result .replace(/&#(\d+);/g, (match, dec) => { const char = String.fromCharCode(parseInt(dec, 10)); return char; }) .replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => { const char = String.fromCharCode(parseInt(hex, 16)); return char; }); // Handle common named entities and Unicode characters const entityMap = { ' ': ' ', '‘': "'", '’': "'", '“': '"', '”': '"', '″': '"', // Double prime (8243) '–': '-', '—': '—', '…': '…', '•': '•', '€': '€', '©': '©', '®': '®', '™': '™', '°': '°', '±': '±', '×': '×', '÷': '÷', '−': '−', '¢': '¢', '£': '£', '¥': '¥', '§': '§', '¶': '¶', 'µ': 'µ', '«': '«', '»': '»', '·': '·' }; // Replace all named entities for (const [entity, char] of Object.entries(entityMap)) { result = result.replace(new RegExp(entity, 'g'), char); } // Clean up any remaining ampersand patterns result = result.replace(/&([a-zA-Z]+);/g, (match, name) => { return entityMap[`&${name};`] || match; }); return result; } // Process files const files = ['pages.json', 'posts.json', 'products.json']; files.forEach(file => { const filePath = path.join(PROCESSED_DIR, file); if (!fs.existsSync(filePath)) return; const items = JSON.parse(fs.readFileSync(filePath, 'utf8')); let updated = false; let updateCount = 0; let decodeCount = 0; items.forEach(item => { let contentChanged = false; let wasDecoded = false; if (item.contentHtml) { // Decode entities first const original = item.contentHtml; item.contentHtml = decodeHTMLEntities(item.contentHtml); if (item.contentHtml !== original) { wasDecoded = true; decodeCount++; } // Now replace IDs with local paths for (const [id, localPath] of Object.entries(idToPath)) { // Pattern 1: bg_image="45569" (standard quotes) const patterns = [ { search: 'bg_image="' + id + '"', replace: 'bg_image="' + localPath + '"' }, { search: 'background_image="' + id + '"', replace: 'background_image="' + localPath + '"' }, { search: 'image_url="' + id + '"', replace: 'image_url="' + localPath + '"' }, { search: 'custom_icon_image="' + id + '"', replace: 'custom_icon_image="' + localPath + '"' }, { search: 'poster="' + id + '"', replace: 'poster="' + localPath + '"' }, { search: 'column_background_image="' + id + '"', replace: 'column_background_image="' + localPath + '"' }, ]; patterns.forEach(({ search, replace }) => { if (item.contentHtml.includes(search)) { item.contentHtml = item.contentHtml.split(search).join(replace); contentChanged = true; } }); // Also check for HTML-encoded attribute values (after decodeHTMLEntities, these become regular quotes) // But we need to handle the case where the HTML entities haven't been decoded yet const encodedPatterns = [ { search: 'bg_image=”' + id + '″', replace: 'bg_image="' + localPath + '"' }, { search: 'bg_image=”' + id + '”', replace: 'bg_image="' + localPath + '"' }, { search: 'bg_image="' + id + '"', replace: 'bg_image="' + localPath + '"' }, ]; encodedPatterns.forEach(({ search, replace }) => { if (item.contentHtml.includes(search)) { item.contentHtml = item.contentHtml.split(search).join(replace); contentChanged = true; } }); } } if (item.excerptHtml) { const original = item.excerptHtml; item.excerptHtml = decodeHTMLEntities(item.excerptHtml); for (const [id, localPath] of Object.entries(idToPath)) { // Standard pattern const search = 'bg_image="' + id + '"'; const replace = 'bg_image="' + localPath + '"'; if (item.excerptHtml.includes(search)) { item.excerptHtml = item.excerptHtml.split(search).join(replace); contentChanged = true; } // Also check for HTML-encoded patterns that might remain (after decode) // Handle various quote combinations const encodedPatterns = [ 'bg_image="' + id + '"', // Already decoded 'bg_image="' + id + '″', // Opening regular, closing double prime 'bg_image="' + id + '"', // Both regular ]; encodedPatterns.forEach(search => { if (item.excerptHtml.includes(search)) { item.excerptHtml = item.excerptHtml.split(search).join(replace); contentChanged = true; } }); } if (item.excerptHtml !== original && !contentChanged) contentChanged = true; } if (contentChanged || wasDecoded) { updated = true; if (contentChanged) updateCount++; } }); if (updated) { fs.writeFileSync(filePath, JSON.stringify(items, null, 2)); console.log('✅ Updated ' + file + ' (' + updateCount + ' replacements, ' + decodeCount + ' decoded)'); } else { console.log('ℹ️ No changes for ' + file); } }); // Verify const pages = JSON.parse(fs.readFileSync(path.join(PROCESSED_DIR, 'pages.json'), 'utf8')); const homeEn = pages.find(p => p.slug === 'corporate-3-landing-2' && p.locale === 'en'); const homeDe = pages.find(p => p.slug === 'start' && p.locale === 'de'); console.log('\n✅ Verification:'); console.log('EN home images:', (homeEn?.contentHtml?.match(/\/media\//g) || []).length); console.log('DE home images:', (homeDe?.contentHtml?.match(/\/media\//g) || []).length); // Check for remaining IDs const remainingIds = homeEn?.contentHtml?.match(/bg_image="\d+"/g) || []; console.log('Remaining IDs in EN:', remainingIds.length > 0 ? remainingIds : 'None'); // Show examples if (homeEn?.contentHtml) { const matches = homeEn.contentHtml.match(/bg_image="[^"]+"/g); if (matches) { console.log('\nEN bg_image examples:', matches.slice(0, 3)); } }