klz-cables.com/scripts/fix-images.js

#!/usr/bin/env node

const fs = require('fs');
const path = require('path');

const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
const ASSET_MAP_PATH = path.join(PROCESSED_DIR, 'asset-map.json');

// Load asset map
const assetMap = JSON.parse(fs.readFileSync(ASSET_MAP_PATH, 'utf8'));

// Create ID to path mapping
const idToPath = {};
for (const [wpUrl, localPath] of Object.entries(assetMap)) {
  const patterns = [/\/(\d+)-/, /\/(\d+)\./, /id=(\d+)/];
  for (const pattern of patterns) {
    const match = wpUrl.match(pattern);
    if (match) {
      idToPath[match[1]] = localPath;
      break;
    }
  }
}

// Add manual mappings
idToPath['45569'] = '/media/45569-Still-2025-02-10-104337_1.1.1.webp';
idToPath['10648'] = '/media/10648-low-voltage-scaled.webp';
idToPath['6486'] = '/media/6486-Low-Voltage.svg';
idToPath['10649'] = '/media/10649-medium-voltage-scaled.webp';
idToPath['6487'] = '/media/6487-Medium-Voltage.svg';
idToPath['46786'] = '/media/46786-na2xsfl2y-rendered.webp';
idToPath['6485'] = '/media/6485-High-Voltage.svg';
idToPath['46359'] = '/media/46359-3.webp';
idToPath['6484'] = '/media/6484-Solar.svg';
idToPath['6527'] = '/media/6527-high-voltage-category.webp';
idToPath['6519'] = '/media/6519-solar-category.webp';
idToPath['6521'] = '/media/6521-low-voltage-category.webp';
idToPath['6517'] = '/media/6517-medium-voltage-category.webp';

console.log('Found', Object.keys(idToPath).length, 'media ID mappings');

// HTML entity decoding - handles decimal, hex, and named entities
function decodeHTMLEntities(text) {
  if (!text) return '';

  let result = text;

  // First, handle numeric entities (decimal and hex)
  result = result
    .replace(/&#(\d+);/g, (match, dec) => {
      const char = String.fromCharCode(parseInt(dec, 10));
      return char;
    })
    .replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
      const char = String.fromCharCode(parseInt(hex, 16));
      return char;
    });

  // Handle common named entities and Unicode characters
  const entityMap = {
    ' ': ' ',
    '‘': "'",
    '’': "'",
    '“': '"',
    '”': '"',
    '″': '"',  // Double prime (8243)
    '–': '-',
    '—': '—',
    '…': '…',
    '•': '•',
    '€': '€',
    '©': '©',
    '®': '®',
    '™': '™',
    '°': '°',
    '±': '±',
    '×': '×',
    '÷': '÷',
    '−': '−',
    '¢': '¢',
    '£': '£',
    '¥': '¥',
    '§': '§',
    '¶': '¶',
    'µ': 'µ',
    '«': '«',
    '»': '»',
    '·': '·'
  };

  // Replace all named entities
  for (const [entity, char] of Object.entries(entityMap)) {
    result = result.replace(new RegExp(entity, 'g'), char);
  }

  // Clean up any remaining ampersand patterns
  result = result.replace(/&([a-zA-Z]+);/g, (match, name) => {
    return entityMap[`&${name};`] || match;
  });

  return result;
}

// Process files
const files = ['pages.json', 'posts.json', 'products.json'];

files.forEach(file => {
  const filePath = path.join(PROCESSED_DIR, file);
  if (!fs.existsSync(filePath)) return;

  const items = JSON.parse(fs.readFileSync(filePath, 'utf8'));
  let updated = false;
  let updateCount = 0;
  let decodeCount = 0;

  items.forEach(item => {
    let contentChanged = false;
    let wasDecoded = false;

    if (item.contentHtml) {
      // Decode entities first
      const original = item.contentHtml;
      item.contentHtml = decodeHTMLEntities(item.contentHtml);
      if (item.contentHtml !== original) {
        wasDecoded = true;
        decodeCount++;
      }

      // Now replace IDs with local paths
      for (const [id, localPath] of Object.entries(idToPath)) {
        // Pattern 1: bg_image="45569" (standard quotes)
        const patterns = [
          { search: 'bg_image="' + id + '"', replace: 'bg_image="' + localPath + '"' },
          { search: 'background_image="' + id + '"', replace: 'background_image="' + localPath + '"' },
          { search: 'image_url="' + id + '"', replace: 'image_url="' + localPath + '"' },
          { search: 'custom_icon_image="' + id + '"', replace: 'custom_icon_image="' + localPath + '"' },
          { search: 'poster="' + id + '"', replace: 'poster="' + localPath + '"' },
          { search: 'column_background_image="' + id + '"', replace: 'column_background_image="' + localPath + '"' },
        ];

        patterns.forEach(({ search, replace }) => {
          if (item.contentHtml.includes(search)) {
            item.contentHtml = item.contentHtml.split(search).join(replace);
            contentChanged = true;
          }
        });

        // Also check for HTML-encoded attribute values (after decodeHTMLEntities, these become regular quotes)
        // But we need to handle the case where the HTML entities haven't been decoded yet
        const encodedPatterns = [
          { search: 'bg_image=”' + id + '″', replace: 'bg_image="' + localPath + '"' },
          { search: 'bg_image=”' + id + '”', replace: 'bg_image="' + localPath + '"' },
          { search: 'bg_image="' + id + '"', replace: 'bg_image="' + localPath + '"' },
        ];

        encodedPatterns.forEach(({ search, replace }) => {
          if (item.contentHtml.includes(search)) {
            item.contentHtml = item.contentHtml.split(search).join(replace);
            contentChanged = true;
          }
        });
      }
    }

    if (item.excerptHtml) {
      const original = item.excerptHtml;
      item.excerptHtml = decodeHTMLEntities(item.excerptHtml);

      for (const [id, localPath] of Object.entries(idToPath)) {
        // Standard pattern
        const search = 'bg_image="' + id + '"';
        const replace = 'bg_image="' + localPath + '"';
        if (item.excerptHtml.includes(search)) {
          item.excerptHtml = item.excerptHtml.split(search).join(replace);
          contentChanged = true;
        }

        // Also check for HTML-encoded patterns that might remain (after decode)
        // Handle various quote combinations
        const encodedPatterns = [
          'bg_image="' + id + '"',  // Already decoded
          'bg_image="' + id + '″',  // Opening regular, closing double prime
          'bg_image="' + id + '"',  // Both regular
        ];

        encodedPatterns.forEach(search => {
          if (item.excerptHtml.includes(search)) {
            item.excerptHtml = item.excerptHtml.split(search).join(replace);
            contentChanged = true;
          }
        });
      }

      if (item.excerptHtml !== original && !contentChanged) contentChanged = true;
    }

    if (contentChanged || wasDecoded) {
      updated = true;
      if (contentChanged) updateCount++;
    }
  });

  if (updated) {
    fs.writeFileSync(filePath, JSON.stringify(items, null, 2));
    console.log('✅ Updated ' + file + ' (' + updateCount + ' replacements, ' + decodeCount + ' decoded)');
  } else {
    console.log('ℹ️  No changes for ' + file);
  }
});

// Verify
const pages = JSON.parse(fs.readFileSync(path.join(PROCESSED_DIR, 'pages.json'), 'utf8'));
const homeEn = pages.find(p => p.slug === 'corporate-3-landing-2' && p.locale === 'en');
const homeDe = pages.find(p => p.slug === 'start' && p.locale === 'de');

console.log('\n✅ Verification:');
console.log('EN home images:', (homeEn?.contentHtml?.match(/\/media\//g) || []).length);
console.log('DE home images:', (homeDe?.contentHtml?.match(/\/media\//g) || []).length);

// Check for remaining IDs
const remainingIds = homeEn?.contentHtml?.match(/bg_image="\d+"/g) || [];
console.log('Remaining IDs in EN:', remainingIds.length > 0 ? remainingIds : 'None');

// Show examples
if (homeEn?.contentHtml) {
  const matches = homeEn.contentHtml.match(/bg_image="[^"]+"/g);
  if (matches) {
    console.log('\nEN bg_image examples:', matches.slice(0, 3));
  }
}