// Test decoding const fs = require('fs'); // Read actual file content const content = fs.readFileSync('data/processed/pages.json', 'utf8'); const idx = content.indexOf('bg_image='); const snippet = content.substring(idx, idx + 30); console.log('File snippet:', snippet); console.log('Bytes:', Buffer.from(snippet).toString('hex')); // The file has: bg_image=”10432″ // Which is: bg_image= + ” + 10432 + ″ // Test with the actual entity string const test = 'bg_image=”10432″'; console.log('\nTest string:', test); // Decode decimal entities const decoded = test .replace(/”/g, '"') // ” -> " .replace(/″/g, '"'); // ″ -> " console.log('After replace:', decoded); // The issue is that the file has literal ” not the entity // Let me check what the actual characters are console.log('\nCharacter analysis:'); for (let i = 0; i < test.length; i++) { const char = test[i]; const code = test.charCodeAt(i); if (code > 127 || char === '&' || char === '#' || char === ';') { console.log(i, char, code, '0x' + code.toString(16)); } }