36 lines
1.0 KiB
JavaScript
36 lines
1.0 KiB
JavaScript
// Test decoding
|
|
const fs = require('fs');
|
|
|
|
// Read actual file content
|
|
const content = fs.readFileSync('data/processed/pages.json', 'utf8');
|
|
const idx = content.indexOf('bg_image=');
|
|
const snippet = content.substring(idx, idx + 30);
|
|
|
|
console.log('File snippet:', snippet);
|
|
console.log('Bytes:', Buffer.from(snippet).toString('hex'));
|
|
|
|
// The file has: bg_image=”10432″
|
|
// Which is: bg_image= + ” + 10432 + ″
|
|
|
|
// Test with the actual entity string
|
|
const test = 'bg_image=”10432″';
|
|
console.log('\nTest string:', test);
|
|
|
|
// Decode decimal entities
|
|
const decoded = test
|
|
.replace(/”/g, '"') // ” -> "
|
|
.replace(/″/g, '"'); // ″ -> "
|
|
|
|
console.log('After replace:', decoded);
|
|
|
|
// The issue is that the file has literal ” not the entity
|
|
// Let me check what the actual characters are
|
|
console.log('\nCharacter analysis:');
|
|
for (let i = 0; i < test.length; i++) {
|
|
const char = test[i];
|
|
const code = test.charCodeAt(i);
|
|
if (code > 127 || char === '&' || char === '#' || char === ';') {
|
|
console.log(i, char, code, '0x' + code.toString(16));
|
|
}
|
|
}
|