58 lines
2.6 KiB
JavaScript
58 lines
2.6 KiB
JavaScript
#!/usr/bin/env node
|
||
|
||
// Debug what entities are actually in the raw data
|
||
|
||
const rawExcerpt = '<p>[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default” column_direction=”default” column_direction_tablet=”default” column_direction_phone=”default” scene_position=”center” text_color=”dark” text_align=”left” row_border_radius=”none” row_border_radius_applies=”bg” overflow=”visible” overlay_strength=”0.3″ gradient_direction=”left_to_right” shape_divider_position=”bottom” bg_image_animation=”none”][vc_column column_padding=”no-extra-padding” column_padding_tablet=”inherit” column_padding_phone=”inherit” column_padding_position=”all” column_element_direction_desktop=”default” column_element_spacing=”default” desktop_text_alignment=”default” tablet_text_alignment=”default” phone_text_alignment=”default” background_color_opacity=”1″ background_hover_color_opacity=”1″ column_backdrop_filter=”none” column_shadow=”none”…</p>';
|
||
|
||
console.log('=== Raw Data Analysis ===');
|
||
console.log('Original excerpt:');
|
||
console.log(rawExcerpt);
|
||
console.log('\n=== Entity Analysis ===');
|
||
|
||
// Check for numeric entities
|
||
const numericEntities = rawExcerpt.match(/&#\d+;/g);
|
||
console.log('Numeric entities found:', numericEntities);
|
||
|
||
// Check for Unicode characters
|
||
const unicodeChars = rawExcerpt.match(/[”“‘’–—″′]/g);
|
||
console.log('Unicode characters found:', unicodeChars);
|
||
|
||
// Test what each numeric entity represents
|
||
if (numericEntities) {
|
||
console.log('\n=== Numeric Entity Decoding ===');
|
||
const uniqueEntities = [...new Set(numericEntities)];
|
||
uniqueEntities.forEach(entity => {
|
||
const code = parseInt(entity.replace(/[&#;]/g, ''));
|
||
const char = String.fromCharCode(code);
|
||
console.log(`${entity} (code ${code}) → "${char}"`);
|
||
});
|
||
}
|
||
|
||
// Test manual decoding
|
||
console.log('\n=== Manual Decoding Test ===');
|
||
let decoded = rawExcerpt
|
||
.replace(/”/g, '"')
|
||
.replace(/“/g, '"')
|
||
.replace(/–/g, '-')
|
||
.replace(/—/g, '—')
|
||
.replace(/‘/g, "'")
|
||
.replace(/’/g, "'")
|
||
.replace(/″/g, '"')
|
||
.replace(/′/g, "'")
|
||
.replace(/…/g, '…');
|
||
|
||
console.log('After manual decoding:');
|
||
console.log(decoded);
|
||
|
||
// Test the current function approach
|
||
console.log('\n=== Current Function Test ===');
|
||
let processed = rawExcerpt
|
||
.replace(/”/g, '"') // This won't work because raw has ”
|
||
.replace(/“/g, '"')
|
||
.replace(/–/g, '-')
|
||
.replace(/—/g, '—')
|
||
.replace(/‘/g, "'")
|
||
.replace(/’/g, "'");
|
||
|
||
console.log('After current function (which won\'t work):');
|
||
console.log(processed); |