migration wip

This commit is contained in:
2025-12-30 00:06:54 +01:00
parent 3efbac78cb
commit 89dbf8af87
94 changed files with 5674 additions and 308 deletions

58
scripts/debug-entities.js Normal file
View File

@@ -0,0 +1,58 @@
#!/usr/bin/env node
// Debug what entities are actually in the raw data
const rawExcerpt = '<p>[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default” column_direction=”default” column_direction_tablet=”default” column_direction_phone=”default” scene_position=”center” text_color=”dark” text_align=”left” row_border_radius=”none” row_border_radius_applies=”bg” overflow=”visible” overlay_strength=”0.3″ gradient_direction=”left_to_right” shape_divider_position=”bottom” bg_image_animation=”none”][vc_column column_padding=”no-extra-padding” column_padding_tablet=”inherit” column_padding_phone=”inherit” column_padding_position=”all” column_element_direction_desktop=”default” column_element_spacing=”default” desktop_text_alignment=”default” tablet_text_alignment=”default” phone_text_alignment=”default” background_color_opacity=”1″ background_hover_color_opacity=”1″ column_backdrop_filter=”none” column_shadow=”none”…</p>';
console.log('=== Raw Data Analysis ===');
console.log('Original excerpt:');
console.log(rawExcerpt);
console.log('\n=== Entity Analysis ===');
// Check for numeric entities
const numericEntities = rawExcerpt.match(/&#\d+;/g);
console.log('Numeric entities found:', numericEntities);
// Check for Unicode characters
const unicodeChars = rawExcerpt.match(/[”“‘’–—″′]/g);
console.log('Unicode characters found:', unicodeChars);
// Test what each numeric entity represents
if (numericEntities) {
console.log('\n=== Numeric Entity Decoding ===');
const uniqueEntities = [...new Set(numericEntities)];
uniqueEntities.forEach(entity => {
const code = parseInt(entity.replace(/[&#;]/g, ''));
const char = String.fromCharCode(code);
console.log(`${entity} (code ${code}) → "${char}"`);
});
}
// Test manual decoding
console.log('\n=== Manual Decoding Test ===');
let decoded = rawExcerpt
.replace(/”/g, '"')
.replace(/“/g, '"')
.replace(//g, '-')
.replace(/—/g, '—')
.replace(//g, "'")
.replace(//g, "'")
.replace(/″/g, '"')
.replace(//g, "'")
.replace(/…/g, '…');
console.log('After manual decoding:');
console.log(decoded);
// Test the current function approach
console.log('\n=== Current Function Test ===');
let processed = rawExcerpt
.replace(/”/g, '"') // This won't work because raw has ”
.replace(/“/g, '"')
.replace(//g, '-')
.replace(/—/g, '—')
.replace(//g, "'")
.replace(//g, "'");
console.log('After current function (which won\'t work):');
console.log(processed);