Files
klz-cables.com/scripts/test-entity-decoding.js
2025-12-30 00:06:54 +01:00

132 lines
6.8 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
// Test script to verify HTML entity decoding works correctly
const testExcerpt = '<p>[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default” column_direction=”default” column_direction_tablet=”default” column_direction_phone=”default” scene_position=”center” text_color=”dark” text_align=”left” row_border_radius=”none” row_border_radius_applies=”bg” overflow=”visible” overlay_strength=”0.3″ gradient_direction=”left_to_right” shape_divider_position=”bottom” bg_image_animation=”none”][vc_column column_padding=”no-extra-padding” column_padding_tablet=”inherit” column_padding_phone=”inherit” column_padding_position=”all” column_element_direction_desktop=”default” column_element_spacing=”default” desktop_text_alignment=”default” tablet_text_alignment=”default” phone_text_alignment=”default” background_color_opacity=”1″ background_hover_color_opacity=”1″ column_backdrop_filter=”none” column_shadow=”none” column_border_radius=”none” column_link_target=”_self” column_position=”default” gradient_direction=”left_to_right” overlay_strength=”0.3″ width=”1/1″ tablet_width_inherit=”default” animation_type=”default” bg_image_animation=”none” border_type=”simple” column_border_width=”none” column_border_style=”solid”][vc_column_text css=”” text_direction=”default”]\n<h1 class=\"p1\">Liefer- und Zahlungsbedingungen</h1>\n<p class=\"p1\">Stand November 2024</p>\n[/vc_column_text][/vc_column][/vc_row]</p>';
// Process excerpts specifically to handle shortcodes comprehensively
function processExcerptShortcodes(excerptHtml) {
if (!excerptHtml) return '';
let processed = excerptHtml;
// First, decode HTML entities to regular characters
// Use a comprehensive approach that handles both numeric and named entities
processed = processed
// Numeric HTML entities commonly found in WordPress raw data
.replace(/”/g, '"') // ” - Right double quote
.replace(/“/g, '"') // “ - Left double quote
.replace(/„/g, ',') // „ - Low double quote
.replace(/‟/g, '"') // ‟ - High double quote
.replace(//g, "'") // - Left single quote
.replace(//g, "'") // - Right single quote
.replace(//g, '-') // - En dash
.replace(/—/g, '—') // — - Em dash
.replace(/…/g, '…') // … - Ellipsis
.replace(/″/g, '"') // ″ - Inches/Prime
.replace(//g, "'") // - Feet/Prime
.replace(//g, ',') // - Single low quote
.replace(//g, '`') // - Single high reversed quote
.replace(/“/g, '"') // “ - Left double quote
.replace(/”/g, '"') // ” - Right double quote
.replace(/„/g, ',') // „ - Low double quote
.replace(/‟/g, '"') // ‟ - High double quote
.replace(/•/g, '•') // • - Bullet
.replace(/…/g, '…') // … - Ellipsis
.replace(/€/g, '€') // € - Euro
// Unicode characters (from rendered content)
.replace(/"/g, '"') // Right double quote
.replace(/"/g, '"') // Left double quote
.replace(/„/g, ',') // Low double quote
.replace(/‟/g, '"') // High double quote
.replace(/'/g, "'") // Left single quote
.replace(/'/g, "'") // Right single quote
.replace(//g, '-') // En dash
.replace(/—/g, '—') // Em dash
.replace(/…/g, '…') // Ellipsis
.replace(/″/g, '"') // Inches/Prime
.replace(//g, "'") // Feet/Prime
.replace(/•/g, '•') // Bullet
// Named HTML entities
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(//g, "'")
.replace(//g, "'")
.replace(/“/g, '"')
.replace(/”/g, '"')
.replace(//g, '-')
.replace(/—/g, '—')
.replace(/…/g, '…')
.replace(/•/g, '•')
.replace(/€/g, '€');
// Process WPBakery shortcodes with HTML entities
processed = processed
// vc_row - convert to div with classes
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-row'];
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
if (attrs.includes('in_container')) classes.push('in-container');
if (attrs.includes('full_width_content')) classes.push('full-width-content');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_row\]/gi, '</div>')
// vc_column - convert to div with classes
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-column'];
if (attrs.includes('1/1')) classes.push('col-1-1');
if (attrs.includes('1/2')) classes.push('col-1-2');
if (attrs.includes('1/3')) classes.push('col-1-3');
if (attrs.includes('2/3')) classes.push('col-2-3');
if (attrs.includes('1/4')) classes.push('col-1-4');
if (attrs.includes('3/4')) classes.push('col-3-4');
if (attrs.includes('5/12')) classes.push('col-5-12');
if (attrs.includes('7/12')) classes.push('col-7-12');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_column\]/gi, '</div>')
// vc_column_text - convert to div
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
.replace(/\[\/vc_column_text\]/gi, '</div>');
// Remove any remaining shortcodes
processed = processed.replace(/\[.*?\]/g, '');
// Clean up any HTML that might be broken
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace
processed = processed.replace(/\s+/g, ' ').trim();
return processed;
}
console.log('=== HTML Entity Decoding Test ===\n');
console.log('Original excerpt:');
console.log(testExcerpt);
console.log('\n--- After processing ---\n');
const result = processExcerptShortcodes(testExcerpt);
console.log(result);
// Test specific entity decoding
console.log('\n=== Specific Entity Tests ===');
const entityTests = [
{ input: '”', expected: '"', name: 'Right double quote' },
{ input: '“', expected: '"', name: 'Left double quote' },
{ input: '', expected: '-', name: 'En dash' },
{ input: '—', expected: '—', name: 'Em dash' },
{ input: '', expected: "'", name: 'Left single quote' },
{ input: '', expected: "'", name: 'Right single quote' },
{ input: 'type=”in_container”', expected: 'type="in_container"', name: 'Full attribute' }
];
entityTests.forEach(test => {
const processed = test.input.replace(/”/g, '"').replace(/“/g, '"').replace(//g, '-').replace(/—/g, '—').replace(//g, "'").replace(//g, "'");
const passed = processed === test.expected;
console.log(`${test.name}: ${passed ? '✅' : '❌'} "${test.input}" → "${processed}" (expected: "${test.expected}")`);
});