#!/usr/bin/env node // Test script to verify HTML entity decoding works correctly const testExcerpt = '
[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default” column_direction=”default” column_direction_tablet=”default” column_direction_phone=”default” scene_position=”center” text_color=”dark” text_align=”left” row_border_radius=”none” row_border_radius_applies=”bg” overflow=”visible” overlay_strength=”0.3″ gradient_direction=”left_to_right” shape_divider_position=”bottom” bg_image_animation=”none”][vc_column column_padding=”no-extra-padding” column_padding_tablet=”inherit” column_padding_phone=”inherit” column_padding_position=”all” column_element_direction_desktop=”default” column_element_spacing=”default” desktop_text_alignment=”default” tablet_text_alignment=”default” phone_text_alignment=”default” background_color_opacity=”1″ background_hover_color_opacity=”1″ column_backdrop_filter=”none” column_shadow=”none” column_border_radius=”none” column_link_target=”_self” column_position=”default” gradient_direction=”left_to_right” overlay_strength=”0.3″ width=”1/1″ tablet_width_inherit=”default” animation_type=”default” bg_image_animation=”none” border_type=”simple” column_border_width=”none” column_border_style=”solid”][vc_column_text css=”” text_direction=”default”]\n
Stand November 2024
\n[/vc_column_text][/vc_column][/vc_row]'; // Process excerpts specifically to handle shortcodes comprehensively function processExcerptShortcodes(excerptHtml) { if (!excerptHtml) return ''; let processed = excerptHtml; // First, decode HTML entities to regular characters // Use a comprehensive approach that handles both numeric and named entities processed = processed // Numeric HTML entities commonly found in WordPress raw data .replace(/”/g, '"') // ” - Right double quote .replace(/“/g, '"') // “ - Left double quote .replace(/„/g, ',') // „ - Low double quote .replace(/‟/g, '"') // ‟ - High double quote .replace(/‘/g, "'") // ‘ - Left single quote .replace(/’/g, "'") // ’ - Right single quote .replace(/–/g, '-') // – - En dash .replace(/—/g, '—') // — - Em dash .replace(/…/g, '…') // … - Ellipsis .replace(/″/g, '"') // ″ - Inches/Prime .replace(/′/g, "'") // ′ - Feet/Prime .replace(/‚/g, ',') // ‚ - Single low quote .replace(/‛/g, '`') // ‛ - Single high reversed quote .replace(/“/g, '"') // “ - Left double quote .replace(/”/g, '"') // ” - Right double quote .replace(/„/g, ',') // „ - Low double quote .replace(/‟/g, '"') // ‟ - High double quote .replace(/•/g, '•') // • - Bullet .replace(/…/g, '…') // … - Ellipsis .replace(/€/g, '€') // € - Euro // Unicode characters (from rendered content) .replace(/"/g, '"') // Right double quote .replace(/"/g, '"') // Left double quote .replace(/„/g, ',') // Low double quote .replace(/‟/g, '"') // High double quote .replace(/'/g, "'") // Left single quote .replace(/'/g, "'") // Right single quote .replace(/–/g, '-') // En dash .replace(/—/g, '—') // Em dash .replace(/…/g, '…') // Ellipsis .replace(/″/g, '"') // Inches/Prime .replace(/′/g, "'") // Feet/Prime .replace(/•/g, '•') // Bullet // Named HTML entities .replace(/"/g, '"') .replace(/'/g, "'") .replace(/‘/g, "'") .replace(/’/g, "'") .replace(/“/g, '"') .replace(/”/g, '"') .replace(/–/g, '-') .replace(/—/g, '—') .replace(/…/g, '…') .replace(/•/g, '•') .replace(/€/g, '€'); // Process WPBakery shortcodes with HTML entities processed = processed // vc_row - convert to div with classes .replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => { const classes = ['vc-row']; if (attrs.includes('full_width_background')) classes.push('full-width-bg'); if (attrs.includes('in_container')) classes.push('in-container'); if (attrs.includes('full_width_content')) classes.push('full-width-content'); return `]*>\s*<\/p>/gi, ''); processed = processed.replace(/