Files
klz-cables.com/scripts/test-final-function.js
2025-12-30 00:06:54 +01:00

125 lines
4.6 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
// Test the final function with actual raw data
const fs = require('fs');
const path = require('path');
// Load the actual raw data
const rawData = JSON.parse(fs.readFileSync('data/raw/2025-12-27T21-26-12-521Z/pages.en.json', 'utf8'));
const testExcerpt = rawData[0].excerptHtml;
console.log('=== Testing Final Function ===');
console.log('Raw excerpt (first 200 chars):');
console.log(testExcerpt.substring(0, 200));
console.log('');
// The function from process-data.js
function processExcerptShortcodes(excerptHtml) {
if (!excerptHtml) return '';
let processed = excerptHtml;
// First, decode HTML entities to regular characters
// Handle both numeric entities (”) and named entities (")
processed = processed
// Decode numeric HTML entities first
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
// Then handle any remaining Unicode characters
.replace(/”/g, '"') // ” - Right double quote
.replace(/“/g, '"') // “ - Left double quote
.replace(/„/g, ',') // „ - Low double quote
.replace(/‟/g, '"') // ‟ - High double quote
.replace(//g, "'") // - Left single quote
.replace(//g, "'") // - Right single quote
.replace(//g, '-') // - En dash
.replace(/—/g, '—') // — - Em dash
.replace(/…/g, '…') // … - Ellipsis
.replace(/″/g, '"') // ″ - Inches/Prime
.replace(//g, "'") // - Feet/Prime
.replace(//g, ',') // - Single low quote
.replace(//g, '`') // - Single high reversed quote
.replace(/•/g, '•') // • - Bullet
.replace(/€/g, '€') // € - Euro
// Named HTML entities
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(//g, "'")
.replace(//g, "'")
.replace(/“/g, '"')
.replace(/”/g, '"')
.replace(//g, '-')
.replace(/—/g, '—')
.replace(/…/g, '…')
.replace(/•/g, '•')
.replace(/€/g, '€');
// Process WPBakery shortcodes with HTML entities
processed = processed
// vc_row - convert to div with classes
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-row'];
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
if (attrs.includes('in_container')) classes.push('in-container');
if (attrs.includes('full_width_content')) classes.push('full-width-content');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_row\]/gi, '</div>')
// vc_column - convert to div with classes
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
const classes = ['vc-column'];
if (attrs.includes('1/1')) classes.push('col-1-1');
if (attrs.includes('1/2')) classes.push('col-1-2');
if (attrs.includes('1/3')) classes.push('col-1-3');
if (attrs.includes('2/3')) classes.push('col-2-3');
if (attrs.includes('1/4')) classes.push('col-1-4');
if (attrs.includes('3/4')) classes.push('col-3-4');
if (attrs.includes('5/12')) classes.push('col-5-12');
if (attrs.includes('7/12')) classes.push('col-7-12');
return `<div class="${classes.join(' ')}">`;
})
.replace(/\[\/vc_column\]/gi, '</div>')
// vc_column_text - convert to div
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
.replace(/\[\/vc_column_text\]/gi, '</div>');
// Remove any remaining shortcodes
processed = processed.replace(/\[.*?\]/g, '');
// Clean up any HTML that might be broken
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
// Normalize whitespace
processed = processed.replace(/\s+/g, ' ').trim();
return processed;
}
const result = processExcerptShortcodes(testExcerpt);
console.log('After processing:');
console.log(result);
console.log('');
// Check for entities
const hasEntities = /[”“‘’–—]/.test(result);
const hasNumericEntities = /&#\d+;/.test(result);
const hasShortcodes = /\[vc_row|\[vc_column/.test(result);
console.log('=== Verification ===');
console.log('Has Unicode entities:', hasEntities);
console.log('Has numeric entities:', hasNumericEntities);
console.log('Has shortcodes:', hasShortcodes);
console.log('Has proper HTML:', result.includes('<div class="vc-row"') || result.includes('<div class="vc-column"'));
console.log('');
if (!hasEntities && !hasNumericEntities && !hasShortcodes && result.includes('<div class="vc-row"')) {
console.log('✅ SUCCESS: Function works correctly!');
} else {
console.log('❌ Issues found');
}