migration wip
This commit is contained in:
125
scripts/test-final-function.js
Normal file
125
scripts/test-final-function.js
Normal file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Test the final function with actual raw data
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// Load the actual raw data
|
||||
const rawData = JSON.parse(fs.readFileSync('data/raw/2025-12-27T21-26-12-521Z/pages.en.json', 'utf8'));
|
||||
const testExcerpt = rawData[0].excerptHtml;
|
||||
|
||||
console.log('=== Testing Final Function ===');
|
||||
console.log('Raw excerpt (first 200 chars):');
|
||||
console.log(testExcerpt.substring(0, 200));
|
||||
console.log('');
|
||||
|
||||
// The function from process-data.js
|
||||
function processExcerptShortcodes(excerptHtml) {
|
||||
if (!excerptHtml) return '';
|
||||
|
||||
let processed = excerptHtml;
|
||||
|
||||
// First, decode HTML entities to regular characters
|
||||
// Handle both numeric entities (”) and named entities (")
|
||||
processed = processed
|
||||
// Decode numeric HTML entities first
|
||||
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
|
||||
|
||||
// Then handle any remaining Unicode characters
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/‘/g, "'") // ‘ - Left single quote
|
||||
.replace(/’/g, "'") // ’ - Right single quote
|
||||
.replace(/–/g, '-') // – - En dash
|
||||
.replace(/—/g, '—') // — - Em dash
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/″/g, '"') // ″ - Inches/Prime
|
||||
.replace(/′/g, "'") // ′ - Feet/Prime
|
||||
.replace(/‚/g, ',') // ‚ - Single low quote
|
||||
.replace(/‛/g, '`') // ‛ - Single high reversed quote
|
||||
.replace(/•/g, '•') // • - Bullet
|
||||
.replace(/€/g, '€') // € - Euro
|
||||
|
||||
// Named HTML entities
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/•/g, '•')
|
||||
.replace(/€/g, '€');
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
// vc_column - convert to div with classes
|
||||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||||
|
||||
// vc_column_text - convert to div
|
||||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||||
|
||||
// Remove any remaining shortcodes
|
||||
processed = processed.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Clean up any HTML that might be broken
|
||||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
const result = processExcerptShortcodes(testExcerpt);
|
||||
|
||||
console.log('After processing:');
|
||||
console.log(result);
|
||||
console.log('');
|
||||
|
||||
// Check for entities
|
||||
const hasEntities = /[”“‘’–—]/.test(result);
|
||||
const hasNumericEntities = /&#\d+;/.test(result);
|
||||
const hasShortcodes = /\[vc_row|\[vc_column/.test(result);
|
||||
|
||||
console.log('=== Verification ===');
|
||||
console.log('Has Unicode entities:', hasEntities);
|
||||
console.log('Has numeric entities:', hasNumericEntities);
|
||||
console.log('Has shortcodes:', hasShortcodes);
|
||||
console.log('Has proper HTML:', result.includes('<div class="vc-row"') || result.includes('<div class="vc-column"'));
|
||||
console.log('');
|
||||
|
||||
if (!hasEntities && !hasNumericEntities && !hasShortcodes && result.includes('<div class="vc-row"')) {
|
||||
console.log('✅ SUCCESS: Function works correctly!');
|
||||
} else {
|
||||
console.log('❌ Issues found');
|
||||
}
|
||||
Reference in New Issue
Block a user