Files
klz-cables.com/scripts/verify-output.js
2025-12-30 00:06:54 +01:00

88 lines
3.2 KiB
JavaScript

#!/usr/bin/env node
const fs = require('fs');
const path = require('path');
// Load the processed data
const processedDir = path.join(__dirname, '..', 'data', 'processed');
const pages = JSON.parse(fs.readFileSync(path.join(processedDir, 'pages.json'), 'utf8'));
const posts = JSON.parse(fs.readFileSync(path.join(processedDir, 'posts.json'), 'utf8'));
console.log('=== Verification of HTML Entity Decoding ===\n');
// Check pages
console.log('📄 PAGES:');
pages.slice(0, 3).forEach(page => {
console.log(`\nPage: ${page.title}`);
console.log(`Path: ${page.path}`);
console.log(`Excerpt preview: ${page.excerptHtml.substring(0, 150)}...`);
// Check for problematic entities
const hasEntities = /[”“‘’–—]/.test(page.excerptHtml);
const hasNumericEntities = /&#\d+;/.test(page.excerptHtml);
if (hasEntities || hasNumericEntities) {
console.log('❌ Still contains HTML entities!');
if (hasEntities) console.log(' - Found smart quotes/dashes');
if (hasNumericEntities) console.log(' - Found numeric entities');
} else {
console.log('✅ Clean - no HTML entities found');
}
});
// Check posts
console.log('\n📝 POSTS:');
posts.slice(0, 3).forEach(post => {
console.log(`\nPost: ${post.title}`);
console.log(`Path: ${post.path}`);
console.log(`Excerpt preview: ${post.excerptHtml.substring(0, 150)}...`);
// Check for problematic entities
const hasEntities = /[”“‘’–—]/.test(post.excerptHtml);
const hasNumericEntities = /&#\d+;/.test(post.excerptHtml);
if (hasEntities || hasNumericEntities) {
console.log('❌ Still contains HTML entities!');
if (hasEntities) console.log(' - Found smart quotes/dashes');
if (hasNumericEntities) console.log(' - Found numeric entities');
} else {
console.log('✅ Clean - no HTML entities found');
}
});
// Check for shortcode patterns
console.log('\n🔍 SHORTCODE CHECK:');
const allPages = [...pages, ...posts];
const shortcodesFound = allPages.filter(item => /\[vc_row|\[vc_column|\[nectar/.test(item.excerptHtml));
console.log(`Pages/posts with shortcodes in excerpt: ${shortcodesFound.length}`);
if (shortcodesFound.length > 0) {
console.log('\nSample of items with shortcodes:');
shortcodesFound.slice(0, 2).forEach(item => {
console.log(`- ${item.title}: ${item.excerptHtml.substring(0, 100)}...`);
});
} else {
console.log('✅ No shortcodes found in excerpts');
}
// Check for proper HTML structure
console.log('\n📊 HTML STRUCTURE CHECK:');
const withProperHTML = allPages.filter(item =>
item.excerptHtml.includes('<div class="vc-row"') ||
item.excerptHtml.includes('<div class="vc-column"') ||
item.excerptHtml.includes('<div class="nectar')
);
console.log(`Items with converted shortcode HTML: ${withProperHTML.length}`);
console.log('\n=== Summary ===');
console.log(`Total items checked: ${allPages.length}`);
console.log(`Items with proper HTML structure: ${withProperHTML.length}`);
console.log(`Items with remaining shortcodes: ${shortcodesFound.length}`);
// Sample the actual content to show it works
console.log('\n=== SAMPLE PROCESSED EXCERPTS ===');
const sample = pages.find(p => p.excerptHtml.includes('vc-row'));
if (sample) {
console.log(`\nTitle: ${sample.title}`);
console.log(`Excerpt: ${sample.excerptHtml}`);
}