#!/usr/bin/env node const fs = require('fs'); const path = require('path'); // Load the processed data const processedDir = path.join(__dirname, '..', 'data', 'processed'); const pages = JSON.parse(fs.readFileSync(path.join(processedDir, 'pages.json'), 'utf8')); const posts = JSON.parse(fs.readFileSync(path.join(processedDir, 'posts.json'), 'utf8')); console.log('=== Verification of HTML Entity Decoding ===\n'); // Check pages console.log('šŸ“„ PAGES:'); pages.slice(0, 3).forEach(page => { console.log(`\nPage: ${page.title}`); console.log(`Path: ${page.path}`); console.log(`Excerpt preview: ${page.excerptHtml.substring(0, 150)}...`); // Check for problematic entities const hasEntities = /[ā€ā€œā€˜ā€™ā€“ā€”]/.test(page.excerptHtml); const hasNumericEntities = /&#\d+;/.test(page.excerptHtml); if (hasEntities || hasNumericEntities) { console.log('āŒ Still contains HTML entities!'); if (hasEntities) console.log(' - Found smart quotes/dashes'); if (hasNumericEntities) console.log(' - Found numeric entities'); } else { console.log('āœ… Clean - no HTML entities found'); } }); // Check posts console.log('\nšŸ“ POSTS:'); posts.slice(0, 3).forEach(post => { console.log(`\nPost: ${post.title}`); console.log(`Path: ${post.path}`); console.log(`Excerpt preview: ${post.excerptHtml.substring(0, 150)}...`); // Check for problematic entities const hasEntities = /[ā€ā€œā€˜ā€™ā€“ā€”]/.test(post.excerptHtml); const hasNumericEntities = /&#\d+;/.test(post.excerptHtml); if (hasEntities || hasNumericEntities) { console.log('āŒ Still contains HTML entities!'); if (hasEntities) console.log(' - Found smart quotes/dashes'); if (hasNumericEntities) console.log(' - Found numeric entities'); } else { console.log('āœ… Clean - no HTML entities found'); } }); // Check for shortcode patterns console.log('\nšŸ” SHORTCODE CHECK:'); const allPages = [...pages, ...posts]; const shortcodesFound = allPages.filter(item => /\[vc_row|\[vc_column|\[nectar/.test(item.excerptHtml)); console.log(`Pages/posts with shortcodes in excerpt: ${shortcodesFound.length}`); if (shortcodesFound.length > 0) { console.log('\nSample of items with shortcodes:'); shortcodesFound.slice(0, 2).forEach(item => { console.log(`- ${item.title}: ${item.excerptHtml.substring(0, 100)}...`); }); } else { console.log('āœ… No shortcodes found in excerpts'); } // Check for proper HTML structure console.log('\nšŸ“Š HTML STRUCTURE CHECK:'); const withProperHTML = allPages.filter(item => item.excerptHtml.includes('