migration wip
This commit is contained in:
88
scripts/verify-output.js
Normal file
88
scripts/verify-output.js
Normal file
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// Load the processed data
|
||||
const processedDir = path.join(__dirname, '..', 'data', 'processed');
|
||||
const pages = JSON.parse(fs.readFileSync(path.join(processedDir, 'pages.json'), 'utf8'));
|
||||
const posts = JSON.parse(fs.readFileSync(path.join(processedDir, 'posts.json'), 'utf8'));
|
||||
|
||||
console.log('=== Verification of HTML Entity Decoding ===\n');
|
||||
|
||||
// Check pages
|
||||
console.log('📄 PAGES:');
|
||||
pages.slice(0, 3).forEach(page => {
|
||||
console.log(`\nPage: ${page.title}`);
|
||||
console.log(`Path: ${page.path}`);
|
||||
console.log(`Excerpt preview: ${page.excerptHtml.substring(0, 150)}...`);
|
||||
|
||||
// Check for problematic entities
|
||||
const hasEntities = /[”“‘’–—]/.test(page.excerptHtml);
|
||||
const hasNumericEntities = /&#\d+;/.test(page.excerptHtml);
|
||||
|
||||
if (hasEntities || hasNumericEntities) {
|
||||
console.log('❌ Still contains HTML entities!');
|
||||
if (hasEntities) console.log(' - Found smart quotes/dashes');
|
||||
if (hasNumericEntities) console.log(' - Found numeric entities');
|
||||
} else {
|
||||
console.log('✅ Clean - no HTML entities found');
|
||||
}
|
||||
});
|
||||
|
||||
// Check posts
|
||||
console.log('\n📝 POSTS:');
|
||||
posts.slice(0, 3).forEach(post => {
|
||||
console.log(`\nPost: ${post.title}`);
|
||||
console.log(`Path: ${post.path}`);
|
||||
console.log(`Excerpt preview: ${post.excerptHtml.substring(0, 150)}...`);
|
||||
|
||||
// Check for problematic entities
|
||||
const hasEntities = /[”“‘’–—]/.test(post.excerptHtml);
|
||||
const hasNumericEntities = /&#\d+;/.test(post.excerptHtml);
|
||||
|
||||
if (hasEntities || hasNumericEntities) {
|
||||
console.log('❌ Still contains HTML entities!');
|
||||
if (hasEntities) console.log(' - Found smart quotes/dashes');
|
||||
if (hasNumericEntities) console.log(' - Found numeric entities');
|
||||
} else {
|
||||
console.log('✅ Clean - no HTML entities found');
|
||||
}
|
||||
});
|
||||
|
||||
// Check for shortcode patterns
|
||||
console.log('\n🔍 SHORTCODE CHECK:');
|
||||
const allPages = [...pages, ...posts];
|
||||
const shortcodesFound = allPages.filter(item => /\[vc_row|\[vc_column|\[nectar/.test(item.excerptHtml));
|
||||
console.log(`Pages/posts with shortcodes in excerpt: ${shortcodesFound.length}`);
|
||||
|
||||
if (shortcodesFound.length > 0) {
|
||||
console.log('\nSample of items with shortcodes:');
|
||||
shortcodesFound.slice(0, 2).forEach(item => {
|
||||
console.log(`- ${item.title}: ${item.excerptHtml.substring(0, 100)}...`);
|
||||
});
|
||||
} else {
|
||||
console.log('✅ No shortcodes found in excerpts');
|
||||
}
|
||||
|
||||
// Check for proper HTML structure
|
||||
console.log('\n📊 HTML STRUCTURE CHECK:');
|
||||
const withProperHTML = allPages.filter(item =>
|
||||
item.excerptHtml.includes('<div class="vc-row"') ||
|
||||
item.excerptHtml.includes('<div class="vc-column"') ||
|
||||
item.excerptHtml.includes('<div class="nectar')
|
||||
);
|
||||
console.log(`Items with converted shortcode HTML: ${withProperHTML.length}`);
|
||||
|
||||
console.log('\n=== Summary ===');
|
||||
console.log(`Total items checked: ${allPages.length}`);
|
||||
console.log(`Items with proper HTML structure: ${withProperHTML.length}`);
|
||||
console.log(`Items with remaining shortcodes: ${shortcodesFound.length}`);
|
||||
|
||||
// Sample the actual content to show it works
|
||||
console.log('\n=== SAMPLE PROCESSED EXCERPTS ===');
|
||||
const sample = pages.find(p => p.excerptHtml.includes('vc-row'));
|
||||
if (sample) {
|
||||
console.log(`\nTitle: ${sample.title}`);
|
||||
console.log(`Excerpt: ${sample.excerptHtml}`);
|
||||
}
|
||||
Reference in New Issue
Block a user