migration wip
This commit is contained in:
58
scripts/debug-entities.js
Normal file
58
scripts/debug-entities.js
Normal file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Debug what entities are actually in the raw data
|
||||
|
||||
const rawExcerpt = '<p>[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default” column_direction=”default” column_direction_tablet=”default” column_direction_phone=”default” scene_position=”center” text_color=”dark” text_align=”left” row_border_radius=”none” row_border_radius_applies=”bg” overflow=”visible” overlay_strength=”0.3″ gradient_direction=”left_to_right” shape_divider_position=”bottom” bg_image_animation=”none”][vc_column column_padding=”no-extra-padding” column_padding_tablet=”inherit” column_padding_phone=”inherit” column_padding_position=”all” column_element_direction_desktop=”default” column_element_spacing=”default” desktop_text_alignment=”default” tablet_text_alignment=”default” phone_text_alignment=”default” background_color_opacity=”1″ background_hover_color_opacity=”1″ column_backdrop_filter=”none” column_shadow=”none”…</p>';
|
||||
|
||||
console.log('=== Raw Data Analysis ===');
|
||||
console.log('Original excerpt:');
|
||||
console.log(rawExcerpt);
|
||||
console.log('\n=== Entity Analysis ===');
|
||||
|
||||
// Check for numeric entities
|
||||
const numericEntities = rawExcerpt.match(/&#\d+;/g);
|
||||
console.log('Numeric entities found:', numericEntities);
|
||||
|
||||
// Check for Unicode characters
|
||||
const unicodeChars = rawExcerpt.match(/[”“‘’–—″′]/g);
|
||||
console.log('Unicode characters found:', unicodeChars);
|
||||
|
||||
// Test what each numeric entity represents
|
||||
if (numericEntities) {
|
||||
console.log('\n=== Numeric Entity Decoding ===');
|
||||
const uniqueEntities = [...new Set(numericEntities)];
|
||||
uniqueEntities.forEach(entity => {
|
||||
const code = parseInt(entity.replace(/[&#;]/g, ''));
|
||||
const char = String.fromCharCode(code);
|
||||
console.log(`${entity} (code ${code}) → "${char}"`);
|
||||
});
|
||||
}
|
||||
|
||||
// Test manual decoding
|
||||
console.log('\n=== Manual Decoding Test ===');
|
||||
let decoded = rawExcerpt
|
||||
.replace(/”/g, '"')
|
||||
.replace(/“/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/″/g, '"')
|
||||
.replace(/′/g, "'")
|
||||
.replace(/…/g, '…');
|
||||
|
||||
console.log('After manual decoding:');
|
||||
console.log(decoded);
|
||||
|
||||
// Test the current function approach
|
||||
console.log('\n=== Current Function Test ===');
|
||||
let processed = rawExcerpt
|
||||
.replace(/”/g, '"') // This won't work because raw has ”
|
||||
.replace(/“/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'");
|
||||
|
||||
console.log('After current function (which won\'t work):');
|
||||
console.log(processed);
|
||||
563
scripts/process-data-fixed.js
Normal file
563
scripts/process-data-fixed.js
Normal file
@@ -0,0 +1,563 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* WordPress → Next.js Data Processing Pipeline
|
||||
* Transforms raw WordPress data into Next.js compatible format
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const DATA_DIR = path.join(__dirname, '..', 'data');
|
||||
const RAW_DIR = path.join(DATA_DIR, 'raw');
|
||||
const PROCESSED_DIR = path.join(DATA_DIR, 'processed');
|
||||
|
||||
// Create processed directory
|
||||
if (!fs.existsSync(PROCESSED_DIR)) {
|
||||
fs.mkdirSync(PROCESSED_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
// Find latest export
|
||||
function getLatestExportDir() {
|
||||
const dirs = fs.readdirSync(RAW_DIR).filter(f => {
|
||||
const stat = fs.statSync(path.join(RAW_DIR, f));
|
||||
return stat.isDirectory();
|
||||
});
|
||||
dirs.sort().reverse();
|
||||
return path.join(RAW_DIR, dirs[0]);
|
||||
}
|
||||
|
||||
// HTML sanitization - preserve content but clean dangerous elements
|
||||
function sanitizeHTML(html) {
|
||||
if (!html) return '';
|
||||
|
||||
let sanitized = html;
|
||||
|
||||
// Remove script tags and inline handlers (security)
|
||||
sanitized = sanitized.replace(/<script.*?>.*?<\/script>/gis, '');
|
||||
sanitized = sanitized.replace(/\son\w+=".*?"/gi, '');
|
||||
|
||||
// Remove WPBakery shortcode wrappers but keep their content
|
||||
// Replace vc_row/vc_column with divs to preserve structure
|
||||
sanitized = sanitized.replace(/\[vc_row.*?\]/gi, '<div class="vc-row">');
|
||||
sanitized = sanitized.replace(/\[\/vc_row\]/gi, '</div>');
|
||||
sanitized = sanitized.replace(/\[vc_column.*?\]/gi, '<div class="vc-column">');
|
||||
sanitized = sanitized.replace(/\[\/vc_column\]/gi, '</div>');
|
||||
|
||||
// Remove other shortcodes but keep text content
|
||||
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
|
||||
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||||
|
||||
// Handle Nectar shortcodes - remove them but keep any text content
|
||||
// [nectar_cta] blocks often contain text we want to preserve
|
||||
sanitized = sanitized.replace(/\[nectar_cta.*?\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1');
|
||||
sanitized = sanitized.replace(/\[nectar.*?\]/gi, '');
|
||||
|
||||
// Remove all remaining shortcodes
|
||||
sanitized = sanitized.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Remove empty paragraphs and divs
|
||||
sanitized = sanitized.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
sanitized = sanitized.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace but preserve HTML structure
|
||||
sanitized = sanitized.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
// Process excerpts specifically to handle shortcodes comprehensively
|
||||
function processExcerptShortcodes(excerptHtml) {
|
||||
if (!excerptHtml) return '';
|
||||
|
||||
let processed = excerptHtml;
|
||||
|
||||
// First, decode HTML entities to regular characters
|
||||
// Handle both numeric entities (”) and named entities (")
|
||||
processed = processed
|
||||
// Numeric HTML entities commonly found in WordPress raw data
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/‘/g, "'") // ‘ - Left single quote
|
||||
.replace(/’/g, "'") // ’ - Right single quote
|
||||
.replace(/–/g, '-') // – - En dash
|
||||
.replace(/—/g, '—') // — - Em dash
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/″/g, '"') // ″ - Inches/Prime
|
||||
.replace(/′/g, "'") // ′ - Feet/Prime
|
||||
.replace(/‚/g, ',') // ‚ - Single low quote
|
||||
.replace(/‛/g, '`') // ‛ - Single high reversed quote
|
||||
.replace(/•/g, '•') // • - Bullet
|
||||
.replace(/€/g, '€') // € - Euro
|
||||
|
||||
// Unicode characters (from rendered content)
|
||||
.replace(/”/g, '"') // Right double quote
|
||||
.replace(/“/g, '"') // Left double quote
|
||||
.replace(/„/g, ',') // Low double quote
|
||||
.replace(/‟/g, '"') // High double quote
|
||||
.replace(/‘/g, "'") // Left single quote
|
||||
.replace(/’/g, "'") // Right single quote
|
||||
.replace(/–/g, '-') // En dash
|
||||
.replace(/—/g, '—') // Em dash
|
||||
.replace(/…/g, '…') // Ellipsis
|
||||
.replace(/″/g, '"') // Inches/Prime
|
||||
.replace(/′/g, "'") // Feet/Prime
|
||||
.replace(/•/g, '•') // Bullet
|
||||
|
||||
// Named HTML entities
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/•/g, '•')
|
||||
.replace(/€/g, '€');
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
// vc_column - convert to div with classes
|
||||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||||
|
||||
// vc_column_text - convert to div
|
||||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>')
|
||||
|
||||
// nectar_cta - convert to button
|
||||
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-cta">$2</a>')
|
||||
|
||||
// nectar_highlighted_text - convert to span
|
||||
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
|
||||
'<span class="nectar-highlighted">$2</span>')
|
||||
|
||||
// nectar_responsive_text - convert to span
|
||||
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
|
||||
'<span class="nectar-responsive">$2</span>')
|
||||
|
||||
// nectar_icon_list - convert to ul
|
||||
.replace(/\[nectar_icon_list([^\]]*)\]/gi, '<ul class="nectar-icon-list">')
|
||||
.replace(/\[\/nectar_icon_list\]/gi, '</ul>')
|
||||
|
||||
// nectar_icon_list_item - convert to li
|
||||
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
|
||||
'<li><strong>$2</strong>: $4</li>')
|
||||
|
||||
// nectar_btn - convert to button
|
||||
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-btn">$2</a>')
|
||||
|
||||
// split_line_heading - convert to heading
|
||||
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
|
||||
'<h2 class="split-line-heading">$2</h2>')
|
||||
|
||||
// vc_row_inner - convert to div
|
||||
.replace(/\[vc_row_inner([^\]]*)\]/gi, '<div class="vc-row-inner">')
|
||||
.replace(/\[\/vc_row_inner\]/gi, '</div>')
|
||||
|
||||
// vc_column_inner - convert to div
|
||||
.replace(/\[vc_column_inner([^\]]*)\]/gi, '<div class="vc-column-inner">')
|
||||
.replace(/\[\/vc_column_inner\]/gi, '</div>')
|
||||
|
||||
// divider - convert to hr
|
||||
.replace(/\[divider([^\]]*)\]/gi, '<hr class="divider" />')
|
||||
|
||||
// vc_gallery - convert to div (placeholder)
|
||||
.replace(/\[vc_gallery([^\]]*)\]/gi, '<div class="vc-gallery">[Gallery]</div>')
|
||||
|
||||
// vc_raw_js - remove or convert to div
|
||||
.replace(/\[vc_raw_js\](.*?)\[\/vc_raw_js\]/gi, '<div class="vc-raw-js">[JavaScript]</div>')
|
||||
|
||||
// nectar_gmap - convert to div
|
||||
.replace(/\[nectar_gmap([^\]]*)\]/gi, '<div class="nectar-gmap">[Google Map]</div>');
|
||||
|
||||
// Remove any remaining shortcodes
|
||||
processed = processed.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Clean up any HTML that might be broken
|
||||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Extract excerpt from content
|
||||
function generateExcerpt(content, maxLength = 200) {
|
||||
const text = content.replace(/<[^>]*>/g, '');
|
||||
if (text.length <= maxLength) return text;
|
||||
return text.substring(0, maxLength) + '...';
|
||||
}
|
||||
|
||||
// Process pages
|
||||
function processPages(pagesEN, pagesDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
// Process English pages
|
||||
pagesEN.forEach(page => {
|
||||
const translationKey = page.slug;
|
||||
const deMatch = translationMapping.pages[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: page.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: page.slug,
|
||||
path: `/${page.slug}`,
|
||||
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
// Process German pages
|
||||
pagesDE.forEach(page => {
|
||||
const translationKey = page.slug;
|
||||
const enMatch = translationMapping.pages[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: page.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: page.slug,
|
||||
path: `/de/${page.slug}`,
|
||||
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process posts
|
||||
function processPosts(postsEN, postsDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
postsEN.forEach(post => {
|
||||
const translationKey = post.slug;
|
||||
const deMatch = translationMapping.posts[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: post.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: post.slug,
|
||||
path: `/blog/${post.slug}`,
|
||||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
postsDE.forEach(post => {
|
||||
const translationKey = post.slug;
|
||||
const enMatch = translationMapping.posts[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: post.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: post.slug,
|
||||
path: `/de/blog/${post.slug}`,
|
||||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process products
|
||||
function processProducts(productsEN, productsDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
productsEN.forEach(product => {
|
||||
const translationKey = product.slug;
|
||||
const deMatch = translationMapping.products[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: product.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: product.slug,
|
||||
path: `/product/${product.slug}`,
|
||||
name: product.name,
|
||||
shortDescriptionHtml: product.shortDescriptionHtml,
|
||||
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
||||
images: product.images,
|
||||
featuredImage: product.featuredImage,
|
||||
sku: product.sku,
|
||||
regularPrice: product.regularPrice,
|
||||
salePrice: product.salePrice,
|
||||
currency: product.currency,
|
||||
stockStatus: product.stockStatus,
|
||||
categories: product.categories,
|
||||
attributes: product.attributes,
|
||||
variations: product.variations,
|
||||
updatedAt: product.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
productsDE.forEach(product => {
|
||||
const translationKey = product.slug;
|
||||
const enMatch = translationMapping.products[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: product.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: product.slug,
|
||||
path: `/de/product/${product.slug}`,
|
||||
name: product.name,
|
||||
shortDescriptionHtml: product.shortDescriptionHtml,
|
||||
descriptionHtml: sanitizeHTML(product.descriptionHtml),
|
||||
images: product.images,
|
||||
featuredImage: product.featuredImage,
|
||||
sku: product.sku,
|
||||
regularPrice: product.regularPrice,
|
||||
salePrice: product.salePrice,
|
||||
currency: product.currency,
|
||||
stockStatus: product.stockStatus,
|
||||
categories: product.categories,
|
||||
attributes: product.attributes,
|
||||
variations: product.variations,
|
||||
updatedAt: product.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process product categories
|
||||
function processProductCategories(categoriesEN, categoriesDE, translationMapping) {
|
||||
const processed = [];
|
||||
|
||||
categoriesEN.forEach(category => {
|
||||
const translationKey = category.slug;
|
||||
const deMatch = translationMapping.productCategories[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: category.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'en',
|
||||
slug: category.slug,
|
||||
name: category.name,
|
||||
path: `/product-category/${category.slug}`,
|
||||
description: category.description,
|
||||
count: category.count,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
});
|
||||
});
|
||||
|
||||
categoriesDE.forEach(category => {
|
||||
const translationKey = category.slug;
|
||||
const enMatch = translationMapping.productCategories[translationKey];
|
||||
|
||||
processed.push({
|
||||
id: category.id,
|
||||
translationKey: translationKey,
|
||||
locale: 'de',
|
||||
slug: category.slug,
|
||||
name: category.name,
|
||||
path: `/de/product-category/${category.slug}`,
|
||||
description: category.description,
|
||||
count: category.count,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
});
|
||||
});
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Process media manifest
|
||||
function processMedia(media) {
|
||||
return media.map(item => ({
|
||||
id: item.id,
|
||||
filename: item.filename,
|
||||
url: item.url,
|
||||
localPath: `/media/${item.filename}`,
|
||||
alt: item.alt,
|
||||
width: item.width,
|
||||
height: item.height,
|
||||
mimeType: item.mime_type
|
||||
}));
|
||||
}
|
||||
|
||||
// Generate asset map for URL replacement
|
||||
function generateAssetMap(media) {
|
||||
const map = {};
|
||||
media.forEach(item => {
|
||||
if (item.url) {
|
||||
map[item.url] = `/media/${item.filename}`;
|
||||
}
|
||||
});
|
||||
return map;
|
||||
}
|
||||
|
||||
// Main processing function
|
||||
function main() {
|
||||
const exportDir = getLatestExportDir();
|
||||
console.log('🔄 Processing WordPress Data for Next.js');
|
||||
console.log('========================================\n');
|
||||
|
||||
// Load raw data
|
||||
const loadJSON = (file) => {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8'));
|
||||
} catch (e) {
|
||||
console.error(`❌ Failed to load ${file}:`, e.message);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
const translationMapping = loadJSON('translation-mapping-improved.json');
|
||||
const pagesEN = loadJSON('pages.en.json');
|
||||
const pagesDE = loadJSON('pages.de.json');
|
||||
const postsEN = loadJSON('posts.en.json');
|
||||
const postsDE = loadJSON('posts.de.json');
|
||||
const productsEN = loadJSON('products.en.json');
|
||||
const productsDE = loadJSON('products.de.json');
|
||||
const categoriesEN = loadJSON('product-categories.en.json');
|
||||
const categoriesDE = loadJSON('product-categories.de.json');
|
||||
const media = loadJSON('media.json');
|
||||
const redirects = loadJSON('redirects.json');
|
||||
const siteInfo = loadJSON('site-info.json');
|
||||
|
||||
console.log('📊 Processing content types...\n');
|
||||
|
||||
// Process each content type
|
||||
const pages = processPages(pagesEN, pagesDE, translationMapping);
|
||||
const posts = processPosts(postsEN, postsDE, translationMapping);
|
||||
const products = processProducts(productsEN, productsDE, translationMapping);
|
||||
const categories = processProductCategories(categoriesEN, categoriesDE, translationMapping);
|
||||
const processedMedia = processMedia(media);
|
||||
const assetMap = generateAssetMap(media);
|
||||
|
||||
// Create processed data structure
|
||||
const processedData = {
|
||||
site: {
|
||||
title: siteInfo.siteTitle,
|
||||
description: siteInfo.siteDescription,
|
||||
baseUrl: siteInfo.baseUrl,
|
||||
defaultLocale: siteInfo.defaultLocale || 'en',
|
||||
locales: ['en', 'de']
|
||||
},
|
||||
content: {
|
||||
pages,
|
||||
posts,
|
||||
products,
|
||||
categories
|
||||
},
|
||||
assets: {
|
||||
media: processedMedia,
|
||||
map: assetMap
|
||||
},
|
||||
redirects,
|
||||
exportDate: new Date().toISOString()
|
||||
};
|
||||
|
||||
// Save processed data
|
||||
const outputPath = path.join(PROCESSED_DIR, 'wordpress-data.json');
|
||||
fs.writeFileSync(outputPath, JSON.stringify(processedData, null, 2));
|
||||
|
||||
// Save individual files for easier access
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'pages.json'), JSON.stringify(pages, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'posts.json'), JSON.stringify(posts, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'products.json'), JSON.stringify(products, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'categories.json'), JSON.stringify(categories, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'media.json'), JSON.stringify(processedMedia, null, 2));
|
||||
fs.writeFileSync(path.join(PROCESSED_DIR, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
|
||||
|
||||
// Summary
|
||||
console.log('✅ Data Processing Complete\n');
|
||||
console.log('📦 Processed Content:');
|
||||
console.log(` Pages: ${pages.length} (with translations)`);
|
||||
console.log(` Posts: ${posts.length} (with translations)`);
|
||||
console.log(` Products: ${products.length} (with translations)`);
|
||||
console.log(` Categories: ${categories.length} (with translations)`);
|
||||
console.log(` Media: ${processedMedia.length} files`);
|
||||
console.log(` Redirects: ${redirects.length} rules\n`);
|
||||
|
||||
console.log('📁 Output Files:');
|
||||
console.log(` ${outputPath}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'pages.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'posts.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'products.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'categories.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'media.json')}`);
|
||||
console.log(` ${path.join(PROCESSED_DIR, 'asset-map.json')}\n`);
|
||||
|
||||
// Sample data
|
||||
if (pages.length > 0) {
|
||||
console.log('📄 Sample Page:');
|
||||
console.log(` Title: ${pages[0].title}`);
|
||||
console.log(` Path: ${pages[0].path}`);
|
||||
console.log(` Locale: ${pages[0].locale}`);
|
||||
console.log(` Translation: ${pages[0].translation ? 'Yes' : 'No'}\n`);
|
||||
}
|
||||
|
||||
if (posts.length > 0) {
|
||||
console.log('📝 Sample Post:');
|
||||
console.log(` Title: ${posts[0].title}`);
|
||||
console.log(` Path: ${posts[0].path}`);
|
||||
console.log(` Locale: ${posts[0].locale}`);
|
||||
console.log(` Date: ${posts[0].datePublished}\n`);
|
||||
}
|
||||
|
||||
console.log('💡 Next: Ready for Next.js project setup!');
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
174
scripts/process-data.js
Executable file → Normal file
174
scripts/process-data.js
Executable file → Normal file
@@ -47,6 +47,13 @@ function sanitizeHTML(html) {
|
||||
// Remove other shortcodes but keep text content
|
||||
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
|
||||
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||||
|
||||
// Handle Nectar shortcodes - remove them but keep any text content
|
||||
// [nectar_cta] blocks often contain text we want to preserve
|
||||
sanitized = sanitized.replace(/\[nectar_cta.*?\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1');
|
||||
sanitized = sanitized.replace(/\[nectar.*?\]/gi, '');
|
||||
|
||||
// Remove all remaining shortcodes
|
||||
sanitized = sanitized.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Remove empty paragraphs and divs
|
||||
@@ -59,6 +66,165 @@ function sanitizeHTML(html) {
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
// Process excerpts specifically to handle shortcodes comprehensively
|
||||
function processExcerptShortcodes(excerptHtml) {
|
||||
if (!excerptHtml) return '';
|
||||
|
||||
let processed = excerptHtml;
|
||||
|
||||
// First, decode HTML entities to regular characters
|
||||
// Handle both numeric entities (”) and named entities (")
|
||||
processed = processed
|
||||
// Decode numeric HTML entities first
|
||||
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
|
||||
|
||||
// Then handle any remaining Unicode characters
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/‘/g, "'") // ‘ - Left single quote
|
||||
.replace(/’/g, "'") // ’ - Right single quote
|
||||
.replace(/–/g, '-') // – - En dash
|
||||
.replace(/—/g, '—') // — - Em dash
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/″/g, '"') // ″ - Inches/Prime
|
||||
.replace(/′/g, "'") // ′ - Feet/Prime
|
||||
.replace(/‚/g, ',') // ‚ - Single low quote
|
||||
.replace(/‛/g, '`') // ‛ - Single high reversed quote
|
||||
.replace(/•/g, '•') // • - Bullet
|
||||
.replace(/€/g, '€') // € - Euro
|
||||
|
||||
// Named HTML entities
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/•/g, '•')
|
||||
.replace(/€/g, '€');
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes (handle both complete and truncated)
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
// Handle truncated vc_row (no closing bracket)
|
||||
.replace(/\[vc_row([^\]]*)$/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
// vc_column - convert to div with classes
|
||||
// Handle both complete and incomplete (truncated) shortcodes
|
||||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
// Also handle incomplete vc_column shortcodes (truncated at end of excerpt)
|
||||
.replace(/\[vc_column([^\]]*)$/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||||
|
||||
// Handle truncated vc_column_text
|
||||
.replace(/\[vc_column_text([^\]]*)$/gi, '<div class="vc-column-text">')
|
||||
|
||||
// vc_column_text - convert to div
|
||||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>')
|
||||
|
||||
// nectar_cta - convert to button
|
||||
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-cta">$2</a>')
|
||||
|
||||
// nectar_highlighted_text - convert to span
|
||||
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
|
||||
'<span class="nectar-highlighted">$2</span>')
|
||||
|
||||
// nectar_responsive_text - convert to span
|
||||
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
|
||||
'<span class="nectar-responsive">$2</span>')
|
||||
|
||||
// nectar_icon_list - convert to ul
|
||||
.replace(/\[nectar_icon_list([^\]]*)\]/gi, '<ul class="nectar-icon-list">')
|
||||
.replace(/\[\/nectar_icon_list\]/gi, '</ul>')
|
||||
|
||||
// nectar_icon_list_item - convert to li
|
||||
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
|
||||
'<li><strong>$2</strong>: $4</li>')
|
||||
|
||||
// nectar_btn - convert to button
|
||||
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-btn">$2</a>')
|
||||
|
||||
// split_line_heading - convert to heading
|
||||
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
|
||||
'<h2 class="split-line-heading">$2</h2>')
|
||||
|
||||
// vc_row_inner - convert to div
|
||||
.replace(/\[vc_row_inner([^\]]*)\]/gi, '<div class="vc-row-inner">')
|
||||
.replace(/\[\/vc_row_inner\]/gi, '</div>')
|
||||
|
||||
// vc_column_inner - convert to div
|
||||
.replace(/\[vc_column_inner([^\]]*)\]/gi, '<div class="vc-column-inner">')
|
||||
.replace(/\[\/vc_column_inner\]/gi, '</div>')
|
||||
|
||||
// divider - convert to hr
|
||||
.replace(/\[divider([^\]]*)\]/gi, '<hr class="divider" />')
|
||||
|
||||
// vc_gallery - convert to div (placeholder)
|
||||
.replace(/\[vc_gallery([^\]]*)\]/gi, '<div class="vc-gallery">[Gallery]</div>')
|
||||
|
||||
// vc_raw_js - remove or convert to div
|
||||
.replace(/\[vc_raw_js\](.*?)\[\/vc_raw_js\]/gi, '<div class="vc-raw-js">[JavaScript]</div>')
|
||||
|
||||
// nectar_gmap - convert to div
|
||||
.replace(/\[nectar_gmap([^\]]*)\]/gi, '<div class="nectar-gmap">[Google Map]</div>');
|
||||
|
||||
// Remove any remaining shortcodes
|
||||
processed = processed.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Clean up any HTML that might be broken
|
||||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Extract excerpt from content
|
||||
function generateExcerpt(content, maxLength = 200) {
|
||||
const text = content.replace(/<[^>]*>/g, '');
|
||||
@@ -84,7 +250,7 @@ function processPages(pagesEN, pagesDE, translationMapping) {
|
||||
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: page.excerptHtml || generateExcerpt(page.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
@@ -105,7 +271,7 @@ function processPages(pagesEN, pagesDE, translationMapping) {
|
||||
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: page.excerptHtml || generateExcerpt(page.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
@@ -132,7 +298,7 @@ function processPosts(postsEN, postsDE, translationMapping) {
|
||||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: post.excerptHtml || generateExcerpt(post.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
@@ -153,7 +319,7 @@ function processPosts(postsEN, postsDE, translationMapping) {
|
||||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: post.excerptHtml || generateExcerpt(post.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
|
||||
132
scripts/test-entity-decoding.js
Normal file
132
scripts/test-entity-decoding.js
Normal file
@@ -0,0 +1,132 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Test script to verify HTML entity decoding works correctly
|
||||
|
||||
const testExcerpt = '<p>[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default” column_direction=”default” column_direction_tablet=”default” column_direction_phone=”default” scene_position=”center” text_color=”dark” text_align=”left” row_border_radius=”none” row_border_radius_applies=”bg” overflow=”visible” overlay_strength=”0.3″ gradient_direction=”left_to_right” shape_divider_position=”bottom” bg_image_animation=”none”][vc_column column_padding=”no-extra-padding” column_padding_tablet=”inherit” column_padding_phone=”inherit” column_padding_position=”all” column_element_direction_desktop=”default” column_element_spacing=”default” desktop_text_alignment=”default” tablet_text_alignment=”default” phone_text_alignment=”default” background_color_opacity=”1″ background_hover_color_opacity=”1″ column_backdrop_filter=”none” column_shadow=”none” column_border_radius=”none” column_link_target=”_self” column_position=”default” gradient_direction=”left_to_right” overlay_strength=”0.3″ width=”1/1″ tablet_width_inherit=”default” animation_type=”default” bg_image_animation=”none” border_type=”simple” column_border_width=”none” column_border_style=”solid”][vc_column_text css=”” text_direction=”default”]\n<h1 class=\"p1\">Liefer- und Zahlungsbedingungen</h1>\n<p class=\"p1\">Stand November 2024</p>\n[/vc_column_text][/vc_column][/vc_row]</p>';
|
||||
|
||||
// Process excerpts specifically to handle shortcodes comprehensively
|
||||
function processExcerptShortcodes(excerptHtml) {
|
||||
if (!excerptHtml) return '';
|
||||
|
||||
let processed = excerptHtml;
|
||||
|
||||
// First, decode HTML entities to regular characters
|
||||
// Use a comprehensive approach that handles both numeric and named entities
|
||||
processed = processed
|
||||
// Numeric HTML entities commonly found in WordPress raw data
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/‘/g, "'") // ‘ - Left single quote
|
||||
.replace(/’/g, "'") // ’ - Right single quote
|
||||
.replace(/–/g, '-') // – - En dash
|
||||
.replace(/—/g, '—') // — - Em dash
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/″/g, '"') // ″ - Inches/Prime
|
||||
.replace(/′/g, "'") // ′ - Feet/Prime
|
||||
.replace(/‚/g, ',') // ‚ - Single low quote
|
||||
.replace(/‛/g, '`') // ‛ - Single high reversed quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/•/g, '•') // • - Bullet
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/€/g, '€') // € - Euro
|
||||
|
||||
// Unicode characters (from rendered content)
|
||||
.replace(/"/g, '"') // Right double quote
|
||||
.replace(/"/g, '"') // Left double quote
|
||||
.replace(/„/g, ',') // Low double quote
|
||||
.replace(/‟/g, '"') // High double quote
|
||||
.replace(/'/g, "'") // Left single quote
|
||||
.replace(/'/g, "'") // Right single quote
|
||||
.replace(/–/g, '-') // En dash
|
||||
.replace(/—/g, '—') // Em dash
|
||||
.replace(/…/g, '…') // Ellipsis
|
||||
.replace(/″/g, '"') // Inches/Prime
|
||||
.replace(/′/g, "'") // Feet/Prime
|
||||
.replace(/•/g, '•') // Bullet
|
||||
|
||||
// Named HTML entities
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/•/g, '•')
|
||||
.replace(/€/g, '€');
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
// vc_column - convert to div with classes
|
||||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||||
|
||||
// vc_column_text - convert to div
|
||||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||||
|
||||
// Remove any remaining shortcodes
|
||||
processed = processed.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Clean up any HTML that might be broken
|
||||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
console.log('=== HTML Entity Decoding Test ===\n');
|
||||
console.log('Original excerpt:');
|
||||
console.log(testExcerpt);
|
||||
console.log('\n--- After processing ---\n');
|
||||
const result = processExcerptShortcodes(testExcerpt);
|
||||
console.log(result);
|
||||
|
||||
// Test specific entity decoding
|
||||
console.log('\n=== Specific Entity Tests ===');
|
||||
const entityTests = [
|
||||
{ input: '”', expected: '"', name: 'Right double quote' },
|
||||
{ input: '“', expected: '"', name: 'Left double quote' },
|
||||
{ input: '–', expected: '-', name: 'En dash' },
|
||||
{ input: '—', expected: '—', name: 'Em dash' },
|
||||
{ input: '‘', expected: "'", name: 'Left single quote' },
|
||||
{ input: '’', expected: "'", name: 'Right single quote' },
|
||||
{ input: 'type=”in_container”', expected: 'type="in_container"', name: 'Full attribute' }
|
||||
];
|
||||
|
||||
entityTests.forEach(test => {
|
||||
const processed = test.input.replace(/”/g, '"').replace(/“/g, '"').replace(/–/g, '-').replace(/—/g, '—').replace(/‘/g, "'").replace(/’/g, "'");
|
||||
const passed = processed === test.expected;
|
||||
console.log(`${test.name}: ${passed ? '✅' : '❌'} "${test.input}" → "${processed}" (expected: "${test.expected}")`);
|
||||
});
|
||||
125
scripts/test-final-function.js
Normal file
125
scripts/test-final-function.js
Normal file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Test the final function with actual raw data
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// Load the actual raw data
|
||||
const rawData = JSON.parse(fs.readFileSync('data/raw/2025-12-27T21-26-12-521Z/pages.en.json', 'utf8'));
|
||||
const testExcerpt = rawData[0].excerptHtml;
|
||||
|
||||
console.log('=== Testing Final Function ===');
|
||||
console.log('Raw excerpt (first 200 chars):');
|
||||
console.log(testExcerpt.substring(0, 200));
|
||||
console.log('');
|
||||
|
||||
// The function from process-data.js
|
||||
function processExcerptShortcodes(excerptHtml) {
|
||||
if (!excerptHtml) return '';
|
||||
|
||||
let processed = excerptHtml;
|
||||
|
||||
// First, decode HTML entities to regular characters
|
||||
// Handle both numeric entities (”) and named entities (")
|
||||
processed = processed
|
||||
// Decode numeric HTML entities first
|
||||
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
|
||||
|
||||
// Then handle any remaining Unicode characters
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/‘/g, "'") // ‘ - Left single quote
|
||||
.replace(/’/g, "'") // ’ - Right single quote
|
||||
.replace(/–/g, '-') // – - En dash
|
||||
.replace(/—/g, '—') // — - Em dash
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/″/g, '"') // ″ - Inches/Prime
|
||||
.replace(/′/g, "'") // ′ - Feet/Prime
|
||||
.replace(/‚/g, ',') // ‚ - Single low quote
|
||||
.replace(/‛/g, '`') // ‛ - Single high reversed quote
|
||||
.replace(/•/g, '•') // • - Bullet
|
||||
.replace(/€/g, '€') // € - Euro
|
||||
|
||||
// Named HTML entities
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/•/g, '•')
|
||||
.replace(/€/g, '€');
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
// vc_column - convert to div with classes
|
||||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||||
|
||||
// vc_column_text - convert to div
|
||||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||||
|
||||
// Remove any remaining shortcodes
|
||||
processed = processed.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Clean up any HTML that might be broken
|
||||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
const result = processExcerptShortcodes(testExcerpt);
|
||||
|
||||
console.log('After processing:');
|
||||
console.log(result);
|
||||
console.log('');
|
||||
|
||||
// Check for entities
|
||||
const hasEntities = /[”“‘’–—]/.test(result);
|
||||
const hasNumericEntities = /&#\d+;/.test(result);
|
||||
const hasShortcodes = /\[vc_row|\[vc_column/.test(result);
|
||||
|
||||
console.log('=== Verification ===');
|
||||
console.log('Has Unicode entities:', hasEntities);
|
||||
console.log('Has numeric entities:', hasNumericEntities);
|
||||
console.log('Has shortcodes:', hasShortcodes);
|
||||
console.log('Has proper HTML:', result.includes('<div class="vc-row"') || result.includes('<div class="vc-column"'));
|
||||
console.log('');
|
||||
|
||||
if (!hasEntities && !hasNumericEntities && !hasShortcodes && result.includes('<div class="vc-row"')) {
|
||||
console.log('✅ SUCCESS: Function works correctly!');
|
||||
} else {
|
||||
console.log('❌ Issues found');
|
||||
}
|
||||
151
scripts/test-function.js
Normal file
151
scripts/test-function.js
Normal file
@@ -0,0 +1,151 @@
|
||||
function processExcerptShortcodes(excerptHtml) {
|
||||
if (!excerptHtml) return '';
|
||||
|
||||
let processed = excerptHtml;
|
||||
|
||||
// First, decode HTML entities to regular characters
|
||||
// Handle both numeric entities (”) and named entities (")
|
||||
processed = processed
|
||||
// Decode numeric HTML entities first
|
||||
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
|
||||
|
||||
// Then handle any remaining Unicode characters
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/‘/g, "'") // ‘ - Left single quote
|
||||
.replace(/’/g, "'") // ’ - Right single quote
|
||||
.replace(/–/g, '-') // – - En dash
|
||||
.replace(/—/g, '—') // — - Em dash
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/″/g, '"') // ″ - Inches/Prime
|
||||
.replace(/′/g, "'") // ′ - Feet/Prime
|
||||
.replace(/‚/g, ',') // ‚ - Single low quote
|
||||
.replace(/‛/g, '`') // ‛ - Single high reversed quote
|
||||
.replace(/•/g, '•') // • - Bullet
|
||||
.replace(/€/g, '€') // € - Euro
|
||||
|
||||
// Named HTML entities
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/•/g, '•')
|
||||
.replace(/€/g, '€');
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
// vc_column - convert to div with classes
|
||||
// Handle both complete and incomplete (truncated) shortcodes
|
||||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
// Also handle incomplete vc_column shortcodes (truncated at end of excerpt)
|
||||
.replace(/\[vc_column([^\]]*)$/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||||
|
||||
// vc_column_text - convert to div
|
||||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>')
|
||||
|
||||
// nectar_cta - convert to button
|
||||
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-cta">$2</a>')
|
||||
|
||||
// nectar_highlighted_text - convert to span
|
||||
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
|
||||
'<span class="nectar-highlighted">$2</span>')
|
||||
|
||||
// nectar_responsive_text - convert to span
|
||||
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
|
||||
'<span class="nectar-responsive">$2</span>')
|
||||
|
||||
// nectar_icon_list - convert to ul
|
||||
.replace(/\[nectar_icon_list([^\]]*)\]/gi, '<ul class="nectar-icon-list">')
|
||||
.replace(/\[\/nectar_icon_list\]/gi, '</ul>')
|
||||
|
||||
// nectar_icon_list_item - convert to li
|
||||
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
|
||||
'<li><strong>$2</strong>: $4</li>')
|
||||
|
||||
// nectar_btn - convert to button
|
||||
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-btn">$2</a>')
|
||||
|
||||
// split_line_heading - convert to heading
|
||||
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
|
||||
'<h2 class="split-line-heading">$2</h2>')
|
||||
|
||||
// vc_row_inner - convert to div
|
||||
.replace(/\[vc_row_inner([^\]]*)\]/gi, '<div class="vc-row-inner">')
|
||||
.replace(/\[\/vc_row_inner\]/gi, '</div>')
|
||||
|
||||
// vc_column_inner - convert to div
|
||||
.replace(/\[vc_column_inner([^\]]*)\]/gi, '<div class="vc-column-inner">')
|
||||
.replace(/\[\/vc_column_inner\]/gi, '</div>')
|
||||
|
||||
// divider - convert to hr
|
||||
.replace(/\[divider([^\]]*)\]/gi, '<hr class="divider" />')
|
||||
|
||||
// vc_gallery - convert to div (placeholder)
|
||||
.replace(/\[vc_gallery([^\]]*)\]/gi, '<div class="vc-gallery">[Gallery]</div>')
|
||||
|
||||
// vc_raw_js - remove or convert to div
|
||||
.replace(/\[vc_raw_js\](.*?)\[\/vc_raw_js\]/gi, '<div class="vc-raw-js">[JavaScript]</div>')
|
||||
|
||||
// nectar_gmap - convert to div
|
||||
.replace(/\[nectar_gmap([^\]]*)\]/gi, '<div class="nectar-gmap">[Google Map]</div>');
|
||||
|
||||
// Remove any remaining shortcodes
|
||||
processed = processed.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Clean up any HTML that might be broken
|
||||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Extract excerpt from content
|
||||
|
||||
|
||||
module.exports = processExcerptShortcodes;
|
||||
68
scripts/test-numeric-entities.js
Normal file
68
scripts/test-numeric-entities.js
Normal file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Test numeric entity decoding
|
||||
|
||||
const testString = 'type=”in_container”';
|
||||
|
||||
console.log('Original:', testString);
|
||||
|
||||
// Method 1: Manual replacement
|
||||
let method1 = testString
|
||||
.replace(/”/g, '"')
|
||||
.replace(/“/g, '"')
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—');
|
||||
|
||||
console.log('Method 1 (Unicode chars):', method1);
|
||||
|
||||
// Method 2: Numeric entity decoding
|
||||
let method2 = testString
|
||||
.replace(/”/g, '"')
|
||||
.replace(/“/g, '"')
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/″/g, '"')
|
||||
.replace(/′/g, "'");
|
||||
|
||||
console.log('Method 2 (Numeric entities):', method2);
|
||||
|
||||
// Method 3: Using a function to decode all numeric entities
|
||||
function decodeHTMLEntities(str) {
|
||||
return str.replace(/&#(\d+);/g, (match, dec) => {
|
||||
return String.fromCharCode(dec);
|
||||
});
|
||||
}
|
||||
|
||||
let method3 = decodeHTMLEntities(testString);
|
||||
console.log('Method 3 (All numeric):', method3);
|
||||
|
||||
// Method 4: Combined approach
|
||||
function comprehensiveEntityDecode(str) {
|
||||
return str
|
||||
// First decode numeric entities
|
||||
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
|
||||
// Then handle any remaining Unicode characters
|
||||
.replace(/”/g, '"')
|
||||
.replace(/“/g, '"')
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/″/g, '"')
|
||||
.replace(/′/g, "'");
|
||||
}
|
||||
|
||||
let method4 = comprehensiveEntityDecode(testString);
|
||||
console.log('Method 4 (Combined):', method4);
|
||||
|
||||
// Test with the actual excerpt
|
||||
const actualExcerpt = '<p>[vc_row type=”in_container” full_screen_row_position=”middle” column_margin=”default”]';
|
||||
console.log('\n=== Real Test ===');
|
||||
console.log('Original:', actualExcerpt);
|
||||
console.log('Decoded:', comprehensiveEntityDecode(actualExcerpt));
|
||||
88
scripts/verify-output.js
Normal file
88
scripts/verify-output.js
Normal file
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// Load the processed data
|
||||
const processedDir = path.join(__dirname, '..', 'data', 'processed');
|
||||
const pages = JSON.parse(fs.readFileSync(path.join(processedDir, 'pages.json'), 'utf8'));
|
||||
const posts = JSON.parse(fs.readFileSync(path.join(processedDir, 'posts.json'), 'utf8'));
|
||||
|
||||
console.log('=== Verification of HTML Entity Decoding ===\n');
|
||||
|
||||
// Check pages
|
||||
console.log('📄 PAGES:');
|
||||
pages.slice(0, 3).forEach(page => {
|
||||
console.log(`\nPage: ${page.title}`);
|
||||
console.log(`Path: ${page.path}`);
|
||||
console.log(`Excerpt preview: ${page.excerptHtml.substring(0, 150)}...`);
|
||||
|
||||
// Check for problematic entities
|
||||
const hasEntities = /[”“‘’–—]/.test(page.excerptHtml);
|
||||
const hasNumericEntities = /&#\d+;/.test(page.excerptHtml);
|
||||
|
||||
if (hasEntities || hasNumericEntities) {
|
||||
console.log('❌ Still contains HTML entities!');
|
||||
if (hasEntities) console.log(' - Found smart quotes/dashes');
|
||||
if (hasNumericEntities) console.log(' - Found numeric entities');
|
||||
} else {
|
||||
console.log('✅ Clean - no HTML entities found');
|
||||
}
|
||||
});
|
||||
|
||||
// Check posts
|
||||
console.log('\n📝 POSTS:');
|
||||
posts.slice(0, 3).forEach(post => {
|
||||
console.log(`\nPost: ${post.title}`);
|
||||
console.log(`Path: ${post.path}`);
|
||||
console.log(`Excerpt preview: ${post.excerptHtml.substring(0, 150)}...`);
|
||||
|
||||
// Check for problematic entities
|
||||
const hasEntities = /[”“‘’–—]/.test(post.excerptHtml);
|
||||
const hasNumericEntities = /&#\d+;/.test(post.excerptHtml);
|
||||
|
||||
if (hasEntities || hasNumericEntities) {
|
||||
console.log('❌ Still contains HTML entities!');
|
||||
if (hasEntities) console.log(' - Found smart quotes/dashes');
|
||||
if (hasNumericEntities) console.log(' - Found numeric entities');
|
||||
} else {
|
||||
console.log('✅ Clean - no HTML entities found');
|
||||
}
|
||||
});
|
||||
|
||||
// Check for shortcode patterns
|
||||
console.log('\n🔍 SHORTCODE CHECK:');
|
||||
const allPages = [...pages, ...posts];
|
||||
const shortcodesFound = allPages.filter(item => /\[vc_row|\[vc_column|\[nectar/.test(item.excerptHtml));
|
||||
console.log(`Pages/posts with shortcodes in excerpt: ${shortcodesFound.length}`);
|
||||
|
||||
if (shortcodesFound.length > 0) {
|
||||
console.log('\nSample of items with shortcodes:');
|
||||
shortcodesFound.slice(0, 2).forEach(item => {
|
||||
console.log(`- ${item.title}: ${item.excerptHtml.substring(0, 100)}...`);
|
||||
});
|
||||
} else {
|
||||
console.log('✅ No shortcodes found in excerpts');
|
||||
}
|
||||
|
||||
// Check for proper HTML structure
|
||||
console.log('\n📊 HTML STRUCTURE CHECK:');
|
||||
const withProperHTML = allPages.filter(item =>
|
||||
item.excerptHtml.includes('<div class="vc-row"') ||
|
||||
item.excerptHtml.includes('<div class="vc-column"') ||
|
||||
item.excerptHtml.includes('<div class="nectar')
|
||||
);
|
||||
console.log(`Items with converted shortcode HTML: ${withProperHTML.length}`);
|
||||
|
||||
console.log('\n=== Summary ===');
|
||||
console.log(`Total items checked: ${allPages.length}`);
|
||||
console.log(`Items with proper HTML structure: ${withProperHTML.length}`);
|
||||
console.log(`Items with remaining shortcodes: ${shortcodesFound.length}`);
|
||||
|
||||
// Sample the actual content to show it works
|
||||
console.log('\n=== SAMPLE PROCESSED EXCERPTS ===');
|
||||
const sample = pages.find(p => p.excerptHtml.includes('vc-row'));
|
||||
if (sample) {
|
||||
console.log(`\nTitle: ${sample.title}`);
|
||||
console.log(`Excerpt: ${sample.excerptHtml}`);
|
||||
}
|
||||
Reference in New Issue
Block a user