migration wip
This commit is contained in:
174
scripts/process-data.js
Executable file → Normal file
174
scripts/process-data.js
Executable file → Normal file
@@ -47,6 +47,13 @@ function sanitizeHTML(html) {
|
||||
// Remove other shortcodes but keep text content
|
||||
sanitized = sanitized.replace(/\[vc_column_text.*?\]/gi, '<div class="vc-text">');
|
||||
sanitized = sanitized.replace(/\[\/vc_column_text\]/gi, '</div>');
|
||||
|
||||
// Handle Nectar shortcodes - remove them but keep any text content
|
||||
// [nectar_cta] blocks often contain text we want to preserve
|
||||
sanitized = sanitized.replace(/\[nectar_cta.*?\]([\s\S]*?)\[\/nectar_cta\]/gi, '$1');
|
||||
sanitized = sanitized.replace(/\[nectar.*?\]/gi, '');
|
||||
|
||||
// Remove all remaining shortcodes
|
||||
sanitized = sanitized.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Remove empty paragraphs and divs
|
||||
@@ -59,6 +66,165 @@ function sanitizeHTML(html) {
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
// Process excerpts specifically to handle shortcodes comprehensively
|
||||
function processExcerptShortcodes(excerptHtml) {
|
||||
if (!excerptHtml) return '';
|
||||
|
||||
let processed = excerptHtml;
|
||||
|
||||
// First, decode HTML entities to regular characters
|
||||
// Handle both numeric entities (”) and named entities (")
|
||||
processed = processed
|
||||
// Decode numeric HTML entities first
|
||||
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
|
||||
|
||||
// Then handle any remaining Unicode characters
|
||||
.replace(/”/g, '"') // ” - Right double quote
|
||||
.replace(/“/g, '"') // “ - Left double quote
|
||||
.replace(/„/g, ',') // „ - Low double quote
|
||||
.replace(/‟/g, '"') // ‟ - High double quote
|
||||
.replace(/‘/g, "'") // ‘ - Left single quote
|
||||
.replace(/’/g, "'") // ’ - Right single quote
|
||||
.replace(/–/g, '-') // – - En dash
|
||||
.replace(/—/g, '—') // — - Em dash
|
||||
.replace(/…/g, '…') // … - Ellipsis
|
||||
.replace(/″/g, '"') // ″ - Inches/Prime
|
||||
.replace(/′/g, "'") // ′ - Feet/Prime
|
||||
.replace(/‚/g, ',') // ‚ - Single low quote
|
||||
.replace(/‛/g, '`') // ‛ - Single high reversed quote
|
||||
.replace(/•/g, '•') // • - Bullet
|
||||
.replace(/€/g, '€') // € - Euro
|
||||
|
||||
// Named HTML entities
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/‘/g, "'")
|
||||
.replace(/’/g, "'")
|
||||
.replace(/“/g, '"')
|
||||
.replace(/”/g, '"')
|
||||
.replace(/–/g, '-')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/•/g, '•')
|
||||
.replace(/€/g, '€');
|
||||
|
||||
// Process WPBakery shortcodes with HTML entities
|
||||
processed = processed
|
||||
// vc_row - convert to div with classes (handle both complete and truncated)
|
||||
.replace(/\[vc_row([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
// Handle truncated vc_row (no closing bracket)
|
||||
.replace(/\[vc_row([^\]]*)$/gi, (match, attrs) => {
|
||||
const classes = ['vc-row'];
|
||||
if (attrs.includes('full_width_background')) classes.push('full-width-bg');
|
||||
if (attrs.includes('in_container')) classes.push('in-container');
|
||||
if (attrs.includes('full_width_content')) classes.push('full-width-content');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_row\]/gi, '</div>')
|
||||
|
||||
// vc_column - convert to div with classes
|
||||
// Handle both complete and incomplete (truncated) shortcodes
|
||||
.replace(/\[vc_column([^\]]*)\]/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
// Also handle incomplete vc_column shortcodes (truncated at end of excerpt)
|
||||
.replace(/\[vc_column([^\]]*)$/gi, (match, attrs) => {
|
||||
const classes = ['vc-column'];
|
||||
if (attrs.includes('1/1')) classes.push('col-1-1');
|
||||
if (attrs.includes('1/2')) classes.push('col-1-2');
|
||||
if (attrs.includes('1/3')) classes.push('col-1-3');
|
||||
if (attrs.includes('2/3')) classes.push('col-2-3');
|
||||
if (attrs.includes('1/4')) classes.push('col-1-4');
|
||||
if (attrs.includes('3/4')) classes.push('col-3-4');
|
||||
if (attrs.includes('5/12')) classes.push('col-5-12');
|
||||
if (attrs.includes('7/12')) classes.push('col-7-12');
|
||||
return `<div class="${classes.join(' ')}">`;
|
||||
})
|
||||
.replace(/\[\/vc_column\]/gi, '</div>')
|
||||
|
||||
// Handle truncated vc_column_text
|
||||
.replace(/\[vc_column_text([^\]]*)$/gi, '<div class="vc-column-text">')
|
||||
|
||||
// vc_column_text - convert to div
|
||||
.replace(/\[vc_column_text([^\]]*)\]/gi, '<div class="vc-column-text">')
|
||||
.replace(/\[\/vc_column_text\]/gi, '</div>')
|
||||
|
||||
// nectar_cta - convert to button
|
||||
.replace(/\[nectar_cta([^\]]*)link_text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-cta">$2</a>')
|
||||
|
||||
// nectar_highlighted_text - convert to span
|
||||
.replace(/\[nectar_highlighted_text([^\]]*)\](.*?)\[\/nectar_highlighted_text\]/gi,
|
||||
'<span class="nectar-highlighted">$2</span>')
|
||||
|
||||
// nectar_responsive_text - convert to span
|
||||
.replace(/\[nectar_responsive_text([^\]]*)\](.*?)\[\/nectar_responsive_text\]/gi,
|
||||
'<span class="nectar-responsive">$2</span>')
|
||||
|
||||
// nectar_icon_list - convert to ul
|
||||
.replace(/\[nectar_icon_list([^\]]*)\]/gi, '<ul class="nectar-icon-list">')
|
||||
.replace(/\[\/nectar_icon_list\]/gi, '</ul>')
|
||||
|
||||
// nectar_icon_list_item - convert to li
|
||||
.replace(/\[nectar_icon_list_item([^\]]*)header="([^"]*)"(.*?)text="([^"]*)"(.*?)\]/gi,
|
||||
'<li><strong>$2</strong>: $4</li>')
|
||||
|
||||
// nectar_btn - convert to button
|
||||
.replace(/\[nectar_btn([^\]]*)text="([^"]*)"(.*?)url="([^"]*)"(.*?)\]/gi,
|
||||
'<a href="$4" class="nectar-btn">$2</a>')
|
||||
|
||||
// split_line_heading - convert to heading
|
||||
.replace(/\[split_line_heading([^\]]*)text_content="([^"]*)"(.*?)\]/gi,
|
||||
'<h2 class="split-line-heading">$2</h2>')
|
||||
|
||||
// vc_row_inner - convert to div
|
||||
.replace(/\[vc_row_inner([^\]]*)\]/gi, '<div class="vc-row-inner">')
|
||||
.replace(/\[\/vc_row_inner\]/gi, '</div>')
|
||||
|
||||
// vc_column_inner - convert to div
|
||||
.replace(/\[vc_column_inner([^\]]*)\]/gi, '<div class="vc-column-inner">')
|
||||
.replace(/\[\/vc_column_inner\]/gi, '</div>')
|
||||
|
||||
// divider - convert to hr
|
||||
.replace(/\[divider([^\]]*)\]/gi, '<hr class="divider" />')
|
||||
|
||||
// vc_gallery - convert to div (placeholder)
|
||||
.replace(/\[vc_gallery([^\]]*)\]/gi, '<div class="vc-gallery">[Gallery]</div>')
|
||||
|
||||
// vc_raw_js - remove or convert to div
|
||||
.replace(/\[vc_raw_js\](.*?)\[\/vc_raw_js\]/gi, '<div class="vc-raw-js">[JavaScript]</div>')
|
||||
|
||||
// nectar_gmap - convert to div
|
||||
.replace(/\[nectar_gmap([^\]]*)\]/gi, '<div class="nectar-gmap">[Google Map]</div>');
|
||||
|
||||
// Remove any remaining shortcodes
|
||||
processed = processed.replace(/\[.*?\]/g, '');
|
||||
|
||||
// Clean up any HTML that might be broken
|
||||
processed = processed.replace(/<p[^>]*>\s*<\/p>/gi, '');
|
||||
processed = processed.replace(/<div[^>]*>\s*<\/div>/gi, '');
|
||||
|
||||
// Normalize whitespace
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Extract excerpt from content
|
||||
function generateExcerpt(content, maxLength = 200) {
|
||||
const text = content.replace(/<[^>]*>/g, '');
|
||||
@@ -84,7 +250,7 @@ function processPages(pagesEN, pagesDE, translationMapping) {
|
||||
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: page.excerptHtml || generateExcerpt(page.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: deMatch ? { locale: 'de', id: deMatch.de } : null
|
||||
@@ -105,7 +271,7 @@ function processPages(pagesEN, pagesDE, translationMapping) {
|
||||
title: page.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: page.titleHtml,
|
||||
contentHtml: sanitizeHTML(page.contentHtml),
|
||||
excerptHtml: page.excerptHtml || generateExcerpt(page.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
|
||||
featuredImage: page.featuredImage,
|
||||
updatedAt: page.updatedAt,
|
||||
translation: enMatch ? { locale: 'en', id: enMatch.en } : null
|
||||
@@ -132,7 +298,7 @@ function processPosts(postsEN, postsDE, translationMapping) {
|
||||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: post.excerptHtml || generateExcerpt(post.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
@@ -153,7 +319,7 @@ function processPosts(postsEN, postsDE, translationMapping) {
|
||||
title: post.titleHtml.replace(/<[^>]*>/g, ''),
|
||||
titleHtml: post.titleHtml,
|
||||
contentHtml: sanitizeHTML(post.contentHtml),
|
||||
excerptHtml: post.excerptHtml || generateExcerpt(post.contentHtml),
|
||||
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
|
||||
featuredImage: post.featuredImage,
|
||||
datePublished: post.datePublished,
|
||||
updatedAt: post.updatedAt,
|
||||
|
||||
Reference in New Issue
Block a user