migration wip

This commit is contained in:
2025-12-30 12:10:13 +01:00
parent 89dbf8af87
commit 65a7e9f24a
203 changed files with 192475 additions and 1562 deletions

View File

@@ -0,0 +1,75 @@
const pages = require('../data/processed/pages.json');
const cheerio = require('cheerio');
// Get home page (corporate-3-landing-2)
const homePage = pages.find(p => p.slug === 'corporate-3-landing-2');
const $ = cheerio.load(homePage.contentHtml);
console.log('=== HOME PAGE (corporate-3-landing-2) DETAILED ANALYSIS ===\n');
// Analyze each vc-row
$('.vc-row').each((i, row) => {
const $row = $(row);
const $cols = $row.find('> .vc-column');
const colCount = $cols.length;
console.log(`Row ${i + 1}:`);
console.log(` Columns: ${colCount}`);
console.log(` Classes: ${$row.attr('class')}`);
// Check for specific patterns
const hasH1 = $row.find('h1').length > 0;
const hasH2 = $row.find('h2').length > 0;
const hasH3 = $row.find('h3').length > 0;
const hasH4 = $row.find('h4').length > 0;
const hasH6 = $row.find('h6').length > 0;
const hasP = $row.find('p').length > 0;
const hasImg = $row.find('img').length > 0;
const hasNested = $row.find('.vc-row').length;
if (hasH1) console.log(` Has H1: ${$row.find('h1').text().substring(0, 50)}...`);
if (hasH2) console.log(` Has H2: ${$row.find('h2').text().substring(0, 50)}...`);
if (hasH3) console.log(` Has H3: ${$row.find('h3').text().substring(0, 50)}...`);
if (hasH4) console.log(` Has H4: ${$row.find('h4').text().substring(0, 50)}...`);
if (hasH6) console.log(` Has H6: ${$row.find('h6').text()}`);
if (hasP) console.log(` Has P: ${$row.find('p').length} paragraphs`);
if (hasImg) console.log(` Has Images: ${hasImg}`);
if (hasNested) console.log(` Has Nested Rows: ${hasNested}`);
// Check column structure
if (colCount > 0) {
$cols.each((j, col) => {
const $col = $(col);
const colClasses = $col.attr('class') || '';
const colH3 = $col.find('h3').text().trim();
const colH4 = $col.find('h4').text().trim();
const colH6 = $col.find('h6').text().trim();
const colP = $col.find('p').text().trim().substring(0, 30);
console.log(` Column ${j + 1}: ${colClasses}`);
if (colH3) console.log(` H3: ${colH3}`);
if (colH4) console.log(` H4: ${colH4}`);
if (colH6) console.log(` H6: ${colH6}`);
if (colP) console.log(` P: ${colP}...`);
});
}
console.log('');
});
// Also check team page for testimonials
console.log('\n=== TEAM PAGE TESTIMONIALS ANALYSIS ===\n');
const teamPage = pages.find(p => p.slug === 'team');
const $team = cheerio.load(teamPage.contentHtml);
$team('.vc-row').each((i, row) => {
const $row = $team(row);
const text = $row.text();
if (text.includes('„') || text.includes('“') || text.includes('Expertise') || text.includes('Experience')) {
console.log(`Row ${i + 1}:`);
console.log(` Content: ${text.substring(0, 100)}...`);
console.log(` Has quotes: ${text.includes('„') || text.includes('“')}`);
console.log('');
}
});

View File

@@ -0,0 +1,122 @@
const pages = require('../data/processed/pages.json');
const cheerio = require('cheerio');
// Analyze each page
const analysis = [];
pages.forEach(page => {
const html = page.contentHtml || '';
const $ = cheerio.load(html);
const patterns = {
vcRows: $('.vc-row').length,
vcColumns: $('.vc-column').length,
hasHero: $('.vc-row h1, .vc-row h2').length > 0,
hasCards: $('.vc-row .vc-column h3, .vc-row .vc-column h4').length >= 2,
hasNumberedFeatures: $('.vc-row h6').length > 0,
hasForm: $('.frm_forms').length > 0 || $('form').length > 0,
hasGrid: $('.vc-row > .vc-column').length >= 2,
hasImages: $('img').length,
hasLinks: $('a').length,
hasTables: $('table').length,
hasLists: $('ul, ol').length,
hasTestimonials: $('.vc-row').filter((i, el) => {
const text = $(el).text();
return text.includes('„') || text.includes('“') || text.includes('Meet the team');
}).length,
hasAnimations: $('.vc-row').filter((i, el) => {
const classes = $(el).attr('class') || '';
return classes.includes('nectar') || classes.includes('animation') || classes.includes('fade');
}).length,
hasSpecialColumns: $('.vc-row > .vc-column').filter((i, el) => {
const classes = $(el).attr('class') || '';
return classes.includes('vc_col-md-') || classes.includes('vc_col-lg-');
}).length,
hasNestedRows: $('.vc-row .vc-row').length,
hasBackgrounds: $('.vc-row').filter((i, el) => {
const style = $(el).attr('style') || '';
const classes = $(el).attr('class') || '';
return style.includes('background') || classes.includes('bg-') || classes.includes('full-width');
}).length,
hasQuotes: $('blockquote, h2').filter((i, el) => {
const text = $(el).text();
return text.includes('„') || text.includes('“') || text.includes('Expertise') || text.includes('Experience');
}).length,
hasPDFs: $('a[href$=".pdf"]').length,
hasContactInfo: $('.vc-row').filter((i, el) => {
const text = $(el).text();
return text.includes('@') || text.includes('Raiffeisenstraße') || text.includes('KLZ Cables');
}).length
};
analysis.push({
slug: page.slug,
locale: page.locale,
translationKey: page.translationKey,
title: page.title,
patterns: patterns,
rawHtml: html.substring(0, 200) + '...'
});
});
// Print detailed analysis
console.log('=== DETAILED PAGE ANALYSIS ===\n');
analysis.forEach(page => {
console.log(`📄 ${page.locale.toUpperCase()}: ${page.slug} (${page.title})`);
console.log(` Translation Key: ${page.translationKey}`);
console.log(' Patterns Found:');
Object.entries(page.patterns).forEach(([key, value]) => {
if (value > 0) {
console.log(` - ${key}: ${value}`);
}
});
console.log('');
});
// Summary by translation key
console.log('=== SUMMARY BY TRANSLATION KEY ===\n');
const byKey = {};
analysis.forEach(page => {
if (!byKey[page.translationKey]) {
byKey[page.translationKey] = [];
}
byKey[page.translationKey].push(page);
});
Object.keys(byKey).sort().forEach(key => {
const pages = byKey[key];
console.log(`${key}:`);
pages.forEach(p => {
const patterns = Object.entries(p.patterns).filter(([k, v]) => v > 0).map(([k, v]) => `${k}=${v}`).join(', ');
console.log(` ${p.locale}: ${p.slug} [${patterns}]`);
});
console.log('');
});
// Priority analysis
console.log('=== PRIORITY PAGES ANALYSIS ===\n');
const priority = {
'Home': ['corporate-3-landing-2', 'start'],
'Contact': ['contact', 'kontakt'],
'About/Legal/Privacy': ['legal-notice', 'impressum', 'privacy-policy', 'datenschutz', 'terms', 'agbs'],
'Team': ['team'],
'Products': ['products', 'produkte'],
'Blog': ['blog'],
'Thanks': ['thanks', 'danke']
};
Object.keys(priority).forEach(category => {
console.log(`${category}:`);
priority[category].forEach(slug => {
const page = analysis.find(p => p.slug === slug);
if (page) {
const patterns = Object.entries(page.patterns).filter(([k, v]) => v > 0).map(([k, v]) => `${k}=${v}`).join(', ');
console.log(` ${page.locale}/${page.slug}: ${patterns || 'No patterns'}`);
}
});
console.log('');
});

View File

@@ -27,6 +27,46 @@ function getLatestExportDir() {
return path.join(RAW_DIR, dirs[0]);
}
// Decode HTML entities in text
function decodeHTMLEntities(text) {
if (!text) return '';
return text
// Decode numeric HTML entities first
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => String.fromCharCode(parseInt(hex, 16)))
// Handle common named entities
.replace(/ /g, ' ')
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(//g, "'")
.replace(//g, "'")
.replace(/“/g, '"')
.replace(/”/g, '"')
.replace(//g, '-')
.replace(/—/g, '—')
.replace(/…/g, '…')
.replace(/•/g, '•')
.replace(/€/g, '€')
// Handle Unicode characters that might appear
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(//g, "'")
.replace(//g, "'")
.replace(/“/g, '"')
.replace(/”/g, '"')
.replace(//g, '-') // En dash
.replace(/—/g, '—') // Em dash
.replace(/…/g, '…') // Ellipsis
.replace(/•/g, '•') // Bullet
.replace(/€/g, '€'); // Euro
}
// HTML sanitization - preserve content but clean dangerous elements
function sanitizeHTML(html) {
if (!html) return '';
@@ -73,40 +113,7 @@ function processExcerptShortcodes(excerptHtml) {
let processed = excerptHtml;
// First, decode HTML entities to regular characters
// Handle both numeric entities (”) and named entities (")
processed = processed
// Decode numeric HTML entities first
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
// Then handle any remaining Unicode characters
.replace(/”/g, '"') // ” - Right double quote
.replace(/“/g, '"') // “ - Left double quote
.replace(/„/g, ',') // „ - Low double quote
.replace(/‟/g, '"') // ‟ - High double quote
.replace(//g, "'") // - Left single quote
.replace(//g, "'") // - Right single quote
.replace(//g, '-') // - En dash
.replace(/—/g, '—') // — - Em dash
.replace(/…/g, '…') // … - Ellipsis
.replace(/″/g, '"') // ″ - Inches/Prime
.replace(//g, "'") // - Feet/Prime
.replace(//g, ',') // - Single low quote
.replace(//g, '`') // - Single high reversed quote
.replace(/•/g, '•') // • - Bullet
.replace(/€/g, '€') // € - Euro
// Named HTML entities
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(//g, "'")
.replace(//g, "'")
.replace(/“/g, '"')
.replace(/”/g, '"')
.replace(//g, '-')
.replace(/—/g, '—')
.replace(/…/g, '…')
.replace(/•/g, '•')
.replace(/€/g, '€');
processed = decodeHTMLEntities(processed);
// Process WPBakery shortcodes with HTML entities
processed = processed
@@ -241,13 +248,17 @@ function processPages(pagesEN, pagesDE, translationMapping) {
const translationKey = page.slug;
const deMatch = translationMapping.pages[translationKey];
// Extract title and decode HTML entities
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'en',
slug: page.slug,
path: `/${page.slug}`,
title: page.titleHtml.replace(/<[^>]*>/g, ''),
title: decodedTitle,
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
@@ -262,13 +273,17 @@ function processPages(pagesEN, pagesDE, translationMapping) {
const translationKey = page.slug;
const enMatch = translationMapping.pages[translationKey];
// Extract title and decode HTML entities
const rawTitle = page.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: page.id,
translationKey: translationKey,
locale: 'de',
slug: page.slug,
path: `/de/${page.slug}`,
title: page.titleHtml.replace(/<[^>]*>/g, ''),
title: decodedTitle,
titleHtml: page.titleHtml,
contentHtml: sanitizeHTML(page.contentHtml),
excerptHtml: processExcerptShortcodes(page.excerptHtml) || generateExcerpt(page.contentHtml),
@@ -289,13 +304,17 @@ function processPosts(postsEN, postsDE, translationMapping) {
const translationKey = post.slug;
const deMatch = translationMapping.posts[translationKey];
// Extract title and decode HTML entities
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'en',
slug: post.slug,
path: `/blog/${post.slug}`,
title: post.titleHtml.replace(/<[^>]*>/g, ''),
title: decodedTitle,
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),
@@ -310,13 +329,17 @@ function processPosts(postsEN, postsDE, translationMapping) {
const translationKey = post.slug;
const enMatch = translationMapping.posts[translationKey];
// Extract title and decode HTML entities
const rawTitle = post.titleHtml.replace(/<[^>]*>/g, '');
const decodedTitle = decodeHTMLEntities(rawTitle);
processed.push({
id: post.id,
translationKey: translationKey,
locale: 'de',
slug: post.slug,
path: `/de/blog/${post.slug}`,
title: post.titleHtml.replace(/<[^>]*>/g, ''),
title: decodedTitle,
titleHtml: post.titleHtml,
contentHtml: sanitizeHTML(post.contentHtml),
excerptHtml: processExcerptShortcodes(post.excerptHtml) || generateExcerpt(post.contentHtml),

View File

@@ -31,6 +31,7 @@ if (!BASE_URL || !CONSUMER_KEY || !CONSUMER_SECRET) {
const TIMESTAMP = new Date().toISOString().replace(/[:.]/g, '-');
const OUTPUT_DIR = path.join(__dirname, '..', 'data', 'raw', TIMESTAMP);
const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media');
const PUBLIC_DIR = path.join(__dirname, '..', 'public');
// Create output directories
if (!fs.existsSync(OUTPUT_DIR)) {
@@ -192,6 +193,36 @@ async function downloadMedia(url, filename) {
});
}
async function downloadFavicon(url, filename) {
return new Promise((resolve, reject) => {
const filePath = path.join(PUBLIC_DIR, filename);
// Check if file already exists
if (fs.existsSync(filePath)) {
console.log(`✅ Favicon already exists: ${filename}`);
resolve(filePath);
return;
}
const file = fs.createWriteStream(filePath);
https.get(url, (res) => {
if (res.statusCode === 200) {
res.pipe(file);
file.on('finish', () => {
console.log(`✅ Downloaded favicon: ${filename}`);
resolve(filePath);
});
} else {
reject(new Error(`Failed to download favicon: ${res.statusCode}`));
}
}).on('error', (err) => {
fs.unlink(filePath, () => {});
reject(err);
});
});
}
// Data Processing Functions
function extractFeaturedImage(item) {
if (item.featured_media) {
@@ -545,6 +576,109 @@ async function exportSiteInfo() {
return siteInfo;
}
async function exportLogoAndFavicon() {
console.log('\n📊 EXPORTING LOGO AND FAVICON');
const assets = {
logo: null,
favicon: null,
appleTouchIcon: null
};
try {
// Get site settings which may include logo and icon IDs
const settings = await makeRequest(`${BASE_URL}/wp-json/wp/v2/settings`, buildWordPressAuth());
// Try to get custom_logo
if (settings.custom_logo) {
console.log(`📥 Found custom_logo ID: ${settings.custom_logo}`);
const logoMedia = await fetchMedia(settings.custom_logo);
if (logoMedia && logoMedia.source_url) {
const logoFilename = 'logo.webp';
await downloadMedia(logoMedia.source_url, logoFilename);
assets.logo = `/media/${logoFilename}`;
console.log(`✅ Logo downloaded: ${logoFilename}`);
}
}
// Try to get site_icon
if (settings.site_icon) {
console.log(`📥 Found site_icon ID: ${settings.site_icon}`);
const iconMedia = await fetchMedia(settings.site_icon);
if (iconMedia && iconMedia.source_url) {
// Download as favicon.ico
const faviconFilename = 'favicon.ico';
await downloadFavicon(iconMedia.source_url, faviconFilename);
assets.favicon = `/favicon.ico`;
console.log(`✅ Favicon downloaded: ${faviconFilename}`);
// Also create apple-touch-icon.png (same file, different name)
const appleTouchFilename = 'apple-touch-icon.png';
await downloadFavicon(iconMedia.source_url, appleTouchFilename);
assets.appleTouchIcon = `/apple-touch-icon.png`;
console.log(`✅ Apple touch icon downloaded: ${appleTouchFilename}`);
}
}
// If no logo found in settings, try to find it in media
if (!assets.logo) {
console.log('⚠️ No logo found in settings, searching media...');
// Try to find logo by filename pattern
const allMedia = await fetchWithPagination('media', { per_page: 100 });
const logoCandidates = allMedia.filter(m =>
m.title?.rendered?.toLowerCase().includes('logo') ||
m.slug?.toLowerCase().includes('logo') ||
m.source_url?.toLowerCase().includes('logo')
);
if (logoCandidates.length > 0) {
const logoMedia = logoCandidates[0];
const logoFilename = 'logo.webp';
await downloadMedia(logoMedia.source_url, logoFilename);
assets.logo = `/media/${logoFilename}`;
console.log(`✅ Logo found and downloaded: ${logoFilename}`);
}
}
// If no favicon found, try to download from common locations
if (!assets.favicon) {
console.log('⚠️ No favicon found in settings, trying common locations...');
const faviconUrls = [
`${BASE_URL}/favicon.ico`,
`${BASE_URL}/wp-content/uploads/favicon.ico`
];
for (const url of faviconUrls) {
try {
await downloadFavicon(url, 'favicon.ico');
assets.favicon = '/favicon.ico';
console.log(`✅ Favicon downloaded from: ${url}`);
// Also create apple-touch-icon
await downloadFavicon(url, 'apple-touch-icon.png');
assets.appleTouchIcon = '/apple-touch-icon.png';
break;
} catch (e) {
// Continue to next URL
}
}
}
// Save asset manifest
fs.writeFileSync(
path.join(OUTPUT_DIR, 'assets.json'),
JSON.stringify(assets, null, 2)
);
console.log('✅ Logo and favicon export complete');
} catch (error) {
console.error('❌ Error exporting logo/favicon:', error.message);
}
return assets;
}
async function generateTranslationMapping() {
console.log('\n📊 GENERATING TRANSLATION MAPPING');
@@ -666,6 +800,7 @@ async function main() {
await exportProductCategories();
await exportMenus();
await exportMedia();
await exportLogoAndFavicon();
// Step 2: Generate mappings and redirects
await generateTranslationMapping();
@@ -675,6 +810,7 @@ async function main() {
console.log('=====================================');
console.log(`📁 Data directory: data/raw/${TIMESTAMP}`);
console.log(`🖼️ Media directory: public/media/`);
console.log(`🎨 Logo/Favicon: public/`);
console.log('');
console.log('Next steps:');
console.log('1. Review exported data for completeness');
@@ -701,6 +837,7 @@ module.exports = {
exportMenus,
exportMedia,
exportSiteInfo,
exportLogoAndFavicon,
generateTranslationMapping,
generateRedirects
};