#!/usr/bin/env node /** * Improved Translation Mapping Script * Creates translation pairs by analyzing content similarity and patterns */ const fs = require('fs'); const path = require('path'); const DATA_DIR = path.join(__dirname, '..', 'data', 'raw'); // Find the latest export directory function getLatestExportDir() { const dirs = fs.readdirSync(DATA_DIR).filter(f => { const stat = fs.statSync(path.join(DATA_DIR, f)); return stat.isDirectory(); }); dirs.sort().reverse(); return path.join(DATA_DIR, dirs[0]); } // Simple text similarity (Levenshtein-like) function similarity(str1, str2) { const longer = str1.length > str2.length ? str1 : str2; const shorter = str1.length > str2.length ? str2 : str1; if (longer.length === 0) return 1.0; const editDistance = (s1, s2) => { const costs = []; for (let i = 0; i <= s1.length; i++) { let lastValue = i; for (let j = 0; j <= s2.length; j++) { if (i === 0) { costs[j] = j; } else if (j > 0) { let newValue = costs[j - 1]; if (s1.charAt(i - 1) !== s2.charAt(j - 1)) { newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; } costs[j - 1] = lastValue; lastValue = newValue; } } if (i > 0) costs[s2.length] = lastValue; } return costs[s2.length]; }; return (longer.length - editDistance(longer, shorter)) / longer.length; } // Extract keywords from slug function extractKeywords(slug) { return slug .split('-') .filter(word => word.length > 3) .sort() .join('-'); } // Find translation pairs using multiple strategies function findTranslationPairs(itemsEN, itemsDE, threshold = 0.6) { const pairs = []; const usedDE = new Set(); itemsEN.forEach(enItem => { let bestMatch = null; let bestScore = 0; itemsDE.forEach(deItem => { if (usedDE.has(deItem.id)) return; // Strategy 1: Keyword similarity const enKeywords = extractKeywords(enItem.slug); const deKeywords = extractKeywords(deItem.slug); const keywordScore = similarity(enKeywords, deKeywords); // Strategy 2: Title similarity (if available) let titleScore = 0; if (enItem.titleHtml && deItem.titleHtml) { const enTitle = enItem.titleHtml.replace(/<[^>]*>/g, '').toLowerCase(); const deTitle = deItem.titleHtml.replace(/<[^>]*>/g, '').toLowerCase(); titleScore = similarity(enTitle, deTitle); } // Strategy 3: Content preview similarity let contentScore = 0; if (enItem.contentHtml && deItem.contentHtml) { const enPreview = enItem.contentHtml.substring(0, 200).replace(/<[^>]*>/g, '').toLowerCase(); const dePreview = deItem.contentHtml.substring(0, 200).replace(/<[^>]*>/g, '').toLowerCase(); contentScore = similarity(enPreview, dePreview); } // Combined score (weighted) const combinedScore = (keywordScore * 0.4) + (titleScore * 0.4) + (contentScore * 0.2); if (combinedScore > bestScore && combinedScore > threshold) { bestScore = combinedScore; bestMatch = deItem; } }); if (bestMatch) { usedDE.add(bestMatch.id); pairs.push({ translationKey: `${enItem.slug}`, en: enItem.id, de: bestMatch.id, score: bestScore, enSlug: enItem.slug, deSlug: bestMatch.slug }); } }); return pairs; } // Main function function main() { const exportDir = getLatestExportDir(); console.log('šŸ”§ Improving Translation Mapping'); console.log('================================\n'); // Load data const loadJSON = (file) => { try { return JSON.parse(fs.readFileSync(path.join(exportDir, file), 'utf8')); } catch (e) { return []; } }; const pagesEN = loadJSON('pages.en.json'); const pagesDE = loadJSON('pages.de.json'); const postsEN = loadJSON('posts.en.json'); const postsDE = loadJSON('posts.de.json'); const productsEN = loadJSON('products.en.json'); const productsDE = loadJSON('products.de.json'); const categoriesEN = loadJSON('product-categories.en.json'); const categoriesDE = loadJSON('product-categories.de.json'); console.log('šŸ“Š Content loaded:'); console.log(` Pages: ${pagesEN.length} EN, ${pagesDE.length} DE`); console.log(` Posts: ${postsEN.length} EN, ${postsDE.length} DE`); console.log(` Products: ${productsEN.length} EN, ${productsDE.length} DE`); console.log(` Categories: ${categoriesEN.length} EN, ${categoriesDE.length} DE\n`); // Find pairs console.log('šŸ” Finding translation pairs...\n'); const pagePairs = findTranslationPairs(pagesEN, pagesDE, 0.5); const postPairs = findTranslationPairs(postsEN, postsDE, 0.5); const productPairs = findTranslationPairs(productsEN, productsDE, 0.6); const categoryPairs = findTranslationPairs(categoriesEN, categoriesDE, 0.5); // Build mapping const mapping = { pages: {}, posts: {}, products: {}, productCategories: {} }; pagePairs.forEach(pair => { mapping.pages[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score }; }); postPairs.forEach(pair => { mapping.posts[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score }; }); productPairs.forEach(pair => { mapping.products[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score }; }); categoryPairs.forEach(pair => { mapping.productCategories[pair.translationKey] = { en: pair.en, de: pair.de, score: pair.score }; }); // Save improved mapping const outputDir = path.join(exportDir, 'translation-mapping-improved.json'); fs.writeFileSync(outputDir, JSON.stringify(mapping, null, 2)); // Summary console.log('āœ… Translation Mapping Complete\n'); console.log('Pairs found:'); console.log(` Pages: ${pagePairs.length}`); console.log(` Posts: ${postPairs.length}`); console.log(` Products: ${productPairs.length}`); console.log(` Categories: ${categoryPairs.length}`); console.log(` Total: ${pagePairs.length + postPairs.length + productPairs.length + categoryPairs.length}\n`); // Show some examples if (postPairs.length > 0) { console.log('šŸ“ Sample Post Pairs:'); postPairs.slice(0, 3).forEach(pair => { console.log(` ${pair.enSlug} (${pair.score.toFixed(2)})`); console.log(` ↔ ${pair.deSlug}`); console.log(''); }); } if (productPairs.length > 0) { console.log('šŸ“¦ Sample Product Pairs:'); productPairs.slice(0, 3).forEach(pair => { console.log(` ${pair.enSlug} (${pair.score.toFixed(2)})`); console.log(` ↔ ${pair.deSlug}`); console.log(''); }); } // Show unmatched items const matchedEN = new Set([...pagePairs.map(p => p.en), ...postPairs.map(p => p.en), ...productPairs.map(p => p.en), ...categoryPairs.map(p => p.en)]); const matchedDE = new Set([...pagePairs.map(p => p.de), ...postPairs.map(p => p.de), ...productPairs.map(p => p.de), ...categoryPairs.map(p => p.de)]); const unmatchedEN = { pages: pagesEN.filter(p => !matchedEN.has(p.id)).length, posts: postsEN.filter(p => !matchedEN.has(p.id)).length, products: productsEN.filter(p => !matchedEN.has(p.id)).length, categories: categoriesEN.filter(p => !matchedEN.has(p.id)).length }; const unmatchedDE = { pages: pagesDE.filter(p => !matchedDE.has(p.id)).length, posts: postsDE.filter(p => !matchedDE.has(p.id)).length, products: productsDE.filter(p => !matchedDE.has(p.id)).length, categories: categoriesDE.filter(p => !matchedDE.has(p.id)).length }; console.log('šŸ” Unmatched Items (may need manual review):'); console.log(` EN: ${unmatchedEN.pages} pages, ${unmatchedEN.posts} posts, ${unmatchedEN.products} products, ${unmatchedEN.categories} categories`); console.log(` DE: ${unmatchedDE.pages} pages, ${unmatchedDE.posts} posts, ${unmatchedDE.products} products, ${unmatchedDE.categories} categories`); console.log('\nšŸ’¾ File saved:', outputDir); console.log('\nšŸ’” Next steps:'); console.log(' 1. Review the improved mapping for accuracy'); console.log(' 2. Manually add any missing pairs'); console.log(' 3. Use this mapping for Next.js i18n implementation'); } if (require.main === module) { main(); }