#!/usr/bin/env tsx /** * Manual Translation Mapping Generator * Creates translationKey mappings for posts that couldn't be auto-detected */ import { readFileSync, writeFileSync } from 'fs'; import { join } from 'path'; interface Post { id: number; slug: string; title: { rendered: string }; date: string; lang: string; pll_translation_id?: number; pll_master_post_id?: number; } interface TranslationMapping { posts: Record; // translationKey -> [en_id, de_id] products: Record; pages: Record; } interface RawData { posts: { en: Post[]; de: Post[]; }; products: { en: any[]; de: any[]; }; pages: { en: any[]; de: any[]; }; } // Simple text similarity function function calculateSimilarity(text1: string, text2: string): number { const normalize = (str: string) => str.toLowerCase() .replace(/[^\w\s]/g, '') .replace(/\s+/g, ' ') .trim(); const s1 = normalize(text1); const s2 = normalize(text2); if (s1 === s2) return 1.0; // Simple overlap calculation const words1 = s1.split(' '); const words2 = s2.split(' '); const intersection = words1.filter(w => words2.includes(w)); const union = new Set([...words1, ...words2]); return intersection.length / union.size; } // Generate translation key from title function generateKeyFromTitle(title: string): string { return title.toLowerCase() .replace(/[^\w\s-]/g, '') .replace(/\s+/g, '-') .replace(/-+/g, '-') .trim(); } function findPostTranslations( postsEn: Post[], postsDe: Post[] ): TranslationMapping['posts'] { const mapping: TranslationMapping['posts'] = {}; // First pass: try to match by Polylang metadata const deById = new Map(postsDe.map(p => [p.id, p])); const deByTranslationId = new Map(postsDe.map(p => [p.pll_translation_id, p])); for (const enPost of postsEn) { // Try by pll_translation_id if (enPost.pll_translation_id && deByTranslationId.has(enPost.pll_translation_id)) { const dePost = deByTranslationId.get(enPost.pll_translation_id)!; const key = `post-${enPost.pll_translation_id}`; mapping[key] = [enPost.id, dePost.id]; continue; } // Try by pll_master_post_id if (enPost.pll_master_post_id && deById.has(enPost.pll_master_post_id)) { const dePost = deById.get(enPost.pll_master_post_id)!; const key = `post-${enPost.pll_master_post_id}`; mapping[key] = [enPost.id, dePost.id]; continue; } } // Second pass: content-based matching for remaining unmatched posts const matchedEnIds = new Set( Object.values(mapping).flat() ); const unmatchedEn = postsEn.filter(p => !matchedEnIds.includes(p.id)); const unmatchedDe = postsDe.filter(p => !matchedEnIds.includes(p.id)); for (const enPost of unmatchedEn) { let bestMatch: { post: Post; score: number } | null = null; for (const dePost of unmatchedDe) { const titleScore = calculateSimilarity(enPost.title.rendered, dePost.title.rendered); const slugScore = calculateSimilarity(enPost.slug, dePost.slug); const dateScore = enPost.date === dePost.date ? 1.0 : 0.0; // Weighted average const score = (titleScore * 0.6) + (slugScore * 0.3) + (dateScore * 0.1); if (score > 0.7 && (!bestMatch || score > bestMatch.score)) { bestMatch = { post: dePost, score }; } } if (bestMatch) { const key = generateKeyFromTitle(enPost.title.rendered); mapping[key] = [enPost.id, bestMatch.post.id]; unmatchedDe.splice(unmatchedDe.indexOf(bestMatch.post), 1); } } return mapping; } function findProductTranslations( productsEn: any[], productsDe: any[] ): TranslationMapping['products'] { const mapping: TranslationMapping['products'] = {}; // Use SKU as primary key if available const deBySku = new Map(productsDe.map(p => [p.sku, p])); for (const enProduct of productsEn) { if (enProduct.sku && deBySku.has(enProduct.sku)) { const key = `product-${enProduct.sku}`; mapping[key] = [enProduct.id, deBySku.get(enProduct.sku)!.id]; } } return mapping; } function findPageTranslations( pagesEn: any[], pagesDe: any[] ): TranslationMapping['pages'] { const mapping: TranslationMapping['pages'] = {}; // Pages should have better Polylang metadata const deById = new Map(pagesDe.map(p => [p.id, p])); const deByTranslationId = new Map(pagesDe.map(p => [p.pll_translation_id, p])); for (const enPage of pagesEn) { if (enPage.pll_translation_id && deByTranslationId.has(enPage.pll_translation_id)) { const dePage = deByTranslationId.get(enPage.pll_translation_id)!; const key = `page-${enPage.pll_translation_id}`; mapping[key] = [enPage.id, dePage.id]; } } return mapping; } function main() { console.log('šŸ” Creating manual translation mapping...\n'); // Read raw data const rawData: RawData = { posts: { en: JSON.parse(readFileSync('data/raw/posts.en.json', 'utf8')), de: JSON.parse(readFileSync('data/raw/posts.de.json', 'utf8')) }, products: { en: JSON.parse(readFileSync('data/raw/products.en.json', 'utf8')), de: JSON.parse(readFileSync('data/raw/products.de.json', 'utf8')) }, pages: { en: JSON.parse(readFileSync('data/raw/pages.en.json', 'utf8')), de: JSON.parse(readFileSync('data/raw/pages.de.json', 'utf8')) } }; console.log('šŸ“Š Raw data loaded:'); console.log(` - Posts: ${rawData.posts.en.length} EN, ${rawData.posts.de.length} DE`); console.log(` - Products: ${rawData.products.en.length} EN, ${rawData.products.de.length} DE`); console.log(` - Pages: ${rawData.pages.en.length} EN, ${rawData.pages.de.length} DE`); console.log(''); // Generate mappings const mapping: TranslationMapping = { posts: findPostTranslations(rawData.posts.en, rawData.posts.de), products: findProductTranslations(rawData.products.en, rawData.products.de), pages: findPageTranslations(rawData.pages.en, rawData.pages.de) }; // Save mapping const outputPath = 'data/manual-translation-mapping.json'; writeFileSync(outputPath, JSON.stringify(mapping, null, 2)); console.log('āœ… Manual translation mapping created:\n'); console.log(`Posts: ${Object.keys(mapping.posts).length} pairs`); console.log(`Products: ${Object.keys(mapping.products).length} pairs`); console.log(`Pages: ${Object.keys(mapping.pages).length} pairs`); console.log(`\nSaved to: ${outputPath}`); // Show some examples if (Object.keys(mapping.posts).length > 0) { console.log('\nšŸ“ Post mapping examples:'); Object.entries(mapping.posts).slice(0, 3).forEach(([key, ids]) => { const enPost = rawData.posts.en.find(p => p.id === ids[0]); const dePost = rawData.posts.de.find(p => p.id === ids[1]); console.log(` ${key}:`); console.log(` EN: [${ids[0]}] ${enPost?.title.rendered}`); console.log(` DE: [${ids[1]}] ${dePost?.title.rendered}`); }); } } main();