Files
klz-cables.com/scripts/create-manual-mapping.ts
2025-12-28 23:28:31 +01:00

230 lines
7.0 KiB
TypeScript

#!/usr/bin/env tsx
/**
* Manual Translation Mapping Generator
* Creates translationKey mappings for posts that couldn't be auto-detected
*/
import { readFileSync, writeFileSync } from 'fs';
import { join } from 'path';
interface Post {
id: number;
slug: string;
title: { rendered: string };
date: string;
lang: string;
pll_translation_id?: number;
pll_master_post_id?: number;
}
interface TranslationMapping {
posts: Record<string, string[]>; // translationKey -> [en_id, de_id]
products: Record<string, string[]>;
pages: Record<string, string[]>;
}
interface RawData {
posts: {
en: Post[];
de: Post[];
};
products: {
en: any[];
de: any[];
};
pages: {
en: any[];
de: any[];
};
}
// Simple text similarity function
function calculateSimilarity(text1: string, text2: string): number {
const normalize = (str: string) =>
str.toLowerCase()
.replace(/[^\w\s]/g, '')
.replace(/\s+/g, ' ')
.trim();
const s1 = normalize(text1);
const s2 = normalize(text2);
if (s1 === s2) return 1.0;
// Simple overlap calculation
const words1 = s1.split(' ');
const words2 = s2.split(' ');
const intersection = words1.filter(w => words2.includes(w));
const union = new Set([...words1, ...words2]);
return intersection.length / union.size;
}
// Generate translation key from title
function generateKeyFromTitle(title: string): string {
return title.toLowerCase()
.replace(/[^\w\s-]/g, '')
.replace(/\s+/g, '-')
.replace(/-+/g, '-')
.trim();
}
function findPostTranslations(
postsEn: Post[],
postsDe: Post[]
): TranslationMapping['posts'] {
const mapping: TranslationMapping['posts'] = {};
// First pass: try to match by Polylang metadata
const deById = new Map(postsDe.map(p => [p.id, p]));
const deByTranslationId = new Map(postsDe.map(p => [p.pll_translation_id, p]));
for (const enPost of postsEn) {
// Try by pll_translation_id
if (enPost.pll_translation_id && deByTranslationId.has(enPost.pll_translation_id)) {
const dePost = deByTranslationId.get(enPost.pll_translation_id)!;
const key = `post-${enPost.pll_translation_id}`;
mapping[key] = [enPost.id, dePost.id];
continue;
}
// Try by pll_master_post_id
if (enPost.pll_master_post_id && deById.has(enPost.pll_master_post_id)) {
const dePost = deById.get(enPost.pll_master_post_id)!;
const key = `post-${enPost.pll_master_post_id}`;
mapping[key] = [enPost.id, dePost.id];
continue;
}
}
// Second pass: content-based matching for remaining unmatched posts
const matchedEnIds = new Set(
Object.values(mapping).flat()
);
const unmatchedEn = postsEn.filter(p => !matchedEnIds.includes(p.id));
const unmatchedDe = postsDe.filter(p => !matchedEnIds.includes(p.id));
for (const enPost of unmatchedEn) {
let bestMatch: { post: Post; score: number } | null = null;
for (const dePost of unmatchedDe) {
const titleScore = calculateSimilarity(enPost.title.rendered, dePost.title.rendered);
const slugScore = calculateSimilarity(enPost.slug, dePost.slug);
const dateScore = enPost.date === dePost.date ? 1.0 : 0.0;
// Weighted average
const score = (titleScore * 0.6) + (slugScore * 0.3) + (dateScore * 0.1);
if (score > 0.7 && (!bestMatch || score > bestMatch.score)) {
bestMatch = { post: dePost, score };
}
}
if (bestMatch) {
const key = generateKeyFromTitle(enPost.title.rendered);
mapping[key] = [enPost.id, bestMatch.post.id];
unmatchedDe.splice(unmatchedDe.indexOf(bestMatch.post), 1);
}
}
return mapping;
}
function findProductTranslations(
productsEn: any[],
productsDe: any[]
): TranslationMapping['products'] {
const mapping: TranslationMapping['products'] = {};
// Use SKU as primary key if available
const deBySku = new Map(productsDe.map(p => [p.sku, p]));
for (const enProduct of productsEn) {
if (enProduct.sku && deBySku.has(enProduct.sku)) {
const key = `product-${enProduct.sku}`;
mapping[key] = [enProduct.id, deBySku.get(enProduct.sku)!.id];
}
}
return mapping;
}
function findPageTranslations(
pagesEn: any[],
pagesDe: any[]
): TranslationMapping['pages'] {
const mapping: TranslationMapping['pages'] = {};
// Pages should have better Polylang metadata
const deById = new Map(pagesDe.map(p => [p.id, p]));
const deByTranslationId = new Map(pagesDe.map(p => [p.pll_translation_id, p]));
for (const enPage of pagesEn) {
if (enPage.pll_translation_id && deByTranslationId.has(enPage.pll_translation_id)) {
const dePage = deByTranslationId.get(enPage.pll_translation_id)!;
const key = `page-${enPage.pll_translation_id}`;
mapping[key] = [enPage.id, dePage.id];
}
}
return mapping;
}
function main() {
console.log('🔍 Creating manual translation mapping...\n');
// Read raw data
const rawData: RawData = {
posts: {
en: JSON.parse(readFileSync('data/raw/posts.en.json', 'utf8')),
de: JSON.parse(readFileSync('data/raw/posts.de.json', 'utf8'))
},
products: {
en: JSON.parse(readFileSync('data/raw/products.en.json', 'utf8')),
de: JSON.parse(readFileSync('data/raw/products.de.json', 'utf8'))
},
pages: {
en: JSON.parse(readFileSync('data/raw/pages.en.json', 'utf8')),
de: JSON.parse(readFileSync('data/raw/pages.de.json', 'utf8'))
}
};
console.log('📊 Raw data loaded:');
console.log(` - Posts: ${rawData.posts.en.length} EN, ${rawData.posts.de.length} DE`);
console.log(` - Products: ${rawData.products.en.length} EN, ${rawData.products.de.length} DE`);
console.log(` - Pages: ${rawData.pages.en.length} EN, ${rawData.pages.de.length} DE`);
console.log('');
// Generate mappings
const mapping: TranslationMapping = {
posts: findPostTranslations(rawData.posts.en, rawData.posts.de),
products: findProductTranslations(rawData.products.en, rawData.products.de),
pages: findPageTranslations(rawData.pages.en, rawData.pages.de)
};
// Save mapping
const outputPath = 'data/manual-translation-mapping.json';
writeFileSync(outputPath, JSON.stringify(mapping, null, 2));
console.log('✅ Manual translation mapping created:\n');
console.log(`Posts: ${Object.keys(mapping.posts).length} pairs`);
console.log(`Products: ${Object.keys(mapping.products).length} pairs`);
console.log(`Pages: ${Object.keys(mapping.pages).length} pairs`);
console.log(`\nSaved to: ${outputPath}`);
// Show some examples
if (Object.keys(mapping.posts).length > 0) {
console.log('\n📝 Post mapping examples:');
Object.entries(mapping.posts).slice(0, 3).forEach(([key, ids]) => {
const enPost = rawData.posts.en.find(p => p.id === ids[0]);
const dePost = rawData.posts.de.find(p => p.id === ids[1]);
console.log(` ${key}:`);
console.log(` EN: [${ids[0]}] ${enPost?.title.rendered}`);
console.log(` DE: [${ids[1]}] ${dePost?.title.rendered}`);
});
}
}
main();