230 lines
7.0 KiB
TypeScript
230 lines
7.0 KiB
TypeScript
#!/usr/bin/env tsx
|
|
|
|
/**
|
|
* Manual Translation Mapping Generator
|
|
* Creates translationKey mappings for posts that couldn't be auto-detected
|
|
*/
|
|
|
|
import { readFileSync, writeFileSync } from 'fs';
|
|
import { join } from 'path';
|
|
|
|
interface Post {
|
|
id: number;
|
|
slug: string;
|
|
title: { rendered: string };
|
|
date: string;
|
|
lang: string;
|
|
pll_translation_id?: number;
|
|
pll_master_post_id?: number;
|
|
}
|
|
|
|
interface TranslationMapping {
|
|
posts: Record<string, string[]>; // translationKey -> [en_id, de_id]
|
|
products: Record<string, string[]>;
|
|
pages: Record<string, string[]>;
|
|
}
|
|
|
|
interface RawData {
|
|
posts: {
|
|
en: Post[];
|
|
de: Post[];
|
|
};
|
|
products: {
|
|
en: any[];
|
|
de: any[];
|
|
};
|
|
pages: {
|
|
en: any[];
|
|
de: any[];
|
|
};
|
|
}
|
|
|
|
// Simple text similarity function
|
|
function calculateSimilarity(text1: string, text2: string): number {
|
|
const normalize = (str: string) =>
|
|
str.toLowerCase()
|
|
.replace(/[^\w\s]/g, '')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
|
|
const s1 = normalize(text1);
|
|
const s2 = normalize(text2);
|
|
|
|
if (s1 === s2) return 1.0;
|
|
|
|
// Simple overlap calculation
|
|
const words1 = s1.split(' ');
|
|
const words2 = s2.split(' ');
|
|
const intersection = words1.filter(w => words2.includes(w));
|
|
const union = new Set([...words1, ...words2]);
|
|
|
|
return intersection.length / union.size;
|
|
}
|
|
|
|
// Generate translation key from title
|
|
function generateKeyFromTitle(title: string): string {
|
|
return title.toLowerCase()
|
|
.replace(/[^\w\s-]/g, '')
|
|
.replace(/\s+/g, '-')
|
|
.replace(/-+/g, '-')
|
|
.trim();
|
|
}
|
|
|
|
function findPostTranslations(
|
|
postsEn: Post[],
|
|
postsDe: Post[]
|
|
): TranslationMapping['posts'] {
|
|
const mapping: TranslationMapping['posts'] = {};
|
|
|
|
// First pass: try to match by Polylang metadata
|
|
const deById = new Map(postsDe.map(p => [p.id, p]));
|
|
const deByTranslationId = new Map(postsDe.map(p => [p.pll_translation_id, p]));
|
|
|
|
for (const enPost of postsEn) {
|
|
// Try by pll_translation_id
|
|
if (enPost.pll_translation_id && deByTranslationId.has(enPost.pll_translation_id)) {
|
|
const dePost = deByTranslationId.get(enPost.pll_translation_id)!;
|
|
const key = `post-${enPost.pll_translation_id}`;
|
|
mapping[key] = [enPost.id, dePost.id];
|
|
continue;
|
|
}
|
|
|
|
// Try by pll_master_post_id
|
|
if (enPost.pll_master_post_id && deById.has(enPost.pll_master_post_id)) {
|
|
const dePost = deById.get(enPost.pll_master_post_id)!;
|
|
const key = `post-${enPost.pll_master_post_id}`;
|
|
mapping[key] = [enPost.id, dePost.id];
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Second pass: content-based matching for remaining unmatched posts
|
|
const matchedEnIds = new Set(
|
|
Object.values(mapping).flat()
|
|
);
|
|
|
|
const unmatchedEn = postsEn.filter(p => !matchedEnIds.includes(p.id));
|
|
const unmatchedDe = postsDe.filter(p => !matchedEnIds.includes(p.id));
|
|
|
|
for (const enPost of unmatchedEn) {
|
|
let bestMatch: { post: Post; score: number } | null = null;
|
|
|
|
for (const dePost of unmatchedDe) {
|
|
const titleScore = calculateSimilarity(enPost.title.rendered, dePost.title.rendered);
|
|
const slugScore = calculateSimilarity(enPost.slug, dePost.slug);
|
|
const dateScore = enPost.date === dePost.date ? 1.0 : 0.0;
|
|
|
|
// Weighted average
|
|
const score = (titleScore * 0.6) + (slugScore * 0.3) + (dateScore * 0.1);
|
|
|
|
if (score > 0.7 && (!bestMatch || score > bestMatch.score)) {
|
|
bestMatch = { post: dePost, score };
|
|
}
|
|
}
|
|
|
|
if (bestMatch) {
|
|
const key = generateKeyFromTitle(enPost.title.rendered);
|
|
mapping[key] = [enPost.id, bestMatch.post.id];
|
|
unmatchedDe.splice(unmatchedDe.indexOf(bestMatch.post), 1);
|
|
}
|
|
}
|
|
|
|
return mapping;
|
|
}
|
|
|
|
function findProductTranslations(
|
|
productsEn: any[],
|
|
productsDe: any[]
|
|
): TranslationMapping['products'] {
|
|
const mapping: TranslationMapping['products'] = {};
|
|
|
|
// Use SKU as primary key if available
|
|
const deBySku = new Map(productsDe.map(p => [p.sku, p]));
|
|
|
|
for (const enProduct of productsEn) {
|
|
if (enProduct.sku && deBySku.has(enProduct.sku)) {
|
|
const key = `product-${enProduct.sku}`;
|
|
mapping[key] = [enProduct.id, deBySku.get(enProduct.sku)!.id];
|
|
}
|
|
}
|
|
|
|
return mapping;
|
|
}
|
|
|
|
function findPageTranslations(
|
|
pagesEn: any[],
|
|
pagesDe: any[]
|
|
): TranslationMapping['pages'] {
|
|
const mapping: TranslationMapping['pages'] = {};
|
|
|
|
// Pages should have better Polylang metadata
|
|
const deById = new Map(pagesDe.map(p => [p.id, p]));
|
|
const deByTranslationId = new Map(pagesDe.map(p => [p.pll_translation_id, p]));
|
|
|
|
for (const enPage of pagesEn) {
|
|
if (enPage.pll_translation_id && deByTranslationId.has(enPage.pll_translation_id)) {
|
|
const dePage = deByTranslationId.get(enPage.pll_translation_id)!;
|
|
const key = `page-${enPage.pll_translation_id}`;
|
|
mapping[key] = [enPage.id, dePage.id];
|
|
}
|
|
}
|
|
|
|
return mapping;
|
|
}
|
|
|
|
function main() {
|
|
console.log('🔍 Creating manual translation mapping...\n');
|
|
|
|
// Read raw data
|
|
const rawData: RawData = {
|
|
posts: {
|
|
en: JSON.parse(readFileSync('data/raw/posts.en.json', 'utf8')),
|
|
de: JSON.parse(readFileSync('data/raw/posts.de.json', 'utf8'))
|
|
},
|
|
products: {
|
|
en: JSON.parse(readFileSync('data/raw/products.en.json', 'utf8')),
|
|
de: JSON.parse(readFileSync('data/raw/products.de.json', 'utf8'))
|
|
},
|
|
pages: {
|
|
en: JSON.parse(readFileSync('data/raw/pages.en.json', 'utf8')),
|
|
de: JSON.parse(readFileSync('data/raw/pages.de.json', 'utf8'))
|
|
}
|
|
};
|
|
|
|
console.log('📊 Raw data loaded:');
|
|
console.log(` - Posts: ${rawData.posts.en.length} EN, ${rawData.posts.de.length} DE`);
|
|
console.log(` - Products: ${rawData.products.en.length} EN, ${rawData.products.de.length} DE`);
|
|
console.log(` - Pages: ${rawData.pages.en.length} EN, ${rawData.pages.de.length} DE`);
|
|
console.log('');
|
|
|
|
// Generate mappings
|
|
const mapping: TranslationMapping = {
|
|
posts: findPostTranslations(rawData.posts.en, rawData.posts.de),
|
|
products: findProductTranslations(rawData.products.en, rawData.products.de),
|
|
pages: findPageTranslations(rawData.pages.en, rawData.pages.de)
|
|
};
|
|
|
|
// Save mapping
|
|
const outputPath = 'data/manual-translation-mapping.json';
|
|
writeFileSync(outputPath, JSON.stringify(mapping, null, 2));
|
|
|
|
console.log('✅ Manual translation mapping created:\n');
|
|
console.log(`Posts: ${Object.keys(mapping.posts).length} pairs`);
|
|
console.log(`Products: ${Object.keys(mapping.products).length} pairs`);
|
|
console.log(`Pages: ${Object.keys(mapping.pages).length} pairs`);
|
|
console.log(`\nSaved to: ${outputPath}`);
|
|
|
|
// Show some examples
|
|
if (Object.keys(mapping.posts).length > 0) {
|
|
console.log('\n📝 Post mapping examples:');
|
|
Object.entries(mapping.posts).slice(0, 3).forEach(([key, ids]) => {
|
|
const enPost = rawData.posts.en.find(p => p.id === ids[0]);
|
|
const dePost = rawData.posts.de.find(p => p.id === ids[1]);
|
|
console.log(` ${key}:`);
|
|
console.log(` EN: [${ids[0]}] ${enPost?.title.rendered}`);
|
|
console.log(` DE: [${ids[1]}] ${dePost?.title.rendered}`);
|
|
});
|
|
}
|
|
}
|
|
|
|
main(); |