initial migration
This commit is contained in:
230
scripts/create-manual-mapping.ts
Normal file
230
scripts/create-manual-mapping.ts
Normal file
@@ -0,0 +1,230 @@
|
||||
#!/usr/bin/env tsx
|
||||
|
||||
/**
|
||||
* Manual Translation Mapping Generator
|
||||
* Creates translationKey mappings for posts that couldn't be auto-detected
|
||||
*/
|
||||
|
||||
import { readFileSync, writeFileSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
|
||||
interface Post {
|
||||
id: number;
|
||||
slug: string;
|
||||
title: { rendered: string };
|
||||
date: string;
|
||||
lang: string;
|
||||
pll_translation_id?: number;
|
||||
pll_master_post_id?: number;
|
||||
}
|
||||
|
||||
interface TranslationMapping {
|
||||
posts: Record<string, string[]>; // translationKey -> [en_id, de_id]
|
||||
products: Record<string, string[]>;
|
||||
pages: Record<string, string[]>;
|
||||
}
|
||||
|
||||
interface RawData {
|
||||
posts: {
|
||||
en: Post[];
|
||||
de: Post[];
|
||||
};
|
||||
products: {
|
||||
en: any[];
|
||||
de: any[];
|
||||
};
|
||||
pages: {
|
||||
en: any[];
|
||||
de: any[];
|
||||
};
|
||||
}
|
||||
|
||||
// Simple text similarity function
|
||||
function calculateSimilarity(text1: string, text2: string): number {
|
||||
const normalize = (str: string) =>
|
||||
str.toLowerCase()
|
||||
.replace(/[^\w\s]/g, '')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
const s1 = normalize(text1);
|
||||
const s2 = normalize(text2);
|
||||
|
||||
if (s1 === s2) return 1.0;
|
||||
|
||||
// Simple overlap calculation
|
||||
const words1 = s1.split(' ');
|
||||
const words2 = s2.split(' ');
|
||||
const intersection = words1.filter(w => words2.includes(w));
|
||||
const union = new Set([...words1, ...words2]);
|
||||
|
||||
return intersection.length / union.size;
|
||||
}
|
||||
|
||||
// Generate translation key from title
|
||||
function generateKeyFromTitle(title: string): string {
|
||||
return title.toLowerCase()
|
||||
.replace(/[^\w\s-]/g, '')
|
||||
.replace(/\s+/g, '-')
|
||||
.replace(/-+/g, '-')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function findPostTranslations(
|
||||
postsEn: Post[],
|
||||
postsDe: Post[]
|
||||
): TranslationMapping['posts'] {
|
||||
const mapping: TranslationMapping['posts'] = {};
|
||||
|
||||
// First pass: try to match by Polylang metadata
|
||||
const deById = new Map(postsDe.map(p => [p.id, p]));
|
||||
const deByTranslationId = new Map(postsDe.map(p => [p.pll_translation_id, p]));
|
||||
|
||||
for (const enPost of postsEn) {
|
||||
// Try by pll_translation_id
|
||||
if (enPost.pll_translation_id && deByTranslationId.has(enPost.pll_translation_id)) {
|
||||
const dePost = deByTranslationId.get(enPost.pll_translation_id)!;
|
||||
const key = `post-${enPost.pll_translation_id}`;
|
||||
mapping[key] = [enPost.id, dePost.id];
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try by pll_master_post_id
|
||||
if (enPost.pll_master_post_id && deById.has(enPost.pll_master_post_id)) {
|
||||
const dePost = deById.get(enPost.pll_master_post_id)!;
|
||||
const key = `post-${enPost.pll_master_post_id}`;
|
||||
mapping[key] = [enPost.id, dePost.id];
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: content-based matching for remaining unmatched posts
|
||||
const matchedEnIds = new Set(
|
||||
Object.values(mapping).flat()
|
||||
);
|
||||
|
||||
const unmatchedEn = postsEn.filter(p => !matchedEnIds.includes(p.id));
|
||||
const unmatchedDe = postsDe.filter(p => !matchedEnIds.includes(p.id));
|
||||
|
||||
for (const enPost of unmatchedEn) {
|
||||
let bestMatch: { post: Post; score: number } | null = null;
|
||||
|
||||
for (const dePost of unmatchedDe) {
|
||||
const titleScore = calculateSimilarity(enPost.title.rendered, dePost.title.rendered);
|
||||
const slugScore = calculateSimilarity(enPost.slug, dePost.slug);
|
||||
const dateScore = enPost.date === dePost.date ? 1.0 : 0.0;
|
||||
|
||||
// Weighted average
|
||||
const score = (titleScore * 0.6) + (slugScore * 0.3) + (dateScore * 0.1);
|
||||
|
||||
if (score > 0.7 && (!bestMatch || score > bestMatch.score)) {
|
||||
bestMatch = { post: dePost, score };
|
||||
}
|
||||
}
|
||||
|
||||
if (bestMatch) {
|
||||
const key = generateKeyFromTitle(enPost.title.rendered);
|
||||
mapping[key] = [enPost.id, bestMatch.post.id];
|
||||
unmatchedDe.splice(unmatchedDe.indexOf(bestMatch.post), 1);
|
||||
}
|
||||
}
|
||||
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function findProductTranslations(
|
||||
productsEn: any[],
|
||||
productsDe: any[]
|
||||
): TranslationMapping['products'] {
|
||||
const mapping: TranslationMapping['products'] = {};
|
||||
|
||||
// Use SKU as primary key if available
|
||||
const deBySku = new Map(productsDe.map(p => [p.sku, p]));
|
||||
|
||||
for (const enProduct of productsEn) {
|
||||
if (enProduct.sku && deBySku.has(enProduct.sku)) {
|
||||
const key = `product-${enProduct.sku}`;
|
||||
mapping[key] = [enProduct.id, deBySku.get(enProduct.sku)!.id];
|
||||
}
|
||||
}
|
||||
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function findPageTranslations(
|
||||
pagesEn: any[],
|
||||
pagesDe: any[]
|
||||
): TranslationMapping['pages'] {
|
||||
const mapping: TranslationMapping['pages'] = {};
|
||||
|
||||
// Pages should have better Polylang metadata
|
||||
const deById = new Map(pagesDe.map(p => [p.id, p]));
|
||||
const deByTranslationId = new Map(pagesDe.map(p => [p.pll_translation_id, p]));
|
||||
|
||||
for (const enPage of pagesEn) {
|
||||
if (enPage.pll_translation_id && deByTranslationId.has(enPage.pll_translation_id)) {
|
||||
const dePage = deByTranslationId.get(enPage.pll_translation_id)!;
|
||||
const key = `page-${enPage.pll_translation_id}`;
|
||||
mapping[key] = [enPage.id, dePage.id];
|
||||
}
|
||||
}
|
||||
|
||||
return mapping;
|
||||
}
|
||||
|
||||
function main() {
|
||||
console.log('🔍 Creating manual translation mapping...\n');
|
||||
|
||||
// Read raw data
|
||||
const rawData: RawData = {
|
||||
posts: {
|
||||
en: JSON.parse(readFileSync('data/raw/posts.en.json', 'utf8')),
|
||||
de: JSON.parse(readFileSync('data/raw/posts.de.json', 'utf8'))
|
||||
},
|
||||
products: {
|
||||
en: JSON.parse(readFileSync('data/raw/products.en.json', 'utf8')),
|
||||
de: JSON.parse(readFileSync('data/raw/products.de.json', 'utf8'))
|
||||
},
|
||||
pages: {
|
||||
en: JSON.parse(readFileSync('data/raw/pages.en.json', 'utf8')),
|
||||
de: JSON.parse(readFileSync('data/raw/pages.de.json', 'utf8'))
|
||||
}
|
||||
};
|
||||
|
||||
console.log('📊 Raw data loaded:');
|
||||
console.log(` - Posts: ${rawData.posts.en.length} EN, ${rawData.posts.de.length} DE`);
|
||||
console.log(` - Products: ${rawData.products.en.length} EN, ${rawData.products.de.length} DE`);
|
||||
console.log(` - Pages: ${rawData.pages.en.length} EN, ${rawData.pages.de.length} DE`);
|
||||
console.log('');
|
||||
|
||||
// Generate mappings
|
||||
const mapping: TranslationMapping = {
|
||||
posts: findPostTranslations(rawData.posts.en, rawData.posts.de),
|
||||
products: findProductTranslations(rawData.products.en, rawData.products.de),
|
||||
pages: findPageTranslations(rawData.pages.en, rawData.pages.de)
|
||||
};
|
||||
|
||||
// Save mapping
|
||||
const outputPath = 'data/manual-translation-mapping.json';
|
||||
writeFileSync(outputPath, JSON.stringify(mapping, null, 2));
|
||||
|
||||
console.log('✅ Manual translation mapping created:\n');
|
||||
console.log(`Posts: ${Object.keys(mapping.posts).length} pairs`);
|
||||
console.log(`Products: ${Object.keys(mapping.products).length} pairs`);
|
||||
console.log(`Pages: ${Object.keys(mapping.pages).length} pairs`);
|
||||
console.log(`\nSaved to: ${outputPath}`);
|
||||
|
||||
// Show some examples
|
||||
if (Object.keys(mapping.posts).length > 0) {
|
||||
console.log('\n📝 Post mapping examples:');
|
||||
Object.entries(mapping.posts).slice(0, 3).forEach(([key, ids]) => {
|
||||
const enPost = rawData.posts.en.find(p => p.id === ids[0]);
|
||||
const dePost = rawData.posts.de.find(p => p.id === ids[1]);
|
||||
console.log(` ${key}:`);
|
||||
console.log(` EN: [${ids[0]}] ${enPost?.title.rendered}`);
|
||||
console.log(` DE: [${ids[1]}] ${dePost?.title.rendered}`);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user