Files
klz-cables.com/scripts/generate-pdf-datasheets.ts
2026-01-23 13:10:08 +01:00

295 lines
9.6 KiB
TypeScript

#!/usr/bin/env ts-node
/**
* PDF Datasheet Generator (React-PDF)
*
* Renders PDFs via `@react-pdf/renderer`.
*
* Source of truth:
* - All technical data + cross-section tables: Excel files in `data/excel/`
* - Product description text: MDX files in `data/products/{en,de}/*.mdx`
*/
import * as fs from 'fs';
import * as path from 'path';
import * as XLSX from 'xlsx';
const matter = require('gray-matter') as (src: string) => { data: unknown; content: string };
import type { ProductData } from './pdf/model/types';
import { generateDatasheetPdfBuffer } from './pdf/react-pdf/generate-datasheet-pdf';
import { generateFileName, normalizeValue, stripHtml } from './pdf/model/utils';
const CONFIG = {
outputDir: path.join(process.cwd(), 'public/datasheets'),
chunkSize: 10,
} as const;
const EXCEL_FILES = [
path.join(process.cwd(), 'data/excel/high-voltage.xlsx'),
path.join(process.cwd(), 'data/excel/medium-voltage-KM.xlsx'),
path.join(process.cwd(), 'data/excel/medium-voltage-KM 170126.xlsx'),
path.join(process.cwd(), 'data/excel/low-voltage-KM.xlsx'),
path.join(process.cwd(), 'data/excel/solar-cables.xlsx'),
] as const;
type MdxProduct = {
slug: string;
title: string;
sku: string;
categories: string[];
images: string[];
descriptionHtml: string;
};
type MdxIndex = Map<string, MdxProduct>; // key: normalized designation/title
function ensureOutputDir(): void {
if (!fs.existsSync(CONFIG.outputDir)) {
fs.mkdirSync(CONFIG.outputDir, { recursive: true });
}
}
function normalizeExcelKey(value: string): string {
return String(value || '')
.toUpperCase()
.replace(/-\d+$/g, '')
.replace(/[^A-Z0-9]+/g, '');
}
function extractDescriptionHtmlFromMdxBody(body: string): string {
const content = String(body || '').trim();
if (!content) return '';
// MDX product files are wrapped like:
// <ProductTabs technicalData={...}>
// <section>...</section>
// </ProductTabs>
// For PDF, we only want the inner description content.
const withoutOpen = content.replace(/^\s*<ProductTabs[\s\S]*?>\s*/i, '');
const withoutClose = withoutOpen.replace(/\s*<\/ProductTabs>\s*$/i, '');
return withoutClose.trim();
}
function buildMdxIndex(locale: 'en' | 'de'): MdxIndex {
const dir = path.join(process.cwd(), 'data/products', locale);
const idx: MdxIndex = new Map();
if (!fs.existsSync(dir)) return idx;
const files = fs
.readdirSync(dir)
.filter(f => f.endsWith('.mdx'))
.sort();
for (const file of files) {
const filePath = path.join(dir, file);
const raw = fs.readFileSync(filePath, 'utf8');
const parsed = matter(raw);
const data = (parsed.data || {}) as any;
const title = normalizeValue(String(data.title || ''));
if (!title) continue;
const sku = normalizeValue(String(data.sku || ''));
const categories = Array.isArray(data.categories) ? data.categories.map((c: any) => normalizeValue(String(c))).filter(Boolean) : [];
const images = Array.isArray(data.images) ? data.images.map((i: any) => normalizeValue(String(i))).filter(Boolean) : [];
const descriptionHtml = extractDescriptionHtmlFromMdxBody(parsed.content);
const slug = path.basename(file, '.mdx');
idx.set(normalizeExcelKey(title), { slug, title, sku, categories, images, descriptionHtml });
}
return idx;
}
function findKeyByHeaderValue(headerRow: Record<string, unknown>, pattern: RegExp): string | null {
for (const [k, v] of Object.entries(headerRow || {})) {
const text = normalizeValue(String(v ?? ''));
if (!text) continue;
if (pattern.test(text)) return k;
}
return null;
}
function readExcelRows(filePath: string): Array<Record<string, unknown>> {
if (!fs.existsSync(filePath)) return [];
const workbook = XLSX.readFile(filePath, { cellDates: false, cellNF: false, cellText: false });
const sheetName = workbook.SheetNames[0];
if (!sheetName) return [];
const sheet = workbook.Sheets[sheetName];
if (!sheet) return [];
return XLSX.utils.sheet_to_json(sheet, {
defval: '',
raw: false,
blankrows: false,
}) as Array<Record<string, unknown>>;
}
function readDesignationsFromExcelFile(filePath: string): Map<string, string> {
const rows = readExcelRows(filePath);
if (!rows.length) return new Map();
// Legacy sheets use "Part Number" as a column key.
// The new MV sheet uses __EMPTY* keys and stores the human headers in row 0 values.
const headerRow = rows[0] || {};
const partNumberKey =
(Object.prototype.hasOwnProperty.call(headerRow, 'Part Number') ? 'Part Number' : null) ||
findKeyByHeaderValue(headerRow, /^part\s*number$/i) ||
'__EMPTY';
const out = new Map<string, string>();
for (const r of rows) {
const pn = normalizeValue(String(r?.[partNumberKey] ?? ''));
if (!pn || pn === 'Units' || pn === 'Part Number') continue;
const key = normalizeExcelKey(pn);
if (!key) continue;
// Keep first-seen designation string (stable filenames from MDX slug).
if (!out.has(key)) out.set(key, pn);
}
return out;
}
function loadAllExcelDesignations(): Map<string, string> {
const out = new Map<string, string>();
for (const filePath of EXCEL_FILES) {
const m = readDesignationsFromExcelFile(filePath);
Array.from(m.entries()).forEach(([k, v]) => {
if (!out.has(k)) out.set(k, v);
});
}
return out;
}
async function loadProductsFromExcelAndMdx(locale: 'en' | 'de'): Promise<ProductData[]> {
const mdxIndex = buildMdxIndex(locale);
const excelDesignations = loadAllExcelDesignations();
const products: ProductData[] = [];
let id = 1;
Array.from(excelDesignations.entries()).forEach(([key, designation]) => {
const mdx = mdxIndex.get(key) || null;
const title = mdx?.title || designation;
const slug =
mdx?.slug ||
title
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/-+/g, '-')
.replace(/^-|-$/g, '');
// Only the product description comes from MDX. Everything else is Excel-driven
// during model building (technicalItems + voltage tables).
const descriptionHtml = mdx?.descriptionHtml || '';
products.push({
id: id++,
name: title,
shortDescriptionHtml: '',
descriptionHtml,
images: mdx?.images || [],
featuredImage: (mdx?.images && mdx.images[0]) || null,
sku: mdx?.sku || title,
slug,
translationKey: slug,
locale,
categories: (mdx?.categories || []).map(name => ({ name })),
attributes: [],
});
});
// Deterministic order: by slug, then name.
products.sort((a, b) => (a.slug || '').localeCompare(b.slug || '') || a.name.localeCompare(b.name));
// Drop products that have no readable name.
return products.filter(p => stripHtml(p.name));
}
async function processChunk(products: ProductData[], chunkIndex: number, totalChunks: number): Promise<void> {
console.log(`\nProcessing chunk ${chunkIndex + 1}/${totalChunks} (${products.length} products)...`);
for (const product of products) {
try {
const locale = (product.locale || 'en') as 'en' | 'de';
const buffer = await generateDatasheetPdfBuffer({ product, locale });
const fileName = generateFileName(product, locale);
fs.writeFileSync(path.join(CONFIG.outputDir, fileName), buffer);
console.log(`${locale.toUpperCase()}: ${fileName}`);
await new Promise(resolve => setTimeout(resolve, 25));
} catch (error) {
console.error(`✗ Failed to process product ${product.id}:`, error);
}
}
}
async function processProductsInChunks(): Promise<void> {
console.log('Starting PDF generation (React-PDF)');
ensureOutputDir();
const onlyLocale = normalizeValue(String(process.env.PDF_LOCALE || '')).toLowerCase();
const locales: Array<'en' | 'de'> = onlyLocale === 'de' || onlyLocale === 'en' ? [onlyLocale] : ['en', 'de'];
const allProducts: ProductData[] = [];
for (const locale of locales) {
const products = await loadProductsFromExcelAndMdx(locale);
allProducts.push(...products);
}
if (allProducts.length === 0) {
console.log('No products found');
return;
}
// Dev convenience: generate only one product subset.
// IMPORTANT: apply filters BEFORE PDF_LIMIT so the limit works within the filtered set.
let products = allProducts;
const match = normalizeValue(String(process.env.PDF_MATCH || '')).toLowerCase();
if (match) {
products = products.filter(p => {
const hay = [p.slug, p.translationKey, p.sku, p.name]
.filter(Boolean)
.join(' ')
.toLowerCase();
return hay.includes(match);
});
}
const limit = Number(process.env.PDF_LIMIT || '0');
products = Number.isFinite(limit) && limit > 0 ? products.slice(0, limit) : products;
const enProducts = products.filter(p => (p.locale || 'en') === 'en');
const deProducts = products.filter(p => (p.locale || 'en') === 'de');
console.log(`Found ${enProducts.length} EN + ${deProducts.length} DE products`);
const totalChunks = Math.ceil(products.length / CONFIG.chunkSize);
for (let i = 0; i < totalChunks; i++) {
const chunk = products.slice(i * CONFIG.chunkSize, (i + 1) * CONFIG.chunkSize);
await processChunk(chunk, i, totalChunks);
}
console.log('\n✅ PDF generation completed!');
console.log(`Generated ${enProducts.length} EN + ${deProducts.length} DE PDFs`);
console.log(`Output: ${CONFIG.outputDir}`);
}
async function main(): Promise<void> {
const start = Date.now();
try {
await processProductsInChunks();
console.log(`\nTime: ${((Date.now() - start) / 1000).toFixed(2)}s`);
} catch (error) {
console.error('Fatal error:', error);
process.exit(1);
}
}
main().catch(console.error);
export { main as generatePDFDatasheets };