pdf sheets from new excel
This commit is contained in:
@@ -2,48 +2,212 @@
|
||||
/**
|
||||
* PDF Datasheet Generator (React-PDF)
|
||||
*
|
||||
* Uses the same Excel-driven data model as the legacy generator, but renders
|
||||
* PDFs via `@react-pdf/renderer` for maintainable layout and pagination.
|
||||
* Renders PDFs via `@react-pdf/renderer`.
|
||||
*
|
||||
* Source of truth:
|
||||
* - All technical data + cross-section tables: Excel files in `data/excel/`
|
||||
* - Product description text: MDX files in `data/products/{en,de}/*.mdx`
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
import * as XLSX from 'xlsx';
|
||||
const matter = require('gray-matter') as (src: string) => { data: unknown; content: string };
|
||||
|
||||
import type { ProductData } from './pdf/model/types';
|
||||
import { generateDatasheetPdfBuffer } from './pdf/react-pdf/generate-datasheet-pdf';
|
||||
import { generateFileName, normalizeValue } from './pdf/model/utils';
|
||||
import { generateFileName, normalizeValue, stripHtml } from './pdf/model/utils';
|
||||
|
||||
const CONFIG = {
|
||||
productsFile: path.join(process.cwd(), 'data/processed/products.json'),
|
||||
outputDir: path.join(process.cwd(), 'public/datasheets'),
|
||||
chunkSize: 10,
|
||||
} as const;
|
||||
|
||||
const EXCEL_FILES = [
|
||||
path.join(process.cwd(), 'data/excel/high-voltage.xlsx'),
|
||||
path.join(process.cwd(), 'data/excel/medium-voltage-KM.xlsx'),
|
||||
path.join(process.cwd(), 'data/excel/medium-voltage-KM 170126.xlsx'),
|
||||
path.join(process.cwd(), 'data/excel/low-voltage-KM.xlsx'),
|
||||
path.join(process.cwd(), 'data/excel/solar-cables.xlsx'),
|
||||
] as const;
|
||||
|
||||
type MdxProduct = {
|
||||
slug: string;
|
||||
title: string;
|
||||
sku: string;
|
||||
categories: string[];
|
||||
images: string[];
|
||||
descriptionHtml: string;
|
||||
};
|
||||
|
||||
type MdxIndex = Map<string, MdxProduct>; // key: normalized designation/title
|
||||
|
||||
function ensureOutputDir(): void {
|
||||
if (!fs.existsSync(CONFIG.outputDir)) {
|
||||
fs.mkdirSync(CONFIG.outputDir, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
async function readProductsStream(): Promise<ProductData[]> {
|
||||
console.log('Reading products.json...');
|
||||
return new Promise((resolve, reject) => {
|
||||
const stream = fs.createReadStream(CONFIG.productsFile, { encoding: 'utf8' });
|
||||
let data = '';
|
||||
stream.on('data', chunk => {
|
||||
data += chunk;
|
||||
function normalizeExcelKey(value: string): string {
|
||||
return String(value || '')
|
||||
.toUpperCase()
|
||||
.replace(/-\d+$/g, '')
|
||||
.replace(/[^A-Z0-9]+/g, '');
|
||||
}
|
||||
|
||||
function extractDescriptionHtmlFromMdxBody(body: string): string {
|
||||
const content = String(body || '').trim();
|
||||
if (!content) return '';
|
||||
|
||||
// MDX product files are wrapped like:
|
||||
// <ProductTabs technicalData={...}>
|
||||
// <section>...</section>
|
||||
// </ProductTabs>
|
||||
// For PDF, we only want the inner description content.
|
||||
const withoutOpen = content.replace(/^\s*<ProductTabs[\s\S]*?>\s*/i, '');
|
||||
const withoutClose = withoutOpen.replace(/\s*<\/ProductTabs>\s*$/i, '');
|
||||
return withoutClose.trim();
|
||||
}
|
||||
|
||||
function buildMdxIndex(locale: 'en' | 'de'): MdxIndex {
|
||||
const dir = path.join(process.cwd(), 'data/products', locale);
|
||||
const idx: MdxIndex = new Map();
|
||||
if (!fs.existsSync(dir)) return idx;
|
||||
|
||||
const files = fs
|
||||
.readdirSync(dir)
|
||||
.filter(f => f.endsWith('.mdx'))
|
||||
.sort();
|
||||
|
||||
for (const file of files) {
|
||||
const filePath = path.join(dir, file);
|
||||
const raw = fs.readFileSync(filePath, 'utf8');
|
||||
const parsed = matter(raw);
|
||||
const data = (parsed.data || {}) as any;
|
||||
|
||||
const title = normalizeValue(String(data.title || ''));
|
||||
if (!title) continue;
|
||||
|
||||
const sku = normalizeValue(String(data.sku || ''));
|
||||
const categories = Array.isArray(data.categories) ? data.categories.map((c: any) => normalizeValue(String(c))).filter(Boolean) : [];
|
||||
const images = Array.isArray(data.images) ? data.images.map((i: any) => normalizeValue(String(i))).filter(Boolean) : [];
|
||||
|
||||
const descriptionHtml = extractDescriptionHtmlFromMdxBody(parsed.content);
|
||||
|
||||
const slug = path.basename(file, '.mdx');
|
||||
idx.set(normalizeExcelKey(title), { slug, title, sku, categories, images, descriptionHtml });
|
||||
}
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
function findKeyByHeaderValue(headerRow: Record<string, unknown>, pattern: RegExp): string | null {
|
||||
for (const [k, v] of Object.entries(headerRow || {})) {
|
||||
const text = normalizeValue(String(v ?? ''));
|
||||
if (!text) continue;
|
||||
if (pattern.test(text)) return k;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function readExcelRows(filePath: string): Array<Record<string, unknown>> {
|
||||
if (!fs.existsSync(filePath)) return [];
|
||||
const workbook = XLSX.readFile(filePath, { cellDates: false, cellNF: false, cellText: false });
|
||||
const sheetName = workbook.SheetNames[0];
|
||||
if (!sheetName) return [];
|
||||
const sheet = workbook.Sheets[sheetName];
|
||||
if (!sheet) return [];
|
||||
|
||||
return XLSX.utils.sheet_to_json(sheet, {
|
||||
defval: '',
|
||||
raw: false,
|
||||
blankrows: false,
|
||||
}) as Array<Record<string, unknown>>;
|
||||
}
|
||||
|
||||
function readDesignationsFromExcelFile(filePath: string): Map<string, string> {
|
||||
const rows = readExcelRows(filePath);
|
||||
if (!rows.length) return new Map();
|
||||
|
||||
// Legacy sheets use "Part Number" as a column key.
|
||||
// The new MV sheet uses __EMPTY* keys and stores the human headers in row 0 values.
|
||||
const headerRow = rows[0] || {};
|
||||
const partNumberKey =
|
||||
(Object.prototype.hasOwnProperty.call(headerRow, 'Part Number') ? 'Part Number' : null) ||
|
||||
findKeyByHeaderValue(headerRow, /^part\s*number$/i) ||
|
||||
'__EMPTY';
|
||||
|
||||
const out = new Map<string, string>();
|
||||
for (const r of rows) {
|
||||
const pn = normalizeValue(String(r?.[partNumberKey] ?? ''));
|
||||
if (!pn || pn === 'Units' || pn === 'Part Number') continue;
|
||||
|
||||
const key = normalizeExcelKey(pn);
|
||||
if (!key) continue;
|
||||
|
||||
// Keep first-seen designation string (stable filenames from MDX slug).
|
||||
if (!out.has(key)) out.set(key, pn);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
function loadAllExcelDesignations(): Map<string, string> {
|
||||
const out = new Map<string, string>();
|
||||
for (const filePath of EXCEL_FILES) {
|
||||
const m = readDesignationsFromExcelFile(filePath);
|
||||
Array.from(m.entries()).forEach(([k, v]) => {
|
||||
if (!out.has(k)) out.set(k, v);
|
||||
});
|
||||
stream.on('end', () => {
|
||||
try {
|
||||
const products = JSON.parse(data) as ProductData[];
|
||||
console.log(`Loaded ${products.length} products`);
|
||||
resolve(products);
|
||||
} catch (error) {
|
||||
reject(new Error(`Failed to parse JSON: ${error}`));
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
async function loadProductsFromExcelAndMdx(locale: 'en' | 'de'): Promise<ProductData[]> {
|
||||
const mdxIndex = buildMdxIndex(locale);
|
||||
const excelDesignations = loadAllExcelDesignations();
|
||||
|
||||
const products: ProductData[] = [];
|
||||
let id = 1;
|
||||
|
||||
Array.from(excelDesignations.entries()).forEach(([key, designation]) => {
|
||||
const mdx = mdxIndex.get(key) || null;
|
||||
|
||||
const title = mdx?.title || designation;
|
||||
const slug =
|
||||
mdx?.slug ||
|
||||
title
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/-+/g, '-')
|
||||
.replace(/^-|-$/g, '');
|
||||
|
||||
// Only the product description comes from MDX. Everything else is Excel-driven
|
||||
// during model building (technicalItems + voltage tables).
|
||||
const descriptionHtml = mdx?.descriptionHtml || '';
|
||||
|
||||
products.push({
|
||||
id: id++,
|
||||
name: title,
|
||||
shortDescriptionHtml: '',
|
||||
descriptionHtml,
|
||||
images: mdx?.images || [],
|
||||
featuredImage: (mdx?.images && mdx.images[0]) || null,
|
||||
sku: mdx?.sku || title,
|
||||
slug,
|
||||
translationKey: slug,
|
||||
locale,
|
||||
categories: (mdx?.categories || []).map(name => ({ name })),
|
||||
attributes: [],
|
||||
});
|
||||
stream.on('error', error => reject(new Error(`Failed to read file: ${error}`)));
|
||||
});
|
||||
|
||||
// Deterministic order: by slug, then name.
|
||||
products.sort((a, b) => (a.slug || '').localeCompare(b.slug || '') || a.name.localeCompare(b.name));
|
||||
|
||||
// Drop products that have no readable name.
|
||||
return products.filter(p => stripHtml(p.name));
|
||||
}
|
||||
|
||||
async function processChunk(products: ProductData[], chunkIndex: number, totalChunks: number): Promise<void> {
|
||||
@@ -67,21 +231,24 @@ async function processProductsInChunks(): Promise<void> {
|
||||
console.log('Starting PDF generation (React-PDF)');
|
||||
ensureOutputDir();
|
||||
|
||||
const allProducts = await readProductsStream();
|
||||
const onlyLocale = normalizeValue(String(process.env.PDF_LOCALE || '')).toLowerCase();
|
||||
const locales: Array<'en' | 'de'> = onlyLocale === 'de' || onlyLocale === 'en' ? [onlyLocale] : ['en', 'de'];
|
||||
|
||||
const allProducts: ProductData[] = [];
|
||||
for (const locale of locales) {
|
||||
const products = await loadProductsFromExcelAndMdx(locale);
|
||||
allProducts.push(...products);
|
||||
}
|
||||
|
||||
if (allProducts.length === 0) {
|
||||
console.log('No products found');
|
||||
return;
|
||||
}
|
||||
|
||||
// Dev convenience: generate only one locale / one product subset.
|
||||
// Dev convenience: generate only one product subset.
|
||||
// IMPORTANT: apply filters BEFORE PDF_LIMIT so the limit works within the filtered set.
|
||||
let products = allProducts;
|
||||
|
||||
const onlyLocale = normalizeValue(String(process.env.PDF_LOCALE || '')).toLowerCase();
|
||||
if (onlyLocale === 'de' || onlyLocale === 'en') {
|
||||
products = products.filter(p => (p.locale || 'en') === onlyLocale);
|
||||
}
|
||||
|
||||
const match = normalizeValue(String(process.env.PDF_MATCH || '')).toLowerCase();
|
||||
if (match) {
|
||||
products = products.filter(p => {
|
||||
|
||||
Reference in New Issue
Block a user