pdf sheets from new excel

This commit is contained in:
2026-01-23 13:10:08 +01:00
parent 899b3c7ed4
commit e5e2b646a0
53 changed files with 447 additions and 53 deletions

View File

@@ -2,48 +2,212 @@
/**
* PDF Datasheet Generator (React-PDF)
*
* Uses the same Excel-driven data model as the legacy generator, but renders
* PDFs via `@react-pdf/renderer` for maintainable layout and pagination.
* Renders PDFs via `@react-pdf/renderer`.
*
* Source of truth:
* - All technical data + cross-section tables: Excel files in `data/excel/`
* - Product description text: MDX files in `data/products/{en,de}/*.mdx`
*/
import * as fs from 'fs';
import * as path from 'path';
import * as XLSX from 'xlsx';
const matter = require('gray-matter') as (src: string) => { data: unknown; content: string };
import type { ProductData } from './pdf/model/types';
import { generateDatasheetPdfBuffer } from './pdf/react-pdf/generate-datasheet-pdf';
import { generateFileName, normalizeValue } from './pdf/model/utils';
import { generateFileName, normalizeValue, stripHtml } from './pdf/model/utils';
const CONFIG = {
productsFile: path.join(process.cwd(), 'data/processed/products.json'),
outputDir: path.join(process.cwd(), 'public/datasheets'),
chunkSize: 10,
} as const;
const EXCEL_FILES = [
path.join(process.cwd(), 'data/excel/high-voltage.xlsx'),
path.join(process.cwd(), 'data/excel/medium-voltage-KM.xlsx'),
path.join(process.cwd(), 'data/excel/medium-voltage-KM 170126.xlsx'),
path.join(process.cwd(), 'data/excel/low-voltage-KM.xlsx'),
path.join(process.cwd(), 'data/excel/solar-cables.xlsx'),
] as const;
type MdxProduct = {
slug: string;
title: string;
sku: string;
categories: string[];
images: string[];
descriptionHtml: string;
};
type MdxIndex = Map<string, MdxProduct>; // key: normalized designation/title
function ensureOutputDir(): void {
if (!fs.existsSync(CONFIG.outputDir)) {
fs.mkdirSync(CONFIG.outputDir, { recursive: true });
}
}
async function readProductsStream(): Promise<ProductData[]> {
console.log('Reading products.json...');
return new Promise((resolve, reject) => {
const stream = fs.createReadStream(CONFIG.productsFile, { encoding: 'utf8' });
let data = '';
stream.on('data', chunk => {
data += chunk;
function normalizeExcelKey(value: string): string {
return String(value || '')
.toUpperCase()
.replace(/-\d+$/g, '')
.replace(/[^A-Z0-9]+/g, '');
}
function extractDescriptionHtmlFromMdxBody(body: string): string {
const content = String(body || '').trim();
if (!content) return '';
// MDX product files are wrapped like:
// <ProductTabs technicalData={...}>
// <section>...</section>
// </ProductTabs>
// For PDF, we only want the inner description content.
const withoutOpen = content.replace(/^\s*<ProductTabs[\s\S]*?>\s*/i, '');
const withoutClose = withoutOpen.replace(/\s*<\/ProductTabs>\s*$/i, '');
return withoutClose.trim();
}
function buildMdxIndex(locale: 'en' | 'de'): MdxIndex {
const dir = path.join(process.cwd(), 'data/products', locale);
const idx: MdxIndex = new Map();
if (!fs.existsSync(dir)) return idx;
const files = fs
.readdirSync(dir)
.filter(f => f.endsWith('.mdx'))
.sort();
for (const file of files) {
const filePath = path.join(dir, file);
const raw = fs.readFileSync(filePath, 'utf8');
const parsed = matter(raw);
const data = (parsed.data || {}) as any;
const title = normalizeValue(String(data.title || ''));
if (!title) continue;
const sku = normalizeValue(String(data.sku || ''));
const categories = Array.isArray(data.categories) ? data.categories.map((c: any) => normalizeValue(String(c))).filter(Boolean) : [];
const images = Array.isArray(data.images) ? data.images.map((i: any) => normalizeValue(String(i))).filter(Boolean) : [];
const descriptionHtml = extractDescriptionHtmlFromMdxBody(parsed.content);
const slug = path.basename(file, '.mdx');
idx.set(normalizeExcelKey(title), { slug, title, sku, categories, images, descriptionHtml });
}
return idx;
}
function findKeyByHeaderValue(headerRow: Record<string, unknown>, pattern: RegExp): string | null {
for (const [k, v] of Object.entries(headerRow || {})) {
const text = normalizeValue(String(v ?? ''));
if (!text) continue;
if (pattern.test(text)) return k;
}
return null;
}
function readExcelRows(filePath: string): Array<Record<string, unknown>> {
if (!fs.existsSync(filePath)) return [];
const workbook = XLSX.readFile(filePath, { cellDates: false, cellNF: false, cellText: false });
const sheetName = workbook.SheetNames[0];
if (!sheetName) return [];
const sheet = workbook.Sheets[sheetName];
if (!sheet) return [];
return XLSX.utils.sheet_to_json(sheet, {
defval: '',
raw: false,
blankrows: false,
}) as Array<Record<string, unknown>>;
}
function readDesignationsFromExcelFile(filePath: string): Map<string, string> {
const rows = readExcelRows(filePath);
if (!rows.length) return new Map();
// Legacy sheets use "Part Number" as a column key.
// The new MV sheet uses __EMPTY* keys and stores the human headers in row 0 values.
const headerRow = rows[0] || {};
const partNumberKey =
(Object.prototype.hasOwnProperty.call(headerRow, 'Part Number') ? 'Part Number' : null) ||
findKeyByHeaderValue(headerRow, /^part\s*number$/i) ||
'__EMPTY';
const out = new Map<string, string>();
for (const r of rows) {
const pn = normalizeValue(String(r?.[partNumberKey] ?? ''));
if (!pn || pn === 'Units' || pn === 'Part Number') continue;
const key = normalizeExcelKey(pn);
if (!key) continue;
// Keep first-seen designation string (stable filenames from MDX slug).
if (!out.has(key)) out.set(key, pn);
}
return out;
}
function loadAllExcelDesignations(): Map<string, string> {
const out = new Map<string, string>();
for (const filePath of EXCEL_FILES) {
const m = readDesignationsFromExcelFile(filePath);
Array.from(m.entries()).forEach(([k, v]) => {
if (!out.has(k)) out.set(k, v);
});
stream.on('end', () => {
try {
const products = JSON.parse(data) as ProductData[];
console.log(`Loaded ${products.length} products`);
resolve(products);
} catch (error) {
reject(new Error(`Failed to parse JSON: ${error}`));
}
}
return out;
}
async function loadProductsFromExcelAndMdx(locale: 'en' | 'de'): Promise<ProductData[]> {
const mdxIndex = buildMdxIndex(locale);
const excelDesignations = loadAllExcelDesignations();
const products: ProductData[] = [];
let id = 1;
Array.from(excelDesignations.entries()).forEach(([key, designation]) => {
const mdx = mdxIndex.get(key) || null;
const title = mdx?.title || designation;
const slug =
mdx?.slug ||
title
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/-+/g, '-')
.replace(/^-|-$/g, '');
// Only the product description comes from MDX. Everything else is Excel-driven
// during model building (technicalItems + voltage tables).
const descriptionHtml = mdx?.descriptionHtml || '';
products.push({
id: id++,
name: title,
shortDescriptionHtml: '',
descriptionHtml,
images: mdx?.images || [],
featuredImage: (mdx?.images && mdx.images[0]) || null,
sku: mdx?.sku || title,
slug,
translationKey: slug,
locale,
categories: (mdx?.categories || []).map(name => ({ name })),
attributes: [],
});
stream.on('error', error => reject(new Error(`Failed to read file: ${error}`)));
});
// Deterministic order: by slug, then name.
products.sort((a, b) => (a.slug || '').localeCompare(b.slug || '') || a.name.localeCompare(b.name));
// Drop products that have no readable name.
return products.filter(p => stripHtml(p.name));
}
async function processChunk(products: ProductData[], chunkIndex: number, totalChunks: number): Promise<void> {
@@ -67,21 +231,24 @@ async function processProductsInChunks(): Promise<void> {
console.log('Starting PDF generation (React-PDF)');
ensureOutputDir();
const allProducts = await readProductsStream();
const onlyLocale = normalizeValue(String(process.env.PDF_LOCALE || '')).toLowerCase();
const locales: Array<'en' | 'de'> = onlyLocale === 'de' || onlyLocale === 'en' ? [onlyLocale] : ['en', 'de'];
const allProducts: ProductData[] = [];
for (const locale of locales) {
const products = await loadProductsFromExcelAndMdx(locale);
allProducts.push(...products);
}
if (allProducts.length === 0) {
console.log('No products found');
return;
}
// Dev convenience: generate only one locale / one product subset.
// Dev convenience: generate only one product subset.
// IMPORTANT: apply filters BEFORE PDF_LIMIT so the limit works within the filtered set.
let products = allProducts;
const onlyLocale = normalizeValue(String(process.env.PDF_LOCALE || '')).toLowerCase();
if (onlyLocale === 'de' || onlyLocale === 'en') {
products = products.filter(p => (p.locale || 'en') === onlyLocale);
}
const match = normalizeValue(String(process.env.PDF_MATCH || '')).toLowerCase();
if (match) {
products = products.filter(p => {

View File

@@ -2,8 +2,8 @@ import * as fs from 'fs';
import * as path from 'path';
import type { DatasheetModel, DatasheetVoltageTable, KeyValueItem, ProductData } from './types';
import type { ExcelMatch } from './excel-index';
import { findExcelForProduct } from './excel-index';
import type { ExcelMatch, MediumVoltageCrossSectionExcelMatch } from './excel-index';
import { findExcelForProduct, findMediumVoltageCrossSectionExcelForProduct } from './excel-index';
import { getLabels, getProductUrl, normalizeValue, stripHtml } from './utils';
type ExcelRow = Record<string, unknown>;
@@ -590,6 +590,129 @@ function buildExcelModel(args: { product: ProductData; locale: 'en' | 'de' }): B
return { ok: true, technicalItems, voltageTables };
}
function isMediumVoltageProduct(product: ProductData): boolean {
const hay = [product.slug, product.path, product.translationKey, ...(product.categories || []).map(c => c.name)]
.filter(Boolean)
.join(' ');
return /medium[-\s]?voltage|mittelspannung/i.test(hay);
}
type AbbrevColumn = { colKey: string; unit: string };
function isAbbreviatedHeaderKey(key: string): boolean {
const k = normalizeValue(key);
if (!k) return false;
if (/^__EMPTY/i.test(k)) return false;
// Examples from the MV sheet: "LD mm", "RI Ohm", "G kg", "SBL 30", "SBE 20", "BK", "BR", "LF".
// Keep this permissive but focused on compact, non-sentence identifiers.
if (k.length > 12) return false;
if (/[a-z]{4,}/.test(k)) return false;
if (!/[A-ZØ]/.test(k)) return false;
return true;
}
function extractAbbrevColumnsFromMediumVoltageHeader(args: {
headerRow: Record<string, unknown>;
units: Record<string, string>;
partNumberKey: string;
crossSectionKey: string;
ratedVoltageKey: string | null;
}): AbbrevColumn[] {
const out: AbbrevColumn[] = [];
for (const colKey of Object.keys(args.headerRow || {})) {
if (!colKey) continue;
if (colKey === args.partNumberKey) continue;
if (colKey === args.crossSectionKey) continue;
if (args.ratedVoltageKey && colKey === args.ratedVoltageKey) continue;
if (!isAbbreviatedHeaderKey(colKey)) continue;
const unit = normalizeUnit(args.units[colKey] || '');
out.push({ colKey, unit });
}
return out;
}
function buildMediumVoltageCrossSectionTableFromNewExcel(args: {
product: ProductData;
locale: 'en' | 'de';
}): BuildExcelModelResult {
const mv = findMediumVoltageCrossSectionExcelForProduct(args.product) as MediumVoltageCrossSectionExcelMatch | null;
if (!mv || !mv.rows.length) return { ok: false, technicalItems: [], voltageTables: [] };
if (!mv.crossSectionKey) return { ok: false, technicalItems: [], voltageTables: [] };
const abbrevCols = extractAbbrevColumnsFromMediumVoltageHeader({
headerRow: mv.headerRow,
units: mv.units,
partNumberKey: mv.partNumberKey,
crossSectionKey: mv.crossSectionKey,
ratedVoltageKey: mv.ratedVoltageKey,
});
if (!abbrevCols.length) return { ok: false, technicalItems: [], voltageTables: [] };
const byVoltage = new Map<string, number[]>();
for (let i = 0; i < mv.rows.length; i++) {
const cs = normalizeValue(String((mv.rows[i] as Record<string, unknown>)?.[mv.crossSectionKey] ?? ''));
if (!cs) continue;
const rawV = mv.ratedVoltageKey
? normalizeValue(String((mv.rows[i] as Record<string, unknown>)?.[mv.ratedVoltageKey] ?? ''))
: '';
const voltageLabel = normalizeVoltageLabel(rawV || '');
const key = voltageLabel || (args.locale === 'de' ? 'Spannung unbekannt' : 'Voltage unknown');
const arr = byVoltage.get(key) ?? [];
arr.push(i);
byVoltage.set(key, arr);
}
const voltageKeysSorted = Array.from(byVoltage.keys()).sort((a, b) => {
const na = parseVoltageSortKey(a);
const nb = parseVoltageSortKey(b);
if (na !== nb) return na - nb;
return a.localeCompare(b);
});
const voltageTables: VoltageTableModel[] = [];
for (const vKey of voltageKeysSorted) {
const indices = byVoltage.get(vKey) || [];
if (!indices.length) continue;
const crossSections = indices.map(idx =>
normalizeValue(String((mv.rows[idx] as Record<string, unknown>)?.[mv.crossSectionKey] ?? '')),
);
const metaItems: KeyValueItem[] = [];
if (mv.ratedVoltageKey) {
const rawV = normalizeValue(String((mv.rows[indices[0]] as Record<string, unknown>)?.[mv.ratedVoltageKey] ?? ''));
metaItems.push({
label: args.locale === 'de' ? 'Spannung' : 'Voltage',
value: normalizeVoltageLabel(rawV || ''),
});
}
const columns = abbrevCols.map(col => {
return {
key: col.colKey,
// Use the abbreviated title from the first row as the table header.
label: normalizeValue(col.colKey),
get: (rowIndex: number) => {
const srcRowIndex = indices[rowIndex];
const raw = normalizeValue(String((mv.rows[srcRowIndex] as Record<string, unknown>)?.[col.colKey] ?? ''));
return compactCellForDenseTable(raw, col.unit, args.locale);
},
};
});
voltageTables.push({ voltageLabel: vKey, metaItems, crossSections, columns });
}
return { ok: true, technicalItems: [], voltageTables };
}
export function buildDatasheetModel(args: { product: ProductData; locale: 'en' | 'de' }): DatasheetModel {
const labels = getLabels(args.locale);
const categoriesLine = (args.product.categories || []).map(c => stripHtml(c.name)).join(' • ');
@@ -597,22 +720,33 @@ export function buildDatasheetModel(args: { product: ProductData; locale: 'en' |
const heroSrc = resolveMediaToLocalPath(args.product.featuredImage || args.product.images?.[0] || null);
const productUrl = getProductUrl(args.product);
// Technical data MUST stay sourced from the existing Excel index (legacy sheets).
const excelModel = buildExcelModel({ product: args.product, locale: args.locale });
const voltageTables: DatasheetVoltageTable[] = excelModel.ok
? excelModel.voltageTables.map(t => {
const columns = t.columns.map(c => ({ key: c.key, label: c.label }));
const rows = t.crossSections.map((configuration, rowIndex) => ({
configuration,
cells: t.columns.map(c => compactNumericForLocale(c.get(rowIndex), args.locale)),
}));
return {
voltageLabel: t.voltageLabel,
metaItems: t.metaItems,
columns,
rows,
};
})
: [];
// Cross-section tables: for medium voltage only, prefer the new MV sheet (abbrev columns in header row).
const crossSectionModel = isMediumVoltageProduct(args.product)
? buildMediumVoltageCrossSectionTableFromNewExcel({ product: args.product, locale: args.locale })
: { ok: false, technicalItems: [], voltageTables: [] };
const voltageTablesSrc = crossSectionModel.ok
? crossSectionModel.voltageTables
: excelModel.ok
? excelModel.voltageTables
: [];
const voltageTables: DatasheetVoltageTable[] = voltageTablesSrc.map(t => {
const columns = t.columns.map(c => ({ key: c.key, label: c.label }));
const rows = t.crossSections.map((configuration, rowIndex) => ({
configuration,
cells: t.columns.map(c => compactNumericForLocale(c.get(rowIndex), args.locale)),
}));
return {
voltageLabel: t.voltageLabel,
metaItems: t.metaItems,
columns,
rows,
};
});
return {
locale: args.locale,

View File

@@ -8,6 +8,15 @@ import { normalizeValue } from './utils';
type ExcelRow = Record<string, unknown>;
export type ExcelMatch = { rows: ExcelRow[]; units: Record<string, string> };
export type MediumVoltageCrossSectionExcelMatch = {
headerRow: ExcelRow;
rows: ExcelRow[];
units: Record<string, string>;
partNumberKey: string;
crossSectionKey: string;
ratedVoltageKey: string | null;
};
const EXCEL_SOURCE_FILES = [
path.join(process.cwd(), 'data/excel/high-voltage.xlsx'),
path.join(process.cwd(), 'data/excel/medium-voltage-KM.xlsx'),
@@ -15,7 +24,21 @@ const EXCEL_SOURCE_FILES = [
path.join(process.cwd(), 'data/excel/solar-cables.xlsx'),
];
// Medium-voltage cross-section table (new format with multi-row header).
// IMPORTANT: this must NOT be used for the technical data table.
const MV_CROSS_SECTION_FILE = path.join(process.cwd(), 'data/excel/medium-voltage-KM 170126.xlsx');
type MediumVoltageCrossSectionIndex = {
headerRow: ExcelRow;
units: Record<string, string>;
partNumberKey: string;
crossSectionKey: string;
ratedVoltageKey: string | null;
rowsByDesignation: Map<string, ExcelRow[]>;
};
let EXCEL_INDEX: Map<string, ExcelMatch> | null = null;
let MV_CROSS_SECTION_INDEX: MediumVoltageCrossSectionIndex | null = null;
export function normalizeExcelKey(value: string): string {
return String(value || '')
@@ -40,6 +63,53 @@ function loadExcelRows(filePath: string): ExcelRow[] {
}
}
function findKeyByHeaderValue(headerRow: ExcelRow, pattern: RegExp): string | null {
for (const [k, v] of Object.entries(headerRow || {})) {
const text = normalizeValue(String(v ?? ''));
if (!text) continue;
if (pattern.test(text)) return k;
}
return null;
}
function getMediumVoltageCrossSectionIndex(): MediumVoltageCrossSectionIndex {
if (MV_CROSS_SECTION_INDEX) return MV_CROSS_SECTION_INDEX;
const rows = fs.existsSync(MV_CROSS_SECTION_FILE) ? loadExcelRows(MV_CROSS_SECTION_FILE) : [];
const headerRow = (rows[0] || {}) as ExcelRow;
const partNumberKey = findKeyByHeaderValue(headerRow, /^part\s*number$/i) || '__EMPTY';
const crossSectionKey = findKeyByHeaderValue(headerRow, /querschnitt|cross.?section/i) || '';
const ratedVoltageKey = findKeyByHeaderValue(headerRow, /rated voltage|voltage rating|nennspannung/i) || null;
const unitsRow = rows.find(r => normalizeValue(String((r as ExcelRow)?.[partNumberKey] ?? '')) === 'Units') || null;
const units: Record<string, string> = {};
if (unitsRow) {
for (const [k, v] of Object.entries(unitsRow)) {
if (k === partNumberKey) continue;
const unit = normalizeValue(String(v ?? ''));
if (unit) units[k] = unit;
}
}
const rowsByDesignation = new Map<string, ExcelRow[]>();
for (const r of rows) {
if (r === headerRow) continue;
const pn = normalizeValue(String((r as ExcelRow)?.[partNumberKey] ?? ''));
if (!pn || pn === 'Units' || pn === 'Part Number') continue;
const key = normalizeExcelKey(pn);
if (!key) continue;
const cur = rowsByDesignation.get(key) || [];
cur.push(r);
rowsByDesignation.set(key, cur);
}
MV_CROSS_SECTION_INDEX = { headerRow, units, partNumberKey, crossSectionKey, ratedVoltageKey, rowsByDesignation };
return MV_CROSS_SECTION_INDEX;
}
export function getExcelIndex(): Map<string, ExcelMatch> {
if (EXCEL_INDEX) return EXCEL_INDEX;
const idx = new Map<string, ExcelMatch>();
@@ -93,3 +163,30 @@ export function findExcelForProduct(product: ProductData): ExcelMatch | null {
}
return null;
}
export function findMediumVoltageCrossSectionExcelForProduct(product: ProductData): MediumVoltageCrossSectionExcelMatch | null {
const idx = getMediumVoltageCrossSectionIndex();
const candidates = [
product.name,
product.slug ? product.slug.replace(/-\d+$/g, '') : '',
product.sku,
product.translationKey,
].filter(Boolean) as string[];
for (const c of candidates) {
const key = normalizeExcelKey(c);
const rows = idx.rowsByDesignation.get(key) || [];
if (rows.length) {
return {
headerRow: idx.headerRow,
rows,
units: idx.units,
partNumberKey: idx.partNumberKey,
crossSectionKey: idx.crossSectionKey,
ratedVoltageKey: idx.ratedVoltageKey,
};
}
}
return null;
}

View File

@@ -62,14 +62,8 @@ export function DatasheetDocument(props: { model: DatasheetModel; assets: Assets
<Footer locale={model.locale} siteUrl={CONFIG.siteUrl} />
{model.voltageTables.map((t: DatasheetVoltageTable) => (
<View key={t.voltageLabel}>
<Section
title={`${model.labels.crossSection}${t.voltageLabel}`}
// Prevent orphaned voltage headings at page bottom; let the rest flow.
minPresenceAhead={140}
>
{t.metaItems.length ? <KeyValueGrid items={t.metaItems} /> : null}
</Section>
<View key={t.voltageLabel} style={{ marginBottom: 14 }}>
<Text style={styles.sectionTitle}>{`${model.labels.crossSection}${t.voltageLabel}`}</Text>
<DenseTable table={{ columns: t.columns, rows: t.rows }} firstColLabel={firstColLabel} />
</View>