pdf sheets from new excel

This commit is contained in:
2026-01-23 13:10:08 +01:00
parent 899b3c7ed4
commit e5e2b646a0
53 changed files with 447 additions and 53 deletions

1
.tmp-mv170126-err.txt Normal file
View File

@@ -0,0 +1 @@
Sheet 1

1
.tmp-mv170126-out.txt Normal file

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -2,48 +2,212 @@
/**
* PDF Datasheet Generator (React-PDF)
*
* Uses the same Excel-driven data model as the legacy generator, but renders
* PDFs via `@react-pdf/renderer` for maintainable layout and pagination.
* Renders PDFs via `@react-pdf/renderer`.
*
* Source of truth:
* - All technical data + cross-section tables: Excel files in `data/excel/`
* - Product description text: MDX files in `data/products/{en,de}/*.mdx`
*/
import * as fs from 'fs';
import * as path from 'path';
import * as XLSX from 'xlsx';
const matter = require('gray-matter') as (src: string) => { data: unknown; content: string };
import type { ProductData } from './pdf/model/types';
import { generateDatasheetPdfBuffer } from './pdf/react-pdf/generate-datasheet-pdf';
import { generateFileName, normalizeValue } from './pdf/model/utils';
import { generateFileName, normalizeValue, stripHtml } from './pdf/model/utils';
const CONFIG = {
productsFile: path.join(process.cwd(), 'data/processed/products.json'),
outputDir: path.join(process.cwd(), 'public/datasheets'),
chunkSize: 10,
} as const;
const EXCEL_FILES = [
path.join(process.cwd(), 'data/excel/high-voltage.xlsx'),
path.join(process.cwd(), 'data/excel/medium-voltage-KM.xlsx'),
path.join(process.cwd(), 'data/excel/medium-voltage-KM 170126.xlsx'),
path.join(process.cwd(), 'data/excel/low-voltage-KM.xlsx'),
path.join(process.cwd(), 'data/excel/solar-cables.xlsx'),
] as const;
type MdxProduct = {
slug: string;
title: string;
sku: string;
categories: string[];
images: string[];
descriptionHtml: string;
};
type MdxIndex = Map<string, MdxProduct>; // key: normalized designation/title
function ensureOutputDir(): void {
if (!fs.existsSync(CONFIG.outputDir)) {
fs.mkdirSync(CONFIG.outputDir, { recursive: true });
}
}
async function readProductsStream(): Promise<ProductData[]> {
console.log('Reading products.json...');
return new Promise((resolve, reject) => {
const stream = fs.createReadStream(CONFIG.productsFile, { encoding: 'utf8' });
let data = '';
stream.on('data', chunk => {
data += chunk;
function normalizeExcelKey(value: string): string {
return String(value || '')
.toUpperCase()
.replace(/-\d+$/g, '')
.replace(/[^A-Z0-9]+/g, '');
}
function extractDescriptionHtmlFromMdxBody(body: string): string {
const content = String(body || '').trim();
if (!content) return '';
// MDX product files are wrapped like:
// <ProductTabs technicalData={...}>
// <section>...</section>
// </ProductTabs>
// For PDF, we only want the inner description content.
const withoutOpen = content.replace(/^\s*<ProductTabs[\s\S]*?>\s*/i, '');
const withoutClose = withoutOpen.replace(/\s*<\/ProductTabs>\s*$/i, '');
return withoutClose.trim();
}
function buildMdxIndex(locale: 'en' | 'de'): MdxIndex {
const dir = path.join(process.cwd(), 'data/products', locale);
const idx: MdxIndex = new Map();
if (!fs.existsSync(dir)) return idx;
const files = fs
.readdirSync(dir)
.filter(f => f.endsWith('.mdx'))
.sort();
for (const file of files) {
const filePath = path.join(dir, file);
const raw = fs.readFileSync(filePath, 'utf8');
const parsed = matter(raw);
const data = (parsed.data || {}) as any;
const title = normalizeValue(String(data.title || ''));
if (!title) continue;
const sku = normalizeValue(String(data.sku || ''));
const categories = Array.isArray(data.categories) ? data.categories.map((c: any) => normalizeValue(String(c))).filter(Boolean) : [];
const images = Array.isArray(data.images) ? data.images.map((i: any) => normalizeValue(String(i))).filter(Boolean) : [];
const descriptionHtml = extractDescriptionHtmlFromMdxBody(parsed.content);
const slug = path.basename(file, '.mdx');
idx.set(normalizeExcelKey(title), { slug, title, sku, categories, images, descriptionHtml });
}
return idx;
}
function findKeyByHeaderValue(headerRow: Record<string, unknown>, pattern: RegExp): string | null {
for (const [k, v] of Object.entries(headerRow || {})) {
const text = normalizeValue(String(v ?? ''));
if (!text) continue;
if (pattern.test(text)) return k;
}
return null;
}
function readExcelRows(filePath: string): Array<Record<string, unknown>> {
if (!fs.existsSync(filePath)) return [];
const workbook = XLSX.readFile(filePath, { cellDates: false, cellNF: false, cellText: false });
const sheetName = workbook.SheetNames[0];
if (!sheetName) return [];
const sheet = workbook.Sheets[sheetName];
if (!sheet) return [];
return XLSX.utils.sheet_to_json(sheet, {
defval: '',
raw: false,
blankrows: false,
}) as Array<Record<string, unknown>>;
}
function readDesignationsFromExcelFile(filePath: string): Map<string, string> {
const rows = readExcelRows(filePath);
if (!rows.length) return new Map();
// Legacy sheets use "Part Number" as a column key.
// The new MV sheet uses __EMPTY* keys and stores the human headers in row 0 values.
const headerRow = rows[0] || {};
const partNumberKey =
(Object.prototype.hasOwnProperty.call(headerRow, 'Part Number') ? 'Part Number' : null) ||
findKeyByHeaderValue(headerRow, /^part\s*number$/i) ||
'__EMPTY';
const out = new Map<string, string>();
for (const r of rows) {
const pn = normalizeValue(String(r?.[partNumberKey] ?? ''));
if (!pn || pn === 'Units' || pn === 'Part Number') continue;
const key = normalizeExcelKey(pn);
if (!key) continue;
// Keep first-seen designation string (stable filenames from MDX slug).
if (!out.has(key)) out.set(key, pn);
}
return out;
}
function loadAllExcelDesignations(): Map<string, string> {
const out = new Map<string, string>();
for (const filePath of EXCEL_FILES) {
const m = readDesignationsFromExcelFile(filePath);
Array.from(m.entries()).forEach(([k, v]) => {
if (!out.has(k)) out.set(k, v);
});
stream.on('end', () => {
try {
const products = JSON.parse(data) as ProductData[];
console.log(`Loaded ${products.length} products`);
resolve(products);
} catch (error) {
reject(new Error(`Failed to parse JSON: ${error}`));
}
}
return out;
}
async function loadProductsFromExcelAndMdx(locale: 'en' | 'de'): Promise<ProductData[]> {
const mdxIndex = buildMdxIndex(locale);
const excelDesignations = loadAllExcelDesignations();
const products: ProductData[] = [];
let id = 1;
Array.from(excelDesignations.entries()).forEach(([key, designation]) => {
const mdx = mdxIndex.get(key) || null;
const title = mdx?.title || designation;
const slug =
mdx?.slug ||
title
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/-+/g, '-')
.replace(/^-|-$/g, '');
// Only the product description comes from MDX. Everything else is Excel-driven
// during model building (technicalItems + voltage tables).
const descriptionHtml = mdx?.descriptionHtml || '';
products.push({
id: id++,
name: title,
shortDescriptionHtml: '',
descriptionHtml,
images: mdx?.images || [],
featuredImage: (mdx?.images && mdx.images[0]) || null,
sku: mdx?.sku || title,
slug,
translationKey: slug,
locale,
categories: (mdx?.categories || []).map(name => ({ name })),
attributes: [],
});
stream.on('error', error => reject(new Error(`Failed to read file: ${error}`)));
});
// Deterministic order: by slug, then name.
products.sort((a, b) => (a.slug || '').localeCompare(b.slug || '') || a.name.localeCompare(b.name));
// Drop products that have no readable name.
return products.filter(p => stripHtml(p.name));
}
async function processChunk(products: ProductData[], chunkIndex: number, totalChunks: number): Promise<void> {
@@ -67,21 +231,24 @@ async function processProductsInChunks(): Promise<void> {
console.log('Starting PDF generation (React-PDF)');
ensureOutputDir();
const allProducts = await readProductsStream();
const onlyLocale = normalizeValue(String(process.env.PDF_LOCALE || '')).toLowerCase();
const locales: Array<'en' | 'de'> = onlyLocale === 'de' || onlyLocale === 'en' ? [onlyLocale] : ['en', 'de'];
const allProducts: ProductData[] = [];
for (const locale of locales) {
const products = await loadProductsFromExcelAndMdx(locale);
allProducts.push(...products);
}
if (allProducts.length === 0) {
console.log('No products found');
return;
}
// Dev convenience: generate only one locale / one product subset.
// Dev convenience: generate only one product subset.
// IMPORTANT: apply filters BEFORE PDF_LIMIT so the limit works within the filtered set.
let products = allProducts;
const onlyLocale = normalizeValue(String(process.env.PDF_LOCALE || '')).toLowerCase();
if (onlyLocale === 'de' || onlyLocale === 'en') {
products = products.filter(p => (p.locale || 'en') === onlyLocale);
}
const match = normalizeValue(String(process.env.PDF_MATCH || '')).toLowerCase();
if (match) {
products = products.filter(p => {

View File

@@ -2,8 +2,8 @@ import * as fs from 'fs';
import * as path from 'path';
import type { DatasheetModel, DatasheetVoltageTable, KeyValueItem, ProductData } from './types';
import type { ExcelMatch } from './excel-index';
import { findExcelForProduct } from './excel-index';
import type { ExcelMatch, MediumVoltageCrossSectionExcelMatch } from './excel-index';
import { findExcelForProduct, findMediumVoltageCrossSectionExcelForProduct } from './excel-index';
import { getLabels, getProductUrl, normalizeValue, stripHtml } from './utils';
type ExcelRow = Record<string, unknown>;
@@ -590,6 +590,129 @@ function buildExcelModel(args: { product: ProductData; locale: 'en' | 'de' }): B
return { ok: true, technicalItems, voltageTables };
}
function isMediumVoltageProduct(product: ProductData): boolean {
const hay = [product.slug, product.path, product.translationKey, ...(product.categories || []).map(c => c.name)]
.filter(Boolean)
.join(' ');
return /medium[-\s]?voltage|mittelspannung/i.test(hay);
}
type AbbrevColumn = { colKey: string; unit: string };
function isAbbreviatedHeaderKey(key: string): boolean {
const k = normalizeValue(key);
if (!k) return false;
if (/^__EMPTY/i.test(k)) return false;
// Examples from the MV sheet: "LD mm", "RI Ohm", "G kg", "SBL 30", "SBE 20", "BK", "BR", "LF".
// Keep this permissive but focused on compact, non-sentence identifiers.
if (k.length > 12) return false;
if (/[a-z]{4,}/.test(k)) return false;
if (!/[A-ZØ]/.test(k)) return false;
return true;
}
function extractAbbrevColumnsFromMediumVoltageHeader(args: {
headerRow: Record<string, unknown>;
units: Record<string, string>;
partNumberKey: string;
crossSectionKey: string;
ratedVoltageKey: string | null;
}): AbbrevColumn[] {
const out: AbbrevColumn[] = [];
for (const colKey of Object.keys(args.headerRow || {})) {
if (!colKey) continue;
if (colKey === args.partNumberKey) continue;
if (colKey === args.crossSectionKey) continue;
if (args.ratedVoltageKey && colKey === args.ratedVoltageKey) continue;
if (!isAbbreviatedHeaderKey(colKey)) continue;
const unit = normalizeUnit(args.units[colKey] || '');
out.push({ colKey, unit });
}
return out;
}
function buildMediumVoltageCrossSectionTableFromNewExcel(args: {
product: ProductData;
locale: 'en' | 'de';
}): BuildExcelModelResult {
const mv = findMediumVoltageCrossSectionExcelForProduct(args.product) as MediumVoltageCrossSectionExcelMatch | null;
if (!mv || !mv.rows.length) return { ok: false, technicalItems: [], voltageTables: [] };
if (!mv.crossSectionKey) return { ok: false, technicalItems: [], voltageTables: [] };
const abbrevCols = extractAbbrevColumnsFromMediumVoltageHeader({
headerRow: mv.headerRow,
units: mv.units,
partNumberKey: mv.partNumberKey,
crossSectionKey: mv.crossSectionKey,
ratedVoltageKey: mv.ratedVoltageKey,
});
if (!abbrevCols.length) return { ok: false, technicalItems: [], voltageTables: [] };
const byVoltage = new Map<string, number[]>();
for (let i = 0; i < mv.rows.length; i++) {
const cs = normalizeValue(String((mv.rows[i] as Record<string, unknown>)?.[mv.crossSectionKey] ?? ''));
if (!cs) continue;
const rawV = mv.ratedVoltageKey
? normalizeValue(String((mv.rows[i] as Record<string, unknown>)?.[mv.ratedVoltageKey] ?? ''))
: '';
const voltageLabel = normalizeVoltageLabel(rawV || '');
const key = voltageLabel || (args.locale === 'de' ? 'Spannung unbekannt' : 'Voltage unknown');
const arr = byVoltage.get(key) ?? [];
arr.push(i);
byVoltage.set(key, arr);
}
const voltageKeysSorted = Array.from(byVoltage.keys()).sort((a, b) => {
const na = parseVoltageSortKey(a);
const nb = parseVoltageSortKey(b);
if (na !== nb) return na - nb;
return a.localeCompare(b);
});
const voltageTables: VoltageTableModel[] = [];
for (const vKey of voltageKeysSorted) {
const indices = byVoltage.get(vKey) || [];
if (!indices.length) continue;
const crossSections = indices.map(idx =>
normalizeValue(String((mv.rows[idx] as Record<string, unknown>)?.[mv.crossSectionKey] ?? '')),
);
const metaItems: KeyValueItem[] = [];
if (mv.ratedVoltageKey) {
const rawV = normalizeValue(String((mv.rows[indices[0]] as Record<string, unknown>)?.[mv.ratedVoltageKey] ?? ''));
metaItems.push({
label: args.locale === 'de' ? 'Spannung' : 'Voltage',
value: normalizeVoltageLabel(rawV || ''),
});
}
const columns = abbrevCols.map(col => {
return {
key: col.colKey,
// Use the abbreviated title from the first row as the table header.
label: normalizeValue(col.colKey),
get: (rowIndex: number) => {
const srcRowIndex = indices[rowIndex];
const raw = normalizeValue(String((mv.rows[srcRowIndex] as Record<string, unknown>)?.[col.colKey] ?? ''));
return compactCellForDenseTable(raw, col.unit, args.locale);
},
};
});
voltageTables.push({ voltageLabel: vKey, metaItems, crossSections, columns });
}
return { ok: true, technicalItems: [], voltageTables };
}
export function buildDatasheetModel(args: { product: ProductData; locale: 'en' | 'de' }): DatasheetModel {
const labels = getLabels(args.locale);
const categoriesLine = (args.product.categories || []).map(c => stripHtml(c.name)).join(' • ');
@@ -597,22 +720,33 @@ export function buildDatasheetModel(args: { product: ProductData; locale: 'en' |
const heroSrc = resolveMediaToLocalPath(args.product.featuredImage || args.product.images?.[0] || null);
const productUrl = getProductUrl(args.product);
// Technical data MUST stay sourced from the existing Excel index (legacy sheets).
const excelModel = buildExcelModel({ product: args.product, locale: args.locale });
const voltageTables: DatasheetVoltageTable[] = excelModel.ok
? excelModel.voltageTables.map(t => {
const columns = t.columns.map(c => ({ key: c.key, label: c.label }));
const rows = t.crossSections.map((configuration, rowIndex) => ({
configuration,
cells: t.columns.map(c => compactNumericForLocale(c.get(rowIndex), args.locale)),
}));
return {
voltageLabel: t.voltageLabel,
metaItems: t.metaItems,
columns,
rows,
};
})
: [];
// Cross-section tables: for medium voltage only, prefer the new MV sheet (abbrev columns in header row).
const crossSectionModel = isMediumVoltageProduct(args.product)
? buildMediumVoltageCrossSectionTableFromNewExcel({ product: args.product, locale: args.locale })
: { ok: false, technicalItems: [], voltageTables: [] };
const voltageTablesSrc = crossSectionModel.ok
? crossSectionModel.voltageTables
: excelModel.ok
? excelModel.voltageTables
: [];
const voltageTables: DatasheetVoltageTable[] = voltageTablesSrc.map(t => {
const columns = t.columns.map(c => ({ key: c.key, label: c.label }));
const rows = t.crossSections.map((configuration, rowIndex) => ({
configuration,
cells: t.columns.map(c => compactNumericForLocale(c.get(rowIndex), args.locale)),
}));
return {
voltageLabel: t.voltageLabel,
metaItems: t.metaItems,
columns,
rows,
};
});
return {
locale: args.locale,

View File

@@ -8,6 +8,15 @@ import { normalizeValue } from './utils';
type ExcelRow = Record<string, unknown>;
export type ExcelMatch = { rows: ExcelRow[]; units: Record<string, string> };
export type MediumVoltageCrossSectionExcelMatch = {
headerRow: ExcelRow;
rows: ExcelRow[];
units: Record<string, string>;
partNumberKey: string;
crossSectionKey: string;
ratedVoltageKey: string | null;
};
const EXCEL_SOURCE_FILES = [
path.join(process.cwd(), 'data/excel/high-voltage.xlsx'),
path.join(process.cwd(), 'data/excel/medium-voltage-KM.xlsx'),
@@ -15,7 +24,21 @@ const EXCEL_SOURCE_FILES = [
path.join(process.cwd(), 'data/excel/solar-cables.xlsx'),
];
// Medium-voltage cross-section table (new format with multi-row header).
// IMPORTANT: this must NOT be used for the technical data table.
const MV_CROSS_SECTION_FILE = path.join(process.cwd(), 'data/excel/medium-voltage-KM 170126.xlsx');
type MediumVoltageCrossSectionIndex = {
headerRow: ExcelRow;
units: Record<string, string>;
partNumberKey: string;
crossSectionKey: string;
ratedVoltageKey: string | null;
rowsByDesignation: Map<string, ExcelRow[]>;
};
let EXCEL_INDEX: Map<string, ExcelMatch> | null = null;
let MV_CROSS_SECTION_INDEX: MediumVoltageCrossSectionIndex | null = null;
export function normalizeExcelKey(value: string): string {
return String(value || '')
@@ -40,6 +63,53 @@ function loadExcelRows(filePath: string): ExcelRow[] {
}
}
function findKeyByHeaderValue(headerRow: ExcelRow, pattern: RegExp): string | null {
for (const [k, v] of Object.entries(headerRow || {})) {
const text = normalizeValue(String(v ?? ''));
if (!text) continue;
if (pattern.test(text)) return k;
}
return null;
}
function getMediumVoltageCrossSectionIndex(): MediumVoltageCrossSectionIndex {
if (MV_CROSS_SECTION_INDEX) return MV_CROSS_SECTION_INDEX;
const rows = fs.existsSync(MV_CROSS_SECTION_FILE) ? loadExcelRows(MV_CROSS_SECTION_FILE) : [];
const headerRow = (rows[0] || {}) as ExcelRow;
const partNumberKey = findKeyByHeaderValue(headerRow, /^part\s*number$/i) || '__EMPTY';
const crossSectionKey = findKeyByHeaderValue(headerRow, /querschnitt|cross.?section/i) || '';
const ratedVoltageKey = findKeyByHeaderValue(headerRow, /rated voltage|voltage rating|nennspannung/i) || null;
const unitsRow = rows.find(r => normalizeValue(String((r as ExcelRow)?.[partNumberKey] ?? '')) === 'Units') || null;
const units: Record<string, string> = {};
if (unitsRow) {
for (const [k, v] of Object.entries(unitsRow)) {
if (k === partNumberKey) continue;
const unit = normalizeValue(String(v ?? ''));
if (unit) units[k] = unit;
}
}
const rowsByDesignation = new Map<string, ExcelRow[]>();
for (const r of rows) {
if (r === headerRow) continue;
const pn = normalizeValue(String((r as ExcelRow)?.[partNumberKey] ?? ''));
if (!pn || pn === 'Units' || pn === 'Part Number') continue;
const key = normalizeExcelKey(pn);
if (!key) continue;
const cur = rowsByDesignation.get(key) || [];
cur.push(r);
rowsByDesignation.set(key, cur);
}
MV_CROSS_SECTION_INDEX = { headerRow, units, partNumberKey, crossSectionKey, ratedVoltageKey, rowsByDesignation };
return MV_CROSS_SECTION_INDEX;
}
export function getExcelIndex(): Map<string, ExcelMatch> {
if (EXCEL_INDEX) return EXCEL_INDEX;
const idx = new Map<string, ExcelMatch>();
@@ -93,3 +163,30 @@ export function findExcelForProduct(product: ProductData): ExcelMatch | null {
}
return null;
}
export function findMediumVoltageCrossSectionExcelForProduct(product: ProductData): MediumVoltageCrossSectionExcelMatch | null {
const idx = getMediumVoltageCrossSectionIndex();
const candidates = [
product.name,
product.slug ? product.slug.replace(/-\d+$/g, '') : '',
product.sku,
product.translationKey,
].filter(Boolean) as string[];
for (const c of candidates) {
const key = normalizeExcelKey(c);
const rows = idx.rowsByDesignation.get(key) || [];
if (rows.length) {
return {
headerRow: idx.headerRow,
rows,
units: idx.units,
partNumberKey: idx.partNumberKey,
crossSectionKey: idx.crossSectionKey,
ratedVoltageKey: idx.ratedVoltageKey,
};
}
}
return null;
}

View File

@@ -62,14 +62,8 @@ export function DatasheetDocument(props: { model: DatasheetModel; assets: Assets
<Footer locale={model.locale} siteUrl={CONFIG.siteUrl} />
{model.voltageTables.map((t: DatasheetVoltageTable) => (
<View key={t.voltageLabel}>
<Section
title={`${model.labels.crossSection}${t.voltageLabel}`}
// Prevent orphaned voltage headings at page bottom; let the rest flow.
minPresenceAhead={140}
>
{t.metaItems.length ? <KeyValueGrid items={t.metaItems} /> : null}
</Section>
<View key={t.voltageLabel} style={{ marginBottom: 14 }}>
<Text style={styles.sectionTitle}>{`${model.labels.crossSection}${t.voltageLabel}`}</Text>
<DenseTable table={{ columns: t.columns, rows: t.rows }} firstColLabel={firstColLabel} />
</View>

File diff suppressed because one or more lines are too long