This commit is contained in:
2026-01-07 00:31:46 +01:00
parent a7b6aa85f8
commit ce26783d45
55 changed files with 481 additions and 3 deletions

View File

@@ -6,6 +6,7 @@
import * as fs from 'fs';
import * as path from 'path';
import { execSync } from 'child_process';
import { PDFDocument, rgb, StandardFonts, PDFFont, PDFPage, PDFImage } from 'pdf-lib';
let sharpFn: ((input?: any, options?: any) => any) | null = null;
@@ -28,6 +29,13 @@ const CONFIG = {
const ASSET_MAP_FILE = path.join(process.cwd(), 'data/processed/asset-map.json');
const PUBLIC_DIR = path.join(process.cwd(), 'public');
const EXCEL_SOURCE_FILES = [
path.join(process.cwd(), 'data/source/high-voltage.xlsx'),
path.join(process.cwd(), 'data/source/medium-voltage-KM.xlsx'),
path.join(process.cwd(), 'data/source/low-voltage-KM.xlsx'),
path.join(process.cwd(), 'data/source/solar-cables.xlsx'),
];
type AssetMap = Record<string, string>;
function readAssetMap(): AssetMap {
@@ -60,6 +68,451 @@ interface ProductData {
}>;
}
type ExcelRow = Record<string, any>;
type ExcelMatch = { rows: ExcelRow[]; units: Record<string, string> };
let EXCEL_INDEX: Map<string, ExcelMatch> | null = null;
function normalizeExcelKey(value: string): string {
// Match product names/slugs and Excel "Part Number" robustly.
// Examples:
// - "NA2XS(FL)2Y" -> "NA2XSFL2Y"
// - "na2xsfl2y-3" -> "NA2XSFL2Y"
return String(value || '')
.toUpperCase()
.replace(/-\d+$/g, '')
.replace(/[^A-Z0-9]+/g, '');
}
function loadExcelRows(filePath: string): ExcelRow[] {
// We intentionally avoid adding a heavy xlsx parser dependency.
// Instead, we use `xlsx-cli` via npx, which is already available at runtime.
// NOTE: `xlsx-cli -j` prints the sheet name on the first line, then JSON.
const out = execSync(`npx -y xlsx-cli -j "${filePath}"`, { encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] });
const trimmed = out.trim();
const jsonStart = trimmed.indexOf('[');
if (jsonStart < 0) return [];
const jsonText = trimmed.slice(jsonStart);
try {
return JSON.parse(jsonText) as ExcelRow[];
} catch {
return [];
}
}
function getExcelIndex(): Map<string, ExcelMatch> {
if (EXCEL_INDEX) return EXCEL_INDEX;
const idx = new Map<string, ExcelMatch>();
for (const file of EXCEL_SOURCE_FILES) {
if (!fs.existsSync(file)) continue;
const rows = loadExcelRows(file);
if (process.env.PDF_DEBUG_EXCEL === '1') {
console.log(`[excel] loaded ${rows.length} rows from ${path.relative(process.cwd(), file)}`);
}
const unitsRow = rows.find(r => r && r['Part Number'] === 'Units') || null;
const units: Record<string, string> = {};
if (unitsRow) {
for (const [k, v] of Object.entries(unitsRow)) {
if (k === 'Part Number') continue;
const unit = normalizeValue(String(v ?? ''));
if (unit) units[k] = unit;
}
}
for (const r of rows) {
const pn = r?.['Part Number'];
if (!pn || pn === 'Units') continue;
const key = normalizeExcelKey(String(pn));
if (!key) continue;
const cur = idx.get(key);
if (!cur) {
idx.set(key, { rows: [r], units });
} else {
cur.rows.push(r);
if (Object.keys(cur.units).length < Object.keys(units).length) cur.units = units;
}
}
}
EXCEL_INDEX = idx;
return idx;
}
function findExcelForProduct(product: ProductData): ExcelMatch | null {
const idx = getExcelIndex();
const candidates = [
product.name,
product.slug ? product.slug.replace(/-\d+$/g, '') : '',
product.sku,
product.translationKey,
].filter(Boolean) as string[];
if (process.env.PDF_DEBUG_EXCEL === '1') {
const keys = candidates.map(c => normalizeExcelKey(c));
console.log(`[excel] lookup product=${product.id} ${product.locale ?? ''} slug=${product.slug ?? ''} name=${stripHtml(product.name)} keys=${keys.join(',')}`);
}
for (const c of candidates) {
const key = normalizeExcelKey(c);
const match = idx.get(key);
if (match && match.rows.length) return match;
}
return null;
}
function findExcelRowsForProduct(product: ProductData): ExcelRow[] {
const match = findExcelForProduct(product);
return match?.rows || [];
}
function guessColumnKey(row: ExcelRow, patterns: RegExp[]): string | null {
const keys = Object.keys(row || {});
for (const re of patterns) {
const k = keys.find(x => re.test(String(x)));
if (k) return k;
}
return null;
}
function hasAttr(product: ProductData, nameRe: RegExp, expectedLen?: number): boolean {
const a = product.attributes?.find(x => nameRe.test(x.name));
if (!a) return false;
if (typeof expectedLen === 'number') return (a.options || []).length === expectedLen;
return (a.options || []).length > 0;
}
function pushRowAttrIfMissing(args: {
product: ProductData;
name: string;
options: string[];
expectedLen: number;
existsRe: RegExp;
}): void {
const { product, name, options, expectedLen, existsRe } = args;
if (!options.filter(Boolean).length) return;
if (hasAttr(product, existsRe, expectedLen)) return;
product.attributes = product.attributes || [];
product.attributes.push({ name, options });
}
function pushAttrIfMissing(args: { product: ProductData; name: string; options: string[]; existsRe: RegExp }): void {
const { product, name, options, existsRe } = args;
if (!options.filter(Boolean).length) return;
if (hasAttr(product, existsRe)) return;
product.attributes = product.attributes || [];
product.attributes.push({ name, options });
}
function getUniqueNonEmpty(options: string[]): string[] {
const uniq: string[] = [];
const seen = new Set<string>();
for (const v of options.map(normalizeValue).filter(Boolean)) {
const k = v.toLowerCase();
if (seen.has(k)) continue;
seen.add(k);
uniq.push(v);
}
return uniq;
}
function ensureExcelCrossSectionAttributes(product: ProductData, locale: 'en' | 'de'): void {
const hasCross = (product.attributes || []).some(a => /configuration|konfiguration|aufbau|bezeichnung|number of cores and cross-section|querschnitt|cross.?section|mm²|mm2/i.test(a.name) && (a.options?.length || 0) > 0);
if (hasCross) return;
const rows = findExcelRowsForProduct(product);
if (!rows.length) {
if (process.env.PDF_DEBUG_EXCEL === '1') {
console.log(`[excel] no rows found for product ${product.id} (${product.slug ?? stripHtml(product.name)})`);
}
return;
}
// Find the cross-section column.
const csKey =
guessColumnKey(rows[0], [
/number of cores and cross-section/i,
/cross.?section/i,
/ross section conductor/i,
]) || null;
if (!csKey) {
if (process.env.PDF_DEBUG_EXCEL === '1') {
console.log(`[excel] rows found but no cross-section column for product ${product.id}; available keys: ${Object.keys(rows[0] || {}).slice(0, 30).join(', ')}`);
}
return;
}
const voltageKey = guessColumnKey(rows[0], [/rated voltage/i, /voltage rating/i, /spannungs/i, /nennspannung/i]);
const outerKey = guessColumnKey(rows[0], [/outer diameter\b/i, /outer diameter.*approx/i, /outer diameter of cable/i, /außen/i]);
const weightKey = guessColumnKey(rows[0], [/weight\b/i, /gewicht/i, /cable weight/i]);
const dcResKey = guessColumnKey(rows[0], [/dc resistance/i, /resistance conductor/i, /leiterwiderstand/i]);
// Additional technical columns that are often missing from WP exports.
// We add them as either constant attributes (if identical across all rows)
// or as small multi-value arrays (if they vary), so TECHNICAL DATA can render them.
const ratedVoltKey = guessColumnKey(rows[0], [/rated voltage/i, /voltage rating/i, /spannungs/i, /nennspannung/i]);
const testVoltKey = guessColumnKey(rows[0], [/test voltage/i, /prüfspannung/i]);
const tempRangeKey = guessColumnKey(rows[0], [/operating temperature range/i, /temperature range/i, /temperaturbereich/i]);
const minLayKey = guessColumnKey(rows[0], [/minimal temperature for laying/i]);
const minStoreKey = guessColumnKey(rows[0], [/minimal storage temperature/i]);
const maxOpKey = guessColumnKey(rows[0], [/maximal operating conductor temperature/i, /max\. operating/i]);
const maxScKey = guessColumnKey(rows[0], [/maximal short-circuit temperature/i, /short\s*circuit\s*temperature/i]);
const insThkKey = guessColumnKey(rows[0], [/nominal insulation thickness/i, /insulation thickness/i]);
const sheathThkKey = guessColumnKey(rows[0], [/nominal sheath thickness/i, /minimum sheath thickness/i]);
const maxResKey = guessColumnKey(rows[0], [/maximum resistance of conductor/i]);
const cfgName = locale === 'de' ? 'Anzahl der Adern und Querschnitt' : 'Number of cores and cross-section';
const cfgOptions = rows
.map(r => {
const cs = normalizeValue(String(r?.[csKey] ?? ''));
const v = voltageKey ? normalizeValue(String(r?.[voltageKey] ?? '')) : '';
if (!cs) return '';
if (!v) return cs;
// Keep the existing config separator used by splitConfig(): "cross - voltage".
// Add unit only if not already present.
const vHasUnit = /\bkv\b/i.test(v);
const vText = vHasUnit ? v : `${v} kV`;
return `${cs} - ${vText}`;
})
.filter(Boolean);
if (!cfgOptions.length) return;
const attrs = product.attributes || [];
attrs.push({ name: cfgName, options: cfgOptions });
const pushRowAttr = (name: string, key: string | null, unit?: string) => {
if (!key) return;
const options = rows
.map(r => normalizeValue(String(r?.[key] ?? '')))
.map(v => (unit && v && looksNumeric(v) ? `${v} ${unit}` : v));
if (options.filter(Boolean).length === 0) return;
attrs.push({ name, options });
};
// These names are chosen so existing PDF regexes can detect them.
pushRowAttr(locale === 'de' ? 'Außen-Ø' : 'Outer diameter', outerKey, 'mm');
pushRowAttr(locale === 'de' ? 'Gewicht' : 'Weight', weightKey, 'kg/km');
pushRowAttr(locale === 'de' ? 'DC-Leiterwiderstand (20°C)' : 'DC resistance at 20 °C', dcResKey, 'Ω/km');
const colValues = (key: string | null) => rows.map(r => normalizeValue(String(r?.[key ?? ''] ?? '')));
const addConstOrSmallList = (args: { name: string; existsRe: RegExp; key: string | null }) => {
if (!args.key) return;
const uniq = getUniqueNonEmpty(colValues(args.key));
if (!uniq.length) return;
// If all rows share the same value, store as single option.
if (uniq.length === 1) {
pushAttrIfMissing({ product, name: args.name, options: [uniq[0]], existsRe: args.existsRe });
return;
}
// Otherwise store the unique set (TECHNICAL DATA will compact it).
pushAttrIfMissing({ product, name: args.name, options: uniq, existsRe: args.existsRe });
};
addConstOrSmallList({
name: locale === 'de' ? 'Nennspannung' : 'Rated voltage',
existsRe: /rated\s*voltage|voltage\s*rating|nennspannung|spannungsbereich/i,
key: ratedVoltKey,
});
addConstOrSmallList({
name: locale === 'de' ? 'Prüfspannung' : 'Test voltage',
existsRe: /test\s*voltage|prüfspannung/i,
key: testVoltKey,
});
addConstOrSmallList({
name: locale === 'de' ? 'Temperaturbereich' : 'Operating temperature range',
existsRe: /operating\s*temperature\s*range|temperature\s*range|temperaturbereich/i,
key: tempRangeKey,
});
addConstOrSmallList({
name: locale === 'de' ? 'Min. Verlegetemperatur' : 'Minimal temperature for laying',
existsRe: /minimal\s*temperature\s*for\s*laying/i,
key: minLayKey,
});
addConstOrSmallList({
name: locale === 'de' ? 'Min. Lagertemperatur' : 'Minimal storage temperature',
existsRe: /minimal\s*storage\s*temperature/i,
key: minStoreKey,
});
addConstOrSmallList({
name: locale === 'de' ? 'Max. Betriebstemperatur' : 'Maximal operating conductor temperature',
existsRe: /maximal\s*operating\s*conductor\s*temperature|max\.?\s*operating/i,
key: maxOpKey,
});
addConstOrSmallList({
name: locale === 'de' ? 'Kurzschlusstemperatur (max.)' : 'Maximal short-circuit temperature',
existsRe: /maximal\s*short-?circuit\s*temperature|short\s*circuit\s*temperature|kurzschlusstemperatur/i,
key: maxScKey,
});
addConstOrSmallList({
name: locale === 'de' ? 'Isolationsdicke (nom.)' : 'Nominal insulation thickness',
existsRe: /nominal\s*insulation\s*thickness|insulation\s*thickness/i,
key: insThkKey,
});
addConstOrSmallList({
name: locale === 'de' ? 'Manteldicke (nom.)' : 'Nominal sheath thickness',
existsRe: /nominal\s*sheath\s*thickness|minimum\s*sheath\s*thickness|manteldicke/i,
key: sheathThkKey,
});
addConstOrSmallList({
name: locale === 'de' ? 'Max. Leiterwiderstand' : 'Maximum resistance of conductor',
existsRe: /maximum\s*resistance\s*of\s*conductor|max\.?\s*resistance|leiterwiderstand/i,
key: maxResKey,
});
product.attributes = attrs;
if (process.env.PDF_DEBUG_EXCEL === '1') {
console.log(`[excel] enriched product ${product.id} (${product.slug ?? stripHtml(product.name)}) with ${cfgOptions.length} configurations from excel`);
}
}
function ensureExcelRowSpecificAttributes(product: ProductData, locale: 'en' | 'de'): void {
const rows = findExcelRowsForProduct(product);
if (!rows.length) return;
const crossSectionAttr =
findAttr(product, /configuration|konfiguration|aufbau|bezeichnung/i) ||
findAttr(product, /number of cores and cross-section|querschnitt|cross.?section|mm²|mm2/i);
if (!crossSectionAttr || !crossSectionAttr.options?.length) return;
const rowCount = crossSectionAttr.options.length;
// Only enrich row-specific columns when row counts match (avoid wrong mapping).
if (rows.length !== rowCount) return;
const sample = rows[0] || {};
const keyOuter = guessColumnKey(sample, [/outer diameter \(approx\.?\)/i, /outer diameter of cable/i, /outer diameter\b/i, /diameter over screen/i]);
const keyWeight = guessColumnKey(sample, [/weight \(approx\.?\)/i, /cable weight/i, /\bweight\b/i]);
const keyDcRes = guessColumnKey(sample, [/dc resistance at 20/i, /maximum resistance of conductor/i, /resistance conductor/i]);
const keyCap = guessColumnKey(sample, [/capacitance/i]);
const keyIndTrefoil = guessColumnKey(sample, [/inductance,?\s*trefoil/i]);
const keyIndAirFlat = guessColumnKey(sample, [/inductance in air,?\s*flat/i]);
const keyIndGroundFlat = guessColumnKey(sample, [/inductance in ground,?\s*flat/i]);
const keyIairTrefoil = guessColumnKey(sample, [/current ratings in air,?\s*trefoil/i]);
const keyIairFlat = guessColumnKey(sample, [/current ratings in air,?\s*flat/i]);
const keyIgroundTrefoil = guessColumnKey(sample, [/current ratings in ground,?\s*trefoil/i]);
const keyIgroundFlat = guessColumnKey(sample, [/current ratings in ground,?\s*flat/i]);
const keyScCond = guessColumnKey(sample, [/conductor shortcircuit current/i]);
const keyScScreen = guessColumnKey(sample, [/screen shortcircuit current/i]);
const keyBend = guessColumnKey(sample, [/bending radius/i, /min\. bending radius/i]);
const get = (k: string | null) => rows.map(r => normalizeValue(String(r?.[k ?? ''] ?? '')));
const withUnit = (vals: string[], unit: string) => vals.map(v => (v && looksNumeric(v) ? `${v} ${unit}` : v));
// Use labels that are already recognized by the existing PDF regexes.
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Außen-Ø' : 'Outer diameter',
options: withUnit(get(keyOuter), 'mm'),
expectedLen: rowCount,
existsRe: /outer\s*diameter|außen\s*durchmesser|außen-?ø/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Gewicht' : 'Weight',
options: withUnit(get(keyWeight), 'kg/km'),
expectedLen: rowCount,
existsRe: /\bweight\b|gewicht/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'DC-Leiterwiderstand (20°C)' : 'DC resistance at 20 °C',
options: withUnit(get(keyDcRes), 'Ω/km'),
expectedLen: rowCount,
existsRe: /dc\s*resistance|max(?:imum)?\s*resistance|resistance\s+conductor|leiterwiderstand/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Kapazität (ca.)' : 'Capacitance (approx.)',
options: withUnit(get(keyCap), 'μF/km'),
expectedLen: rowCount,
existsRe: /capacitance|kapazit/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Induktivität, trefoil (ca.)' : 'Inductance, trefoil (approx.)',
options: withUnit(get(keyIndTrefoil), 'mH/km'),
expectedLen: rowCount,
existsRe: /inductance,?\s*trefoil/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Induktivität in Luft, flach (ca.)' : 'Inductance in air, flat (approx.)',
options: withUnit(get(keyIndAirFlat), 'mH/km'),
expectedLen: rowCount,
existsRe: /inductance\s+in\s+air,?\s*flat/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Induktivität im Erdreich, flach (ca.)' : 'Inductance in ground, flat (approx.)',
options: withUnit(get(keyIndGroundFlat), 'mH/km'),
expectedLen: rowCount,
existsRe: /inductance\s+in\s+ground,?\s*flat/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Strombelastbarkeit in Luft, trefoil' : 'Current ratings in air, trefoil',
options: withUnit(get(keyIairTrefoil), 'A'),
expectedLen: rowCount,
existsRe: /current\s+ratings\s+in\s+air,?\s*trefoil/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Strombelastbarkeit in Luft, flach' : 'Current ratings in air, flat',
options: withUnit(get(keyIairFlat), 'A'),
expectedLen: rowCount,
existsRe: /current\s+ratings\s+in\s+air,?\s*flat/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Strombelastbarkeit im Erdreich, trefoil' : 'Current ratings in ground, trefoil',
options: withUnit(get(keyIgroundTrefoil), 'A'),
expectedLen: rowCount,
existsRe: /current\s+ratings\s+in\s+ground,?\s*trefoil/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Strombelastbarkeit im Erdreich, flach' : 'Current ratings in ground, flat',
options: withUnit(get(keyIgroundFlat), 'A'),
expectedLen: rowCount,
existsRe: /current\s+ratings\s+in\s+ground,?\s*flat/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Kurzschlussstrom Leiter' : 'Conductor shortcircuit current',
options: withUnit(get(keyScCond), 'kA'),
expectedLen: rowCount,
existsRe: /conductor\s+shortcircuit\s+current/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Kurzschlussstrom Schirm' : 'Screen shortcircuit current',
options: withUnit(get(keyScScreen), 'kA'),
expectedLen: rowCount,
existsRe: /screen\s+shortcircuit\s+current/i,
});
pushRowAttrIfMissing({
product,
name: locale === 'de' ? 'Biegeradius (min.)' : 'Bending radius (min.)',
options: withUnit(get(keyBend), 'mm'),
expectedLen: rowCount,
existsRe: /bending\s*radius|biegeradius/i,
});
}
function getProductUrl(product: ProductData): string | null {
if (!product.path) return null;
return `https://klz-cables.com${product.path}`;
@@ -1065,7 +1518,9 @@ function summarizeOptions(options: string[] | undefined, maxItems: number = 3):
const uniq = Array.from(new Set(vals));
if (uniq.length === 1) return uniq[0];
if (uniq.length <= maxItems) return uniq.join(' / ');
return `${uniq.slice(0, maxItems).join(' / ')} (+${uniq.length - maxItems})`;
// UX: avoid showing internal counts like "+8" in customer-facing PDFs.
// Indicate truncation with an ellipsis.
return `${uniq.slice(0, maxItems).join(' / ')} / ...`;
}
function parseNumericOption(value: string): number | null {
@@ -1086,11 +1541,13 @@ function summarizeNumericRange(options: string[] | undefined): { ok: boolean; te
const vals = (options || []).map(parseNumericOption).filter((n): n is number => n !== null);
if (vals.length < 3) return { ok: false, text: '' };
const uniq = Array.from(new Set(vals));
if (uniq.length < 2) return { ok: false, text: '' };
// If there are only a few distinct values, listing is clearer than a range.
if (uniq.length < 4) return { ok: false, text: '' };
uniq.sort((a, b) => a - b);
const min = uniq[0];
const max = uniq[uniq.length - 1];
return { ok: true, text: `${formatNumber(min)}${formatNumber(max)} (n=${uniq.length})` };
// UX: don't show internal counts like "n=…" in customer-facing datasheets.
return { ok: true, text: `${formatNumber(min)}${formatNumber(max)}` };
}
function summarizeSmartOptions(label: string, options: string[] | undefined): string {
@@ -1482,6 +1939,21 @@ async function generatePDF(product: ProductData, locale: 'en' | 'de'): Promise<B
}
}
// === EXCEL SOURCE ENRICHMENT (cross-section + key row-specific attrs) ===
// Some products have cross-section data on the website but not in the WP export.
// When missing, we enrich from the source Excel sheets under `data/source/*`.
ensureExcelCrossSectionAttributes(product, locale);
// Even when cross-sections exist, the WP export can miss row-specific technical columns.
// We add a best-effort set of numeric per-row attributes from Excel (only when row counts match).
ensureExcelRowSpecificAttributes(product, locale);
if (process.env.PDF_DEBUG_EXCEL === '1') {
const hasAnyCfg = (product.attributes || []).some(a =>
/configuration|konfiguration|aufbau|bezeichnung|number of cores and cross-section|querschnitt|cross.?section|mm²|mm2/i.test(a.name),
);
console.log(`[excel] after enrichment: product ${product.id} cfgAttrPresent=${hasAnyCfg}`);
}
// === TECHNICAL DATA (shared across all cross-sections) ===
const configAttr = findAttr(product, /configuration|konfiguration|aufbau|bezeichnung/i);
const crossSectionAttr =
@@ -1490,6 +1962,12 @@ async function generatePDF(product: ProductData, locale: 'en' | 'de'): Promise<B
const rowCount = crossSectionAttr?.options?.length || 0;
const hasCrossSectionData = Boolean(crossSectionAttr && rowCount > 0);
if (process.env.PDF_DEBUG_EXCEL === '1') {
console.log(
`[excel] crossSectionAttr=${crossSectionAttr ? normalizeValue(crossSectionAttr.name) : 'none'} rows=${rowCount} hasCross=${hasCrossSectionData}`,
);
}
// Compact mode approach:
// - show constant (non-row) attributes as key/value grid
// - show only a small configuration sample + total count