feat: unify pdf datasheet architecture and regenerate all products
Some checks failed
Build & Deploy / 🔍 Prepare (push) Successful in 8s
Build & Deploy / 🧪 QA (push) Successful in 2m18s
Build & Deploy / 🏗️ Build (push) Successful in 3m43s
Build & Deploy / 🚀 Deploy (push) Successful in 19s
Build & Deploy / 🧪 Post-Deploy Verification (push) Failing after 4m54s
Build & Deploy / 🔔 Notify (push) Successful in 1s

This commit is contained in:
2026-03-06 23:42:46 +01:00
parent b80136894c
commit 20051244d9
62 changed files with 2026 additions and 49 deletions

View File

@@ -0,0 +1,894 @@
import * as fs from 'fs';
import * as path from 'path';
import type { DatasheetModel, DatasheetVoltageTable, KeyValueItem, ProductData } from './types';
import type { ExcelMatch, MediumVoltageCrossSectionExcelMatch } from './excel-index';
import { findExcelForProduct, findMediumVoltageCrossSectionExcelForProduct } from './excel-index';
import { getLabels, getProductUrl, normalizeValue, stripHtml } from './utils';
type ExcelRow = Record<string, unknown>;
type VoltageTableModel = {
voltageLabel: string;
metaItems: KeyValueItem[];
crossSections: string[];
columns: Array<{ key: string; label: string; get: (rowIndex: number) => string }>;
};
type BuildExcelModelResult = {
ok: boolean;
technicalItems: KeyValueItem[];
voltageTables: VoltageTableModel[];
};
type AssetMap = Record<string, string>;
const ASSET_MAP_FILE = path.join(process.cwd(), 'data/processed/asset-map.json');
function readAssetMap(): AssetMap {
try {
if (!fs.existsSync(ASSET_MAP_FILE)) return {};
return JSON.parse(fs.readFileSync(ASSET_MAP_FILE, 'utf8')) as AssetMap;
} catch {
return {};
}
}
const ASSET_MAP: AssetMap = readAssetMap();
function normalizeUnit(unitRaw: string): string {
const u = normalizeValue(unitRaw);
if (!u) return '';
if (/^c$/i.test(u) || /^°c$/i.test(u)) return '°C';
return u.replace(/Ω/gi, 'Ohm').replace(/[\u00B5\u03BC]/g, 'u');
}
function formatExcelHeaderLabel(key: string, unit?: string): string {
const k = normalizeValue(key);
if (!k) return '';
const u = normalizeValue(unit || '');
const compact = k
.replace(/\s*\(approx\.?\)\s*/gi, ' (approx.) ')
.replace(/\s+/g, ' ')
.trim();
if (!u) return compact;
if (new RegExp(`\\(${u.replace(/[.*+?^${}()|[\\]\\]/g, '\\$&')}\\)`, 'i').test(compact))
return compact;
return `${compact} (${u})`;
}
function normalizeVoltageLabel(raw: string): string {
const v = normalizeValue(raw);
if (!v) return '';
const cleaned = v.replace(/\s+/g, ' ');
if (/\bkv\b/i.test(cleaned)) return cleaned.replace(/\bkv\b/i, 'kV');
const num = cleaned.match(/\d+(?:[.,]\d+)?(?:\s*\/\s*\d+(?:[.,]\d+)?)?/);
if (!num) return cleaned;
if (/[a-z]/i.test(cleaned)) return cleaned;
return `${cleaned} kV`;
}
function parseVoltageSortKey(voltageLabel: string): number {
const v = normalizeVoltageLabel(voltageLabel);
const nums = v
.replace(/,/g, '.')
.match(/\d+(?:\.\d+)?/g)
?.map((n) => Number(n))
.filter((n) => Number.isFinite(n));
if (!nums || nums.length === 0) return Number.POSITIVE_INFINITY;
return nums[nums.length - 1];
}
function compactNumericForLocale(value: string, locale: 'en' | 'de'): string {
const v = normalizeValue(value);
if (!v) return '';
// Compact common bending-radius style: "15xD (Single core); 12xD (Multi core)" -> "15/12xD".
// Keep semantics, reduce width. Never truncate with ellipses.
if (/\d+xD/i.test(v)) {
const nums = Array.from(v.matchAll(/(\d+)xD/gi))
.map((m) => m[1])
.filter(Boolean);
const unique: string[] = [];
for (const n of nums) {
if (!unique.includes(n)) unique.push(n);
}
if (unique.length) return `${unique.join('/')}xD`;
}
const hasDigit = /\d/.test(v);
if (!hasDigit) return v;
const trimmed = v.replace(/\s+/g, ' ').trim();
const parts = trimmed.split(/(|-)/);
const out = parts.map((p) => {
if (p === '' || p === '-') return p;
const s = p.trim();
if (!/^-?\d+(?:[.,]\d+)?$/.test(s)) return p;
const n = s.replace(/,/g, '.');
const compact = n
.replace(/\.0+$/, '')
.replace(/(\.\d*?)0+$/, '$1')
.replace(/\.$/, '');
const hadPlus = /^\+/.test(s);
const withPlus = hadPlus && !/^\+/.test(compact) ? `+${compact}` : compact;
return locale === 'de' ? withPlus.replace(/\./g, ',') : withPlus;
});
return out.join('');
}
function compactCellForDenseTable(
value: string,
unit: string | undefined,
locale: 'en' | 'de',
): string {
let v = normalizeValue(value);
if (!v) return '';
const u = normalizeValue(unit || '');
if (u) {
const esc = u.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
v = v.replace(new RegExp(`\\s*${esc}\\b`, 'ig'), '').trim();
v = v
.replace(/\bkg\s*\/\s*km\b/gi, '')
.replace(/\bohm\s*\/\s*km\b/gi, '')
.replace(/\bΩ\s*\/\s*km\b/gi, '')
.replace(/\bu\s*f\s*\/\s*km\b/gi, '')
.replace(/\bmh\s*\/\s*km\b/gi, '')
.replace(/\bkA\b/gi, '')
.replace(/\bmm\b/gi, '')
.replace(/\bkv\b/gi, '')
.replace(/\b°?c\b/gi, '')
.replace(/\s+/g, ' ')
.trim();
}
v = v
.replace(/\s*\s*/g, '-')
.replace(/\s*-\s*/g, '-')
.replace(/\s*\/\s*/g, '/')
.replace(/\s+/g, ' ')
.trim();
return compactNumericForLocale(v, locale);
}
function resolveMediaToLocalPath(urlOrPath: string | null | undefined): string | null {
if (!urlOrPath) return null;
if (urlOrPath.startsWith('/')) return urlOrPath;
if (/^media\//i.test(urlOrPath)) return `/${urlOrPath}`;
const mapped = ASSET_MAP[urlOrPath];
if (mapped) {
if (mapped.startsWith('/')) return mapped;
if (/^public\//i.test(mapped)) return `/${mapped.replace(/^public\//i, '')}`;
if (/^media\//i.test(mapped)) return `/${mapped}`;
return mapped;
}
return urlOrPath;
}
function guessColumnKey(row: ExcelRow, patterns: RegExp[]): string | null {
const keys = Object.keys(row || {});
for (const re of patterns) {
const k = keys.find((x) => {
const key = String(x);
if (re.test('conductor') && /ross section conductor/i.test(key)) return false;
if (re.test('insulation thickness') && /Diameter over insulation/i.test(key)) return false;
if (re.test('conductor') && !/^conductor$/i.test(key)) return false;
if (re.test('insulation') && !/^insulation$/i.test(key)) return false;
if (re.test('sheath') && !/^sheath$/i.test(key)) return false;
if (re.test('norm') && !/^norm$/i.test(key)) return false;
return re.test(key);
});
if (k) return k;
}
return null;
}
function technicalFullLabel(args: { key: string; excelKey: string; locale: 'en' | 'de' }): string {
if (args.locale === 'en') return normalizeValue(args.excelKey);
const raw = normalizeValue(args.excelKey);
if (!raw) return '';
return raw
.replace(/\(approx\.?\)/gi, '(ca.)')
.replace(/\bcapacitance\b/gi, 'Kapazität')
.replace(/\binductance\b/gi, 'Induktivität')
.replace(/\breactance\b/gi, 'Reaktanz')
.replace(/\btest voltage\b/gi, 'Prüfspannung')
.replace(/\brated voltage\b/gi, 'Nennspannung')
.replace(/\boperating temperature range\b/gi, 'Temperaturbereich')
.replace(/\bminimum sheath thickness\b/gi, 'Manteldicke (min.)')
.replace(/\bsheath thickness\b/gi, 'Manteldicke')
.replace(/\bnominal insulation thickness\b/gi, 'Isolationsdicke (nom.)')
.replace(/\binsulation thickness\b/gi, 'Isolationsdicke')
.replace(/\bdc resistance at 20\s*°?c\b/gi, 'DC-Leiterwiderstand (20 °C)')
.replace(/\bouter diameter(?: of cable)?\b/gi, 'Außen-Ø')
.replace(/\bbending radius\b/gi, 'Biegeradius')
.replace(/\bpackaging\b/gi, 'Verpackung')
.replace(/\bce\s*-?conformity\b/gi, 'CE-Konformität');
}
function metaFullLabel(args: { key: string; excelKey: string; locale: 'en' | 'de' }): string {
const key = normalizeValue(args.key);
if (args.locale === 'de') {
switch (key) {
case 'test_volt':
return 'Prüfspannung';
case 'temp_range':
return 'Temperaturbereich';
case 'max_op_temp':
return 'Leitertemperatur (max.)';
case 'max_sc_temp':
return 'Kurzschlusstemperatur (max.)';
case 'min_lay_temp':
return 'Minimale Verlegetemperatur';
case 'min_store_temp':
return 'Minimale Lagertemperatur';
case 'cpr':
return 'CPR-Klasse';
case 'flame':
return 'Flammhemmend';
default:
return formatExcelHeaderLabel(args.excelKey);
}
}
switch (key) {
case 'test_volt':
return 'Test voltage';
case 'temp_range':
return 'Operating temperature range';
case 'max_op_temp':
return 'Conductor temperature (max.)';
case 'max_sc_temp':
return 'Short-circuit temperature (max.)';
case 'min_lay_temp':
return 'Minimum laying temperature';
case 'min_store_temp':
return 'Minimum storage temperature';
case 'cpr':
return 'CPR class';
case 'flame':
return 'Flame retardant';
default:
return formatExcelHeaderLabel(args.excelKey);
}
}
function denseAbbrevLabel(args: { key: string; locale: 'en' | 'de'; unit?: string }): string {
const u = normalizeUnit(args.unit || '');
const unitSafe = u.replace(/Ω/gi, 'Ohm').replace(/[\u00B5\u03BC]/g, 'u');
const suffix = unitSafe ? ` [${unitSafe}]` : '';
switch (args.key) {
case 'DI':
case 'RI':
case 'Wi':
case 'Ibl':
case 'Ibe':
case 'Wm':
case 'Rbv':
case 'Fzv':
case 'G':
return `${args.key}${suffix}`;
case 'Ik_cond':
return `Ik${suffix}`;
case 'Ik_screen':
return `Ik_s${suffix}`;
case 'Ø':
return `Ø${suffix}`;
case 'Cond':
return args.locale === 'de' ? 'Leiter' : 'Cond.';
case 'shape':
return args.locale === 'de' ? 'Form' : 'Shape';
// Electrical
case 'cap':
// Capacitance. Use a clear label; lowercase "cap" looks like an internal key.
return `Cap${suffix}`;
case 'X':
return `X${suffix}`;
case 'test_volt':
return `U_test${suffix}`;
case 'rated_volt':
return `U0/U${suffix}`;
case 'temp_range':
return `T${suffix}`;
case 'max_op_temp':
return `T_op${suffix}`;
case 'max_sc_temp':
return `T_sc${suffix}`;
case 'min_store_temp':
return `T_st${suffix}`;
case 'min_lay_temp':
return `T_lay${suffix}`;
case 'cpr':
return `CPR${suffix}`;
case 'flame':
return `FR${suffix}`;
default:
return args.key || '';
}
}
function summarizeOptions(options: string[] | undefined): string {
const vals = (options || []).map(normalizeValue).filter(Boolean);
if (vals.length === 0) return '';
const uniq = Array.from(new Set(vals));
if (uniq.length === 1) return uniq[0];
// Never use ellipsis truncation in datasheets. Prefer full value list.
// (Long values should be handled by layout; if needed we can later add wrapping rules.)
return uniq.join(' / ');
}
function parseNumericOption(value: string): number | null {
const v = normalizeValue(value).replace(/,/g, '.');
const m = v.match(/-?\d+(?:\.\d+)?/);
if (!m) return null;
const n = Number(m[0]);
return Number.isFinite(n) ? n : null;
}
function summarizeNumericRange(options: string[] | undefined): { ok: boolean; text: string } {
const vals = (options || []).map(parseNumericOption).filter((n): n is number => n !== null);
if (vals.length < 3) return { ok: false, text: '' };
const uniq = Array.from(new Set(vals));
if (uniq.length < 4) return { ok: false, text: '' };
uniq.sort((a, b) => a - b);
const min = uniq[0];
const max = uniq[uniq.length - 1];
const fmt = (n: number) => (Number.isInteger(n) ? String(n) : String(n)).replace(/\.0+$/, '');
return { ok: true, text: `${fmt(min)}${fmt(max)}` };
}
function summarizeSmartOptions(_label: string, options: string[] | undefined): string {
const range = summarizeNumericRange(options);
if (range.ok) return range.text;
return summarizeOptions(options);
}
function normalizeDesignation(value: string): string {
return String(value || '')
.toUpperCase()
.replace(/-\d+$/g, '')
.replace(/[^A-Z0-9]+/g, '');
}
function buildExcelModel(args: {
product: ProductData;
locale: 'en' | 'de';
}): BuildExcelModelResult {
const match = findExcelForProduct(args.product) as ExcelMatch | null;
if (!match || match.rows.length === 0)
return { ok: false, technicalItems: [], voltageTables: [] };
const units = match.units || {};
const rows = match.rows;
let sample = rows.find((r) => r && Object.keys(r).length > 0) || {};
let maxColumns = Object.keys(sample).filter(
(k) => k && k !== 'Part Number' && k !== 'Units',
).length;
for (const r of rows) {
const cols = Object.keys(r).filter((k) => k && k !== 'Part Number' && k !== 'Units').length;
if (cols > maxColumns) {
sample = r;
maxColumns = cols;
}
}
const columnMapping: Record<string, { header: string; unit: string; key: string }> = {
'number of cores and cross-section': {
header: 'Cross-section',
unit: '',
key: 'cross_section',
},
'ross section conductor': { header: 'Cross-section', unit: '', key: 'cross_section' },
'diameter over insulation': { header: 'DI', unit: 'mm', key: 'DI' },
'diameter over insulation (approx.)': { header: 'DI', unit: 'mm', key: 'DI' },
'dc resistance at 20 °C': { header: 'RI', unit: 'Ohm/km', key: 'RI' },
'dc resistance at 20°C': { header: 'RI', unit: 'Ohm/km', key: 'RI' },
'resistance conductor': { header: 'RI', unit: 'Ohm/km', key: 'RI' },
'maximum resistance of conductor': { header: 'RI', unit: 'Ohm/km', key: 'RI' },
'insulation thickness': { header: 'Wi', unit: 'mm', key: 'Wi' },
'nominal insulation thickness': { header: 'Wi', unit: 'mm', key: 'Wi' },
'current ratings in air, trefoil': { header: 'Ibl', unit: 'A', key: 'Ibl' },
'current ratings in air, trefoil*': { header: 'Ibl', unit: 'A', key: 'Ibl' },
'current ratings in ground, trefoil': { header: 'Ibe', unit: 'A', key: 'Ibe' },
'current ratings in ground, trefoil*': { header: 'Ibe', unit: 'A', key: 'Ibe' },
'conductor shortcircuit current': { header: 'Ik', unit: 'kA', key: 'Ik_cond' },
'screen shortcircuit current': { header: 'Ik', unit: 'kA', key: 'Ik_screen' },
'sheath thickness': { header: 'Wm', unit: 'mm', key: 'Wm' },
'minimum sheath thickness': { header: 'Wm', unit: 'mm', key: 'Wm' },
'nominal sheath thickness': { header: 'Wm', unit: 'mm', key: 'Wm' },
'bending radius': { header: 'Rbv', unit: 'mm', key: 'Rbv' },
'bending radius (min.)': { header: 'Rbv', unit: 'mm', key: 'Rbv' },
'outer diameter': { header: 'Ø', unit: 'mm', key: 'Ø' },
'outer diameter (approx.)': { header: 'Ø', unit: 'mm', key: 'Ø' },
'outer diameter of cable': { header: 'Ø', unit: 'mm', key: 'Ø' },
'pulling force': { header: 'Fzv', unit: 'N', key: 'Fzv' },
'max. pulling force': { header: 'Fzv', unit: 'N', key: 'Fzv' },
'conductor aluminum': { header: 'Cond.', unit: '', key: 'Cond' },
'conductor copper': { header: 'Cond.', unit: '', key: 'Cond' },
weight: { header: 'G', unit: 'kg/km', key: 'G' },
'weight (approx.)': { header: 'G', unit: 'kg/km', key: 'G' },
'cable weight': { header: 'G', unit: 'kg/km', key: 'G' },
'shape of conductor': { header: 'Conductor shape', unit: '', key: 'shape' },
'operating temperature range': {
header: 'Operating temp range',
unit: '°C',
key: 'temp_range',
},
'maximal operating conductor temperature': {
header: 'Max operating temp',
unit: '°C',
key: 'max_op_temp',
},
'maximal short-circuit temperature': {
header: 'Max short-circuit temp',
unit: '°C',
key: 'max_sc_temp',
},
'minimal storage temperature': {
header: 'Min storage temp',
unit: '°C',
key: 'min_store_temp',
},
'minimal temperature for laying': {
header: 'Min laying temp',
unit: '°C',
key: 'min_lay_temp',
},
'test voltage': { header: 'Test voltage', unit: 'kV', key: 'test_volt' },
'rated voltage': { header: 'Rated voltage', unit: 'kV', key: 'rated_volt' },
'cpr class': { header: 'CPR class', unit: '', key: 'cpr' },
'flame retardant': { header: 'Flame retardant', unit: '', key: 'flame' },
'self-extinguishing of single cable': { header: 'Flame retardant', unit: '', key: 'flame' },
// High-value electrical/screen columns
'capacitance (approx.)': { header: 'Capacitance', unit: 'uF/km', key: 'cap' },
capacitance: { header: 'Capacitance', unit: 'uF/km', key: 'cap' },
reactance: { header: 'Reactance', unit: 'Ohm/km', key: 'X' },
'diameter over screen': { header: 'Diameter over screen', unit: 'mm', key: 'D_screen' },
'metallic screen mm2': { header: 'Metallic screen', unit: 'mm2', key: 'S_screen' },
'metallic screen': { header: 'Metallic screen', unit: 'mm2', key: 'S_screen' },
};
const excelKeys = Object.keys(sample).filter((k) => k && k !== 'Part Number' && k !== 'Units');
const matchedColumns: Array<{
excelKey: string;
mapping: { header: string; unit: string; key: string };
}> = [];
for (const excelKey of excelKeys) {
const normalized = normalizeValue(excelKey).toLowerCase();
for (const [pattern, mapping] of Object.entries(columnMapping)) {
if (normalized === pattern.toLowerCase() || new RegExp(pattern, 'i').test(normalized)) {
matchedColumns.push({ excelKey, mapping });
break;
}
}
}
const seenKeys = new Set<string>();
const deduplicated: typeof matchedColumns = [];
for (const item of matchedColumns) {
if (!seenKeys.has(item.mapping.key)) {
seenKeys.add(item.mapping.key);
deduplicated.push(item);
}
}
const sampleKeys = Object.keys(sample)
.filter((k) => k && k !== 'Part Number' && k !== 'Units')
.sort();
const compatibleRows = rows.filter((r) => {
const rKeys = Object.keys(r)
.filter((k) => k && k !== 'Part Number' && k !== 'Units')
.sort();
return JSON.stringify(rKeys) === JSON.stringify(sampleKeys);
});
if (compatibleRows.length === 0) return { ok: false, technicalItems: [], voltageTables: [] };
const csKey =
guessColumnKey(sample, [
/number of cores and cross-section/i,
/cross.?section/i,
/ross section conductor/i,
]) || null;
const voltageKey =
guessColumnKey(sample, [/rated voltage/i, /voltage rating/i, /nennspannung/i, /spannungs/i]) ||
null;
if (!csKey) return { ok: false, technicalItems: [], voltageTables: [] };
const byVoltage = new Map<string, number[]>();
for (let i = 0; i < compatibleRows.length; i++) {
const cs = normalizeValue(String(compatibleRows[i]?.[csKey] ?? ''));
if (!cs) continue;
const rawV = voltageKey ? normalizeValue(String(compatibleRows[i]?.[voltageKey] ?? '')) : '';
const voltageLabel = normalizeVoltageLabel(rawV || '');
const key = voltageLabel || (args.locale === 'de' ? 'Spannung unbekannt' : 'Voltage unknown');
const arr = byVoltage.get(key) ?? [];
arr.push(i);
byVoltage.set(key, arr);
}
const voltageKeysSorted = Array.from(byVoltage.keys()).sort((a, b) => {
const na = parseVoltageSortKey(a);
const nb = parseVoltageSortKey(b);
if (na !== nb) return na - nb;
return a.localeCompare(b);
});
const technicalItems: KeyValueItem[] = [];
const globalConstantColumns = new Set<string>();
for (const { excelKey, mapping } of deduplicated) {
const values = compatibleRows
.map((r) => normalizeValue(String(r?.[excelKey] ?? '')))
.filter(Boolean);
const unique = Array.from(new Set(values.map((v) => v.toLowerCase())));
if (unique.length === 1 && values.length > 0) {
globalConstantColumns.add(excelKey);
const unit = normalizeUnit(units[excelKey] || mapping.unit || '');
const labelBase = technicalFullLabel({ key: mapping.key, excelKey, locale: args.locale });
const label = formatExcelHeaderLabel(labelBase, unit);
const value = compactCellForDenseTable(values[0], unit, args.locale);
if (!technicalItems.find((t) => t.label === label))
technicalItems.push({ label, value, unit });
}
}
technicalItems.sort((a, b) => a.label.localeCompare(b.label));
const voltageTables: VoltageTableModel[] = [];
for (const vKey of voltageKeysSorted) {
const indices = byVoltage.get(vKey) || [];
if (!indices.length) continue;
const crossSections = indices.map((idx) =>
normalizeValue(String(compatibleRows[idx]?.[csKey] ?? '')),
);
const metaItems: KeyValueItem[] = [];
const metaCandidates = new Map<string, KeyValueItem>();
if (voltageKey) {
const rawV = normalizeValue(String(compatibleRows[indices[0]]?.[voltageKey] ?? ''));
metaItems.push({
label: args.locale === 'de' ? 'Spannung' : 'Voltage',
value: normalizeVoltageLabel(rawV || ''),
});
}
const metaKeyPriority = [
'test_volt',
'temp_range',
'max_op_temp',
'max_sc_temp',
'min_lay_temp',
'min_store_temp',
'cpr',
'flame',
];
const metaKeyPrioritySet = new Set(metaKeyPriority);
const denseTableKeyOrder = [
'Cond',
'shape',
// Electrical properties (when present)
'cap',
'X',
// Dimensions and ratings
'DI',
'RI',
'Wi',
'Ibl',
'Ibe',
'Ik_cond',
'Wm',
'Rbv',
'Ø',
// Screen data (when present)
'D_screen',
'S_screen',
'Fzv',
'G',
] as const;
const denseTableKeys = new Set<string>(denseTableKeyOrder);
const tableColumns: Array<{
excelKey: string;
mapping: { header: string; unit: string; key: string };
}> = [];
for (const { excelKey, mapping } of deduplicated) {
if (excelKey === csKey || excelKey === voltageKey) continue;
const values = indices
.map((idx) => normalizeValue(String(compatibleRows[idx]?.[excelKey] ?? '')))
.filter(Boolean);
if (!values.length) continue;
const unique = Array.from(new Set(values.map((v) => v.toLowerCase())));
const unit = normalizeUnit(units[excelKey] || mapping.unit || '');
if (denseTableKeys.has(mapping.key)) {
tableColumns.push({ excelKey, mapping });
continue;
}
if (globalConstantColumns.has(excelKey) && !metaKeyPrioritySet.has(mapping.key)) {
continue;
}
const value =
unique.length === 1
? compactCellForDenseTable(values[0], unit, args.locale)
: summarizeSmartOptions(excelKey, values);
const label = metaFullLabel({ key: mapping.key, excelKey, locale: args.locale });
metaCandidates.set(mapping.key, { label, value, unit });
}
for (const k of metaKeyPriority) {
const item = metaCandidates.get(k);
if (item && item.label && item.value) metaItems.push(item);
}
const mappedByKey = new Map<
string,
{ excelKey: string; mapping: { header: string; unit: string; key: string } }
>();
for (const c of tableColumns) {
if (!mappedByKey.has(c.mapping.key)) mappedByKey.set(c.mapping.key, c);
}
// If conductor material is missing in Excel, derive it from designation.
// NA... => Al, N... => Cu (common for this dataset).
if (!mappedByKey.has('Cond')) {
mappedByKey.set('Cond', {
excelKey: '',
mapping: { header: 'Cond.', unit: '', key: 'Cond' },
});
}
const orderedTableColumns = denseTableKeyOrder
.filter((k) => mappedByKey.has(k))
.map((k) => mappedByKey.get(k)!)
.map(({ excelKey, mapping }) => {
const unit = normalizeUnit((excelKey ? units[excelKey] : '') || mapping.unit || '');
return {
key: mapping.key,
label:
denseAbbrevLabel({ key: mapping.key, locale: args.locale, unit }) ||
formatExcelHeaderLabel(excelKey, unit),
get: (rowIndex: number) => {
const srcRowIndex = indices[rowIndex];
if (mapping.key === 'Cond' && !excelKey) {
const pn = normalizeDesignation(
args.product.name || args.product.slug || args.product.sku || '',
);
if (/^NA/.test(pn)) return 'Al';
if (/^N/.test(pn)) return 'Cu';
return '';
}
const raw = excelKey
? normalizeValue(String(compatibleRows[srcRowIndex]?.[excelKey] ?? ''))
: '';
return compactCellForDenseTable(raw, unit, args.locale);
},
};
});
voltageTables.push({
voltageLabel: vKey,
metaItems,
crossSections,
columns: orderedTableColumns,
});
}
return { ok: true, technicalItems, voltageTables };
}
function isMediumVoltageProduct(product: ProductData): boolean {
const hay = [
product.slug,
product.path,
product.translationKey,
...(product.categories || []).map((c) => c.name),
]
.filter(Boolean)
.join(' ');
return /medium[-\s]?voltage|mittelspannung/i.test(hay);
}
type AbbrevColumn = { colKey: string; unit: string };
function isAbbreviatedHeaderKey(key: string): boolean {
const k = normalizeValue(key);
if (!k) return false;
if (/^__EMPTY/i.test(k)) return false;
// Examples from the MV sheet: "LD mm", "RI Ohm", "G kg", "SBL 30", "SBE 20", "BK", "BR", "LF".
// Keep this permissive but focused on compact, non-sentence identifiers.
if (k.length > 12) return false;
if (/[a-z]{4,}/.test(k)) return false;
if (!/[A-ZØ]/.test(k)) return false;
return true;
}
function extractAbbrevColumnsFromMediumVoltageHeader(args: {
headerRow: Record<string, unknown>;
units: Record<string, string>;
partNumberKey: string;
crossSectionKey: string;
ratedVoltageKey: string | null;
}): AbbrevColumn[] {
const out: AbbrevColumn[] = [];
for (const colKey of Object.keys(args.headerRow || {})) {
if (!colKey) continue;
if (colKey === args.partNumberKey) continue;
if (colKey === args.crossSectionKey) continue;
if (args.ratedVoltageKey && colKey === args.ratedVoltageKey) continue;
if (!isAbbreviatedHeaderKey(colKey)) continue;
const unit = normalizeUnit(args.units[colKey] || '');
out.push({ colKey, unit });
}
return out;
}
function buildMediumVoltageCrossSectionTableFromNewExcel(args: {
product: ProductData;
locale: 'en' | 'de';
}): BuildExcelModelResult & { legendItems: KeyValueItem[] } {
const mv = findMediumVoltageCrossSectionExcelForProduct(
args.product,
) as MediumVoltageCrossSectionExcelMatch | null;
if (!mv || !mv.rows.length)
return { ok: false, technicalItems: [], voltageTables: [], legendItems: [] };
if (!mv.crossSectionKey)
return { ok: false, technicalItems: [], voltageTables: [], legendItems: [] };
const abbrevCols = extractAbbrevColumnsFromMediumVoltageHeader({
headerRow: mv.headerRow,
units: mv.units,
partNumberKey: mv.partNumberKey,
crossSectionKey: mv.crossSectionKey,
ratedVoltageKey: mv.ratedVoltageKey,
});
if (!abbrevCols.length)
return { ok: false, technicalItems: [], voltageTables: [], legendItems: [] };
// Collect legend items: abbreviation -> description from header row
const legendItems: KeyValueItem[] = [];
for (const col of abbrevCols) {
const description = normalizeValue(String(mv.headerRow[col.colKey] || ''));
if (description && description !== col.colKey) {
legendItems.push({
label: col.colKey,
value: description,
});
}
}
const byVoltage = new Map<string, number[]>();
for (let i = 0; i < mv.rows.length; i++) {
const cs = normalizeValue(
String((mv.rows[i] as Record<string, unknown>)?.[mv.crossSectionKey] ?? ''),
);
if (!cs) continue;
const rawV = mv.ratedVoltageKey
? normalizeValue(String((mv.rows[i] as Record<string, unknown>)?.[mv.ratedVoltageKey] ?? ''))
: '';
const voltageLabel = normalizeVoltageLabel(rawV || '');
const key = voltageLabel || (args.locale === 'de' ? 'Spannung unbekannt' : 'Voltage unknown');
const arr = byVoltage.get(key) ?? [];
arr.push(i);
byVoltage.set(key, arr);
}
const voltageKeysSorted = Array.from(byVoltage.keys()).sort((a, b) => {
const na = parseVoltageSortKey(a);
const nb = parseVoltageSortKey(b);
if (na !== nb) return na - nb;
return a.localeCompare(b);
});
const voltageTables: VoltageTableModel[] = [];
for (const vKey of voltageKeysSorted) {
const indices = byVoltage.get(vKey) || [];
if (!indices.length) continue;
const crossSections = indices.map((idx) =>
normalizeValue(String((mv.rows[idx] as Record<string, unknown>)?.[mv.crossSectionKey] ?? '')),
);
const metaItems: KeyValueItem[] = [];
if (mv.ratedVoltageKey) {
const rawV = normalizeValue(
String((mv.rows[indices[0]] as Record<string, unknown>)?.[mv.ratedVoltageKey] ?? ''),
);
metaItems.push({
label: args.locale === 'de' ? 'Spannung' : 'Voltage',
value: normalizeVoltageLabel(rawV || ''),
});
}
const columns = abbrevCols.map((col) => {
return {
key: col.colKey,
// Use the abbreviated title from the first row as the table header.
label: normalizeValue(col.colKey),
get: (rowIndex: number) => {
const srcRowIndex = indices[rowIndex];
const raw = normalizeValue(
String((mv.rows[srcRowIndex] as Record<string, unknown>)?.[col.colKey] ?? ''),
);
return compactCellForDenseTable(raw, col.unit, args.locale);
},
};
});
voltageTables.push({ voltageLabel: vKey, metaItems, crossSections, columns });
}
return { ok: true, technicalItems: [], voltageTables, legendItems };
}
export function buildDatasheetModel(args: {
product: ProductData;
locale: 'en' | 'de';
}): DatasheetModel {
const labels = getLabels(args.locale);
const categoriesLine = (args.product.categories || []).map((c) => stripHtml(c.name)).join(' • ');
const descriptionText = stripHtml(
args.product.shortDescriptionHtml || args.product.descriptionHtml || '',
);
const heroSrc = resolveMediaToLocalPath(
args.product.featuredImage || args.product.images?.[0] || null,
);
const productUrl = getProductUrl(args.product);
// Technical data MUST stay sourced from the existing Excel index (legacy sheets).
const excelModel = buildExcelModel({ product: args.product, locale: args.locale });
// Cross-section tables: for medium voltage only, prefer the new MV sheet (abbrev columns in header row).
const crossSectionModel = isMediumVoltageProduct(args.product)
? buildMediumVoltageCrossSectionTableFromNewExcel({
product: args.product,
locale: args.locale,
})
: { ok: false, technicalItems: [], voltageTables: [], legendItems: [] };
const voltageTablesSrc = crossSectionModel.ok
? crossSectionModel.voltageTables
: excelModel.ok
? excelModel.voltageTables
: [];
const voltageTables: DatasheetVoltageTable[] = voltageTablesSrc.map((t) => {
const columns = t.columns.map((c) => ({ key: c.key, label: c.label }));
const rows = t.crossSections.map((configuration, rowIndex) => ({
configuration,
cells: t.columns.map((c) => compactNumericForLocale(c.get(rowIndex), args.locale)),
}));
return {
voltageLabel: t.voltageLabel,
metaItems: t.metaItems,
columns,
rows,
};
});
return {
locale: args.locale,
product: {
id: args.product.id,
name: stripHtml(args.product.name),
sku: args.product.sku,
categoriesLine,
descriptionText,
heroSrc,
productUrl,
},
labels,
technicalItems: excelModel.ok ? excelModel.technicalItems : [],
voltageTables,
legendItems: crossSectionModel.legendItems || [],
};
}

View File

@@ -0,0 +1,204 @@
import * as fs from 'fs';
import * as path from 'path';
import { execSync } from 'child_process';
import type { ProductData } from './types';
import { normalizeValue } from './utils';
type ExcelRow = Record<string, unknown>;
export type ExcelMatch = { rows: ExcelRow[]; units: Record<string, string> };
export type MediumVoltageCrossSectionExcelMatch = {
headerRow: ExcelRow;
rows: ExcelRow[];
units: Record<string, string>;
partNumberKey: string;
crossSectionKey: string;
ratedVoltageKey: string | null;
};
const EXCEL_SOURCE_FILES = [
path.join(process.cwd(), 'data/excel/high-voltage.xlsx'),
path.join(process.cwd(), 'data/excel/medium-voltage-KM.xlsx'),
path.join(process.cwd(), 'data/excel/low-voltage-KM.xlsx'),
path.join(process.cwd(), 'data/excel/solar-cables.xlsx'),
];
// Medium-voltage cross-section table (new format with multi-row header).
// IMPORTANT: this must NOT be used for the technical data table.
const MV_CROSS_SECTION_FILE = path.join(process.cwd(), 'data/excel/medium-voltage-KM 170126.xlsx');
type MediumVoltageCrossSectionIndex = {
headerRow: ExcelRow;
units: Record<string, string>;
partNumberKey: string;
crossSectionKey: string;
ratedVoltageKey: string | null;
rowsByDesignation: Map<string, ExcelRow[]>;
};
let EXCEL_INDEX: Map<string, ExcelMatch> | null = null;
let MV_CROSS_SECTION_INDEX: MediumVoltageCrossSectionIndex | null = null;
export function normalizeExcelKey(value: string): string {
return String(value || '')
.toUpperCase()
.replace(/-\d+$/g, '')
.replace(/[^A-Z0-9]+/g, '');
}
function loadExcelRows(filePath: string): ExcelRow[] {
const out = execSync(`npx -y xlsx-cli -j "${filePath}"`, {
encoding: 'utf8',
stdio: ['ignore', 'pipe', 'ignore'],
});
const trimmed = out.trim();
const jsonStart = trimmed.indexOf('[');
if (jsonStart < 0) return [];
const jsonText = trimmed.slice(jsonStart);
try {
return JSON.parse(jsonText) as ExcelRow[];
} catch {
return [];
}
}
function findKeyByHeaderValue(headerRow: ExcelRow, pattern: RegExp): string | null {
for (const [k, v] of Object.entries(headerRow || {})) {
const text = normalizeValue(String(v ?? ''));
if (!text) continue;
if (pattern.test(text)) return k;
}
return null;
}
function getMediumVoltageCrossSectionIndex(): MediumVoltageCrossSectionIndex {
if (MV_CROSS_SECTION_INDEX) return MV_CROSS_SECTION_INDEX;
const rows = fs.existsSync(MV_CROSS_SECTION_FILE) ? loadExcelRows(MV_CROSS_SECTION_FILE) : [];
const headerRow = (rows[0] || {}) as ExcelRow;
const partNumberKey = findKeyByHeaderValue(headerRow, /^part\s*number$/i) || '__EMPTY';
const crossSectionKey = findKeyByHeaderValue(headerRow, /querschnitt|cross.?section/i) || '';
const ratedVoltageKey =
findKeyByHeaderValue(headerRow, /rated voltage|voltage rating|nennspannung/i) || null;
const unitsRow =
rows.find((r) => normalizeValue(String((r as ExcelRow)?.[partNumberKey] ?? '')) === 'Units') ||
null;
const units: Record<string, string> = {};
if (unitsRow) {
for (const [k, v] of Object.entries(unitsRow)) {
if (k === partNumberKey) continue;
const unit = normalizeValue(String(v ?? ''));
if (unit) units[k] = unit;
}
}
const rowsByDesignation = new Map<string, ExcelRow[]>();
for (const r of rows) {
if (r === headerRow) continue;
const pn = normalizeValue(String((r as ExcelRow)?.[partNumberKey] ?? ''));
if (!pn || pn === 'Units' || pn === 'Part Number') continue;
const key = normalizeExcelKey(pn);
if (!key) continue;
const cur = rowsByDesignation.get(key) || [];
cur.push(r);
rowsByDesignation.set(key, cur);
}
MV_CROSS_SECTION_INDEX = {
headerRow,
units,
partNumberKey,
crossSectionKey,
ratedVoltageKey,
rowsByDesignation,
};
return MV_CROSS_SECTION_INDEX;
}
export function getExcelIndex(): Map<string, ExcelMatch> {
if (EXCEL_INDEX) return EXCEL_INDEX;
const idx = new Map<string, ExcelMatch>();
for (const file of EXCEL_SOURCE_FILES) {
if (!fs.existsSync(file)) continue;
const rows = loadExcelRows(file);
const unitsRow = rows.find((r) => r && r['Part Number'] === 'Units') || null;
const units: Record<string, string> = {};
if (unitsRow) {
for (const [k, v] of Object.entries(unitsRow)) {
if (k === 'Part Number') continue;
const unit = normalizeValue(String(v ?? ''));
if (unit) units[k] = unit;
}
}
for (const r of rows) {
const pn = r?.['Part Number'];
if (!pn || pn === 'Units') continue;
const key = normalizeExcelKey(String(pn));
if (!key) continue;
const cur = idx.get(key);
if (!cur) {
idx.set(key, { rows: [r], units });
} else {
cur.rows.push(r);
if (Object.keys(cur.units).length < Object.keys(units).length) cur.units = units;
}
}
}
EXCEL_INDEX = idx;
return idx;
}
export function findExcelForProduct(product: ProductData): ExcelMatch | null {
const idx = getExcelIndex();
const candidates = [
product.name,
product.slug ? product.slug.replace(/-\d+$/g, '') : '',
product.sku,
product.translationKey,
].filter(Boolean) as string[];
for (const c of candidates) {
const key = normalizeExcelKey(c);
const match = idx.get(key);
if (match && match.rows.length) return match;
}
return null;
}
export function findMediumVoltageCrossSectionExcelForProduct(
product: ProductData,
): MediumVoltageCrossSectionExcelMatch | null {
const idx = getMediumVoltageCrossSectionIndex();
const candidates = [
product.name,
product.slug ? product.slug.replace(/-\d+$/g, '') : '',
product.sku,
product.translationKey,
].filter(Boolean) as string[];
for (const c of candidates) {
const key = normalizeExcelKey(c);
const rows = idx.rowsByDesignation.get(key) || [];
if (rows.length) {
return {
headerRow: idx.headerRow,
rows,
units: idx.units,
partNumberKey: idx.partNumberKey,
crossSectionKey: idx.crossSectionKey,
ratedVoltageKey: idx.ratedVoltageKey,
};
}
}
return null;
}

View File

@@ -0,0 +1,51 @@
export interface ProductData {
id: number;
name: string;
shortDescriptionHtml: string;
descriptionHtml: string;
images: string[];
featuredImage: string | null;
sku: string;
slug?: string;
path?: string;
translationKey?: string;
locale?: 'en' | 'de';
categories: Array<{ name: string }>;
attributes: Array<{
name: string;
options: string[];
}>;
}
export type KeyValueItem = { label: string; value: string; unit?: string };
export type DatasheetVoltageTable = {
voltageLabel: string;
metaItems: KeyValueItem[];
columns: Array<{ key: string; label: string }>;
rows: Array<{ configuration: string; cells: string[] }>;
};
export type DatasheetModel = {
locale: 'en' | 'de';
product: {
id: number;
name: string;
sku: string;
categoriesLine: string;
descriptionText: string;
heroSrc: string | null;
productUrl: string;
};
labels: {
datasheet: string;
description: string;
technicalData: string;
crossSection: string;
sku: string;
noImage: string;
};
technicalItems: KeyValueItem[];
voltageTables: DatasheetVoltageTable[];
legendItems: KeyValueItem[];
};

View File

@@ -0,0 +1,74 @@
import * as path from 'path';
import type { ProductData } from './types';
export const CONFIG = {
siteUrl: 'https://klz-cables.com',
publicDir: path.join(process.cwd(), 'public'),
assetMapFile: path.join(process.cwd(), 'data/processed/asset-map.json'),
} as const;
export function stripHtml(html: string): string {
if (!html) return '';
let text = String(html)
.replace(/<[^>]*>/g, '')
.normalize('NFC');
text = text
.replace(/[\u00A0\u202F]/g, ' ')
.replace(/[\u2013\u2014]/g, '-')
.replace(/[\u2018\u2019]/g, "'")
.replace(/[\u201C\u201D]/g, '"')
.replace(/\u2026/g, '...')
.replace(/[\u2022]/g, '·')
.replace(/[\u2264]/g, '<=')
.replace(/[\u2265]/g, '>=')
.replace(/[\u2248]/g, '~')
.replace(/[\u03A9\u2126]/g, 'Ohm')
.replace(/[\u00B5\u03BC]/g, 'u')
.replace(/[\u2193]/g, 'v')
.replace(/[\u2191]/g, '^')
.replace(/[\u00B0]/g, '°');
// eslint-disable-next-line no-control-regex
text = text.replace(/[\u0000-\u001F\u007F]/g, '');
return text.replace(/\s+/g, ' ').trim();
}
export function normalizeValue(value: string): string {
return stripHtml(value).replace(/\s+/g, ' ').trim();
}
export function getProductUrl(product: ProductData): string {
if (product.path) return `${CONFIG.siteUrl}${product.path}`;
return CONFIG.siteUrl;
}
export function generateFileName(product: ProductData, locale: 'en' | 'de'): string {
const baseName = product.slug || product.translationKey || `product-${product.id}`;
const cleanSlug = baseName
.toLowerCase()
.replace(/[^a-z0-9-]/g, '-')
.replace(/-+/g, '-')
.replace(/^-|-$/g, '');
return `${cleanSlug}-${locale}.pdf`;
}
export function getLabels(locale: 'en' | 'de') {
return {
en: {
datasheet: 'PRODUCT DATASHEET',
description: 'DESCRIPTION',
technicalData: 'TECHNICAL DATA',
crossSection: 'CROSS-SECTION DATA',
sku: 'SKU',
noImage: 'No image available',
},
de: {
datasheet: 'PRODUKTDATENBLATT',
description: 'BESCHREIBUNG',
technicalData: 'TECHNISCHE DATEN',
crossSection: 'QUERSCHNITTSDATEN',
sku: 'ARTIKELNUMMER',
noImage: 'Kein Bild verfügbar',
},
}[locale];
}