Files
klz-cables.com/test-enrichment.js
2026-01-07 11:34:56 +01:00

173 lines
5.1 KiB
JavaScript

const fs = require('fs');
const path = require('path');
const { execSync } = require('child_process');
// Copy the key functions from the PDF script
const EXCEL_SOURCE_FILES = [
path.join(process.cwd(), 'data/source/high-voltage.xlsx'),
path.join(process.cwd(), 'data/source/medium-voltage-KM.xlsx'),
path.join(process.cwd(), 'data/source/low-voltage-KM.xlsx'),
path.join(process.cwd(), 'data/source/solar-cables.xlsx'),
];
function normalizeExcelKey(value) {
return String(value || '')
.toUpperCase()
.replace(/-\d+$/g, '')
.replace(/[^A-Z0-9]+/g, '');
}
function loadExcelRows(filePath) {
const out = execSync(`npx -y xlsx-cli -j "${filePath}"`, { encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] });
const trimmed = out.trim();
const jsonStart = trimmed.indexOf('[');
if (jsonStart < 0) return [];
const jsonText = trimmed.slice(jsonStart);
try {
return JSON.parse(jsonText);
} catch {
return [];
}
}
function getExcelIndex() {
if (getExcelIndex.cached) return getExcelIndex.cached;
const idx = new Map();
for (const file of EXCEL_SOURCE_FILES) {
if (!fs.existsSync(file)) continue;
const rows = loadExcelRows(file);
const unitsRow = rows.find(r => r && r['Part Number'] === 'Units') || null;
const units = {};
if (unitsRow) {
for (const [k, v] of Object.entries(unitsRow)) {
if (k === 'Part Number') continue;
const unit = String(v ?? '').trim();
if (unit) units[k] = unit;
}
}
for (const r of rows) {
const pn = r?.['Part Number'];
if (!pn || pn === 'Units') continue;
const key = normalizeExcelKey(String(pn));
if (!key) continue;
const cur = idx.get(key);
if (!cur) {
idx.set(key, { rows: [r], units });
} else {
cur.rows.push(r);
if (Object.keys(cur.units).length < Object.keys(units).length) cur.units = units;
}
}
}
getExcelIndex.cached = idx;
return idx;
}
function findExcelForProduct(product) {
const idx = getExcelIndex();
const candidates = [
product.name,
product.slug ? product.slug.replace(/-\d+$/g, '') : '',
product.sku,
product.translationKey,
].filter(Boolean);
for (const c of candidates) {
const key = normalizeExcelKey(c);
const match = idx.get(key);
if (match && match.rows.length) return match;
}
return null;
}
function findExcelRowsForProduct(product) {
const match = findExcelForProduct(product);
return match?.rows || [];
}
function guessColumnKey(row, patterns) {
const keys = Object.keys(row || {});
for (const re of patterns) {
const k = keys.find(x => re.test(String(x)));
if (k) return k;
}
return null;
}
function normalizeValue(value) {
return String(value || '')
.replace(/<[^>]*>/g, '')
.replace(/\s+/g, ' ')
.trim();
}
function getUniqueNonEmpty(options) {
const uniq = [];
const seen = new Set();
for (const v of options.map(normalizeValue).filter(Boolean)) {
const k = v.toLowerCase();
if (seen.has(k)) continue;
seen.add(k);
uniq.push(v);
}
return uniq;
}
function looksNumeric(value) {
const v = normalizeValue(value).replace(/,/g, '.');
return /^-?\d+(?:\.\d+)?$/.test(v);
}
// Test the enrichment for a specific product
const products = JSON.parse(fs.readFileSync('data/processed/products.json', 'utf8'));
const testProduct = products.find(p => p.slug === 'na2xsfl2y-3');
if (testProduct) {
console.log('=== Original Product ===');
console.log('ID:', testProduct.id);
console.log('Slug:', testProduct.slug);
console.log('Name:', testProduct.name);
console.log('Attributes:', testProduct.attributes?.length || 0);
const rows = findExcelRowsForProduct(testProduct);
console.log('\n=== Excel Rows Found ===');
console.log('Rows:', rows.length);
if (rows.length > 0) {
console.log('\nFirst row columns:', Object.keys(rows[0]));
console.log('\nFirst row sample:', JSON.stringify(rows[0], null, 2).substring(0, 500));
// Test cross-section detection
const csKey = guessColumnKey(rows[0], [
/number of cores and cross-section/i,
/cross.?section/i,
/ross section conductor/i,
]);
console.log('\nCross-section key:', csKey);
if (csKey) {
const cfgOptions = rows
.map(r => normalizeValue(String(r?.[csKey] ?? '')))
.filter(Boolean);
console.log('Configurations found:', cfgOptions.length);
console.log('Sample configs:', cfgOptions.slice(0, 5));
}
// Test additional columns
const conductorKey = guessColumnKey(rows[0], [/conductor/i]);
const insulationKey = guessColumnKey(rows[0], [/insulation/i]);
const sheathKey = guessColumnKey(rows[0], [/sheath/i]);
const normKey = guessColumnKey(rows[0], [/norm|standard|iec|vde/i]);
console.log('\nAdditional column keys:');
console.log(' Conductor:', conductorKey);
console.log(' Insulation:', insulationKey);
console.log(' Sheath:', sheathKey);
console.log(' Norm:', normKey);
if (conductorKey) {
const values = getUniqueNonEmpty(rows.map(r => normalizeValue(String(r?.[conductorKey] ?? ''))));
console.log('\nConductor values:', values);
}
}
}