173 lines
5.1 KiB
JavaScript
173 lines
5.1 KiB
JavaScript
const fs = require('fs');
|
|
const path = require('path');
|
|
const { execSync } = require('child_process');
|
|
|
|
// Copy the key functions from the PDF script
|
|
const EXCEL_SOURCE_FILES = [
|
|
path.join(process.cwd(), 'data/source/high-voltage.xlsx'),
|
|
path.join(process.cwd(), 'data/source/medium-voltage-KM.xlsx'),
|
|
path.join(process.cwd(), 'data/source/low-voltage-KM.xlsx'),
|
|
path.join(process.cwd(), 'data/source/solar-cables.xlsx'),
|
|
];
|
|
|
|
function normalizeExcelKey(value) {
|
|
return String(value || '')
|
|
.toUpperCase()
|
|
.replace(/-\d+$/g, '')
|
|
.replace(/[^A-Z0-9]+/g, '');
|
|
}
|
|
|
|
function loadExcelRows(filePath) {
|
|
const out = execSync(`npx -y xlsx-cli -j "${filePath}"`, { encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] });
|
|
const trimmed = out.trim();
|
|
const jsonStart = trimmed.indexOf('[');
|
|
if (jsonStart < 0) return [];
|
|
const jsonText = trimmed.slice(jsonStart);
|
|
try {
|
|
return JSON.parse(jsonText);
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
function getExcelIndex() {
|
|
if (getExcelIndex.cached) return getExcelIndex.cached;
|
|
const idx = new Map();
|
|
for (const file of EXCEL_SOURCE_FILES) {
|
|
if (!fs.existsSync(file)) continue;
|
|
const rows = loadExcelRows(file);
|
|
const unitsRow = rows.find(r => r && r['Part Number'] === 'Units') || null;
|
|
const units = {};
|
|
if (unitsRow) {
|
|
for (const [k, v] of Object.entries(unitsRow)) {
|
|
if (k === 'Part Number') continue;
|
|
const unit = String(v ?? '').trim();
|
|
if (unit) units[k] = unit;
|
|
}
|
|
}
|
|
for (const r of rows) {
|
|
const pn = r?.['Part Number'];
|
|
if (!pn || pn === 'Units') continue;
|
|
const key = normalizeExcelKey(String(pn));
|
|
if (!key) continue;
|
|
const cur = idx.get(key);
|
|
if (!cur) {
|
|
idx.set(key, { rows: [r], units });
|
|
} else {
|
|
cur.rows.push(r);
|
|
if (Object.keys(cur.units).length < Object.keys(units).length) cur.units = units;
|
|
}
|
|
}
|
|
}
|
|
getExcelIndex.cached = idx;
|
|
return idx;
|
|
}
|
|
|
|
function findExcelForProduct(product) {
|
|
const idx = getExcelIndex();
|
|
const candidates = [
|
|
product.name,
|
|
product.slug ? product.slug.replace(/-\d+$/g, '') : '',
|
|
product.sku,
|
|
product.translationKey,
|
|
].filter(Boolean);
|
|
|
|
for (const c of candidates) {
|
|
const key = normalizeExcelKey(c);
|
|
const match = idx.get(key);
|
|
if (match && match.rows.length) return match;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function findExcelRowsForProduct(product) {
|
|
const match = findExcelForProduct(product);
|
|
return match?.rows || [];
|
|
}
|
|
|
|
function guessColumnKey(row, patterns) {
|
|
const keys = Object.keys(row || {});
|
|
for (const re of patterns) {
|
|
const k = keys.find(x => re.test(String(x)));
|
|
if (k) return k;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function normalizeValue(value) {
|
|
return String(value || '')
|
|
.replace(/<[^>]*>/g, '')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
function getUniqueNonEmpty(options) {
|
|
const uniq = [];
|
|
const seen = new Set();
|
|
for (const v of options.map(normalizeValue).filter(Boolean)) {
|
|
const k = v.toLowerCase();
|
|
if (seen.has(k)) continue;
|
|
seen.add(k);
|
|
uniq.push(v);
|
|
}
|
|
return uniq;
|
|
}
|
|
|
|
function looksNumeric(value) {
|
|
const v = normalizeValue(value).replace(/,/g, '.');
|
|
return /^-?\d+(?:\.\d+)?$/.test(v);
|
|
}
|
|
|
|
// Test the enrichment for a specific product
|
|
const products = JSON.parse(fs.readFileSync('data/processed/products.json', 'utf8'));
|
|
const testProduct = products.find(p => p.slug === 'na2xsfl2y-3');
|
|
|
|
if (testProduct) {
|
|
console.log('=== Original Product ===');
|
|
console.log('ID:', testProduct.id);
|
|
console.log('Slug:', testProduct.slug);
|
|
console.log('Name:', testProduct.name);
|
|
console.log('Attributes:', testProduct.attributes?.length || 0);
|
|
|
|
const rows = findExcelRowsForProduct(testProduct);
|
|
console.log('\n=== Excel Rows Found ===');
|
|
console.log('Rows:', rows.length);
|
|
|
|
if (rows.length > 0) {
|
|
console.log('\nFirst row columns:', Object.keys(rows[0]));
|
|
console.log('\nFirst row sample:', JSON.stringify(rows[0], null, 2).substring(0, 500));
|
|
|
|
// Test cross-section detection
|
|
const csKey = guessColumnKey(rows[0], [
|
|
/number of cores and cross-section/i,
|
|
/cross.?section/i,
|
|
/ross section conductor/i,
|
|
]);
|
|
console.log('\nCross-section key:', csKey);
|
|
|
|
if (csKey) {
|
|
const cfgOptions = rows
|
|
.map(r => normalizeValue(String(r?.[csKey] ?? '')))
|
|
.filter(Boolean);
|
|
console.log('Configurations found:', cfgOptions.length);
|
|
console.log('Sample configs:', cfgOptions.slice(0, 5));
|
|
}
|
|
|
|
// Test additional columns
|
|
const conductorKey = guessColumnKey(rows[0], [/conductor/i]);
|
|
const insulationKey = guessColumnKey(rows[0], [/insulation/i]);
|
|
const sheathKey = guessColumnKey(rows[0], [/sheath/i]);
|
|
const normKey = guessColumnKey(rows[0], [/norm|standard|iec|vde/i]);
|
|
|
|
console.log('\nAdditional column keys:');
|
|
console.log(' Conductor:', conductorKey);
|
|
console.log(' Insulation:', insulationKey);
|
|
console.log(' Sheath:', sheathKey);
|
|
console.log(' Norm:', normKey);
|
|
|
|
if (conductorKey) {
|
|
const values = getUniqueNonEmpty(rows.map(r => normalizeValue(String(r?.[conductorKey] ?? ''))));
|
|
console.log('\nConductor values:', values);
|
|
}
|
|
}
|
|
} |