wip
This commit is contained in:
153
scripts/download-missing-assets.js
Normal file
153
scripts/download-missing-assets.js
Normal file
@@ -0,0 +1,153 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Script to download missing videos and PDFs
|
||||
* Downloads videos referenced in processed data and PDFs linked in pages
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const https = require('https');
|
||||
const http = require('http');
|
||||
|
||||
// Configuration
|
||||
const MEDIA_DIR = path.join(__dirname, '..', 'public', 'media');
|
||||
const PROCESSED_DIR = path.join(__dirname, '..', 'data', 'processed');
|
||||
|
||||
// Videos to download (from home pages)
|
||||
const VIDEOS_TO_DOWNLOAD = [
|
||||
{
|
||||
url: 'https://klz-cables.com/wp-content/uploads/2025/02/header.mp4',
|
||||
filename: 'header.mp4'
|
||||
},
|
||||
{
|
||||
url: 'https://klz-cables.com/wp-content/uploads/2025/02/header.webm',
|
||||
filename: 'header.webm'
|
||||
}
|
||||
];
|
||||
|
||||
// PDFs to download (from terms pages)
|
||||
const PDFS_TO_DOWNLOAD = [
|
||||
{
|
||||
url: 'https://klz-cables.com/wp-content/uploads/2025/01/agbs.pdf',
|
||||
filename: 'agbs.pdf'
|
||||
}
|
||||
];
|
||||
|
||||
// Create media directory if it doesn't exist
|
||||
if (!fs.existsSync(MEDIA_DIR)) {
|
||||
fs.mkdirSync(MEDIA_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
// Download file function
|
||||
function downloadFile(url, filename) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const filePath = path.join(MEDIA_DIR, filename);
|
||||
|
||||
// Check if file already exists
|
||||
if (fs.existsSync(filePath)) {
|
||||
console.log(`✅ Already exists: ${filename}`);
|
||||
resolve(filePath);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`📥 Downloading: ${filename} from ${url}`);
|
||||
|
||||
const protocol = url.startsWith('https') ? https : http;
|
||||
|
||||
const file = fs.createWriteStream(filePath);
|
||||
|
||||
protocol.get(url, (res) => {
|
||||
if (res.statusCode === 200) {
|
||||
res.pipe(file);
|
||||
file.on('finish', () => {
|
||||
console.log(`✅ Downloaded: ${filename}`);
|
||||
resolve(filePath);
|
||||
});
|
||||
} else if (res.statusCode === 301 || res.statusCode === 302) {
|
||||
// Handle redirects
|
||||
if (res.headers.location) {
|
||||
console.log(`🔄 Redirected to: ${res.headers.location}`);
|
||||
downloadFile(res.headers.location, filename).then(resolve).catch(reject);
|
||||
} else {
|
||||
reject(new Error(`Redirect without location: ${res.statusCode}`));
|
||||
}
|
||||
} else {
|
||||
reject(new Error(`Failed to download: HTTP ${res.statusCode}`));
|
||||
}
|
||||
}).on('error', (err) => {
|
||||
fs.unlink(filePath, () => {});
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Main function
|
||||
async function main() {
|
||||
console.log('🔍 Downloading Missing Assets');
|
||||
console.log('==============================');
|
||||
console.log(`Output: ${MEDIA_DIR}`);
|
||||
console.log('');
|
||||
|
||||
const assetMap = {};
|
||||
const downloaded = [];
|
||||
|
||||
// Download videos
|
||||
console.log('🎬 Videos:');
|
||||
for (const video of VIDEOS_TO_DOWNLOAD) {
|
||||
try {
|
||||
await downloadFile(video.url, video.filename);
|
||||
assetMap[video.url] = `/media/${video.filename}`;
|
||||
downloaded.push(video.filename);
|
||||
} catch (error) {
|
||||
console.warn(`⚠️ Failed to download video ${video.filename}:`, error.message);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('');
|
||||
|
||||
// Download PDFs
|
||||
console.log('📄 PDFs:');
|
||||
for (const pdf of PDFS_TO_DOWNLOAD) {
|
||||
try {
|
||||
await downloadFile(pdf.url, pdf.filename);
|
||||
assetMap[pdf.url] = `/media/${pdf.filename}`;
|
||||
downloaded.push(pdf.filename);
|
||||
} catch (error) {
|
||||
console.warn(`⚠️ Failed to download PDF ${pdf.filename}:`, error.message);
|
||||
}
|
||||
}
|
||||
|
||||
// Update asset-map.json with new entries
|
||||
const assetMapPath = path.join(PROCESSED_DIR, 'asset-map.json');
|
||||
if (fs.existsSync(assetMapPath)) {
|
||||
const existingMap = JSON.parse(fs.readFileSync(assetMapPath, 'utf8'));
|
||||
const updatedMap = { ...existingMap, ...assetMap };
|
||||
|
||||
fs.writeFileSync(assetMapPath, JSON.stringify(updatedMap, null, 2));
|
||||
console.log(`\n✅ Updated asset-map.json with ${Object.keys(assetMap).length} new entries`);
|
||||
}
|
||||
|
||||
console.log('\n🎉 Asset Download Complete!');
|
||||
console.log('==============================');
|
||||
console.log(`📥 Downloaded: ${downloaded.length} files`);
|
||||
console.log(`📁 Directory: public/media/`);
|
||||
console.log('');
|
||||
console.log('Files downloaded:');
|
||||
downloaded.forEach(file => {
|
||||
console.log(` - ${file}`);
|
||||
});
|
||||
}
|
||||
|
||||
// Run if called directly
|
||||
if (require.main === module) {
|
||||
main().catch(error => {
|
||||
console.error('\n❌ Script failed:', error.message);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
downloadFile,
|
||||
main
|
||||
};
|
||||
Reference in New Issue
Block a user