This commit is contained in:
2026-02-01 19:56:53 +01:00
parent f597fc2d78
commit c9a4afe080
956 changed files with 5062 additions and 93001 deletions

View File

@@ -77,15 +77,8 @@ async function run() {
urls: [targetUrl],
directory: outputDir,
recursive: true,
maxDepth: 10,
maxDepth: 5,
// Custom filename generation to avoid "https:/" folders
// We use 'bySiteStructure' behavior but manually controlled via plugin
// to forcefully strip protocol/domain issues if any.
// Actually, let's just use 'bySiteStructure' but strictly configured?
// No, the user saw garbage. Let's use 'byType' combined with preserving structure for HTML.
// BETTER STRATEGY:
// Use a custom plugin to control filenames EXACTLY how we want.
plugins: [
new PuppeteerPlugin({
launchOptions: {
@@ -95,6 +88,16 @@ async function run() {
scrollToBottom: { timeout: 10000, viewportN: 10 },
blockNavigation: false
}),
new class LoggerPlugin {
apply(registerAction: any) {
registerAction('onResourceSaved', ({ resource }: any) => {
console.log(` 💾 Saved: ${resource.url} -> ${resource.filename}`);
});
registerAction('onResourceError', ({ resource, error }: any) => {
console.error(` ❌ Error: ${resource.url} - ${error.message}`);
});
}
},
new class FilenamePlugin {
apply(registerAction: any) {
registerAction('generateFilename', ({ resource }: any) => {
@@ -130,9 +133,9 @@ async function run() {
const isTargetDomain = u.hostname === domain;
const isGoogleFonts = u.hostname.includes('fonts.googleapis.com') || u.hostname.includes('fonts.gstatic.com');
// Allow assets from anywhere
const isAsset = /\.(css|js|png|jpg|jpeg|gif|svg|woff|woff2|ttf|eot|mp4|webm|ico|json)$/i.test(u.pathname);
const isAsset = /\.(css|js|png|jpg|jpeg|gif|svg|woff|woff2|ttf|eot|mp4|webm|ico|json|webp)$/i.test(u.pathname);
// Allow fonts/css from common CDNs if standard extension check fails
const isCommonAsset = u.pathname.includes('/css/') || u.pathname.includes('/js/') || u.pathname.includes('/static/') || u.pathname.includes('/assets/');
const isCommonAsset = u.pathname.includes('/css/') || u.pathname.includes('/js/') || u.pathname.includes('/static/') || u.pathname.includes('/assets/') || u.pathname.includes('/uploads/');
return isTargetDomain || isAsset || isCommonAsset || isGoogleFonts;
},
@@ -144,6 +147,8 @@ async function run() {
{ selector: 'source', attr: 'src' },
{ selector: 'source', attr: 'srcset' },
{ selector: 'link[rel="stylesheet"]', attr: 'href' },
{ selector: 'link[rel="preload"]', attr: 'href' },
{ selector: 'link[rel="prefetch"]', attr: 'href' },
{ selector: 'script', attr: 'src' },
{ selector: 'video', attr: 'src' },
{ selector: 'video', attr: 'poster' },
@@ -194,14 +199,43 @@ function sanitizeHtmlFiles(dir: string) {
content = content.replace(/<script[^>]+src="[^"]*\/_next\/static\/chunks\/[^"]*"[^>]*><\/script>/gi, '');
content = content.replace(/<script[^>]+src="[^"]*\/_next\/static\/[^"]*Manifest\.js"[^>]*><\/script>/gi, '');
// Convert Breeze dynamic script/styles into actual tags if possible
// match <div class="breeze-scripts-load" ...>URL</div>
content = content.replace(/<div[^>]+class="breeze-scripts-load"[^>]*>([^<]+)<\/div>/gi, (match, url) => {
if (url.endsWith('.css')) return `<link rel="stylesheet" href="${url}">`;
return `<script src="${url}"></script>`;
});
// Inject Fonts (Fix for missing dynamic fonts)
// We inject Inter as a safe default for modern Next.js/Tailwind sites if strictly missing
if (!content.includes('fonts.googleapis.com')) {
const fontLink = `<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap">`;
const styleBlock = `<style>.body-font{font-family:'Inter',sans-serif;}.title-font{font-family:'Inter',sans-serif;}</style>`;
// We inject Inter and Montserrat as safe defaults for industrial/modern sites
// Check specifically for a stylesheet link to google fonts
const hasGoogleFontStylesheet = /<link[^>]+rel="stylesheet"[^>]+href="[^"]*fonts\.googleapis\.com/i.test(content);
if (!hasGoogleFontStylesheet) {
const fontLink = `<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Montserrat:wght@300;400;500;600;700&display=swap">`;
const styleBlock = `<style>
:root { --main-font: 'Inter', sans-serif; --heading-font: 'Montserrat', sans-serif; }
body, .body-font, p, span, li, a { font-family: var(--main-font) !important; }
h1, h2, h3, h4, h5, h6, .title-font, .heading-font { font-family: var(--heading-font) !important; }
</style>`;
content = content.replace('</head>', `${fontLink}${styleBlock}</head>`);
}
// Force column layout on product pages
if (content.includes('class="products')) {
const layoutScript = `
<script>
document.addEventListener('DOMContentLoaded', function() {
const products = document.querySelector('.products');
if (products) {
products.classList.remove(...Array.from(products.classList).filter(c => c.startsWith('columns-')));
products.classList.add('columns-1');
products.setAttribute('data-n-desktop-columns', '1');
}
});
</script>`;
content = content.replace('</body>', `${layoutScript}</body>`);
}
fs.writeFileSync(fullPath, content);
}
}