clone
This commit is contained in:
@@ -77,15 +77,8 @@ async function run() {
|
||||
urls: [targetUrl],
|
||||
directory: outputDir,
|
||||
recursive: true,
|
||||
maxDepth: 10,
|
||||
maxDepth: 5,
|
||||
// Custom filename generation to avoid "https:/" folders
|
||||
// We use 'bySiteStructure' behavior but manually controlled via plugin
|
||||
// to forcefully strip protocol/domain issues if any.
|
||||
// Actually, let's just use 'bySiteStructure' but strictly configured?
|
||||
// No, the user saw garbage. Let's use 'byType' combined with preserving structure for HTML.
|
||||
|
||||
// BETTER STRATEGY:
|
||||
// Use a custom plugin to control filenames EXACTLY how we want.
|
||||
plugins: [
|
||||
new PuppeteerPlugin({
|
||||
launchOptions: {
|
||||
@@ -95,6 +88,16 @@ async function run() {
|
||||
scrollToBottom: { timeout: 10000, viewportN: 10 },
|
||||
blockNavigation: false
|
||||
}),
|
||||
new class LoggerPlugin {
|
||||
apply(registerAction: any) {
|
||||
registerAction('onResourceSaved', ({ resource }: any) => {
|
||||
console.log(` 💾 Saved: ${resource.url} -> ${resource.filename}`);
|
||||
});
|
||||
registerAction('onResourceError', ({ resource, error }: any) => {
|
||||
console.error(` ❌ Error: ${resource.url} - ${error.message}`);
|
||||
});
|
||||
}
|
||||
},
|
||||
new class FilenamePlugin {
|
||||
apply(registerAction: any) {
|
||||
registerAction('generateFilename', ({ resource }: any) => {
|
||||
@@ -130,9 +133,9 @@ async function run() {
|
||||
const isTargetDomain = u.hostname === domain;
|
||||
const isGoogleFonts = u.hostname.includes('fonts.googleapis.com') || u.hostname.includes('fonts.gstatic.com');
|
||||
// Allow assets from anywhere
|
||||
const isAsset = /\.(css|js|png|jpg|jpeg|gif|svg|woff|woff2|ttf|eot|mp4|webm|ico|json)$/i.test(u.pathname);
|
||||
const isAsset = /\.(css|js|png|jpg|jpeg|gif|svg|woff|woff2|ttf|eot|mp4|webm|ico|json|webp)$/i.test(u.pathname);
|
||||
// Allow fonts/css from common CDNs if standard extension check fails
|
||||
const isCommonAsset = u.pathname.includes('/css/') || u.pathname.includes('/js/') || u.pathname.includes('/static/') || u.pathname.includes('/assets/');
|
||||
const isCommonAsset = u.pathname.includes('/css/') || u.pathname.includes('/js/') || u.pathname.includes('/static/') || u.pathname.includes('/assets/') || u.pathname.includes('/uploads/');
|
||||
|
||||
return isTargetDomain || isAsset || isCommonAsset || isGoogleFonts;
|
||||
},
|
||||
@@ -144,6 +147,8 @@ async function run() {
|
||||
{ selector: 'source', attr: 'src' },
|
||||
{ selector: 'source', attr: 'srcset' },
|
||||
{ selector: 'link[rel="stylesheet"]', attr: 'href' },
|
||||
{ selector: 'link[rel="preload"]', attr: 'href' },
|
||||
{ selector: 'link[rel="prefetch"]', attr: 'href' },
|
||||
{ selector: 'script', attr: 'src' },
|
||||
{ selector: 'video', attr: 'src' },
|
||||
{ selector: 'video', attr: 'poster' },
|
||||
@@ -194,14 +199,43 @@ function sanitizeHtmlFiles(dir: string) {
|
||||
content = content.replace(/<script[^>]+src="[^"]*\/_next\/static\/chunks\/[^"]*"[^>]*><\/script>/gi, '');
|
||||
content = content.replace(/<script[^>]+src="[^"]*\/_next\/static\/[^"]*Manifest\.js"[^>]*><\/script>/gi, '');
|
||||
|
||||
// Convert Breeze dynamic script/styles into actual tags if possible
|
||||
// match <div class="breeze-scripts-load" ...>URL</div>
|
||||
content = content.replace(/<div[^>]+class="breeze-scripts-load"[^>]*>([^<]+)<\/div>/gi, (match, url) => {
|
||||
if (url.endsWith('.css')) return `<link rel="stylesheet" href="${url}">`;
|
||||
return `<script src="${url}"></script>`;
|
||||
});
|
||||
|
||||
// Inject Fonts (Fix for missing dynamic fonts)
|
||||
// We inject Inter as a safe default for modern Next.js/Tailwind sites if strictly missing
|
||||
if (!content.includes('fonts.googleapis.com')) {
|
||||
const fontLink = `<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap">`;
|
||||
const styleBlock = `<style>.body-font{font-family:'Inter',sans-serif;}.title-font{font-family:'Inter',sans-serif;}</style>`;
|
||||
// We inject Inter and Montserrat as safe defaults for industrial/modern sites
|
||||
// Check specifically for a stylesheet link to google fonts
|
||||
const hasGoogleFontStylesheet = /<link[^>]+rel="stylesheet"[^>]+href="[^"]*fonts\.googleapis\.com/i.test(content);
|
||||
if (!hasGoogleFontStylesheet) {
|
||||
const fontLink = `<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Montserrat:wght@300;400;500;600;700&display=swap">`;
|
||||
const styleBlock = `<style>
|
||||
:root { --main-font: 'Inter', sans-serif; --heading-font: 'Montserrat', sans-serif; }
|
||||
body, .body-font, p, span, li, a { font-family: var(--main-font) !important; }
|
||||
h1, h2, h3, h4, h5, h6, .title-font, .heading-font { font-family: var(--heading-font) !important; }
|
||||
</style>`;
|
||||
content = content.replace('</head>', `${fontLink}${styleBlock}</head>`);
|
||||
}
|
||||
|
||||
// Force column layout on product pages
|
||||
if (content.includes('class="products')) {
|
||||
const layoutScript = `
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const products = document.querySelector('.products');
|
||||
if (products) {
|
||||
products.classList.remove(...Array.from(products.classList).filter(c => c.startsWith('columns-')));
|
||||
products.classList.add('columns-1');
|
||||
products.setAttribute('data-n-desktop-columns', '1');
|
||||
}
|
||||
});
|
||||
</script>`;
|
||||
content = content.replace('</body>', `${layoutScript}</body>`);
|
||||
}
|
||||
|
||||
fs.writeFileSync(fullPath, content);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user