chore: stabilize apps/web (lint, build, typecheck fixes)
Some checks failed
Build & Deploy / 🔍 Prepare (push) Successful in 6s
Build & Deploy / 🧪 QA (push) Failing after 1m27s
Build & Deploy / 🏗️ Build (push) Failing after 1m31s
Build & Deploy / 🚀 Deploy (push) Has been skipped
Build & Deploy / 🩺 Health Check (push) Has been skipped
Build & Deploy / 🔔 Notify (push) Successful in 2s
Some checks failed
Build & Deploy / 🔍 Prepare (push) Successful in 6s
Build & Deploy / 🧪 QA (push) Failing after 1m27s
Build & Deploy / 🏗️ Build (push) Failing after 1m31s
Build & Deploy / 🚀 Deploy (push) Has been skipped
Build & Deploy / 🩺 Health Check (push) Has been skipped
Build & Deploy / 🔔 Notify (push) Successful in 2s
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import { CheerioCrawler, RequestQueue } from "crawlee";
|
||||
import { CheerioCrawler } from "crawlee";
|
||||
import * as path from "node:path";
|
||||
import * as fs from "node:fs/promises";
|
||||
import { existsSync } from "node:fs";
|
||||
@@ -1055,7 +1055,7 @@ ${JSON.stringify({ facts, strategy, ia, positionsData }, null, 2)}
|
||||
finalState.sitemap = finalState.sitemap.sitemap;
|
||||
else {
|
||||
const entries = Object.entries(finalState.sitemap);
|
||||
if (entries.every(([_, v]) => Array.isArray(v))) {
|
||||
if (entries.every(([__, v]) => Array.isArray(v))) {
|
||||
finalState.sitemap = entries.map(([category, pages]) => ({
|
||||
category,
|
||||
pages,
|
||||
|
||||
@@ -1,322 +1,398 @@
|
||||
import { chromium, type Page } from 'playwright';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import fs from 'node:fs';
|
||||
import axios from 'axios';
|
||||
import { chromium } from "playwright";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import fs from "node:fs";
|
||||
import axios from "axios";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36';
|
||||
const USER_AGENT =
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36";
|
||||
|
||||
function sanitizePath(rawPath: string) {
|
||||
return rawPath.split('/').map(p => p.replace(/[^a-z0-9._-]/gi, '_')).join('/');
|
||||
return rawPath
|
||||
.split("/")
|
||||
.map((p) => p.replace(/[^a-z0-9._-]/gi, "_"))
|
||||
.join("/");
|
||||
}
|
||||
|
||||
async function downloadFile(url: string, assetsDir: string) {
|
||||
if (url.startsWith('//')) url = `https:${url}`;
|
||||
if (!url.startsWith('http')) return null;
|
||||
if (url.startsWith("//")) url = `https:${url}`;
|
||||
if (!url.startsWith("http")) return null;
|
||||
|
||||
try {
|
||||
const u = new URL(url);
|
||||
// Create a collision-resistant local path
|
||||
const relPath = sanitizePath(u.hostname + u.pathname);
|
||||
const dest = path.join(assetsDir, relPath);
|
||||
try {
|
||||
const u = new URL(url);
|
||||
// Create a collision-resistant local path
|
||||
const relPath = sanitizePath(u.hostname + u.pathname);
|
||||
const dest = path.join(assetsDir, relPath);
|
||||
|
||||
if (fs.existsSync(dest)) return `./assets/${relPath}`;
|
||||
if (fs.existsSync(dest)) return `./assets/${relPath}`;
|
||||
|
||||
const res = await axios.get(url, {
|
||||
responseType: 'arraybuffer',
|
||||
headers: { 'User-Agent': USER_AGENT },
|
||||
timeout: 15000,
|
||||
validateStatus: () => true
|
||||
});
|
||||
const res = await axios.get(url, {
|
||||
responseType: "arraybuffer",
|
||||
headers: { "User-Agent": USER_AGENT },
|
||||
timeout: 15000,
|
||||
validateStatus: () => true,
|
||||
});
|
||||
|
||||
if (res.status !== 200) return null;
|
||||
if (res.status !== 200) return null;
|
||||
|
||||
if (!fs.existsSync(path.dirname(dest))) fs.mkdirSync(path.dirname(dest), { recursive: true });
|
||||
fs.writeFileSync(dest, Buffer.from(res.data));
|
||||
return `./assets/${relPath}`;
|
||||
} catch {
|
||||
return null; // Fail silently, proceed with original URL
|
||||
}
|
||||
if (!fs.existsSync(path.dirname(dest)))
|
||||
fs.mkdirSync(path.dirname(dest), { recursive: true });
|
||||
fs.writeFileSync(dest, Buffer.from(res.data));
|
||||
return `./assets/${relPath}`;
|
||||
} catch {
|
||||
return null; // Fail silently, proceed with original URL
|
||||
}
|
||||
}
|
||||
|
||||
async function processCssRecursively(cssContent: string, cssUrl: string, assetsDir: string, urlMap: Record<string, string>, depth = 0) {
|
||||
if (depth > 5) return cssContent;
|
||||
async function processCssRecursively(
|
||||
cssContent: string,
|
||||
cssUrl: string,
|
||||
assetsDir: string,
|
||||
urlMap: Record<string, string>,
|
||||
depth = 0,
|
||||
) {
|
||||
if (depth > 5) return cssContent;
|
||||
|
||||
// Capture both standard url(...) and @import url(...)
|
||||
const urlRegex = /(?:url\(["']?|@import\s+["'])([^"'\)]+)["']?\)?/gi;
|
||||
let match;
|
||||
let newContent = cssContent;
|
||||
// Capture both standard url(...) and @import url(...)
|
||||
const urlRegex = /(?:url\(["']?|@import\s+["'])([^"'\)]+)["']?\)?/gi;
|
||||
let match;
|
||||
let newContent = cssContent;
|
||||
|
||||
while ((match = urlRegex.exec(cssContent)) !== null) {
|
||||
const originalUrl = match[1];
|
||||
if (originalUrl.startsWith('data:') || originalUrl.startsWith('blob:')) continue;
|
||||
while ((match = urlRegex.exec(cssContent)) !== null) {
|
||||
const originalUrl = match[1];
|
||||
if (originalUrl.startsWith("data:") || originalUrl.startsWith("blob:"))
|
||||
continue;
|
||||
|
||||
try {
|
||||
const absUrl = new URL(originalUrl, cssUrl).href;
|
||||
const local = await downloadFile(absUrl, assetsDir);
|
||||
try {
|
||||
const absUrl = new URL(originalUrl, cssUrl).href;
|
||||
const local = await downloadFile(absUrl, assetsDir);
|
||||
|
||||
if (local) {
|
||||
// Calculate relative path from CSS file to Asset
|
||||
const u = new URL(cssUrl);
|
||||
const cssPath = u.hostname + u.pathname;
|
||||
const assetPath = new URL(absUrl).hostname + new URL(absUrl).pathname;
|
||||
if (local) {
|
||||
// Calculate relative path from CSS file to Asset
|
||||
const u = new URL(cssUrl);
|
||||
const cssPath = u.hostname + u.pathname;
|
||||
const assetPath = new URL(absUrl).hostname + new URL(absUrl).pathname;
|
||||
|
||||
// We need to route from the folder containing the CSS to the asset
|
||||
const rel = path.relative(path.dirname(sanitizePath(cssPath)), sanitizePath(assetPath));
|
||||
// We need to route from the folder containing the CSS to the asset
|
||||
const rel = path.relative(
|
||||
path.dirname(sanitizePath(cssPath)),
|
||||
sanitizePath(assetPath),
|
||||
);
|
||||
|
||||
// Replace strictly the URL part
|
||||
newContent = newContent.split(originalUrl).join(rel);
|
||||
urlMap[absUrl] = local;
|
||||
}
|
||||
} catch { }
|
||||
// Replace strictly the URL part
|
||||
newContent = newContent.split(originalUrl).join(rel);
|
||||
urlMap[absUrl] = local;
|
||||
}
|
||||
} catch {
|
||||
// Ignore URL resolution errors
|
||||
}
|
||||
return newContent;
|
||||
}
|
||||
return newContent;
|
||||
}
|
||||
|
||||
async function run() {
|
||||
const rawUrl = process.argv[2];
|
||||
if (!rawUrl) {
|
||||
console.error('Usage: npm run clone-page <url>');
|
||||
process.exit(1);
|
||||
const rawUrl = process.argv[2];
|
||||
if (!rawUrl) {
|
||||
console.error("Usage: npm run clone-page <url>");
|
||||
process.exit(1);
|
||||
}
|
||||
const targetUrl = rawUrl.trim();
|
||||
const urlObj = new URL(targetUrl);
|
||||
|
||||
// Setup Output Directories
|
||||
const domainSlug = urlObj.hostname.replace("www.", "");
|
||||
const domainDir = path.resolve(__dirname, `../public/showcase/${domainSlug}`);
|
||||
const assetsDir = path.join(domainDir, "assets");
|
||||
if (!fs.existsSync(assetsDir)) fs.mkdirSync(assetsDir, { recursive: true });
|
||||
|
||||
let pageSlug = urlObj.pathname.split("/").filter(Boolean).join("-");
|
||||
if (!pageSlug) pageSlug = "index";
|
||||
const htmlFilename = `${pageSlug}.html`;
|
||||
|
||||
console.log(`🚀 INDUSTRIAL CLONE: ${targetUrl}`);
|
||||
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
// Start with a standard viewport, we will resize widely later
|
||||
const context = await browser.newContext({
|
||||
userAgent: USER_AGENT,
|
||||
viewport: { width: 1920, height: 1080 },
|
||||
});
|
||||
const page = await context.newPage();
|
||||
|
||||
const urlMap: Record<string, string> = {};
|
||||
const foundAssets = new Set<string>();
|
||||
|
||||
// 1. Live Network Interception
|
||||
page.on("response", (response) => {
|
||||
const url = response.url();
|
||||
if (response.status() === 200) {
|
||||
// Capture anything that looks like a static asset
|
||||
if (
|
||||
url.match(
|
||||
/\.(css|js|png|jpg|jpeg|gif|svg|woff2?|ttf|otf|mp4|webm|webp|ico)/i,
|
||||
)
|
||||
) {
|
||||
foundAssets.add(url);
|
||||
}
|
||||
}
|
||||
const targetUrl = rawUrl.trim();
|
||||
const urlObj = new URL(targetUrl);
|
||||
});
|
||||
|
||||
// Setup Output Directories
|
||||
const domainSlug = urlObj.hostname.replace('www.', '');
|
||||
const domainDir = path.resolve(__dirname, `../public/showcase/${domainSlug}`);
|
||||
const assetsDir = path.join(domainDir, 'assets');
|
||||
if (!fs.existsSync(assetsDir)) fs.mkdirSync(assetsDir, { recursive: true });
|
||||
try {
|
||||
console.log("🌐 Loading page (Waiting for Network Idle)...");
|
||||
await page.goto(targetUrl, { waitUntil: "networkidle", timeout: 90000 });
|
||||
|
||||
let pageSlug = urlObj.pathname.split('/').filter(Boolean).join('-');
|
||||
if (!pageSlug) pageSlug = 'index';
|
||||
const htmlFilename = `${pageSlug}.html`;
|
||||
console.log(
|
||||
'🌊 Executing "Scroll Wave" to trigger all lazy loaders naturally...',
|
||||
);
|
||||
await page.evaluate(async () => {
|
||||
await new Promise((resolve) => {
|
||||
let totalHeight = 0;
|
||||
const distance = 400;
|
||||
const timer = setInterval(() => {
|
||||
const scrollHeight = document.body.scrollHeight;
|
||||
window.scrollBy(0, distance);
|
||||
totalHeight += distance;
|
||||
|
||||
console.log(`🚀 INDUSTRIAL CLONE: ${targetUrl}`);
|
||||
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
// Start with a standard viewport, we will resize widely later
|
||||
const context = await browser.newContext({ userAgent: USER_AGENT, viewport: { width: 1920, height: 1080 } });
|
||||
const page = await context.newPage();
|
||||
|
||||
const urlMap: Record<string, string> = {};
|
||||
const foundAssets = new Set<string>();
|
||||
|
||||
// 1. Live Network Interception
|
||||
page.on('response', response => {
|
||||
const url = response.url();
|
||||
if (response.status() === 200) {
|
||||
// Capture anything that looks like a static asset
|
||||
if (url.match(/\.(css|js|png|jpg|jpeg|gif|svg|woff2?|ttf|otf|mp4|webm|webp|ico)/i)) {
|
||||
foundAssets.add(url);
|
||||
}
|
||||
}
|
||||
if (totalHeight >= scrollHeight) {
|
||||
clearInterval(timer);
|
||||
window.scrollTo(0, 0); // Reset to top
|
||||
resolve(true);
|
||||
}
|
||||
}, 100);
|
||||
});
|
||||
});
|
||||
|
||||
try {
|
||||
console.log('🌐 Loading page (Waiting for Network Idle)...');
|
||||
await page.goto(targetUrl, { waitUntil: 'networkidle', timeout: 90000 });
|
||||
console.log(
|
||||
'📐 Expanding Viewport to "Giant Mode" for final asset capture...',
|
||||
);
|
||||
const fullHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
await page.setViewportSize({ width: 1920, height: fullHeight + 1000 });
|
||||
|
||||
console.log('🌊 Executing "Scroll Wave" to trigger all lazy loaders naturally...');
|
||||
await page.evaluate(async () => {
|
||||
await new Promise((resolve) => {
|
||||
let totalHeight = 0;
|
||||
const distance = 400;
|
||||
const timer = setInterval(() => {
|
||||
const scrollHeight = document.body.scrollHeight;
|
||||
window.scrollBy(0, distance);
|
||||
totalHeight += distance;
|
||||
// Final settlement wait
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
if (totalHeight >= scrollHeight) {
|
||||
clearInterval(timer);
|
||||
window.scrollTo(0, 0); // Reset to top
|
||||
resolve(true);
|
||||
}
|
||||
}, 100);
|
||||
console.log("💧 Final DOM Hydration & Sanitization...");
|
||||
await page.evaluate(() => {
|
||||
// A. Deterministic Attribute Hydration (Generic)
|
||||
// Scours every element for attributes that look like asset URLs and promotes them
|
||||
const assetPattern =
|
||||
/\.(jpg|jpeg|png|gif|svg|webp|mp4|webm|woff2?|ttf|otf)/i;
|
||||
|
||||
document.querySelectorAll("*").forEach((el) => {
|
||||
// 0. Skip Meta/Head/Script/Style/SVG tags for attribute promotion
|
||||
if (
|
||||
["META", "LINK", "HEAD", "SCRIPT", "STYLE", "SVG", "PATH"].includes(
|
||||
el.tagName,
|
||||
)
|
||||
)
|
||||
return;
|
||||
|
||||
// 1. Force Visibility (Anti-Flicker)
|
||||
const htmlEl = el as HTMLElement;
|
||||
const style = window.getComputedStyle(htmlEl);
|
||||
if (style.opacity === "0" || style.visibility === "hidden") {
|
||||
htmlEl.style.setProperty("opacity", "1", "important");
|
||||
htmlEl.style.setProperty("visibility", "visible", "important");
|
||||
}
|
||||
|
||||
// 2. Promote Data Attributes
|
||||
for (const attr of Array.from(el.attributes)) {
|
||||
const name = attr.name.toLowerCase();
|
||||
const val = attr.value;
|
||||
|
||||
if (
|
||||
assetPattern.test(val) ||
|
||||
name.includes("src") ||
|
||||
name.includes("image")
|
||||
) {
|
||||
// Standard Image/Video/Source promotion
|
||||
if (el.tagName === "IMG") {
|
||||
const img = el as HTMLImageElement;
|
||||
if (name.includes("srcset")) img.srcset = val;
|
||||
else if (!img.src || img.src.includes("data:")) img.src = val;
|
||||
}
|
||||
if (el.tagName === "SOURCE") {
|
||||
const source = el as HTMLSourceElement;
|
||||
if (name.includes("srcset")) source.srcset = val;
|
||||
}
|
||||
if (el.tagName === "VIDEO" || el.tagName === "AUDIO") {
|
||||
const media = el as HTMLMediaElement;
|
||||
if (!media.src) media.src = val;
|
||||
}
|
||||
|
||||
// Background Image Promotion
|
||||
if (val.match(/^(https?:\/\/|\/\/|\/)/) && !name.includes("href")) {
|
||||
const bg = htmlEl.style.backgroundImage;
|
||||
if (!bg || bg === "none") {
|
||||
htmlEl.style.backgroundImage = `url('${val}')`;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// B. Ensure basic structural elements are visible post-scroll
|
||||
const body = document.body;
|
||||
if (body) {
|
||||
body.style.setProperty("opacity", "1", "important");
|
||||
body.style.setProperty("visibility", "visible", "important");
|
||||
}
|
||||
});
|
||||
|
||||
console.log("⏳ Waiting for network idle...");
|
||||
await page.waitForLoadState("networkidle");
|
||||
|
||||
// 1.5 FINAL SETTLEMENT: Let any scroll-triggered JS finish
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
// 2. Static Snapshot
|
||||
let content = await page.content();
|
||||
|
||||
// 3. Post-Snapshot Asset Discovery (Regex)
|
||||
// Catches assets that never triggered a network request but exist in the markup
|
||||
const regexPatterns = [
|
||||
/(?:src|href|url|data-[a-z-]+|srcset)=["']([^"'<>\s]+?\.(?:css|js|png|jpg|jpeg|gif|svg|woff2?|ttf|otf|mp4|webm|webp|ico)(?:\?[^"']*)?)["']/gi,
|
||||
// Capture CSS url() inside style blocks
|
||||
/url\(["']?([^"'\)]+)["']?\)/gi,
|
||||
];
|
||||
|
||||
for (const pattern of regexPatterns) {
|
||||
let match;
|
||||
while ((match = pattern.exec(content)) !== null) {
|
||||
try {
|
||||
foundAssets.add(new URL(match[1], targetUrl).href);
|
||||
} catch {
|
||||
// Ignore invalid URLs in content
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Specific srcset parsing
|
||||
const srcsetRegex = /[a-z0-9-]+srcset=["']([^"']+)["']/gi;
|
||||
let match;
|
||||
while ((match = srcsetRegex.exec(content)) !== null) {
|
||||
match[1].split(",").forEach((rule) => {
|
||||
const parts = rule.trim().split(/\s+/);
|
||||
if (parts[0] && !parts[0].startsWith("data:")) {
|
||||
try {
|
||||
foundAssets.add(new URL(parts[0], targetUrl).href);
|
||||
} catch {
|
||||
// Ignore invalid srcset URLs
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`🔍 Processing ${foundAssets.size} discovered assets...`);
|
||||
|
||||
// 4. Download & Map
|
||||
for (const url of foundAssets) {
|
||||
const local = await downloadFile(url, assetsDir);
|
||||
if (local) {
|
||||
urlMap[url] = local;
|
||||
const clean = url.split("?")[0];
|
||||
urlMap[clean] = local;
|
||||
|
||||
// Handle CSS recursively
|
||||
if (clean.endsWith(".css")) {
|
||||
try {
|
||||
const { data } = await axios.get(url, {
|
||||
headers: { "User-Agent": USER_AGENT },
|
||||
});
|
||||
});
|
||||
|
||||
console.log('📐 Expanding Viewport to "Giant Mode" for final asset capture...');
|
||||
const fullHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
await page.setViewportSize({ width: 1920, height: fullHeight + 1000 });
|
||||
|
||||
// Final settlement wait
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
console.log('💧 Final DOM Hydration & Sanitization...');
|
||||
await page.evaluate(() => {
|
||||
// A. Deterministic Attribute Hydration (Generic)
|
||||
// Scours every element for attributes that look like asset URLs and promotes them
|
||||
const assetPattern = /\.(jpg|jpeg|png|gif|svg|webp|mp4|webm|woff2?|ttf|otf)/i;
|
||||
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
// 0. Skip Meta/Head/Script/Style/SVG tags for attribute promotion
|
||||
if (['META', 'LINK', 'HEAD', 'SCRIPT', 'STYLE', 'SVG', 'PATH'].includes(el.tagName)) return;
|
||||
|
||||
// 1. Force Visibility (Anti-Flicker)
|
||||
const htmlEl = el as HTMLElement;
|
||||
const style = window.getComputedStyle(htmlEl);
|
||||
if (style.opacity === '0' || style.visibility === 'hidden') {
|
||||
htmlEl.style.setProperty('opacity', '1', 'important');
|
||||
htmlEl.style.setProperty('visibility', 'visible', 'important');
|
||||
}
|
||||
|
||||
// 2. Promote Data Attributes
|
||||
for (const attr of Array.from(el.attributes)) {
|
||||
const name = attr.name.toLowerCase();
|
||||
const val = attr.value;
|
||||
|
||||
if (assetPattern.test(val) || name.includes('src') || name.includes('image')) {
|
||||
// Standard Image/Video/Source promotion
|
||||
if (el.tagName === 'IMG') {
|
||||
const img = el as HTMLImageElement;
|
||||
if (name.includes('srcset')) img.srcset = val;
|
||||
else if (!img.src || img.src.includes('data:')) img.src = val;
|
||||
}
|
||||
if (el.tagName === 'SOURCE') {
|
||||
const source = el as HTMLSourceElement;
|
||||
if (name.includes('srcset')) source.srcset = val;
|
||||
}
|
||||
if (el.tagName === 'VIDEO' || el.tagName === 'AUDIO') {
|
||||
const media = el as HTMLMediaElement;
|
||||
if (!media.src) media.src = val;
|
||||
}
|
||||
|
||||
// Background Image Promotion
|
||||
if (val.match(/^(https?:\/\/|\/\/|\/)/) && !name.includes('href')) {
|
||||
const bg = htmlEl.style.backgroundImage;
|
||||
if (!bg || bg === 'none') {
|
||||
htmlEl.style.backgroundImage = `url('${val}')`;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// B. Ensure basic structural elements are visible post-scroll
|
||||
const body = document.body;
|
||||
if (body) {
|
||||
body.style.setProperty('opacity', '1', 'important');
|
||||
body.style.setProperty('visibility', 'visible', 'important');
|
||||
}
|
||||
});
|
||||
|
||||
console.log('⏳ Waiting for network idle...');
|
||||
await page.waitForLoadState('networkidle');
|
||||
|
||||
// 1.5 FINAL SETTLEMENT: Let any scroll-triggered JS finish
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
// 2. Static Snapshot
|
||||
let content = await page.content();
|
||||
|
||||
// 3. Post-Snapshot Asset Discovery (Regex)
|
||||
// Catches assets that never triggered a network request but exist in the markup
|
||||
const regexPatterns = [
|
||||
/(?:src|href|url|data-[a-z-]+|srcset)=["']([^"'<>\s]+?\.(?:css|js|png|jpg|jpeg|gif|svg|woff2?|ttf|otf|mp4|webm|webp|ico)(?:\?[^"']*)?)["']/gi,
|
||||
// Capture CSS url() inside style blocks
|
||||
/url\(["']?([^"'\)]+)["']?\)/gi
|
||||
];
|
||||
|
||||
for (const pattern of regexPatterns) {
|
||||
let match;
|
||||
while ((match = pattern.exec(content)) !== null) {
|
||||
try { foundAssets.add(new URL(match[1], targetUrl).href); } catch { }
|
||||
}
|
||||
// Process CSS and save it
|
||||
const processedCss = await processCssRecursively(
|
||||
data,
|
||||
url,
|
||||
assetsDir,
|
||||
urlMap,
|
||||
);
|
||||
const relPath = sanitizePath(
|
||||
new URL(url).hostname + new URL(url).pathname,
|
||||
);
|
||||
fs.writeFileSync(path.join(assetsDir, relPath), processedCss);
|
||||
} catch {
|
||||
// Ignore CSS fetch/process errors
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Specific srcset parsing
|
||||
const srcsetRegex = /[a-z0-9-]+srcset=["']([^"']+)["']/gi;
|
||||
let match;
|
||||
while ((match = srcsetRegex.exec(content)) !== null) {
|
||||
match[1].split(',').forEach(rule => {
|
||||
const parts = rule.trim().split(/\s+/);
|
||||
if (parts[0] && !parts[0].startsWith('data:')) {
|
||||
try { foundAssets.add(new URL(parts[0], targetUrl).href); } catch { }
|
||||
}
|
||||
});
|
||||
console.log("🛠️ Finalizing Static Mirror...");
|
||||
let finalContent = content;
|
||||
|
||||
// A. Apply URL Map Replacements
|
||||
// Longer paths first to prevent partial replacement errors
|
||||
const sortedUrls = Object.keys(urlMap).sort((a, b) => b.length - a.length);
|
||||
if (sortedUrls.length > 0) {
|
||||
const escaped = sortedUrls.map((u) =>
|
||||
u.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"),
|
||||
);
|
||||
// Create a massive regex for single-pass replacement
|
||||
const masterRegex = new RegExp(`(${escaped.join("|")})`, "g");
|
||||
finalContent = finalContent.replace(
|
||||
masterRegex,
|
||||
(match) => urlMap[match] || match,
|
||||
);
|
||||
}
|
||||
|
||||
// B. Global Root-Relative Path Cleanup
|
||||
// Catches things like /wp-content/ that weren't distinct assets or were missed
|
||||
const commonDirs = [
|
||||
"/wp-content/",
|
||||
"/wp-includes/",
|
||||
"/assets/",
|
||||
"/static/",
|
||||
"/images/",
|
||||
];
|
||||
for (const dir of commonDirs) {
|
||||
const localDir = `./assets/${urlObj.hostname}${dir}`;
|
||||
finalContent = finalContent.split(`"${dir}`).join(`"${localDir}`);
|
||||
finalContent = finalContent.split(`'${dir}`).join(`'${localDir}`);
|
||||
finalContent = finalContent.split(`(${dir}`).join(`(${localDir}`);
|
||||
}
|
||||
|
||||
// C. Domain Nuke
|
||||
// Replace absolute links to the original domain with relative or #
|
||||
const domainPattern = new RegExp(
|
||||
`https?://(www\\.)?${urlObj.hostname.replace(/\./g, "\\.")}[^"']*`,
|
||||
"gi",
|
||||
);
|
||||
// We carefully only replace if it looks like a resource link, or neutralize if it's a navigation link
|
||||
// For simplicity and "solidness", we'll rely on the specific replacements above first.
|
||||
// This catch-all nuke ensures we don't leak requests.
|
||||
// Convert remaining absolute domain links to relative .
|
||||
finalContent = finalContent.replace(domainPattern, (match) => {
|
||||
// If we have a map for it, it should have been replaced.
|
||||
// If not, it's likely a navigation link or an uncaptured asset.
|
||||
// Safe fallback:
|
||||
return "./";
|
||||
});
|
||||
|
||||
// D. Static Stability & Cleanup
|
||||
// Remove tracking/analytics/lazy-load scripts that ruins stability
|
||||
finalContent = finalContent.replace(
|
||||
/<script\b[^>]*>([\s\S]*?)<\/script>/gi,
|
||||
(match, content) => {
|
||||
const lower = content.toLowerCase();
|
||||
if (
|
||||
lower.includes("google-analytics") ||
|
||||
lower.includes("gtag") ||
|
||||
lower.includes("fbq") ||
|
||||
lower.includes("lazy") ||
|
||||
lower.includes("tracker")
|
||||
) {
|
||||
return "";
|
||||
}
|
||||
return match;
|
||||
},
|
||||
);
|
||||
|
||||
console.log(`🔍 Processing ${foundAssets.size} discovered assets...`);
|
||||
|
||||
// 4. Download & Map
|
||||
for (const url of foundAssets) {
|
||||
const local = await downloadFile(url, assetsDir);
|
||||
if (local) {
|
||||
urlMap[url] = local;
|
||||
const clean = url.split('?')[0];
|
||||
urlMap[clean] = local;
|
||||
|
||||
// Handle CSS recursively
|
||||
if (clean.endsWith('.css')) {
|
||||
try {
|
||||
const { data } = await axios.get(url, { headers: { 'User-Agent': USER_AGENT } });
|
||||
// Process CSS and save it
|
||||
const processedCss = await processCssRecursively(data, url, assetsDir, urlMap);
|
||||
const relPath = sanitizePath(new URL(url).hostname + new URL(url).pathname);
|
||||
fs.writeFileSync(path.join(assetsDir, relPath), processedCss);
|
||||
} catch { }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log('🛠️ Finalizing Static Mirror...');
|
||||
let finalContent = content;
|
||||
|
||||
// A. Apply URL Map Replacements
|
||||
// Longer paths first to prevent partial replacement errors
|
||||
const sortedUrls = Object.keys(urlMap).sort((a, b) => b.length - a.length);
|
||||
if (sortedUrls.length > 0) {
|
||||
const escaped = sortedUrls.map(u => u.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
|
||||
// Create a massive regex for single-pass replacement
|
||||
const masterRegex = new RegExp(`(${escaped.join('|')})`, 'g');
|
||||
finalContent = finalContent.replace(masterRegex, (match) => urlMap[match] || match);
|
||||
}
|
||||
|
||||
// B. Global Root-Relative Path Cleanup
|
||||
// Catches things like /wp-content/ that weren't distinct assets or were missed
|
||||
const commonDirs = ['/wp-content/', '/wp-includes/', '/assets/', '/static/', '/images/'];
|
||||
for (const dir of commonDirs) {
|
||||
const localDir = `./assets/${urlObj.hostname}${dir}`;
|
||||
finalContent = finalContent.split(`"${dir}`).join(`"${localDir}`);
|
||||
finalContent = finalContent.split(`'${dir}`).join(`'${localDir}`);
|
||||
finalContent = finalContent.split(`(${dir}`).join(`(${localDir}`);
|
||||
}
|
||||
|
||||
// C. Domain Nuke
|
||||
// Replace absolute links to the original domain with relative or #
|
||||
const domainPattern = new RegExp(`https?://(www\\.)?${urlObj.hostname.replace(/\./g, '\\.')}[^"']*`, 'gi');
|
||||
// We carefully only replace if it looks like a resource link, or neutralize if it's a navigation link
|
||||
// For simplicity and "solidness", we'll rely on the specific replacements above first.
|
||||
// This catch-all nuke ensures we don't leak requests.
|
||||
// Convert remaining absolute domain links to relative .
|
||||
finalContent = finalContent.replace(domainPattern, (match) => {
|
||||
// If we have a map for it, it should have been replaced.
|
||||
// If not, it's likely a navigation link or an uncaptured asset.
|
||||
// Safe fallback:
|
||||
return './';
|
||||
});
|
||||
|
||||
// D. Static Stability & Cleanup
|
||||
// Remove tracking/analytics/lazy-load scripts that ruins stability
|
||||
finalContent = finalContent.replace(/<script\b[^>]*>([\s\S]*?)<\/script>/gi, (match, content) => {
|
||||
const lower = content.toLowerCase();
|
||||
if (lower.includes('google-analytics') ||
|
||||
lower.includes('gtag') ||
|
||||
lower.includes('fbq') ||
|
||||
lower.includes('lazy') ||
|
||||
lower.includes('tracker')) {
|
||||
return '';
|
||||
}
|
||||
return match;
|
||||
});
|
||||
|
||||
// E. CSS Injections for Stability
|
||||
const headEnd = finalContent.indexOf('</head>');
|
||||
if (headEnd > -1) {
|
||||
const stabilityCss = `
|
||||
// E. CSS Injections for Stability
|
||||
const headEnd = finalContent.indexOf("</head>");
|
||||
if (headEnd > -1) {
|
||||
const stabilityCss = `
|
||||
<style>
|
||||
/* UNIVERSAL CLONE STABILIZATION */
|
||||
* {
|
||||
@@ -340,19 +416,21 @@ async function run() {
|
||||
cursor: default;
|
||||
}
|
||||
</style>`;
|
||||
finalContent = finalContent.slice(0, headEnd) + stabilityCss + finalContent.slice(headEnd);
|
||||
}
|
||||
|
||||
// Save
|
||||
const finalPath = path.join(domainDir, htmlFilename);
|
||||
fs.writeFileSync(finalPath, finalContent);
|
||||
console.log(`✅ SUCCESS: Cloned to ${finalPath}`);
|
||||
|
||||
} catch (err) {
|
||||
console.error('❌ FATAL ERROR:', err);
|
||||
} finally {
|
||||
await browser.close();
|
||||
finalContent =
|
||||
finalContent.slice(0, headEnd) +
|
||||
stabilityCss +
|
||||
finalContent.slice(headEnd);
|
||||
}
|
||||
|
||||
// Save
|
||||
const finalPath = path.join(domainDir, htmlFilename);
|
||||
fs.writeFileSync(finalPath, finalContent);
|
||||
console.log(`✅ SUCCESS: Cloned to ${finalPath}`);
|
||||
} catch (err) {
|
||||
console.error("❌ FATAL ERROR:", err);
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
run();
|
||||
|
||||
@@ -1,228 +1,223 @@
|
||||
// @ts-ignore
|
||||
import scrape from 'website-scraper';
|
||||
import scrape from "website-scraper";
|
||||
// @ts-ignore
|
||||
import PuppeteerPlugin from 'website-scraper-puppeteer';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import fs from 'node:fs';
|
||||
import PuppeteerPlugin from "website-scraper-puppeteer";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import fs from "node:fs";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
class CustomFilenameGeneratorPlugin {
|
||||
apply(registerAction: any) {
|
||||
registerAction('generateFilename', ({ resource }: any) => {
|
||||
const url = new URL(resource.url);
|
||||
const ext = path.extname(url.pathname);
|
||||
async function run() {
|
||||
const targetUrl = process.argv[2];
|
||||
if (!targetUrl) {
|
||||
console.error("Usage: npm run clone-website <URL> [output-dir]");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Clean the path
|
||||
let safePath = url.pathname;
|
||||
if (safePath.endsWith('/')) {
|
||||
safePath += 'index.html';
|
||||
} else if (!ext && !resource.isHtml()) {
|
||||
// If no extension and not HTML, guess based on content type?
|
||||
// But usually safe to leave as is or add extension if known.
|
||||
} else if (!ext && resource.isHtml()) {
|
||||
safePath += '.html';
|
||||
const urlObj = new URL(targetUrl);
|
||||
const domain = urlObj.hostname;
|
||||
const safeDomain = domain.replace(/[^a-z0-9-]/gi, "_");
|
||||
const outputDir = process.argv[3]
|
||||
? path.resolve(process.cwd(), process.argv[3])
|
||||
: path.resolve(__dirname, "../cloned-websites", safeDomain);
|
||||
|
||||
if (fs.existsSync(outputDir)) {
|
||||
console.log(`Cleaning existing directory: ${outputDir}`);
|
||||
fs.rmSync(outputDir, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
console.log(`🚀 Starting recursive clone of ${targetUrl}`);
|
||||
console.log(`📂 Output: ${outputDir}`);
|
||||
|
||||
const options = {
|
||||
urls: [targetUrl],
|
||||
directory: outputDir,
|
||||
recursive: true,
|
||||
maxDepth: 5,
|
||||
// Custom filename generation to avoid "https:/" folders
|
||||
plugins: [
|
||||
new PuppeteerPlugin({
|
||||
launchOptions: {
|
||||
headless: true,
|
||||
args: [
|
||||
"--no-sandbox",
|
||||
"--disable-setuid-sandbox",
|
||||
"--disable-dev-shm-usage",
|
||||
],
|
||||
},
|
||||
scrollToBottom: { timeout: 10000, viewportN: 10 },
|
||||
blockNavigation: false,
|
||||
}),
|
||||
new (class LoggerPlugin {
|
||||
apply(registerAction: any) {
|
||||
registerAction("onResourceSaved", ({ resource }: any) => {
|
||||
console.log(` 💾 Saved: ${resource.url} -> ${resource.filename}`);
|
||||
});
|
||||
registerAction("onResourceError", ({ resource, error }: any) => {
|
||||
console.error(` ❌ Error: ${resource.url} - ${error.message}`);
|
||||
});
|
||||
}
|
||||
})(),
|
||||
new (class FilenamePlugin {
|
||||
apply(registerAction: any) {
|
||||
registerAction("generateFilename", ({ resource }: any) => {
|
||||
const u = new URL(resource.url);
|
||||
let filename = u.pathname;
|
||||
|
||||
// normalize
|
||||
if (filename.endsWith("/")) filename += "index.html";
|
||||
else if (!path.extname(filename) && resource.url.includes(domain))
|
||||
filename += "/index.html"; // Assume folder if internal link without ext
|
||||
|
||||
// If it's an external asset, put it in a separate folder
|
||||
if (u.hostname !== domain) {
|
||||
filename = `_external/${u.hostname}${filename}`;
|
||||
}
|
||||
|
||||
// Handle query strings if needed (simplifying by ignoring them for static local files usually better,
|
||||
// unless they determine content. For a clean clone, we usually ignore unique query params)
|
||||
// But if the site relies on routing via query params (e.g. ?page=2), we might want to encode them.
|
||||
// For now, let's keep it simple and clean.
|
||||
// Sanitize filename
|
||||
filename = filename
|
||||
.split("/")
|
||||
.map((part) => part.replace(/[^a-z0-9._-]/gi, "_"))
|
||||
.join("/");
|
||||
|
||||
// Remove leading slash
|
||||
if (safePath.startsWith('/')) safePath = safePath.substring(1);
|
||||
if (filename.startsWith("/")) filename = filename.substring(1);
|
||||
|
||||
// Sanitization
|
||||
safePath = safePath.replace(/[:*?"<>|]/g, '_');
|
||||
// Handle "Unnamed page" by checking if empty
|
||||
if (!filename || filename === "index.html")
|
||||
return { filename: "index.html" };
|
||||
|
||||
// External assets go to a separate folder to avoid collision
|
||||
// We can detect external by checking if the resource parent is different?
|
||||
// Actually, simply using the hostname mapping is safer.
|
||||
|
||||
// However, the USER wants "local cloned pages".
|
||||
// If we just use the path, we merge everything into one root.
|
||||
// If there are collision (e.g. same path on different domains), this is bad.
|
||||
// But typically we clone ONE site.
|
||||
|
||||
return { filename: safePath };
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function run() {
|
||||
const targetUrl = process.argv[2];
|
||||
if (!targetUrl) {
|
||||
console.error('Usage: npm run clone-website <URL> [output-dir]');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const urlObj = new URL(targetUrl);
|
||||
const domain = urlObj.hostname;
|
||||
const safeDomain = domain.replace(/[^a-z0-9-]/gi, '_');
|
||||
const outputDir = process.argv[3]
|
||||
? path.resolve(process.cwd(), process.argv[3])
|
||||
: path.resolve(__dirname, '../cloned-websites', safeDomain);
|
||||
|
||||
if (fs.existsSync(outputDir)) {
|
||||
console.log(`Cleaning existing directory: ${outputDir}`);
|
||||
fs.rmSync(outputDir, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
console.log(`🚀 Starting recursive clone of ${targetUrl}`);
|
||||
console.log(`📂 Output: ${outputDir}`);
|
||||
|
||||
const options = {
|
||||
urls: [targetUrl],
|
||||
directory: outputDir,
|
||||
recursive: true,
|
||||
maxDepth: 5,
|
||||
// Custom filename generation to avoid "https:/" folders
|
||||
plugins: [
|
||||
new PuppeteerPlugin({
|
||||
launchOptions: {
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
|
||||
},
|
||||
scrollToBottom: { timeout: 10000, viewportN: 10 },
|
||||
blockNavigation: false
|
||||
}),
|
||||
new class LoggerPlugin {
|
||||
apply(registerAction: any) {
|
||||
registerAction('onResourceSaved', ({ resource }: any) => {
|
||||
console.log(` 💾 Saved: ${resource.url} -> ${resource.filename}`);
|
||||
});
|
||||
registerAction('onResourceError', ({ resource, error }: any) => {
|
||||
console.error(` ❌ Error: ${resource.url} - ${error.message}`);
|
||||
});
|
||||
}
|
||||
},
|
||||
new class FilenamePlugin {
|
||||
apply(registerAction: any) {
|
||||
registerAction('generateFilename', ({ resource }: any) => {
|
||||
const u = new URL(resource.url);
|
||||
let filename = u.pathname;
|
||||
|
||||
// normalize
|
||||
if (filename.endsWith('/')) filename += 'index.html';
|
||||
else if (!path.extname(filename) && resource.url.includes(domain)) filename += '/index.html'; // Assume folder if internal link without ext
|
||||
|
||||
// If it's an external asset, put it in a separate folder
|
||||
if (u.hostname !== domain) {
|
||||
filename = `_external/${u.hostname}${filename}`;
|
||||
}
|
||||
|
||||
// Sanitize filename
|
||||
filename = filename.split('/').map(part => part.replace(/[^a-z0-9._-]/gi, '_')).join('/');
|
||||
|
||||
// Remove leading slash
|
||||
if (filename.startsWith('/')) filename = filename.substring(1);
|
||||
|
||||
// Handle "Unnamed page" by checking if empty
|
||||
if (!filename || filename === 'index.html') return { filename: 'index.html' };
|
||||
|
||||
return { filename };
|
||||
});
|
||||
}
|
||||
}
|
||||
],
|
||||
|
||||
urlFilter: (url: string) => {
|
||||
const u = new URL(url);
|
||||
const isTargetDomain = u.hostname === domain;
|
||||
const isGoogleFonts = u.hostname.includes('fonts.googleapis.com') || u.hostname.includes('fonts.gstatic.com');
|
||||
// Allow assets from anywhere
|
||||
const isAsset = /\.(css|js|png|jpg|jpeg|gif|svg|woff|woff2|ttf|eot|mp4|webm|ico|json|webp)$/i.test(u.pathname);
|
||||
// Allow fonts/css from common CDNs if standard extension check fails
|
||||
const isCommonAsset = u.pathname.includes('/css/') || u.pathname.includes('/js/') || u.pathname.includes('/static/') || u.pathname.includes('/assets/') || u.pathname.includes('/uploads/');
|
||||
|
||||
return isTargetDomain || isAsset || isCommonAsset || isGoogleFonts;
|
||||
},
|
||||
|
||||
|
||||
sources: [
|
||||
{ selector: 'img', attr: 'src' },
|
||||
{ selector: 'img', attr: 'srcset' },
|
||||
{ selector: 'source', attr: 'src' },
|
||||
{ selector: 'source', attr: 'srcset' },
|
||||
{ selector: 'link[rel="stylesheet"]', attr: 'href' },
|
||||
{ selector: 'link[rel="preload"]', attr: 'href' },
|
||||
{ selector: 'link[rel="prefetch"]', attr: 'href' },
|
||||
{ selector: 'script', attr: 'src' },
|
||||
{ selector: 'video', attr: 'src' },
|
||||
{ selector: 'video', attr: 'poster' },
|
||||
{ selector: 'iframe', attr: 'src' },
|
||||
{ selector: 'link[rel*="icon"]', attr: 'href' },
|
||||
{ selector: 'link[rel="manifest"]', attr: 'href' },
|
||||
{ selector: 'meta[property="og:image"]', attr: 'content' }
|
||||
],
|
||||
|
||||
request: {
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
|
||||
}
|
||||
return { filename };
|
||||
});
|
||||
}
|
||||
};
|
||||
})(),
|
||||
],
|
||||
|
||||
try {
|
||||
// @ts-ignore
|
||||
const result = await scrape(options);
|
||||
console.log(`\n✅ Successfully cloned ${result.length} resources to ${outputDir}`);
|
||||
urlFilter: (url: string) => {
|
||||
const u = new URL(url);
|
||||
const isTargetDomain = u.hostname === domain;
|
||||
const isGoogleFonts =
|
||||
u.hostname.includes("fonts.googleapis.com") ||
|
||||
u.hostname.includes("fonts.gstatic.com");
|
||||
// Allow assets from anywhere
|
||||
const isAsset =
|
||||
/\.(css|js|png|jpg|jpeg|gif|svg|woff|woff2|ttf|eot|mp4|webm|ico|json|webp)$/i.test(
|
||||
u.pathname,
|
||||
);
|
||||
// Allow fonts/css from common CDNs if standard extension check fails
|
||||
const isCommonAsset =
|
||||
u.pathname.includes("/css/") ||
|
||||
u.pathname.includes("/js/") ||
|
||||
u.pathname.includes("/static/") ||
|
||||
u.pathname.includes("/assets/") ||
|
||||
u.pathname.includes("/uploads/");
|
||||
|
||||
// Post-processing: Sanitize HTML to remove Next.js hydration scripts
|
||||
// This prevents the static site from trying to "hydrate" and breaking images/links
|
||||
console.log('🧹 Sanitizing HTML files...');
|
||||
sanitizeHtmlFiles(outputDir);
|
||||
return isTargetDomain || isAsset || isCommonAsset || isGoogleFonts;
|
||||
},
|
||||
|
||||
console.log(`open "${path.join(outputDir, 'index.html')}"`);
|
||||
} catch (error) {
|
||||
console.error('❌ Error cloning website:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
sources: [
|
||||
{ selector: "img", attr: "src" },
|
||||
{ selector: "img", attr: "srcset" },
|
||||
{ selector: "source", attr: "src" },
|
||||
{ selector: "source", attr: "srcset" },
|
||||
{ selector: 'link[rel="stylesheet"]', attr: "href" },
|
||||
{ selector: 'link[rel="preload"]', attr: "href" },
|
||||
{ selector: 'link[rel="prefetch"]', attr: "href" },
|
||||
{ selector: "script", attr: "src" },
|
||||
{ selector: "video", attr: "src" },
|
||||
{ selector: "video", attr: "poster" },
|
||||
{ selector: "iframe", attr: "src" },
|
||||
{ selector: 'link[rel*="icon"]', attr: "href" },
|
||||
{ selector: 'link[rel="manifest"]', attr: "href" },
|
||||
{ selector: 'meta[property="og:image"]', attr: "content" },
|
||||
],
|
||||
|
||||
request: {
|
||||
headers: {
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
try {
|
||||
// @ts-ignore
|
||||
const result = await scrape(options);
|
||||
console.log(
|
||||
`\n✅ Successfully cloned ${result.length} resources to ${outputDir}`,
|
||||
);
|
||||
|
||||
// Post-processing: Sanitize HTML to remove Next.js hydration scripts
|
||||
// This prevents the static site from trying to "hydrate" and breaking images/links
|
||||
console.log("🧹 Sanitizing HTML files...");
|
||||
sanitizeHtmlFiles(outputDir);
|
||||
|
||||
console.log(`open "${path.join(outputDir, "index.html")}"`);
|
||||
} catch (error) {
|
||||
console.error("❌ Error cloning website:", error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
function sanitizeHtmlFiles(dir: string) {
|
||||
const files = fs.readdirSync(dir);
|
||||
for (const file of files) {
|
||||
const fullPath = path.join(dir, file);
|
||||
if (fs.statSync(fullPath).isDirectory()) {
|
||||
sanitizeHtmlFiles(fullPath);
|
||||
} else if (file.endsWith('.html')) {
|
||||
let content = fs.readFileSync(fullPath, 'utf8');
|
||||
const files = fs.readdirSync(dir);
|
||||
for (const file of files) {
|
||||
const fullPath = path.join(dir, file);
|
||||
if (fs.statSync(fullPath).isDirectory()) {
|
||||
sanitizeHtmlFiles(fullPath);
|
||||
} else if (file.endsWith(".html")) {
|
||||
let content = fs.readFileSync(fullPath, "utf8");
|
||||
|
||||
// Remove Next.js data script
|
||||
content = content.replace(/<script id="__NEXT_DATA__"[\s\S]*?<\/script>/gi, '');
|
||||
// Remove Next.js data script
|
||||
content = content.replace(
|
||||
/<script id="__NEXT_DATA__"[\s\S]*?<\/script>/gi,
|
||||
"",
|
||||
);
|
||||
|
||||
// Remove Next.js chunk scripts (hydration)
|
||||
// match <script src="..._next/static/chunks..." ...
|
||||
content = content.replace(/<script[^>]+src="[^"]*\/_next\/static\/chunks\/[^"]*"[^>]*><\/script>/gi, '');
|
||||
content = content.replace(/<script[^>]+src="[^"]*\/_next\/static\/[^"]*Manifest\.js"[^>]*><\/script>/gi, '');
|
||||
// Remove Next.js chunk scripts (hydration)
|
||||
// match <script src="..._next/static/chunks..." ...
|
||||
content = content.replace(
|
||||
/<script[^>]+src="[^"]*\/_next\/static\/chunks\/[^"]*"[^>]*><\/script>/gi,
|
||||
"",
|
||||
);
|
||||
content = content.replace(
|
||||
/<script[^>]+src="[^"]*\/_next\/static\/[^"]*Manifest\.js"[^>]*><\/script>/gi,
|
||||
"",
|
||||
);
|
||||
|
||||
// Convert Breeze dynamic script/styles into actual tags if possible
|
||||
// match <div class="breeze-scripts-load" ...>URL</div>
|
||||
content = content.replace(/<div[^>]+class="breeze-scripts-load"[^>]*>([^<]+)<\/div>/gi, (match, url) => {
|
||||
if (url.endsWith('.css')) return `<link rel="stylesheet" href="${url}">`;
|
||||
return `<script src="${url}"></script>`;
|
||||
});
|
||||
// Convert Breeze dynamic script/styles into actual tags if possible
|
||||
// match <div class="breeze-scripts-load" ...>URL</div>
|
||||
content = content.replace(
|
||||
/<div[^>]+class="breeze-scripts-load"[^>]*>([^<]+)<\/div>/gi,
|
||||
(match, url) => {
|
||||
if (url.endsWith(".css"))
|
||||
return `<link rel="stylesheet" href="${url}">`;
|
||||
return `<script src="${url}"></script>`;
|
||||
},
|
||||
);
|
||||
|
||||
// Inject Fonts (Fix for missing dynamic fonts)
|
||||
// We inject Inter and Montserrat as safe defaults for industrial/modern sites
|
||||
// Check specifically for a stylesheet link to google fonts
|
||||
const hasGoogleFontStylesheet = /<link[^>]+rel="stylesheet"[^>]+href="[^"]*fonts\.googleapis\.com/i.test(content);
|
||||
if (!hasGoogleFontStylesheet) {
|
||||
const fontLink = `<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Montserrat:wght@300;400;500;600;700&display=swap">`;
|
||||
const styleBlock = `<style>
|
||||
// Inject Fonts (Fix for missing dynamic fonts)
|
||||
// We inject Inter and Montserrat as safe defaults for industrial/modern sites
|
||||
// Check specifically for a stylesheet link to google fonts
|
||||
const hasGoogleFontStylesheet =
|
||||
/<link[^>]+rel="stylesheet"[^>]+href="[^"]*fonts\.googleapis\.com/i.test(
|
||||
content,
|
||||
);
|
||||
if (!hasGoogleFontStylesheet) {
|
||||
const fontLink = `<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Montserrat:wght@300;400;500;600;700&display=swap">`;
|
||||
const styleBlock = `<style>
|
||||
:root { --main-font: 'Inter', sans-serif; --heading-font: 'Montserrat', sans-serif; }
|
||||
body, .body-font, p, span, li, a { font-family: var(--main-font) !important; }
|
||||
h1, h2, h3, h4, h5, h6, .title-font, .heading-font { font-family: var(--heading-font) !important; }
|
||||
</style>`;
|
||||
content = content.replace('</head>', `${fontLink}${styleBlock}</head>`);
|
||||
}
|
||||
content = content.replace("</head>", `${fontLink}${styleBlock}</head>`);
|
||||
}
|
||||
|
||||
// Force column layout on product pages
|
||||
if (content.includes('class="products')) {
|
||||
const layoutScript = `
|
||||
// Force column layout on product pages
|
||||
if (content.includes('class="products')) {
|
||||
const layoutScript = `
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const products = document.querySelector('.products');
|
||||
@@ -233,12 +228,12 @@ function sanitizeHtmlFiles(dir: string) {
|
||||
}
|
||||
});
|
||||
</script>`;
|
||||
content = content.replace('</body>', `${layoutScript}</body>`);
|
||||
}
|
||||
content = content.replace("</body>", `${layoutScript}</body>`);
|
||||
}
|
||||
|
||||
fs.writeFileSync(fullPath, content);
|
||||
}
|
||||
fs.writeFileSync(fullPath, content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
run();
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import scrape from 'website-scraper';
|
||||
import PuppeteerPlugin from 'website-scraper-puppeteer';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import fs from 'fs';
|
||||
import scrape from "website-scraper";
|
||||
import PuppeteerPlugin from "website-scraper-puppeteer";
|
||||
import path from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
import fs from "fs";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
@@ -11,45 +11,55 @@ const __dirname = path.dirname(__filename);
|
||||
class PortfolioPlugin {
|
||||
apply(registerAction: any) {
|
||||
// 1. Add more sources before starting
|
||||
registerAction('beforeStart', ({ options }: any) => {
|
||||
registerAction("beforeStart", ({ options }: any) => {
|
||||
if (!options.sources) options.sources = [];
|
||||
options.sources.push({ selector: 'img', attr: 'data-nimg' });
|
||||
options.sources.push({ selector: 'img', attr: 'data-src' });
|
||||
options.sources.push({ selector: 'img', attr: 'data-srcset' });
|
||||
options.sources.push({ selector: 'video', attr: 'poster' });
|
||||
options.sources.push({ selector: 'source', attr: 'data-srcset' });
|
||||
options.sources.push({ selector: '[style*="background-image"]', attr: 'style' });
|
||||
options.sources.push({ selector: 'link[as="font"]', attr: 'href' });
|
||||
options.sources.push({ selector: 'link[as="image"]', attr: 'href' });
|
||||
options.sources.push({ selector: 'link[as="style"]', attr: 'href' });
|
||||
options.sources.push({ selector: 'link[as="script"]', attr: 'href' });
|
||||
options.sources.push({ selector: "img", attr: "data-nimg" });
|
||||
options.sources.push({ selector: "img", attr: "data-src" });
|
||||
options.sources.push({ selector: "img", attr: "data-srcset" });
|
||||
options.sources.push({ selector: "video", attr: "poster" });
|
||||
options.sources.push({ selector: "source", attr: "data-srcset" });
|
||||
options.sources.push({
|
||||
selector: '[style*="background-image"]',
|
||||
attr: "style",
|
||||
});
|
||||
options.sources.push({ selector: 'link[as="font"]', attr: "href" });
|
||||
options.sources.push({ selector: 'link[as="image"]', attr: "href" });
|
||||
options.sources.push({ selector: 'link[as="style"]', attr: "href" });
|
||||
options.sources.push({ selector: 'link[as="script"]', attr: "href" });
|
||||
});
|
||||
|
||||
// 2. Sanitize filenames and handle Next.js optimized images
|
||||
registerAction('generateFilename', ({ resource, filename }: any) => {
|
||||
registerAction("generateFilename", ({ resource, filename }: any) => {
|
||||
const url = resource.getUrl();
|
||||
let result = filename;
|
||||
|
||||
// Handle Next.js optimized images: /_next/image?url=...&w=...
|
||||
if (url.includes('/_next/image')) {
|
||||
if (url.includes("/_next/image")) {
|
||||
try {
|
||||
const urlParams = new URL(url).searchParams;
|
||||
const originalUrl = urlParams.get('url');
|
||||
const originalUrl = urlParams.get("url");
|
||||
if (originalUrl) {
|
||||
const cleanPath = originalUrl.split('?')[0];
|
||||
const ext = path.extname(cleanPath) || '.webp';
|
||||
const cleanPath = originalUrl.split("?")[0];
|
||||
const ext = path.extname(cleanPath) || ".webp";
|
||||
const name = path.basename(cleanPath, ext);
|
||||
const width = urlParams.get('w') || 'auto';
|
||||
const width = urlParams.get("w") || "auto";
|
||||
result = `_next/optimized/${name}-${width}${ext}`;
|
||||
}
|
||||
} catch (e) {}
|
||||
} catch (e) {
|
||||
// Ignore invalid optimized image URLs
|
||||
}
|
||||
}
|
||||
|
||||
// CRITICAL MAC FIX: Replace .app with -app in all paths to prevent hidden Application Bundles
|
||||
// We split by / to ensure we only replace .app at the end of a directory name or filename
|
||||
result = result.split('/').map((segment: string) =>
|
||||
segment.endsWith('.app') ? segment.replace(/\.app$/, '-app') : segment
|
||||
).join('/');
|
||||
result = result
|
||||
.split("/")
|
||||
.map((segment: string) =>
|
||||
segment.endsWith(".app")
|
||||
? segment.replace(/\.app$/, "-app")
|
||||
: segment,
|
||||
)
|
||||
.join("/");
|
||||
|
||||
return { filename: result };
|
||||
});
|
||||
@@ -59,19 +69,23 @@ class PortfolioPlugin {
|
||||
async function cloneWebsite() {
|
||||
const url = process.argv[2];
|
||||
if (!url) {
|
||||
console.error('Please provide a URL as an argument.');
|
||||
console.error("Please provide a URL as an argument.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const domain = new URL(url).hostname;
|
||||
let outputDirName = process.argv[3] || domain.replace(/\./g, '-');
|
||||
|
||||
let outputDirName = process.argv[3] || domain.replace(/\./g, "-");
|
||||
|
||||
// Sanitize top-level folder name for Mac
|
||||
if (outputDirName.endsWith('.app')) {
|
||||
outputDirName = outputDirName.replace(/\.app$/, '-app');
|
||||
if (outputDirName.endsWith(".app")) {
|
||||
outputDirName = outputDirName.replace(/\.app$/, "-app");
|
||||
}
|
||||
|
||||
const outputDir = path.resolve(__dirname, '../cloned-websites', outputDirName);
|
||||
|
||||
const outputDir = path.resolve(
|
||||
__dirname,
|
||||
"../cloned-websites",
|
||||
outputDirName,
|
||||
);
|
||||
|
||||
if (fs.existsSync(outputDir)) {
|
||||
fs.rmSync(outputDir, { recursive: true, force: true });
|
||||
@@ -88,61 +102,84 @@ async function cloneWebsite() {
|
||||
requestConcurrency: 10,
|
||||
plugins: [
|
||||
new PuppeteerPlugin({
|
||||
launchOptions: { headless: true, args: ['--no-sandbox'] },
|
||||
gotoOptions: { waitUntil: 'networkidle0', timeout: 60000 },
|
||||
scrollToBottom: { timeout: 20000, viewportN: 20 },
|
||||
launchOptions: { headless: true, args: ["--no-sandbox"] },
|
||||
gotoOptions: { waitUntil: "networkidle0", timeout: 60000 },
|
||||
scrollToBottom: { timeout: 20000, viewportN: 20 },
|
||||
}),
|
||||
new PortfolioPlugin()
|
||||
new PortfolioPlugin(),
|
||||
],
|
||||
sources: [
|
||||
{ selector: 'img', attr: 'src' },
|
||||
{ selector: 'img', attr: 'srcset' },
|
||||
{ selector: 'img', attr: 'data-src' },
|
||||
{ selector: 'img', attr: 'data-srcset' },
|
||||
{ selector: 'link[rel="stylesheet"]', attr: 'href' },
|
||||
{ selector: 'link[rel*="icon"]', attr: 'href' },
|
||||
{ selector: 'script', attr: 'src' },
|
||||
{ selector: 'link[rel="preload"]', attr: 'href' },
|
||||
{ selector: 'link[rel="prefetch"]', attr: 'href' },
|
||||
{ selector: 'link[rel="modulepreload"]', attr: 'href' },
|
||||
{ selector: 'link[rel="apple-touch-icon"]', attr: 'href' },
|
||||
{ selector: 'link[rel="mask-icon"]', attr: 'href' },
|
||||
{ selector: 'source', attr: 'src' },
|
||||
{ selector: 'source', attr: 'srcset' },
|
||||
{ selector: 'video', attr: 'src' },
|
||||
{ selector: 'video', attr: 'poster' },
|
||||
{ selector: 'audio', attr: 'src' },
|
||||
{ selector: 'iframe', attr: 'src' },
|
||||
{ selector: 'meta[property="og:image"]', attr: 'content' },
|
||||
{ selector: 'meta[name="twitter:image"]', attr: 'content' },
|
||||
{ selector: '[style]', attr: 'style' },
|
||||
{ selector: "img", attr: "src" },
|
||||
{ selector: "img", attr: "srcset" },
|
||||
{ selector: "img", attr: "data-src" },
|
||||
{ selector: "img", attr: "data-srcset" },
|
||||
{ selector: 'link[rel="stylesheet"]', attr: "href" },
|
||||
{ selector: 'link[rel*="icon"]', attr: "href" },
|
||||
{ selector: "script", attr: "src" },
|
||||
{ selector: 'link[rel="preload"]', attr: "href" },
|
||||
{ selector: 'link[rel="prefetch"]', attr: "href" },
|
||||
{ selector: 'link[rel="modulepreload"]', attr: "href" },
|
||||
{ selector: 'link[rel="apple-touch-icon"]', attr: "href" },
|
||||
{ selector: 'link[rel="mask-icon"]', attr: "href" },
|
||||
{ selector: "source", attr: "src" },
|
||||
{ selector: "source", attr: "srcset" },
|
||||
{ selector: "video", attr: "src" },
|
||||
{ selector: "video", attr: "poster" },
|
||||
{ selector: "audio", attr: "src" },
|
||||
{ selector: "iframe", attr: "src" },
|
||||
{ selector: 'meta[property="og:image"]', attr: "content" },
|
||||
{ selector: 'meta[name="twitter:image"]', attr: "content" },
|
||||
{ selector: "[style]", attr: "style" },
|
||||
],
|
||||
urlFilter: (link: string) => {
|
||||
const isAsset = /\.(js|css|jpg|jpeg|png|gif|svg|webp|woff|woff2|ttf|eot|otf|mp4|webm|mov|ogg|pdf|ico)(\?.*)?$/i.test(link);
|
||||
const isNextAsset = link.includes('/_next/');
|
||||
const isSameDomain = link.startsWith(url) || link.startsWith('/') || !link.includes('://') || link.includes(domain);
|
||||
const isGoogleTagManager = link.includes('googletagmanager.com');
|
||||
const isAnalytics = link.includes('analytics.mintel.me');
|
||||
const isVercelApp = link.includes('vercel.app');
|
||||
const isDataUrl = link.startsWith('data:');
|
||||
const isMailto = link.startsWith('mailto:');
|
||||
const isTel = link.startsWith('tel:');
|
||||
return (isAsset || isNextAsset || isSameDomain || isGoogleTagManager || isAnalytics || isVercelApp) && !isDataUrl && !isMailto && !isTel;
|
||||
const isAsset =
|
||||
/\.(js|css|jpg|jpeg|png|gif|svg|webp|woff|woff2|ttf|eot|otf|mp4|webm|mov|ogg|pdf|ico)(\?.*)?$/i.test(
|
||||
link,
|
||||
);
|
||||
const isNextAsset = link.includes("/_next/");
|
||||
const isSameDomain =
|
||||
link.startsWith(url) ||
|
||||
link.startsWith("/") ||
|
||||
!link.includes("://") ||
|
||||
link.includes(domain);
|
||||
const isGoogleTagManager = link.includes("googletagmanager.com");
|
||||
const isAnalytics = link.includes("analytics.mintel.me");
|
||||
const isVercelApp = link.includes("vercel.app");
|
||||
const isDataUrl = link.startsWith("data:");
|
||||
const isMailto = link.startsWith("mailto:");
|
||||
const isTel = link.startsWith("tel:");
|
||||
return (
|
||||
(isAsset ||
|
||||
isNextAsset ||
|
||||
isSameDomain ||
|
||||
isGoogleTagManager ||
|
||||
isAnalytics ||
|
||||
isVercelApp) &&
|
||||
!isDataUrl &&
|
||||
!isMailto &&
|
||||
!isTel
|
||||
);
|
||||
},
|
||||
filenameGenerator: 'bySiteStructure',
|
||||
filenameGenerator: "bySiteStructure",
|
||||
subdirectories: [
|
||||
{ directory: 'img', extensions: ['.jpg', '.png', '.svg', '.webp', '.gif', '.ico'] },
|
||||
{ directory: 'js', extensions: ['.js'] },
|
||||
{ directory: 'css', extensions: ['.css'] },
|
||||
{ directory: 'fonts', extensions: ['.woff', '.woff2', '.ttf', '.eot', '.otf'] },
|
||||
{ directory: 'videos', extensions: ['.mp4', '.webm', '.mov', '.ogg'] },
|
||||
{
|
||||
directory: "img",
|
||||
extensions: [".jpg", ".png", ".svg", ".webp", ".gif", ".ico"],
|
||||
},
|
||||
{ directory: "js", extensions: [".js"] },
|
||||
{ directory: "css", extensions: [".css"] },
|
||||
{
|
||||
directory: "fonts",
|
||||
extensions: [".woff", ".woff2", ".ttf", ".eot", ".otf"],
|
||||
},
|
||||
{ directory: "videos", extensions: [".mp4", ".webm", ".mov", ".ogg"] },
|
||||
],
|
||||
});
|
||||
|
||||
console.log('✅ Website cloned successfully!');
|
||||
console.log("✅ Website cloned successfully!");
|
||||
console.log(`Location: ${outputDir}`);
|
||||
} catch (error) {
|
||||
console.error('❌ Error cloning website:', error);
|
||||
console.error("❌ Error cloning website:", error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,10 +4,7 @@ import * as readline from "node:readline/promises";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { createElement } from "react";
|
||||
import { renderToFile } from "@react-pdf/renderer";
|
||||
import {
|
||||
calculatePositions,
|
||||
calculateTotals,
|
||||
} from "../src/logic/pricing/calculator.js";
|
||||
import { calculateTotals } from "../src/logic/pricing/calculator.js";
|
||||
import { CombinedQuotePDF } from "../src/components/CombinedQuotePDF.js";
|
||||
import { initialState, PRICING } from "../src/logic/pricing/constants.js";
|
||||
import {
|
||||
@@ -18,7 +15,6 @@ import {
|
||||
} from "../src/logic/content-provider.js";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import axios from 'axios';
|
||||
import * as cheerio from 'cheerio';
|
||||
import { execSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import axios from "axios";
|
||||
import * as cheerio from "cheerio";
|
||||
import { execSync } from "child_process";
|
||||
import * as fs from "fs";
|
||||
import * as path from "path";
|
||||
|
||||
/**
|
||||
* PageSpeed Test Script
|
||||
@@ -13,10 +13,15 @@ import * as path from 'path';
|
||||
*/
|
||||
|
||||
const targetUrl =
|
||||
process.argv[2] || process.env.NEXT_PUBLIC_BASE_URL || 'https://testing.mintel.me';
|
||||
const limit = process.env.PAGESPEED_LIMIT ? parseInt(process.env.PAGESPEED_LIMIT) : 20;
|
||||
const gatekeeperPassword = process.env.GATEKEEPER_PASSWORD || 'mintel';
|
||||
const gatekeeperCookie = process.env.AUTH_COOKIE_NAME || 'mintel_gatekeeper_session';
|
||||
process.argv[2] ||
|
||||
process.env.NEXT_PUBLIC_BASE_URL ||
|
||||
"https://testing.mintel.me";
|
||||
const limit = process.env.PAGESPEED_LIMIT
|
||||
? parseInt(process.env.PAGESPEED_LIMIT)
|
||||
: 20;
|
||||
const gatekeeperPassword = process.env.GATEKEEPER_PASSWORD || "mintel";
|
||||
const gatekeeperCookie =
|
||||
process.env.AUTH_COOKIE_NAME || "mintel_gatekeeper_session";
|
||||
|
||||
async function main() {
|
||||
console.log(`\n🚀 Starting PageSpeed test for: ${targetUrl}`);
|
||||
@@ -24,7 +29,7 @@ async function main() {
|
||||
|
||||
try {
|
||||
// 1. Fetch Sitemap
|
||||
const sitemapUrl = `${targetUrl.replace(/\/$/, '')}/sitemap.xml`;
|
||||
const sitemapUrl = `${targetUrl.replace(/\/$/, "")}/sitemap.xml`;
|
||||
console.log(`📥 Fetching sitemap from ${sitemapUrl}...`);
|
||||
|
||||
// We might need to bypass gatekeeper for the sitemap fetch too
|
||||
@@ -36,21 +41,21 @@ async function main() {
|
||||
});
|
||||
|
||||
const $ = cheerio.load(response.data, { xmlMode: true });
|
||||
let urls = $('url loc')
|
||||
.map((i, el) => $(el).text())
|
||||
let urls = $("url loc")
|
||||
.map((_i, el) => $(el).text())
|
||||
.get();
|
||||
|
||||
// Cleanup, filter and normalize domains to targetUrl
|
||||
const urlPattern = /https?:\/\/[^\/]+/;
|
||||
urls = [...new Set(urls)]
|
||||
.filter((u) => u.startsWith('http'))
|
||||
.map((u) => u.replace(urlPattern, targetUrl.replace(/\/$/, '')))
|
||||
.filter((u) => u.startsWith("http"))
|
||||
.map((u) => u.replace(urlPattern, targetUrl.replace(/\/$/, "")))
|
||||
.sort();
|
||||
|
||||
console.log(`✅ Found ${urls.length} URLs in sitemap.`);
|
||||
|
||||
if (urls.length === 0) {
|
||||
console.error('❌ No URLs found in sitemap. Is the site up?');
|
||||
console.error("❌ No URLs found in sitemap. Is the site up?");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
@@ -59,7 +64,9 @@ async function main() {
|
||||
`⚠️ Too many pages (${urls.length}). Limiting to ${limit} representative pages.`,
|
||||
);
|
||||
// Try to pick a variety: home, some products, some blog posts
|
||||
const home = urls.filter((u) => u.endsWith('/de') || u.endsWith('/en') || u === targetUrl);
|
||||
const home = urls.filter(
|
||||
(u) => u.endsWith("/de") || u.endsWith("/en") || u === targetUrl,
|
||||
);
|
||||
const others = urls.filter((u) => !home.includes(u));
|
||||
urls = [...home, ...others.slice(0, limit - home.length)];
|
||||
}
|
||||
@@ -69,7 +76,7 @@ async function main() {
|
||||
|
||||
// 2. Prepare LHCI command
|
||||
// We use --collect.url multiple times
|
||||
const urlArgs = urls.map((u) => `--collect.url="${u}"`).join(' ');
|
||||
const urlArgs = urls.map((u) => `--collect.url="${u}"`).join(" ");
|
||||
|
||||
// Handle authentication for staging/testing
|
||||
// Lighthouse can set cookies via --collect.settings.extraHeaders
|
||||
@@ -77,12 +84,15 @@ async function main() {
|
||||
Cookie: `${gatekeeperCookie}=${gatekeeperPassword}`,
|
||||
});
|
||||
|
||||
const chromePath = process.env.CHROME_PATH || process.env.PUPPETEER_EXECUTABLE_PATH;
|
||||
const chromePathArg = chromePath ? `--collect.chromePath="${chromePath}"` : '';
|
||||
const chromePath =
|
||||
process.env.CHROME_PATH || process.env.PUPPETEER_EXECUTABLE_PATH;
|
||||
const chromePathArg = chromePath
|
||||
? `--collect.chromePath="${chromePath}"`
|
||||
: "";
|
||||
|
||||
// Clean up old reports
|
||||
if (fs.existsSync('.lighthouseci')) {
|
||||
fs.rmSync('.lighthouseci', { recursive: true, force: true });
|
||||
if (fs.existsSync(".lighthouseci")) {
|
||||
fs.rmSync(".lighthouseci", { recursive: true, force: true });
|
||||
}
|
||||
|
||||
// Using a more robust way to execute and capture output
|
||||
@@ -93,27 +103,31 @@ async function main() {
|
||||
|
||||
try {
|
||||
execSync(lhciCommand, {
|
||||
encoding: 'utf8',
|
||||
stdio: 'inherit',
|
||||
encoding: "utf8",
|
||||
stdio: "inherit",
|
||||
});
|
||||
} catch (err: any) {
|
||||
console.warn('⚠️ LHCI assertion finished with warnings or errors.');
|
||||
console.warn("⚠️ LHCI assertion finished with warnings or errors.");
|
||||
// We continue to show the table even if assertions failed
|
||||
}
|
||||
|
||||
// 3. Summarize Results (Local & Independent)
|
||||
const manifestPath = path.join(process.cwd(), '.lighthouseci', 'manifest.json');
|
||||
const manifestPath = path.join(
|
||||
process.cwd(),
|
||||
".lighthouseci",
|
||||
"manifest.json",
|
||||
);
|
||||
if (fs.existsSync(manifestPath)) {
|
||||
const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
|
||||
const manifest = JSON.parse(fs.readFileSync(manifestPath, "utf8"));
|
||||
console.log(`\n📊 PageSpeed Summary (FOSS - Local Report):\n`);
|
||||
|
||||
const summaryTable = manifest.map((entry: any) => {
|
||||
const s = entry.summary;
|
||||
return {
|
||||
URL: entry.url.replace(targetUrl, ''),
|
||||
URL: entry.url.replace(targetUrl, ""),
|
||||
Perf: Math.round(s.performance * 100),
|
||||
Acc: Math.round(s.accessibility * 100),
|
||||
BP: Math.round(s['best-practices'] * 100),
|
||||
BP: Math.round(s["best-practices"] * 100),
|
||||
SEO: Math.round(s.seo * 100),
|
||||
};
|
||||
});
|
||||
@@ -123,24 +137,30 @@ async function main() {
|
||||
// Calculate Average
|
||||
const avg = {
|
||||
Perf: Math.round(
|
||||
summaryTable.reduce((acc: any, curr: any) => acc + curr.Perf, 0) / summaryTable.length,
|
||||
summaryTable.reduce((acc: any, curr: any) => acc + curr.Perf, 0) /
|
||||
summaryTable.length,
|
||||
),
|
||||
Acc: Math.round(
|
||||
summaryTable.reduce((acc: any, curr: any) => acc + curr.Acc, 0) / summaryTable.length,
|
||||
summaryTable.reduce((acc: any, curr: any) => acc + curr.Acc, 0) /
|
||||
summaryTable.length,
|
||||
),
|
||||
BP: Math.round(
|
||||
summaryTable.reduce((acc: any, curr: any) => acc + curr.BP, 0) / summaryTable.length,
|
||||
summaryTable.reduce((acc: any, curr: any) => acc + curr.BP, 0) /
|
||||
summaryTable.length,
|
||||
),
|
||||
SEO: Math.round(
|
||||
summaryTable.reduce((acc: any, curr: any) => acc + curr.SEO, 0) / summaryTable.length,
|
||||
summaryTable.reduce((acc: any, curr: any) => acc + curr.SEO, 0) /
|
||||
summaryTable.length,
|
||||
),
|
||||
};
|
||||
|
||||
console.log(`\n📈 Average Scores:`);
|
||||
console.log(` Performance: ${avg.Perf > 90 ? '✅' : '⚠️'} ${avg.Perf}`);
|
||||
console.log(` Accessibility: ${avg.Acc > 90 ? '✅' : '⚠️'} ${avg.Acc}`);
|
||||
console.log(` Best Practices: ${avg.BP > 90 ? '✅' : '⚠️'} ${avg.BP}`);
|
||||
console.log(` SEO: ${avg.SEO > 90 ? '✅' : '⚠️'} ${avg.SEO}`);
|
||||
console.log(
|
||||
` Performance: ${avg.Perf > 90 ? "✅" : "⚠️"} ${avg.Perf}`,
|
||||
);
|
||||
console.log(` Accessibility: ${avg.Acc > 90 ? "✅" : "⚠️"} ${avg.Acc}`);
|
||||
console.log(` Best Practices: ${avg.BP > 90 ? "✅" : "⚠️"} ${avg.BP}`);
|
||||
console.log(` SEO: ${avg.SEO > 90 ? "✅" : "⚠️"} ${avg.SEO}`);
|
||||
}
|
||||
|
||||
console.log(`\n✨ PageSpeed tests completed successfully!`);
|
||||
|
||||
@@ -3,83 +3,90 @@
|
||||
* Verify components can be imported and used
|
||||
*/
|
||||
|
||||
import { join } from 'path';
|
||||
import { join } from "path";
|
||||
|
||||
console.log('🔍 Verifying Embed Components...\n');
|
||||
console.log("🔍 Verifying Embed Components...\n");
|
||||
|
||||
// Test 1: Check if components can be imported
|
||||
try {
|
||||
const YouTubePath = join(process.cwd(), 'src', 'components', 'YouTubeEmbed.astro');
|
||||
const TwitterPath = join(process.cwd(), 'src', 'components', 'TwitterEmbed.astro');
|
||||
const GenericPath = join(process.cwd(), 'src', 'components', 'GenericEmbed.astro');
|
||||
|
||||
console.log('✅ YouTubeEmbed.astro exists');
|
||||
console.log('✅ TwitterEmbed.astro exists');
|
||||
console.log('✅ GenericEmbed.astro exists');
|
||||
|
||||
console.log("✅ YouTubeEmbed.astro exists");
|
||||
console.log("✅ TwitterEmbed.astro exists");
|
||||
console.log("✅ GenericEmbed.astro exists");
|
||||
} catch (error) {
|
||||
console.log('❌ Component import error:', error);
|
||||
console.log("❌ Component import error:", error);
|
||||
}
|
||||
|
||||
// Test 2: Check demo post accessibility
|
||||
try {
|
||||
const demoPath = join(process.cwd(), 'src', 'pages', 'blog', 'embed-demo.astro');
|
||||
const { readFileSync } = require('fs');
|
||||
|
||||
if (require('fs').existsSync(demoPath)) {
|
||||
const content = readFileSync(demoPath, 'utf-8');
|
||||
|
||||
const demoPath = join(
|
||||
process.cwd(),
|
||||
"src",
|
||||
"pages",
|
||||
"blog",
|
||||
"embed-demo.astro",
|
||||
);
|
||||
const { readFileSync } = require("fs");
|
||||
|
||||
if (require("fs").existsSync(demoPath)) {
|
||||
const content = readFileSync(demoPath, "utf-8");
|
||||
|
||||
// Check if demo has proper structure
|
||||
const hasImports = content.includes('import YouTubeEmbed') &&
|
||||
content.includes('import TwitterEmbed') &&
|
||||
content.includes('import GenericEmbed');
|
||||
|
||||
const hasUsage = content.includes('<YouTubeEmbed') &&
|
||||
content.includes('<TwitterEmbed') &&
|
||||
content.includes('<GenericEmbed>');
|
||||
|
||||
const hasImports =
|
||||
content.includes("import YouTubeEmbed") &&
|
||||
content.includes("import TwitterEmbed") &&
|
||||
content.includes("import GenericEmbed");
|
||||
|
||||
const hasUsage =
|
||||
content.includes("<YouTubeEmbed") &&
|
||||
content.includes("<TwitterEmbed") &&
|
||||
content.includes("<GenericEmbed>");
|
||||
|
||||
if (hasImports && hasUsage) {
|
||||
console.log('✅ Demo post has correct imports and usage');
|
||||
console.log("✅ Demo post has correct imports and usage");
|
||||
} else {
|
||||
console.log('❌ Demo post missing imports or usage');
|
||||
console.log("❌ Demo post missing imports or usage");
|
||||
}
|
||||
|
||||
|
||||
// Check if it has BaseLayout
|
||||
if (content.includes('BaseLayout')) {
|
||||
console.log('✅ Demo post uses BaseLayout');
|
||||
if (content.includes("BaseLayout")) {
|
||||
console.log("✅ Demo post uses BaseLayout");
|
||||
} else {
|
||||
console.log('❌ Demo post missing BaseLayout');
|
||||
console.log("❌ Demo post missing BaseLayout");
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('❌ Demo post check error:', error);
|
||||
console.log("❌ Demo post check error:", error);
|
||||
}
|
||||
|
||||
// Test 3: Check blogPosts array
|
||||
try {
|
||||
const blogPostsPath = join(process.cwd(), 'src', 'data', 'blogPosts.ts');
|
||||
const { readFileSync } = require('fs');
|
||||
|
||||
const content = readFileSync(blogPostsPath, 'utf-8');
|
||||
|
||||
const blogPostsPath = join(process.cwd(), "src", "data", "blogPosts.ts");
|
||||
const { readFileSync } = require("fs");
|
||||
|
||||
const content = readFileSync(blogPostsPath, "utf-8");
|
||||
|
||||
// Check if embed-demo needs to be added
|
||||
if (!content.includes('embed-demo')) {
|
||||
console.log('⚠️ embed-demo not in blogPosts array - this is why it won\'t show in blog list');
|
||||
console.log(' But it should still be accessible at /blog/embed-demo directly');
|
||||
if (!content.includes("embed-demo")) {
|
||||
console.log(
|
||||
"⚠️ embed-demo not in blogPosts array - this is why it won't show in blog list",
|
||||
);
|
||||
console.log(
|
||||
" But it should still be accessible at /blog/embed-demo directly",
|
||||
);
|
||||
} else {
|
||||
console.log('✅ embed-demo found in blogPosts array');
|
||||
console.log("✅ embed-demo found in blogPosts array");
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('❌ blogPosts check error:', error);
|
||||
console.log("❌ blogPosts check error:", error);
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('📋 SUMMARY:');
|
||||
console.log('• Components are created and structured correctly');
|
||||
console.log('• Demo post exists at src/pages/blog/embed-demo.astro');
|
||||
console.log('• Demo post has all required imports and usage');
|
||||
console.log('\n🔧 TO FIX BLOG LISTING:');
|
||||
console.log('Add embed-demo to src/data/blogPosts.ts array');
|
||||
console.log('\n🚀 TO TEST COMPONENTS:');
|
||||
console.log('Visit: http://localhost:4321/blog/embed-demo');
|
||||
console.log('If that 404s, the demo post needs to be added to blogPosts.ts');
|
||||
console.log("\n" + "=".repeat(60));
|
||||
console.log("📋 SUMMARY:");
|
||||
console.log("• Components are created and structured correctly");
|
||||
console.log("• Demo post exists at src/pages/blog/embed-demo.astro");
|
||||
console.log("• Demo post has all required imports and usage");
|
||||
console.log("\n🔧 TO FIX BLOG LISTING:");
|
||||
console.log("Add embed-demo to src/data/blogPosts.ts array");
|
||||
console.log("\n🚀 TO TEST COMPONENTS:");
|
||||
console.log("Visit: http://localhost:4321/blog/embed-demo");
|
||||
console.log("If that 404s, the demo post needs to be added to blogPosts.ts");
|
||||
|
||||
Reference in New Issue
Block a user