import axios from 'axios'; import * as cheerio from 'cheerio'; import * as fs from 'fs'; import * as path from 'path'; import { execSync } from 'child_process'; const targetUrl = process.argv[2] || process.env.NEXT_PUBLIC_BASE_URL || 'http://localhost:3000'; const limit = process.env.PAGESPEED_LIMIT ? parseInt(process.env.PAGESPEED_LIMIT) : 0; // 0 means no limit const gatekeeperPassword = process.env.GATEKEEPER_PASSWORD || 'klz2026'; async function main() { console.log(`\nšŸš€ Starting HTML Validation for: ${targetUrl}`); console.log(`šŸ“Š Limit: ${limit ? limit : 'None (Full Sitemap)'} pages\n`); try { const sitemapUrl = `${targetUrl.replace(/\/$/, '')}/sitemap.xml`; console.log(`šŸ“„ Fetching sitemap from ${sitemapUrl}...`); const response = await axios.get(sitemapUrl, { headers: { Cookie: `klz_gatekeeper_session=${gatekeeperPassword}` }, validateStatus: (status) => status < 400, }); const $ = cheerio.load(response.data, { xmlMode: true }); let urls = $('url loc') .map((i, el) => $(el).text()) .get(); const urlPattern = /https?:\/\/[^\/]+/; urls = [...new Set(urls)] .filter((u) => u.startsWith('http')) .map((u) => u.replace(urlPattern, targetUrl.replace(/\/$/, ''))) .sort(); console.log(`āœ… Found ${urls.length} URLs in sitemap.`); if (urls.length === 0) { console.error('āŒ No URLs found in sitemap. Is the site up?'); process.exit(1); } if (limit && urls.length > limit) { console.log( `āš ļø Too many pages (${urls.length}). Limiting to ${limit} representative pages.`, ); const home = urls.filter((u) => u.endsWith('/de') || u.endsWith('/en') || u === targetUrl); const others = urls.filter((u) => !home.includes(u)); urls = [...home, ...others.slice(0, limit - home.length)]; } const outputDir = path.join(process.cwd(), '.htmlvalidate-tmp'); if (fs.existsSync(outputDir)) fs.rmSync(outputDir, { recursive: true, force: true }); fs.mkdirSync(outputDir, { recursive: true }); console.log(`šŸ“„ Fetching HTML for ${urls.length} pages...`); for (let i = 0; i < urls.length; i++) { const u = urls[i]; try { const res = await axios.get(u, { headers: { Cookie: `klz_gatekeeper_session=${gatekeeperPassword}` }, }); const filename = `page-${i}.html`; fs.writeFileSync(path.join(outputDir, filename), res.data); } catch (err: any) { console.error(`āŒ HTTP Error fetching ${u}: ${err.message}`); throw err; } } console.log(`\nšŸ’» Executing html-validate...`); try { execSync(`npx html-validate .htmlvalidate-tmp/*.html`, { stdio: 'inherit' }); console.log(`āœ… HTML Validation passed perfectly!`); } catch (e) { console.error(`āŒ HTML Validation found issues.`); process.exit(1); } } catch (error: any) { console.error(`\nāŒ Error during HTML Validation:`, error.message); process.exit(1); } finally { const outputDir = path.join(process.cwd(), '.htmlvalidate-tmp'); if (fs.existsSync(outputDir)) fs.rmSync(outputDir, { recursive: true, force: true }); } } main();