import axios from 'axios'; import * as cheerio from 'cheerio'; import * as fs from 'fs'; import * as path from 'path'; import { execSync } from 'child_process'; const targetUrl = process.argv[2] || process.env.NEXT_PUBLIC_BASE_URL || 'http://localhost:3000'; const gatekeeperPassword = process.env.GATEKEEPER_PASSWORD || 'klz2026'; async function main() { console.log(`\nšŸš€ Starting HTML Validation for: ${targetUrl}`); console.log(`šŸ“Š Limit: None (Full Sitemap)\n`); try { const sitemapUrl = `${targetUrl.replace(/\/$/, '')}/sitemap.xml`; console.log(`šŸ“„ Fetching sitemap from ${sitemapUrl}...`); const response = await axios.get(sitemapUrl, { headers: { Cookie: `klz_gatekeeper_session=${gatekeeperPassword}` }, validateStatus: (status) => status < 400, }); const $ = cheerio.load(response.data, { xmlMode: true }); let urls = $('url loc') .map((i, el) => $(el).text()) .get(); const urlPattern = /https?:\/\/[^\/]+/; urls = [...new Set(urls)] .filter((u) => u.startsWith('http')) .map((u) => u.replace(urlPattern, targetUrl.replace(/\/$/, ''))) .sort(); console.log(`āœ… Found ${urls.length} URLs in sitemap.`); if (urls.length === 0) { console.error('āŒ No URLs found in sitemap. Is the site up?'); process.exit(1); } const outputDir = path.join(process.cwd(), '.htmlvalidate-tmp'); if (fs.existsSync(outputDir)) fs.rmSync(outputDir, { recursive: true, force: true }); fs.mkdirSync(outputDir, { recursive: true }); console.log(`šŸ“„ Fetching HTML for ${urls.length} pages...`); for (let i = 0; i < urls.length; i++) { const u = urls[i]; try { const res = await axios.get(u, { headers: { Cookie: `klz_gatekeeper_session=${gatekeeperPassword}` }, validateStatus: (status) => status < 400, }); const filename = `page-${i}.html`; fs.writeFileSync(path.join(outputDir, filename), res.data); } catch (err: any) { console.error(`āŒ HTTP Error fetching ${u}: ${err.message}`); throw new Error(`Failed to fetch page: ${u} - ${err.message}`); } } console.log(`\nšŸ’» Executing html-validate...`); try { execSync(`npx html-validate .htmlvalidate-tmp/*.html`, { stdio: 'inherit' }); console.log(`āœ… HTML Validation passed perfectly!`); } catch (e) { console.error(`āŒ HTML Validation found issues.`); process.exit(1); } } catch (error: any) { console.error(`\nāŒ Error during HTML Validation:`, error.message); process.exit(1); } finally { const outputDir = path.join(process.cwd(), '.htmlvalidate-tmp'); if (fs.existsSync(outputDir)) fs.rmSync(outputDir, { recursive: true, force: true }); } } main();