From c5851370bf072644177b3ae1e7a94a66ae2331f5 Mon Sep 17 00:00:00 2001 From: Marc Mintel Date: Wed, 25 Feb 2026 00:16:20 +0100 Subject: [PATCH] feat: implement robust full-sitemap HTTP validation in smoke test phase --- .gitea/workflows/deploy.yml | 5 +++ package.json | 1 + scripts/check-http.ts | 74 +++++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 scripts/check-http.ts diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index 848dd28a..b8335de2 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -396,6 +396,11 @@ jobs: env: TEST_URL: ${{ needs.prepare.outputs.next_public_url }} run: pnpm run check:og + - name: 🌐 Run Full Sitemap HTTP Validation + env: + NEXT_PUBLIC_BASE_URL: ${{ needs.prepare.outputs.next_public_url }} + GATEKEEPER_PASSWORD: ${{ secrets.GATEKEEPER_PASSWORD || 'klz2026' }} + run: pnpm run check:http # ────────────────────────────────────────────────────────────────────────────── # JOB 6: Lighthouse (Performance & Accessibility) diff --git a/package.json b/package.json index 73309110..8389abcb 100644 --- a/package.json +++ b/package.json @@ -105,6 +105,7 @@ "check:a11y": "pa11y-ci", "check:wcag": "tsx ./scripts/wcag-sitemap.ts", "check:html": "tsx ./scripts/check-html.ts", + "check:http": "tsx ./scripts/check-http.ts", "check:spell": "cspell \"content/**/*.{md,mdx}\" \"app/**/*.tsx\" \"components/**/*.tsx\"", "check:security": "tsx ./scripts/check-security.ts", "check:links": "bash ./scripts/check-links.sh", diff --git a/scripts/check-http.ts b/scripts/check-http.ts new file mode 100644 index 00000000..c279ee9c --- /dev/null +++ b/scripts/check-http.ts @@ -0,0 +1,74 @@ +import axios from 'axios'; +import * as cheerio from 'cheerio'; + +const targetUrl = process.argv[2] || process.env.NEXT_PUBLIC_BASE_URL || 'http://localhost:3000'; +const gatekeeperPassword = process.env.GATEKEEPER_PASSWORD || 'klz2026'; + +async function main() { + console.log(`\nšŸš€ Starting HTTP Sitemap Validation for: ${targetUrl}\n`); + + try { + const sitemapUrl = `${targetUrl.replace(/\/$/, '')}/sitemap.xml`; + console.log(`šŸ“„ Fetching sitemap from ${sitemapUrl}...`); + + const response = await axios.get(sitemapUrl, { + headers: { Cookie: `klz_gatekeeper_session=${gatekeeperPassword}` }, + validateStatus: (status) => status < 400, + }); + + const $ = cheerio.load(response.data, { xmlMode: true }); + let urls = $('url loc') + .map((i, el) => $(el).text()) + .get(); + + const urlPattern = /https?:\/\/[^\/]+/; + urls = [...new Set(urls)] + .filter((u) => u.startsWith('http')) + .map((u) => u.replace(urlPattern, targetUrl.replace(/\/$/, ''))) + .sort(); + + console.log(`āœ… Found ${urls.length} target URLs in sitemap.`); + + if (urls.length === 0) { + console.error('āŒ No URLs found in sitemap. Is the site up?'); + process.exit(1); + } + + console.log(`\nšŸ” Verifying HTTP Status Codes (Limit: None)...`); + let hasErrors = false; + + // Run fetches sequentially to avoid overwhelming the server during CI + for (let i = 0; i < urls.length; i++) { + const u = urls[i]; + try { + const res = await axios.get(u, { + headers: { Cookie: `klz_gatekeeper_session=${gatekeeperPassword}` }, + validateStatus: null, // Don't throw on error status + }); + + if (res.status >= 400) { + console.error(`āŒ ERROR ${res.status}: ${res.statusText} -> ${u}`); + hasErrors = true; + } else { + console.log(`āœ… OK ${res.status} -> ${u}`); + } + } catch (err: any) { + console.error(`āŒ NETWORK ERROR: ${err.message} -> ${u}`); + hasErrors = true; + } + } + + if (hasErrors) { + console.error(`\nāŒ HTTP Sitemap Validation Failed. One or more pages returned an error.`); + process.exit(1); + } else { + console.log(`\n✨ Success: All ${urls.length} pages are healthy! (HTTP 200)`); + process.exit(0); + } + } catch (error: any) { + console.error(`\nāŒ Critical Error during Sitemap Fetch:`, error.message); + process.exit(1); + } +} + +main();