Files
at-mintel/packages/page-audit/src/auditor.ts
Marc Mintel 5da88356a8
Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 1s
Monorepo Pipeline / 🧹 Lint (push) Failing after 35s
Monorepo Pipeline / 🧪 Test (push) Failing after 35s
Monorepo Pipeline / 🏗️ Build (push) Failing after 12s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Image Processor (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
feat: migrate npm registry from Verdaccio to Gitea Packages
2026-02-27 00:12:00 +01:00

153 lines
5.9 KiB
TypeScript

// ============================================================================
// @mintel/page-audit — Auditor Orchestrator
// Main entry point: runs the full audit pipeline for a domain.
// ============================================================================
import * as fs from "node:fs/promises";
import * as path from "node:path";
import { DataForSeoClient, normalizePage } from "./dataforseo.js";
import { generateAuditReport } from "./report.js";
import type { AuditConfig, AuditIssue, DomainAuditResult, PageAuditData } from "./types.js";
export class PageAuditor {
private client: DataForSeoClient;
constructor(private config: AuditConfig) {
this.client = new DataForSeoClient(
config.dataForSeoLogin,
config.dataForSeoPassword,
);
}
/**
* Run a full audit for a domain.
* Steps:
* 1. Start DataForSEO On-Page crawl task
* 2. Wait for completion
* 3. Fetch page results + broken resources
* 4. Normalize and aggregate issues
* 5. Generate AI report (unless lightMode)
* 6. Save to disk
*/
async audit(domain: string, opts?: { maxPages?: number; onProgress?: (msg: string) => void }): Promise<DomainAuditResult> {
const log = opts?.onProgress || console.log;
const cleanDomain = domain.replace(/^https?:\/\//, "").replace(/\/$/, "");
log(`🔍 Starting audit for ${cleanDomain}...`);
// Step 1: Start crawl
const taskId = await this.client.startCrawl(domain, opts?.maxPages || 50);
log(`📋 DataForSEO task started: ${taskId}`);
// Step 2: Wait for completion
log("⏳ Waiting for crawl to complete...");
await this.client.waitForTask(taskId, 300_000);
log("✅ Crawl complete!");
// Step 3: Fetch results
log("📥 Fetching page data...");
const [rawPages, brokenResources] = await Promise.all([
this.client.getPages(taskId, 100, 0),
this.client.getBrokenResources(taskId),
]);
// Step 4: Normalize pages
const pages: PageAuditData[] = rawPages.map(normalizePage);
// Count broken links per page
for (const broken of brokenResources) {
const sourceUrl = broken.source_url;
const sourcePage = pages.find((p) => p.url === sourceUrl);
if (sourcePage) sourcePage.links.broken++;
}
// Aggregate top issues
const issueMap = new Map<string, AuditIssue & { count: number }>();
for (const page of pages) {
for (const issue of page.issues) {
const existing = issueMap.get(issue.code);
if (existing) {
existing.count++;
} else {
issueMap.set(issue.code, { ...issue, count: 1 });
}
}
}
const topIssues = Array.from(issueMap.values())
.sort((a, b) => {
const severityOrder = { critical: 0, warning: 1, info: 2 };
return severityOrder[a.severity] - severityOrder[b.severity] || b.count - a.count;
})
.slice(0, 20);
const result: DomainAuditResult = {
domain: cleanDomain,
auditedAt: new Date().toISOString(),
totalPages: pages.length,
pages,
topIssues,
report: null,
dataForSeoTaskId: taskId,
};
// Step 5: AI Report
if (!this.config.lightMode && this.config.openrouterKey) {
log("🤖 Generating AI analysis...");
try {
result.report = await generateAuditReport(result, this.config.openrouterKey);
log("✅ AI report generated!");
} catch (err: any) {
console.warn(`⚠️ AI report failed (audit data still saved): ${err.message}`);
}
}
// Step 6: Save
await this.saveResult(result);
return result;
}
private async saveResult(result: DomainAuditResult): Promise<void> {
const outputDir = this.config.outputDir || "./out/page-audits";
await fs.mkdir(outputDir, { recursive: true });
const slug = result.domain.replace(/\./g, "-");
const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
const outputPath = path.join(outputDir, `${slug}_${timestamp}.json`);
await fs.writeFile(outputPath, JSON.stringify(result, null, 2));
console.log(`\n📦 Audit saved to: ${outputPath}`);
// Print summary
console.log("\n" + "─".repeat(50));
console.log("📊 AUDIT SUMMARY — " + result.domain);
console.log("─".repeat(50));
console.log(` Pages audited: ${result.totalPages}`);
console.log(` Critical issues: ${result.topIssues.filter((i) => i.severity === "critical").length}`);
console.log(` Warnings: ${result.topIssues.filter((i) => i.severity === "warning").length}`);
if (result.report) {
console.log("\n🤖 AI REPORT:");
console.log(` Overall Health: ${result.report.overallHealth.toUpperCase()}`);
console.log(` SEO Score: ${result.report.seoScore}/100`);
console.log(` UX Score: ${result.report.uxScore}/100`);
console.log(` Performance Score: ${result.report.performanceScore}/100`);
console.log(`\n Summary: ${result.report.executiveSummary}`);
if (result.report.criticalIssues.length > 0) {
console.log("\n 🔴 Critical Issues:");
for (const issue of result.report.criticalIssues) {
console.log(` - ${issue}`);
}
}
if (result.report.quickWins.length > 0) {
console.log("\n 🟢 Quick Wins:");
for (const win of result.report.quickWins) {
console.log(` - ${win}`);
}
}
}
console.log("─".repeat(50));
}
}