// ============================================================================ // @mintel/page-audit — Auditor Orchestrator // Main entry point: runs the full audit pipeline for a domain. // ============================================================================ import * as fs from "node:fs/promises"; import * as path from "node:path"; import { DataForSeoClient, normalizePage } from "./dataforseo.js"; import { generateAuditReport } from "./report.js"; import type { AuditConfig, AuditIssue, DomainAuditResult, PageAuditData } from "./types.js"; export class PageAuditor { private client: DataForSeoClient; constructor(private config: AuditConfig) { this.client = new DataForSeoClient( config.dataForSeoLogin, config.dataForSeoPassword, ); } /** * Run a full audit for a domain. * Steps: * 1. Start DataForSEO On-Page crawl task * 2. Wait for completion * 3. Fetch page results + broken resources * 4. Normalize and aggregate issues * 5. Generate AI report (unless lightMode) * 6. Save to disk */ async audit(domain: string, opts?: { maxPages?: number; onProgress?: (msg: string) => void }): Promise { const log = opts?.onProgress || console.log; const cleanDomain = domain.replace(/^https?:\/\//, "").replace(/\/$/, ""); log(`šŸ” Starting audit for ${cleanDomain}...`); // Step 1: Start crawl const taskId = await this.client.startCrawl(domain, opts?.maxPages || 50); log(`šŸ“‹ DataForSEO task started: ${taskId}`); // Step 2: Wait for completion log("ā³ Waiting for crawl to complete..."); await this.client.waitForTask(taskId, 300_000); log("āœ… Crawl complete!"); // Step 3: Fetch results log("šŸ“„ Fetching page data..."); const [rawPages, brokenResources] = await Promise.all([ this.client.getPages(taskId, 100, 0), this.client.getBrokenResources(taskId), ]); // Step 4: Normalize pages const pages: PageAuditData[] = rawPages.map(normalizePage); // Count broken links per page for (const broken of brokenResources) { const sourceUrl = broken.source_url; const sourcePage = pages.find((p) => p.url === sourceUrl); if (sourcePage) sourcePage.links.broken++; } // Aggregate top issues const issueMap = new Map(); for (const page of pages) { for (const issue of page.issues) { const existing = issueMap.get(issue.code); if (existing) { existing.count++; } else { issueMap.set(issue.code, { ...issue, count: 1 }); } } } const topIssues = Array.from(issueMap.values()) .sort((a, b) => { const severityOrder = { critical: 0, warning: 1, info: 2 }; return severityOrder[a.severity] - severityOrder[b.severity] || b.count - a.count; }) .slice(0, 20); const result: DomainAuditResult = { domain: cleanDomain, auditedAt: new Date().toISOString(), totalPages: pages.length, pages, topIssues, report: null, dataForSeoTaskId: taskId, }; // Step 5: AI Report if (!this.config.lightMode && this.config.openrouterKey) { log("šŸ¤– Generating AI analysis..."); try { result.report = await generateAuditReport(result, this.config.openrouterKey); log("āœ… AI report generated!"); } catch (err: any) { console.warn(`āš ļø AI report failed (audit data still saved): ${err.message}`); } } // Step 6: Save await this.saveResult(result); return result; } private async saveResult(result: DomainAuditResult): Promise { const outputDir = this.config.outputDir || "./out/page-audits"; await fs.mkdir(outputDir, { recursive: true }); const slug = result.domain.replace(/\./g, "-"); const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19); const outputPath = path.join(outputDir, `${slug}_${timestamp}.json`); await fs.writeFile(outputPath, JSON.stringify(result, null, 2)); console.log(`\nšŸ“¦ Audit saved to: ${outputPath}`); // Print summary console.log("\n" + "─".repeat(50)); console.log("šŸ“Š AUDIT SUMMARY — " + result.domain); console.log("─".repeat(50)); console.log(` Pages audited: ${result.totalPages}`); console.log(` Critical issues: ${result.topIssues.filter((i) => i.severity === "critical").length}`); console.log(` Warnings: ${result.topIssues.filter((i) => i.severity === "warning").length}`); if (result.report) { console.log("\nšŸ¤– AI REPORT:"); console.log(` Overall Health: ${result.report.overallHealth.toUpperCase()}`); console.log(` SEO Score: ${result.report.seoScore}/100`); console.log(` UX Score: ${result.report.uxScore}/100`); console.log(` Performance Score: ${result.report.performanceScore}/100`); console.log(`\n Summary: ${result.report.executiveSummary}`); if (result.report.criticalIssues.length > 0) { console.log("\n šŸ”“ Critical Issues:"); for (const issue of result.report.criticalIssues) { console.log(` - ${issue}`); } } if (result.report.quickWins.length > 0) { console.log("\n 🟢 Quick Wins:"); for (const win of result.report.quickWins) { console.log(` - ${win}`); } } } console.log("─".repeat(50)); } }