Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 1s
Monorepo Pipeline / 🧹 Lint (push) Failing after 1m1s
Monorepo Pipeline / 🧪 Test (push) Failing after 1m7s
Monorepo Pipeline / 🏗️ Build (push) Failing after 1m10s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Image Processor (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
220 lines
8.4 KiB
TypeScript
220 lines
8.4 KiB
TypeScript
// ============================================================================
|
|
// @mintel/page-audit — DataForSEO API Client
|
|
// Uses native fetch (no axios) to avoid Node event loop exit during polling.
|
|
// Docs: https://docs.dataforseo.com/v3/on_page/
|
|
// ============================================================================
|
|
|
|
import type { PageAuditData, AuditIssue } from "./types.js";
|
|
|
|
const API_BASE = "https://api.dataforseo.com/v3";
|
|
|
|
/** Authenticated DataForSEO client */
|
|
export class DataForSeoClient {
|
|
private auth: string;
|
|
|
|
constructor(login: string, password: string) {
|
|
this.auth = Buffer.from(`${login}:${password}`).toString("base64");
|
|
}
|
|
|
|
private get headers(): Record<string, string> {
|
|
return {
|
|
Authorization: `Basic ${this.auth}`,
|
|
"Content-Type": "application/json",
|
|
};
|
|
}
|
|
|
|
private async apiGet(path: string): Promise<any> {
|
|
const resp = await fetch(`${API_BASE}${path}`, { headers: this.headers });
|
|
if (!resp.ok) throw new Error(`DataForSEO GET ${path} failed: ${resp.status}`);
|
|
return resp.json();
|
|
}
|
|
|
|
private async apiPost(path: string, body: any): Promise<any> {
|
|
const resp = await fetch(`${API_BASE}${path}`, {
|
|
method: "POST",
|
|
headers: this.headers,
|
|
body: JSON.stringify(body),
|
|
});
|
|
if (!resp.ok) throw new Error(`DataForSEO POST ${path} failed: ${resp.status}`);
|
|
return resp.json();
|
|
}
|
|
|
|
/**
|
|
* Start an On-Page crawl for a domain and return the task ID.
|
|
*/
|
|
async startCrawl(domain: string, maxCrawlPages = 50): Promise<string> {
|
|
const url = domain.startsWith("http") ? domain : `https://${domain}`;
|
|
|
|
const data = await this.apiPost("/on_page/task_post", [
|
|
{
|
|
target: url,
|
|
max_crawl_pages: maxCrawlPages,
|
|
load_resources: true,
|
|
enable_javascript: true,
|
|
enable_browser_rendering: true,
|
|
check_spell: false,
|
|
calculate_keyword_density: false,
|
|
},
|
|
]);
|
|
|
|
const task = data?.tasks?.[0];
|
|
if (!task?.id) {
|
|
throw new Error(`DataForSEO task creation failed: ${JSON.stringify(task?.status_message || "unknown")}`);
|
|
}
|
|
|
|
return task.id;
|
|
}
|
|
|
|
/**
|
|
* Check if a task is ready via the tasks_ready endpoint.
|
|
*/
|
|
async isTaskReady(taskId: string): Promise<boolean> {
|
|
const data = await this.apiGet("/on_page/tasks_ready");
|
|
const readyTasks: string[] = data?.tasks?.[0]?.result?.map((t: any) => t.id) || [];
|
|
return readyTasks.includes(taskId);
|
|
}
|
|
|
|
/**
|
|
* Poll for task completion using tasks_ready endpoint.
|
|
* DataForSEO crawls can take 2-5 minutes.
|
|
*/
|
|
async waitForTask(taskId: string, timeoutMs = 300_000): Promise<void> {
|
|
const start = Date.now();
|
|
let delay = 15_000;
|
|
let pollCount = 0;
|
|
|
|
// Force Node event loop active
|
|
const keepAlive = setInterval(() => { }, 1000);
|
|
|
|
try {
|
|
while (Date.now() - start < timeoutMs) {
|
|
await this.sleep(delay);
|
|
pollCount++;
|
|
|
|
const ready = await this.isTaskReady(taskId);
|
|
const elapsed = Math.round((Date.now() - start) / 1000);
|
|
console.log(` 📊 Poll #${pollCount}: ${ready ? "READY ✅" : "not ready"} (${elapsed}s elapsed)`);
|
|
|
|
if (ready) {
|
|
// Short grace period so the pages endpoint settles
|
|
await this.sleep(5_000);
|
|
return;
|
|
}
|
|
|
|
delay = Math.min(delay * 1.3, 30_000);
|
|
}
|
|
|
|
throw new Error(`DataForSEO task ${taskId} timed out after ${timeoutMs / 1000}s`);
|
|
} finally {
|
|
clearInterval(keepAlive);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Sleep that keeps the Node event loop alive.
|
|
*/
|
|
private sleep(ms: number): Promise<void> {
|
|
return new Promise((resolve) => {
|
|
const timer = setTimeout(resolve, ms);
|
|
// Explicitly ref the timer to prevent Node from exiting
|
|
if (timer && typeof timer === "object" && "ref" in timer) {
|
|
(timer as NodeJS.Timeout).ref();
|
|
}
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Fetch the crawl summary.
|
|
*/
|
|
async getCrawlSummary(taskId: string): Promise<any> {
|
|
const data = await this.apiGet(`/on_page/summary/${taskId}`);
|
|
return data?.tasks?.[0]?.result?.[0] || null;
|
|
}
|
|
|
|
/**
|
|
* Fetch all page-level results.
|
|
*/
|
|
async getPages(taskId: string, limit = 100, offset = 0): Promise<any[]> {
|
|
const data = await this.apiPost("/on_page/pages", [
|
|
{ id: taskId, limit, offset },
|
|
]);
|
|
return data?.tasks?.[0]?.result?.[0]?.items || [];
|
|
}
|
|
|
|
/**
|
|
* Fetch non-indexable pages.
|
|
*/
|
|
async getNonIndexable(taskId: string): Promise<any[]> {
|
|
const data = await this.apiPost("/on_page/non_indexable", [
|
|
{ id: taskId, limit: 100, offset: 0 },
|
|
]);
|
|
return data?.tasks?.[0]?.result?.[0]?.items || [];
|
|
}
|
|
|
|
/**
|
|
* Fetch broken resources (404s, timeouts, etc.)
|
|
*/
|
|
async getBrokenResources(taskId: string): Promise<any[]> {
|
|
const data = await this.apiPost("/on_page/resources", [
|
|
{ id: taskId, limit: 100, filters: [["status_code", ">", "399"]] },
|
|
]);
|
|
return data?.tasks?.[0]?.result?.[0]?.items || [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Normalize a DataForSEO raw page result into our PageAuditData type.
|
|
*/
|
|
export function normalizePage(raw: any): PageAuditData {
|
|
const issues: AuditIssue[] = [];
|
|
|
|
const checks = raw.checks || {};
|
|
|
|
if (checks.no_title) issues.push({ code: "NO_TITLE", severity: "critical", message: "Seite hat keinen <title> Tag" });
|
|
if (checks.title_too_long) issues.push({ code: "TITLE_TOO_LONG", severity: "warning", message: `Title zu lang (${raw.meta?.title?.length || "?"} Zeichen, max 60)` });
|
|
if (checks.no_description) issues.push({ code: "NO_META_DESCRIPTION", severity: "warning", message: "Keine Meta-Description" });
|
|
if (checks.description_too_long) issues.push({ code: "META_DESC_TOO_LONG", severity: "info", message: "Meta-Description zu lang (max 160)" });
|
|
if (checks.no_h1_tag) issues.push({ code: "NO_H1", severity: "critical", message: "Kein H1-Tag auf der Seite" });
|
|
if (checks.duplicate_h1_tag) issues.push({ code: "DUPLICATE_H1", severity: "warning", message: "Mehrere H1-Tags gefunden" });
|
|
if (checks.is_broken) issues.push({ code: "PAGE_BROKEN", severity: "critical", message: `HTTP ${raw.status_code}: Seite nicht erreichbar` });
|
|
if (checks.low_content_rate) issues.push({ code: "THIN_CONTENT", severity: "warning", message: "Zu wenig Content (dünne Seite)" });
|
|
if (checks.has_render_blocking_resources) issues.push({ code: "RENDER_BLOCKING", severity: "warning", message: "Render-blockierende Ressourcen gefunden" });
|
|
if (checks.image_not_optimized) issues.push({ code: "UNOPTIMIZED_IMAGES", severity: "info", message: "Nicht-optimierte Bilder vorhanden" });
|
|
|
|
const imagesWithoutAlt = raw.checks?.no_image_alt ? (raw.meta?.images_count || 0) : 0;
|
|
|
|
return {
|
|
url: raw.url,
|
|
statusCode: raw.status_code,
|
|
pageTitle: raw.meta?.title || null,
|
|
metaDescription: raw.meta?.description || null,
|
|
h1: raw.meta?.htags?.h1?.[0] || null,
|
|
wordCount: raw.meta?.content?.words_count || 0,
|
|
loadTime: raw.page_timing?.time_to_interactive || null,
|
|
links: {
|
|
internal: raw.meta?.internal_links_count || 0,
|
|
external: raw.meta?.external_links_count || 0,
|
|
broken: 0,
|
|
},
|
|
images: {
|
|
total: raw.meta?.images_count || 0,
|
|
missingAlt: imagesWithoutAlt,
|
|
},
|
|
seo: {
|
|
hasViewport: !raw.checks?.no_viewport_tag,
|
|
hasCanonical: !!raw.meta?.canonical,
|
|
isIndexable: !raw.checks?.is_4xx_code && !raw.checks?.is_5xx_code,
|
|
robotsTxt: raw.meta?.robots || null,
|
|
ogTitle: raw.meta?.social_media_tags?.og_title || null,
|
|
ogDescription: raw.meta?.social_media_tags?.og_description || null,
|
|
},
|
|
performance: {
|
|
cls: raw.page_timing?.cumulative_layout_shift || null,
|
|
lcp: raw.page_timing?.largest_contentful_paint || null,
|
|
fid: raw.page_timing?.first_input_delay || null,
|
|
ttfb: raw.page_timing?.waiting_time || null,
|
|
},
|
|
issues,
|
|
};
|
|
}
|