feat: migrate npm registry from Verdaccio to Gitea Packages
Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 1s
Monorepo Pipeline / 🧹 Lint (push) Failing after 35s
Monorepo Pipeline / 🧪 Test (push) Failing after 35s
Monorepo Pipeline / 🏗️ Build (push) Failing after 12s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Image Processor (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 1s
Monorepo Pipeline / 🧹 Lint (push) Failing after 35s
Monorepo Pipeline / 🧪 Test (push) Failing after 35s
Monorepo Pipeline / 🏗️ Build (push) Failing after 12s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Image Processor (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
This commit is contained in:
212
packages/page-audit/src/dataforseo.ts
Normal file
212
packages/page-audit/src/dataforseo.ts
Normal file
@@ -0,0 +1,212 @@
|
||||
// ============================================================================
|
||||
// @mintel/page-audit — DataForSEO API Client
|
||||
// Uses native fetch (no axios) to avoid Node event loop exit during polling.
|
||||
// Docs: https://docs.dataforseo.com/v3/on_page/
|
||||
// ============================================================================
|
||||
|
||||
import type { PageAuditData, AuditIssue } from "./types.js";
|
||||
|
||||
const API_BASE = "https://api.dataforseo.com/v3";
|
||||
|
||||
/** Authenticated DataForSEO client */
|
||||
export class DataForSeoClient {
|
||||
private auth: string;
|
||||
|
||||
constructor(login: string, password: string) {
|
||||
this.auth = Buffer.from(`${login}:${password}`).toString("base64");
|
||||
}
|
||||
|
||||
private get headers(): Record<string, string> {
|
||||
return {
|
||||
Authorization: `Basic ${this.auth}`,
|
||||
"Content-Type": "application/json",
|
||||
};
|
||||
}
|
||||
|
||||
private async apiGet(path: string): Promise<any> {
|
||||
const resp = await fetch(`${API_BASE}${path}`, { headers: this.headers });
|
||||
if (!resp.ok) throw new Error(`DataForSEO GET ${path} failed: ${resp.status}`);
|
||||
return resp.json();
|
||||
}
|
||||
|
||||
private async apiPost(path: string, body: any): Promise<any> {
|
||||
const resp = await fetch(`${API_BASE}${path}`, {
|
||||
method: "POST",
|
||||
headers: this.headers,
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!resp.ok) throw new Error(`DataForSEO POST ${path} failed: ${resp.status}`);
|
||||
return resp.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Start an On-Page crawl for a domain and return the task ID.
|
||||
*/
|
||||
async startCrawl(domain: string, maxCrawlPages = 50): Promise<string> {
|
||||
const url = domain.startsWith("http") ? domain : `https://${domain}`;
|
||||
|
||||
const data = await this.apiPost("/on_page/task_post", [
|
||||
{
|
||||
target: url,
|
||||
max_crawl_pages: maxCrawlPages,
|
||||
load_resources: true,
|
||||
enable_javascript: true,
|
||||
enable_browser_rendering: true,
|
||||
check_spell: false,
|
||||
calculate_keyword_density: false,
|
||||
},
|
||||
]);
|
||||
|
||||
const task = data?.tasks?.[0];
|
||||
if (!task?.id) {
|
||||
throw new Error(`DataForSEO task creation failed: ${JSON.stringify(task?.status_message || "unknown")}`);
|
||||
}
|
||||
|
||||
return task.id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a task is ready via the tasks_ready endpoint.
|
||||
*/
|
||||
async isTaskReady(taskId: string): Promise<boolean> {
|
||||
const data = await this.apiGet("/on_page/tasks_ready");
|
||||
const readyTasks: string[] = data?.tasks?.[0]?.result?.map((t: any) => t.id) || [];
|
||||
return readyTasks.includes(taskId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll for task completion using tasks_ready endpoint.
|
||||
* DataForSEO crawls can take 2-5 minutes.
|
||||
*/
|
||||
async waitForTask(taskId: string, timeoutMs = 300_000): Promise<void> {
|
||||
const start = Date.now();
|
||||
let delay = 15_000;
|
||||
let pollCount = 0;
|
||||
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
await this.sleep(delay);
|
||||
pollCount++;
|
||||
|
||||
const ready = await this.isTaskReady(taskId);
|
||||
const elapsed = Math.round((Date.now() - start) / 1000);
|
||||
console.log(` 📊 Poll #${pollCount}: ${ready ? "READY ✅" : "not ready"} (${elapsed}s elapsed)`);
|
||||
|
||||
if (ready) {
|
||||
// Short grace period so the pages endpoint settles
|
||||
await this.sleep(5_000);
|
||||
return;
|
||||
}
|
||||
|
||||
delay = Math.min(delay * 1.3, 30_000);
|
||||
}
|
||||
|
||||
throw new Error(`DataForSEO task ${taskId} timed out after ${timeoutMs / 1000}s`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sleep that keeps the Node event loop alive.
|
||||
*/
|
||||
private sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => {
|
||||
const timer = setTimeout(resolve, ms);
|
||||
// Explicitly ref the timer to prevent Node from exiting
|
||||
if (timer && typeof timer === "object" && "ref" in timer) {
|
||||
(timer as NodeJS.Timeout).ref();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the crawl summary.
|
||||
*/
|
||||
async getCrawlSummary(taskId: string): Promise<any> {
|
||||
const data = await this.apiGet(`/on_page/summary/${taskId}`);
|
||||
return data?.tasks?.[0]?.result?.[0] || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch all page-level results.
|
||||
*/
|
||||
async getPages(taskId: string, limit = 100, offset = 0): Promise<any[]> {
|
||||
const data = await this.apiPost("/on_page/pages", [
|
||||
{ id: taskId, limit, offset },
|
||||
]);
|
||||
return data?.tasks?.[0]?.result?.[0]?.items || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch non-indexable pages.
|
||||
*/
|
||||
async getNonIndexable(taskId: string): Promise<any[]> {
|
||||
const data = await this.apiPost("/on_page/non_indexable", [
|
||||
{ id: taskId, limit: 100, offset: 0 },
|
||||
]);
|
||||
return data?.tasks?.[0]?.result?.[0]?.items || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch broken resources (404s, timeouts, etc.)
|
||||
*/
|
||||
async getBrokenResources(taskId: string): Promise<any[]> {
|
||||
const data = await this.apiPost("/on_page/resources", [
|
||||
{ id: taskId, limit: 100, filters: [["status_code", ">", "399"]] },
|
||||
]);
|
||||
return data?.tasks?.[0]?.result?.[0]?.items || [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize a DataForSEO raw page result into our PageAuditData type.
|
||||
*/
|
||||
export function normalizePage(raw: any): PageAuditData {
|
||||
const issues: AuditIssue[] = [];
|
||||
|
||||
const checks = raw.checks || {};
|
||||
|
||||
if (checks.no_title) issues.push({ code: "NO_TITLE", severity: "critical", message: "Seite hat keinen <title> Tag" });
|
||||
if (checks.title_too_long) issues.push({ code: "TITLE_TOO_LONG", severity: "warning", message: `Title zu lang (${raw.meta?.title?.length || "?"} Zeichen, max 60)` });
|
||||
if (checks.no_description) issues.push({ code: "NO_META_DESCRIPTION", severity: "warning", message: "Keine Meta-Description" });
|
||||
if (checks.description_too_long) issues.push({ code: "META_DESC_TOO_LONG", severity: "info", message: "Meta-Description zu lang (max 160)" });
|
||||
if (checks.no_h1_tag) issues.push({ code: "NO_H1", severity: "critical", message: "Kein H1-Tag auf der Seite" });
|
||||
if (checks.duplicate_h1_tag) issues.push({ code: "DUPLICATE_H1", severity: "warning", message: "Mehrere H1-Tags gefunden" });
|
||||
if (checks.is_broken) issues.push({ code: "PAGE_BROKEN", severity: "critical", message: `HTTP ${raw.status_code}: Seite nicht erreichbar` });
|
||||
if (checks.low_content_rate) issues.push({ code: "THIN_CONTENT", severity: "warning", message: "Zu wenig Content (dünne Seite)" });
|
||||
if (checks.has_render_blocking_resources) issues.push({ code: "RENDER_BLOCKING", severity: "warning", message: "Render-blockierende Ressourcen gefunden" });
|
||||
if (checks.image_not_optimized) issues.push({ code: "UNOPTIMIZED_IMAGES", severity: "info", message: "Nicht-optimierte Bilder vorhanden" });
|
||||
|
||||
const imagesWithoutAlt = raw.checks?.no_image_alt ? (raw.meta?.images_count || 0) : 0;
|
||||
|
||||
return {
|
||||
url: raw.url,
|
||||
statusCode: raw.status_code,
|
||||
pageTitle: raw.meta?.title || null,
|
||||
metaDescription: raw.meta?.description || null,
|
||||
h1: raw.meta?.htags?.h1?.[0] || null,
|
||||
wordCount: raw.meta?.content?.words_count || 0,
|
||||
loadTime: raw.page_timing?.time_to_interactive || null,
|
||||
links: {
|
||||
internal: raw.meta?.internal_links_count || 0,
|
||||
external: raw.meta?.external_links_count || 0,
|
||||
broken: 0,
|
||||
},
|
||||
images: {
|
||||
total: raw.meta?.images_count || 0,
|
||||
missingAlt: imagesWithoutAlt,
|
||||
},
|
||||
seo: {
|
||||
hasViewport: !raw.checks?.no_viewport_tag,
|
||||
hasCanonical: !!raw.meta?.canonical,
|
||||
isIndexable: !raw.checks?.is_4xx_code && !raw.checks?.is_5xx_code,
|
||||
robotsTxt: raw.meta?.robots || null,
|
||||
ogTitle: raw.meta?.social_media_tags?.og_title || null,
|
||||
ogDescription: raw.meta?.social_media_tags?.og_description || null,
|
||||
},
|
||||
performance: {
|
||||
cls: raw.page_timing?.cumulative_layout_shift || null,
|
||||
lcp: raw.page_timing?.largest_contentful_paint || null,
|
||||
fid: raw.page_timing?.first_input_delay || null,
|
||||
ttfb: raw.page_timing?.waiting_time || null,
|
||||
},
|
||||
issues,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user