feat: migrate npm registry from Verdaccio to Gitea Packages
Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 1s
Monorepo Pipeline / 🧹 Lint (push) Failing after 35s
Monorepo Pipeline / 🧪 Test (push) Failing after 35s
Monorepo Pipeline / 🏗️ Build (push) Failing after 12s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Image Processor (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped

This commit is contained in:
2026-02-27 00:12:00 +01:00
parent efd1341762
commit 5da88356a8
69 changed files with 5397 additions and 114 deletions

View File

@@ -0,0 +1,152 @@
// ============================================================================
// @mintel/page-audit — Auditor Orchestrator
// Main entry point: runs the full audit pipeline for a domain.
// ============================================================================
import * as fs from "node:fs/promises";
import * as path from "node:path";
import { DataForSeoClient, normalizePage } from "./dataforseo.js";
import { generateAuditReport } from "./report.js";
import type { AuditConfig, AuditIssue, DomainAuditResult, PageAuditData } from "./types.js";
export class PageAuditor {
private client: DataForSeoClient;
constructor(private config: AuditConfig) {
this.client = new DataForSeoClient(
config.dataForSeoLogin,
config.dataForSeoPassword,
);
}
/**
* Run a full audit for a domain.
* Steps:
* 1. Start DataForSEO On-Page crawl task
* 2. Wait for completion
* 3. Fetch page results + broken resources
* 4. Normalize and aggregate issues
* 5. Generate AI report (unless lightMode)
* 6. Save to disk
*/
async audit(domain: string, opts?: { maxPages?: number; onProgress?: (msg: string) => void }): Promise<DomainAuditResult> {
const log = opts?.onProgress || console.log;
const cleanDomain = domain.replace(/^https?:\/\//, "").replace(/\/$/, "");
log(`🔍 Starting audit for ${cleanDomain}...`);
// Step 1: Start crawl
const taskId = await this.client.startCrawl(domain, opts?.maxPages || 50);
log(`📋 DataForSEO task started: ${taskId}`);
// Step 2: Wait for completion
log("⏳ Waiting for crawl to complete...");
await this.client.waitForTask(taskId, 300_000);
log("✅ Crawl complete!");
// Step 3: Fetch results
log("📥 Fetching page data...");
const [rawPages, brokenResources] = await Promise.all([
this.client.getPages(taskId, 100, 0),
this.client.getBrokenResources(taskId),
]);
// Step 4: Normalize pages
const pages: PageAuditData[] = rawPages.map(normalizePage);
// Count broken links per page
for (const broken of brokenResources) {
const sourceUrl = broken.source_url;
const sourcePage = pages.find((p) => p.url === sourceUrl);
if (sourcePage) sourcePage.links.broken++;
}
// Aggregate top issues
const issueMap = new Map<string, AuditIssue & { count: number }>();
for (const page of pages) {
for (const issue of page.issues) {
const existing = issueMap.get(issue.code);
if (existing) {
existing.count++;
} else {
issueMap.set(issue.code, { ...issue, count: 1 });
}
}
}
const topIssues = Array.from(issueMap.values())
.sort((a, b) => {
const severityOrder = { critical: 0, warning: 1, info: 2 };
return severityOrder[a.severity] - severityOrder[b.severity] || b.count - a.count;
})
.slice(0, 20);
const result: DomainAuditResult = {
domain: cleanDomain,
auditedAt: new Date().toISOString(),
totalPages: pages.length,
pages,
topIssues,
report: null,
dataForSeoTaskId: taskId,
};
// Step 5: AI Report
if (!this.config.lightMode && this.config.openrouterKey) {
log("🤖 Generating AI analysis...");
try {
result.report = await generateAuditReport(result, this.config.openrouterKey);
log("✅ AI report generated!");
} catch (err: any) {
console.warn(`⚠️ AI report failed (audit data still saved): ${err.message}`);
}
}
// Step 6: Save
await this.saveResult(result);
return result;
}
private async saveResult(result: DomainAuditResult): Promise<void> {
const outputDir = this.config.outputDir || "./out/page-audits";
await fs.mkdir(outputDir, { recursive: true });
const slug = result.domain.replace(/\./g, "-");
const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
const outputPath = path.join(outputDir, `${slug}_${timestamp}.json`);
await fs.writeFile(outputPath, JSON.stringify(result, null, 2));
console.log(`\n📦 Audit saved to: ${outputPath}`);
// Print summary
console.log("\n" + "─".repeat(50));
console.log("📊 AUDIT SUMMARY — " + result.domain);
console.log("─".repeat(50));
console.log(` Pages audited: ${result.totalPages}`);
console.log(` Critical issues: ${result.topIssues.filter((i) => i.severity === "critical").length}`);
console.log(` Warnings: ${result.topIssues.filter((i) => i.severity === "warning").length}`);
if (result.report) {
console.log("\n🤖 AI REPORT:");
console.log(` Overall Health: ${result.report.overallHealth.toUpperCase()}`);
console.log(` SEO Score: ${result.report.seoScore}/100`);
console.log(` UX Score: ${result.report.uxScore}/100`);
console.log(` Performance Score: ${result.report.performanceScore}/100`);
console.log(`\n Summary: ${result.report.executiveSummary}`);
if (result.report.criticalIssues.length > 0) {
console.log("\n 🔴 Critical Issues:");
for (const issue of result.report.criticalIssues) {
console.log(` - ${issue}`);
}
}
if (result.report.quickWins.length > 0) {
console.log("\n 🟢 Quick Wins:");
for (const win of result.report.quickWins) {
console.log(` - ${win}`);
}
}
}
console.log("─".repeat(50));
}
}

View File

@@ -0,0 +1,70 @@
#!/usr/bin/env node
// ============================================================================
// @mintel/page-audit — CLI
// ============================================================================
import { Command } from "commander";
import * as path from "node:path";
import { config as dotenvConfig } from "dotenv";
import { PageAuditor } from "./auditor.js";
dotenvConfig({ path: path.resolve(process.cwd(), "../../.env") });
dotenvConfig({ path: path.resolve(process.cwd(), ".env") });
const program = new Command();
program
.name("page-audit")
.description("AI-powered website IST-analysis using DataForSEO + Gemini")
.version("1.0.0");
program
.command("run")
.description("Run a full SEO and UX audit for a website")
.argument("<url>", "Website URL or domain to audit")
.option("--max-pages <n>", "Maximum pages to crawl", "50")
.option("--output <dir>", "Output directory", "../../out/page-audits")
.option("--light", "Skip AI report (faster)")
.action(async (url: string, options: any) => {
// Support both DATA_FOR_SEO_API_KEY (login:password) and separate vars
let login = process.env.DATA_FOR_SEO_LOGIN;
let password = process.env.DATA_FOR_SEO_PASSWORD;
if (!login && process.env.DATA_FOR_SEO_API_KEY) {
const parts = process.env.DATA_FOR_SEO_API_KEY.split(":");
login = parts[0];
password = parts.slice(1).join(":"); // passwords may contain colons
}
if (!login || !password) {
console.error("\n❌ DataForSEO credentials not found.");
console.error(" Set in .env:");
console.error(" DATA_FOR_SEO_LOGIN=yourlogin@example.com");
console.error(" DATA_FOR_SEO_PASSWORD=your_api_password");
console.error(" OR: DATA_FOR_SEO_API_KEY=email:password");
process.exit(1);
}
const auditor = new PageAuditor({
dataForSeoLogin: login,
dataForSeoPassword: password,
openrouterKey: process.env.OPENROUTER_API_KEY,
outputDir: path.resolve(process.cwd(), options.output),
lightMode: options.light || false,
});
try {
await auditor.audit(url, {
maxPages: parseInt(options.maxPages, 10),
onProgress: (msg) => console.log(msg),
});
} catch (err: any) {
console.error(`\n❌ Audit failed: ${err.message}`);
process.exit(1);
}
});
program.parseAsync(process.argv).catch((err) => {
console.error(err);
process.exit(1);
});

View File

@@ -0,0 +1,212 @@
// ============================================================================
// @mintel/page-audit — DataForSEO API Client
// Uses native fetch (no axios) to avoid Node event loop exit during polling.
// Docs: https://docs.dataforseo.com/v3/on_page/
// ============================================================================
import type { PageAuditData, AuditIssue } from "./types.js";
const API_BASE = "https://api.dataforseo.com/v3";
/** Authenticated DataForSEO client */
export class DataForSeoClient {
private auth: string;
constructor(login: string, password: string) {
this.auth = Buffer.from(`${login}:${password}`).toString("base64");
}
private get headers(): Record<string, string> {
return {
Authorization: `Basic ${this.auth}`,
"Content-Type": "application/json",
};
}
private async apiGet(path: string): Promise<any> {
const resp = await fetch(`${API_BASE}${path}`, { headers: this.headers });
if (!resp.ok) throw new Error(`DataForSEO GET ${path} failed: ${resp.status}`);
return resp.json();
}
private async apiPost(path: string, body: any): Promise<any> {
const resp = await fetch(`${API_BASE}${path}`, {
method: "POST",
headers: this.headers,
body: JSON.stringify(body),
});
if (!resp.ok) throw new Error(`DataForSEO POST ${path} failed: ${resp.status}`);
return resp.json();
}
/**
* Start an On-Page crawl for a domain and return the task ID.
*/
async startCrawl(domain: string, maxCrawlPages = 50): Promise<string> {
const url = domain.startsWith("http") ? domain : `https://${domain}`;
const data = await this.apiPost("/on_page/task_post", [
{
target: url,
max_crawl_pages: maxCrawlPages,
load_resources: true,
enable_javascript: true,
enable_browser_rendering: true,
check_spell: false,
calculate_keyword_density: false,
},
]);
const task = data?.tasks?.[0];
if (!task?.id) {
throw new Error(`DataForSEO task creation failed: ${JSON.stringify(task?.status_message || "unknown")}`);
}
return task.id;
}
/**
* Check if a task is ready via the tasks_ready endpoint.
*/
async isTaskReady(taskId: string): Promise<boolean> {
const data = await this.apiGet("/on_page/tasks_ready");
const readyTasks: string[] = data?.tasks?.[0]?.result?.map((t: any) => t.id) || [];
return readyTasks.includes(taskId);
}
/**
* Poll for task completion using tasks_ready endpoint.
* DataForSEO crawls can take 2-5 minutes.
*/
async waitForTask(taskId: string, timeoutMs = 300_000): Promise<void> {
const start = Date.now();
let delay = 15_000;
let pollCount = 0;
while (Date.now() - start < timeoutMs) {
await this.sleep(delay);
pollCount++;
const ready = await this.isTaskReady(taskId);
const elapsed = Math.round((Date.now() - start) / 1000);
console.log(` 📊 Poll #${pollCount}: ${ready ? "READY ✅" : "not ready"} (${elapsed}s elapsed)`);
if (ready) {
// Short grace period so the pages endpoint settles
await this.sleep(5_000);
return;
}
delay = Math.min(delay * 1.3, 30_000);
}
throw new Error(`DataForSEO task ${taskId} timed out after ${timeoutMs / 1000}s`);
}
/**
* Sleep that keeps the Node event loop alive.
*/
private sleep(ms: number): Promise<void> {
return new Promise((resolve) => {
const timer = setTimeout(resolve, ms);
// Explicitly ref the timer to prevent Node from exiting
if (timer && typeof timer === "object" && "ref" in timer) {
(timer as NodeJS.Timeout).ref();
}
});
}
/**
* Fetch the crawl summary.
*/
async getCrawlSummary(taskId: string): Promise<any> {
const data = await this.apiGet(`/on_page/summary/${taskId}`);
return data?.tasks?.[0]?.result?.[0] || null;
}
/**
* Fetch all page-level results.
*/
async getPages(taskId: string, limit = 100, offset = 0): Promise<any[]> {
const data = await this.apiPost("/on_page/pages", [
{ id: taskId, limit, offset },
]);
return data?.tasks?.[0]?.result?.[0]?.items || [];
}
/**
* Fetch non-indexable pages.
*/
async getNonIndexable(taskId: string): Promise<any[]> {
const data = await this.apiPost("/on_page/non_indexable", [
{ id: taskId, limit: 100, offset: 0 },
]);
return data?.tasks?.[0]?.result?.[0]?.items || [];
}
/**
* Fetch broken resources (404s, timeouts, etc.)
*/
async getBrokenResources(taskId: string): Promise<any[]> {
const data = await this.apiPost("/on_page/resources", [
{ id: taskId, limit: 100, filters: [["status_code", ">", "399"]] },
]);
return data?.tasks?.[0]?.result?.[0]?.items || [];
}
}
/**
* Normalize a DataForSEO raw page result into our PageAuditData type.
*/
export function normalizePage(raw: any): PageAuditData {
const issues: AuditIssue[] = [];
const checks = raw.checks || {};
if (checks.no_title) issues.push({ code: "NO_TITLE", severity: "critical", message: "Seite hat keinen <title> Tag" });
if (checks.title_too_long) issues.push({ code: "TITLE_TOO_LONG", severity: "warning", message: `Title zu lang (${raw.meta?.title?.length || "?"} Zeichen, max 60)` });
if (checks.no_description) issues.push({ code: "NO_META_DESCRIPTION", severity: "warning", message: "Keine Meta-Description" });
if (checks.description_too_long) issues.push({ code: "META_DESC_TOO_LONG", severity: "info", message: "Meta-Description zu lang (max 160)" });
if (checks.no_h1_tag) issues.push({ code: "NO_H1", severity: "critical", message: "Kein H1-Tag auf der Seite" });
if (checks.duplicate_h1_tag) issues.push({ code: "DUPLICATE_H1", severity: "warning", message: "Mehrere H1-Tags gefunden" });
if (checks.is_broken) issues.push({ code: "PAGE_BROKEN", severity: "critical", message: `HTTP ${raw.status_code}: Seite nicht erreichbar` });
if (checks.low_content_rate) issues.push({ code: "THIN_CONTENT", severity: "warning", message: "Zu wenig Content (dünne Seite)" });
if (checks.has_render_blocking_resources) issues.push({ code: "RENDER_BLOCKING", severity: "warning", message: "Render-blockierende Ressourcen gefunden" });
if (checks.image_not_optimized) issues.push({ code: "UNOPTIMIZED_IMAGES", severity: "info", message: "Nicht-optimierte Bilder vorhanden" });
const imagesWithoutAlt = raw.checks?.no_image_alt ? (raw.meta?.images_count || 0) : 0;
return {
url: raw.url,
statusCode: raw.status_code,
pageTitle: raw.meta?.title || null,
metaDescription: raw.meta?.description || null,
h1: raw.meta?.htags?.h1?.[0] || null,
wordCount: raw.meta?.content?.words_count || 0,
loadTime: raw.page_timing?.time_to_interactive || null,
links: {
internal: raw.meta?.internal_links_count || 0,
external: raw.meta?.external_links_count || 0,
broken: 0,
},
images: {
total: raw.meta?.images_count || 0,
missingAlt: imagesWithoutAlt,
},
seo: {
hasViewport: !raw.checks?.no_viewport_tag,
hasCanonical: !!raw.meta?.canonical,
isIndexable: !raw.checks?.is_4xx_code && !raw.checks?.is_5xx_code,
robotsTxt: raw.meta?.robots || null,
ogTitle: raw.meta?.social_media_tags?.og_title || null,
ogDescription: raw.meta?.social_media_tags?.og_description || null,
},
performance: {
cls: raw.page_timing?.cumulative_layout_shift || null,
lcp: raw.page_timing?.largest_contentful_paint || null,
fid: raw.page_timing?.first_input_delay || null,
ttfb: raw.page_timing?.waiting_time || null,
},
issues,
};
}

View File

@@ -0,0 +1,14 @@
// ============================================================================
// @mintel/page-audit — Public API
// ============================================================================
export { PageAuditor } from "./auditor.js";
export { DataForSeoClient, normalizePage } from "./dataforseo.js";
export { generateAuditReport } from "./report.js";
export type {
PageAuditData,
AuditIssue,
DomainAuditResult,
AuditReport,
AuditConfig,
} from "./types.js";

View File

@@ -0,0 +1,149 @@
// ============================================================================
// @mintel/page-audit — AI Report Generator
// Uses Gemini Pro (via OpenRouter) to synthesize DataForSEO data into
// a structured IST-analysis report in German.
// ============================================================================
import type { DomainAuditResult, AuditReport, PageAuditData, AuditIssue } from "./types.js";
const OPENROUTER_BASE = "https://openrouter.ai/api/v1";
const REPORT_MODEL = "google/gemini-3.1-pro-preview";
/**
* Generate an AI-powered IST-analysis report from audit data.
*/
export async function generateAuditReport(
audit: DomainAuditResult,
openrouterKey: string,
): Promise<AuditReport> {
const summary = buildAuditSummary(audit);
const systemPrompt = `
Du bist ein Senior SEO- und UX-Stratege. Analysiere die technischen Audit-Daten einer Website
und erstelle einen präzisen IST-Analyse-Bericht auf DEUTSCH.
Stil:
- Faktisch, direkt, kein Bullshit
- Konkrete Handlungsempfehlungen, keine vagen Floskeln
- Technik-verständlich für Entscheider (nicht für Entwickler)
Output: JSON (kein Markdown drumherum)
`;
const userPrompt = `
Website: ${audit.domain}
Seiten gecrawlt: ${audit.totalPages}
Audit-Datum: ${audit.auditedAt}
=== TECHNISCHE AUSWERTUNG ===
${summary}
=== TOP-ISSUES ===
${audit.topIssues.map((i) => `[${i.severity.toUpperCase()}] ${i.message}${i.count ? ` (${i.count}x)` : ""}`).join("\n")}
Erstelle jetzt den IST-Analyse-Report als JSON:
{
"executiveSummary": string (2-3 Sätze über den aktuellen Zustand der Website),
"strengths": string[] (max 4, was gut läuft),
"criticalIssues": string[] (max 5, sofort zu beheben),
"quickWins": string[] (max 4, einfach umzusetzen mit großer Wirkung),
"strategicRecommendations": string[] (max 4, mittel-/langfristig),
"seoScore": number (0-100, realistisch),
"uxScore": number (0-100, realistisch),
"performanceScore": number (0-100, realistisch),
"overallHealth": "critical" | "needs-work" | "good" | "excellent"
}
`;
const response = await fetch(`${OPENROUTER_BASE}/chat/completions`, {
method: "POST",
headers: {
Authorization: `Bearer ${openrouterKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: REPORT_MODEL,
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: userPrompt },
],
response_format: { type: "json_object" },
}),
});
if (!response.ok) {
throw new Error(`LLM request failed: ${response.status} ${await response.text()}`);
}
const json = await response.json();
const content = json.choices?.[0]?.message?.content || "{}";
// Clean up markdown JSON wrappers if present
const cleaned = content.replace(/^```(?:json)?\n?/m, "").replace(/```$/m, "").trim();
try {
return JSON.parse(cleaned) as AuditReport;
} catch {
throw new Error(`Could not parse AI report: ${cleaned.slice(0, 200)}`);
}
}
/**
* Build a human-readable text summary of the audit data for the LLM prompt.
*/
function buildAuditSummary(audit: DomainAuditResult): string {
const pages = audit.pages;
const brokenPages = pages.filter((p) => p.statusCode >= 400);
const noTitle = pages.filter((p) => !p.pageTitle);
const noDesc = pages.filter((p) => !p.metaDescription);
const noH1 = pages.filter((p) => !p.h1);
const notIndexable = pages.filter((p) => !p.seo.isIndexable);
const noViewport = pages.filter((p) => !p.seo.hasViewport);
const slowPages = pages.filter((p) => p.loadTime !== null && p.loadTime > 3000);
const imagesWithoutAlt = pages.reduce((sum, p) => sum + p.images.missingAlt, 0);
const totalImages = pages.reduce((sum, p) => sum + p.images.total, 0);
const avgLoad = pages
.filter((p) => p.loadTime !== null)
.reduce((sum, p, _, arr) => sum + (p.loadTime || 0) / arr.length, 0);
const lines = [
`Seiten gesamt: ${pages.length}`,
`Seiten mit Fehler (4xx/5xx): ${brokenPages.length}`,
`Seiten ohne <title>: ${noTitle.length}`,
`Seiten ohne Meta-Description: ${noDesc.length}`,
`Seiten ohne H1: ${noH1.length}`,
`Nicht-indexierbare Seiten: ${notIndexable.length}`,
`Seiten ohne Viewport-Meta: ${noViewport.length}`,
`Bilder gesamt: ${totalImages}, davon ohne alt-Attribut: ${imagesWithoutAlt}`,
`Langsame Seiten (>3s): ${slowPages.length}`,
`Ø Ladezeit: ${avgLoad > 0 ? `${(avgLoad / 1000).toFixed(1)}s` : "unbekannt"}`,
];
// Core Web Vitals (from first valid page)
const lcpPages = pages.filter((p) => p.performance.lcp !== null);
if (lcpPages.length > 0) {
const avgLcp = lcpPages.reduce((s, p) => s + (p.performance.lcp || 0), 0) / lcpPages.length;
lines.push(`Ø LCP: ${(avgLcp / 1000).toFixed(1)}s (Ziel: <2.5s)`);
}
const clsPages = pages.filter((p) => p.performance.cls !== null);
if (clsPages.length > 0) {
const avgCls = clsPages.reduce((s, p) => s + (p.performance.cls || 0), 0) / clsPages.length;
lines.push(`Ø CLS: ${avgCls.toFixed(3)} (Ziel: <0.1)`);
}
// Top pages by issues
const worstPages = [...pages]
.sort((a, b) => b.issues.length - a.issues.length)
.slice(0, 5);
if (worstPages.length > 0) {
lines.push("\nSeiten mit den meisten Problemen:");
for (const page of worstPages) {
lines.push(` ${page.url}: ${page.issues.length} Issues (${page.issues.map((i) => i.code).join(", ")})`);
}
}
return lines.join("\n");
}

View File

@@ -0,0 +1,83 @@
// ============================================================================
// @mintel/page-audit — Types
// ============================================================================
/** DataForSEO On-Page audit result for a single page */
export interface PageAuditData {
url: string;
statusCode: number;
pageTitle: string | null;
metaDescription: string | null;
h1: string | null;
wordCount: number;
loadTime: number | null; // ms
links: {
internal: number;
external: number;
broken: number;
};
images: {
total: number;
missingAlt: number;
};
seo: {
hasViewport: boolean;
hasCanonical: boolean;
isIndexable: boolean;
robotsTxt: string | null;
ogTitle: string | null;
ogDescription: string | null;
};
performance: {
cls: number | null; // Cumulative Layout Shift
lcp: number | null; // Largest Contentful Paint (ms)
fid: number | null; // First Input Delay (ms)
ttfb: number | null; // Time to First Byte (ms)
};
issues: AuditIssue[];
}
/** A single issue found during audit */
export interface AuditIssue {
code: string;
severity: "critical" | "warning" | "info";
message: string;
count?: number;
}
/** Full crawled audit result for a domain */
export interface DomainAuditResult {
domain: string;
auditedAt: string;
totalPages: number;
pages: PageAuditData[];
/** Aggregated issues sorted by severity */
topIssues: AuditIssue[];
/** AI-generated analysis */
report: AuditReport | null;
/** Raw DataForSEO task ID for reference */
dataForSeoTaskId?: string;
}
/** AI-generated IST analysis report */
export interface AuditReport {
executiveSummary: string;
strengths: string[];
criticalIssues: string[];
quickWins: string[];
strategicRecommendations: string[];
seoScore: number; // 0-100
uxScore: number; // 0-100
performanceScore: number; // 0-100
overallHealth: "critical" | "needs-work" | "good" | "excellent";
}
/** Config for running an audit */
export interface AuditConfig {
dataForSeoLogin: string;
dataForSeoPassword: string;
openrouterKey?: string;
outputDir?: string;
/** If true, only analyze — no AI report */
lightMode?: boolean;
}