diff --git a/packages/concept-engine/src/scraper.ts b/packages/concept-engine/src/scraper.ts index fce2261..ab49c9c 100644 --- a/packages/concept-engine/src/scraper.ts +++ b/packages/concept-engine/src/scraper.ts @@ -1,9 +1,8 @@ // ============================================================================ // Scraper — Zyte API + Local Persistence // Crawls all pages of a website, stores them locally for reuse. +// Crawls all pages of a website, stores them locally for reuse. // ============================================================================ - -import axios from "axios"; import * as cheerio from "cheerio"; import * as fs from "node:fs/promises"; import * as path from "node:path"; @@ -171,32 +170,39 @@ function extractServices(text: string): string[] { */ async function fetchWithZyte(url: string, apiKey: string): Promise { try { - const resp = await axios.post( - "https://api.zyte.com/v1/extract", - { + const auth = Buffer.from(`${apiKey}:`).toString("base64"); + const resp = await fetch("https://api.zyte.com/v1/extract", { + method: "POST", + headers: { + "Authorization": `Basic ${auth}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ url, browserHtml: true, - }, - { - auth: { username: apiKey, password: "" }, - timeout: 60000, - }, - ); - const html = resp.data.browserHtml || ""; + }), + signal: AbortSignal.timeout(60000), + }); + + if (!resp.ok) { + const errorText = await resp.text(); + console.error(` ❌ Zyte API error ${resp.status} for ${url}: ${errorText}`); + // Rate limited — wait and retry once + if (resp.status === 429) { + console.log(" ⏳ Rate limited, waiting 5s and retrying..."); + await new Promise((r) => setTimeout(r, 5000)); + return fetchWithZyte(url, apiKey); + } + throw new Error(`HTTP ${resp.status}: ${errorText}`); + } + + const data = await resp.json(); + const html = data.browserHtml || ""; if (!html) { console.warn(` ⚠️ Zyte returned empty browserHtml for ${url}`); } return html; } catch (err: any) { - if (err.response) { - console.error(` ❌ Zyte API error ${err.response.status} for ${url}: ${err.response.data?.detail || err.response.statusText}`); - // Rate limited — wait and retry once - if (err.response.status === 429) { - console.log(" ⏳ Rate limited, waiting 5s and retrying..."); - await new Promise((r) => setTimeout(r, 5000)); - return fetchWithZyte(url, apiKey); - } - } throw err; } } @@ -205,14 +211,19 @@ async function fetchWithZyte(url: string, apiKey: string): Promise { * Fetch a page via simple HTTP GET (fallback). */ async function fetchDirect(url: string): Promise { - const resp = await axios.get(url, { - timeout: 30000, - headers: { - "User-Agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", - }, - }); - return typeof resp.data === "string" ? resp.data : ""; + try { + const resp = await fetch(url, { + headers: { + "User-Agent": + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", + }, + signal: AbortSignal.timeout(30000), + }); + if (!resp.ok) return ""; + return await resp.text(); + } catch { + return ""; + } } /** diff --git a/packages/page-audit/src/dataforseo.ts b/packages/page-audit/src/dataforseo.ts index 1ffd44d..d8cac21 100644 --- a/packages/page-audit/src/dataforseo.ts +++ b/packages/page-audit/src/dataforseo.ts @@ -83,24 +83,31 @@ export class DataForSeoClient { let delay = 15_000; let pollCount = 0; - while (Date.now() - start < timeoutMs) { - await this.sleep(delay); - pollCount++; + // Force Node event loop active + const keepAlive = setInterval(() => { }, 1000); - const ready = await this.isTaskReady(taskId); - const elapsed = Math.round((Date.now() - start) / 1000); - console.log(` 📊 Poll #${pollCount}: ${ready ? "READY ✅" : "not ready"} (${elapsed}s elapsed)`); + try { + while (Date.now() - start < timeoutMs) { + await this.sleep(delay); + pollCount++; - if (ready) { - // Short grace period so the pages endpoint settles - await this.sleep(5_000); - return; + const ready = await this.isTaskReady(taskId); + const elapsed = Math.round((Date.now() - start) / 1000); + console.log(` 📊 Poll #${pollCount}: ${ready ? "READY ✅" : "not ready"} (${elapsed}s elapsed)`); + + if (ready) { + // Short grace period so the pages endpoint settles + await this.sleep(5_000); + return; + } + + delay = Math.min(delay * 1.3, 30_000); } - delay = Math.min(delay * 1.3, 30_000); + throw new Error(`DataForSEO task ${taskId} timed out after ${timeoutMs / 1000}s`); + } finally { + clearInterval(keepAlive); } - - throw new Error(`DataForSEO task ${taskId} timed out after ${timeoutMs / 1000}s`); } /** diff --git a/packages/pdf-library/src/components/AgbsPDF.tsx b/packages/pdf-library/src/components/AgbsPDF.tsx index df6b14c..f970ade 100644 --- a/packages/pdf-library/src/components/AgbsPDF.tsx +++ b/packages/pdf-library/src/components/AgbsPDF.tsx @@ -6,6 +6,7 @@ import { Text as PDFText, View as PDFView, StyleSheet as PDFStyleSheet, + Document as PDFDocument, } from "@react-pdf/renderer"; import { pdfStyles, @@ -213,30 +214,34 @@ export const AgbsPDF = ({ if (mode === "full") { return ( - - {content} - + + + {content} + + ); } return ( - - -
- {content} -