// ============================================================================ // Analyzer — Deterministic Site Analysis (NO LLM!) // Builds a SiteProfile from crawled pages using pure code logic. // This is the core fix against hallucinated page structures. // ============================================================================ import type { CrawledPage, SiteProfile, NavItem, CompanyInfo, PageInventoryItem, } from "./types.js"; /** * Build a complete SiteProfile from an array of crawled pages. * This is 100% deterministic — no LLM calls involved. */ export function analyzeSite(pages: CrawledPage[], domain: string): SiteProfile { const navigation = extractNavigation(pages); const existingFeatures = extractExistingFeatures(pages); const services = extractAllServices(pages); const companyInfo = extractCompanyInfo(pages); const colors = extractColors(pages); const socialLinks = extractSocialLinks(pages); const externalDomains = extractExternalDomains(pages, domain); const images = extractAllImages(pages); const employeeCount = extractEmployeeCount(pages); const pageInventory = buildPageInventory(pages); return { domain, crawledAt: new Date().toISOString(), totalPages: pages.filter((p) => p.type !== "legal").length, navigation, existingFeatures, services, companyInfo, pageInventory, colors, socialLinks, externalDomains, images, employeeCount, }; } /** * Extract the site's main navigation structure from