// ============================================================================ // Validators — Deterministic Math & Logic Checks (NO LLM!) // Catches all the issues reported by the user programmatically. // ============================================================================ import type { EstimationState, ValidationResult, ValidationError, ValidationWarning, } from "./types.js"; /** * Run all deterministic validation checks on the final estimation state. */ export function validateEstimation(state: EstimationState): ValidationResult { const errors: ValidationError[] = []; const warnings: ValidationWarning[] = []; if (!state.formState) { return { passed: false, errors: [ { code: "NO_FORM_STATE", message: "No form state available for validation.", }, ], warnings: [], }; } const fs = state.formState; // 1. PAGE COUNT PARITY validatePageCountParity(fs, errors); // 2. SORGLOS-BETRIEB IN SUMMARY validateSorglosBetrieb(fs, errors, warnings); // 3. NO VIDEOS AS PAGES validateNoVideosAsPages(fs, errors); // 4. EXTERNAL DOMAINS NOT AS PAGES validateExternalDomains(fs, state.concept?.siteProfile, errors); // 5. SERVICE COVERAGE validateServiceCoverage(fs, state.concept?.siteProfile, warnings); // 6. EXISTING FEATURE DETECTION validateExistingFeatures(fs, state.concept?.siteProfile, warnings); // 7. MULTILANG LABEL CORRECTNESS validateMultilangLabeling(fs, errors); // 8. INITIAL-PFLEGE UNITS validateInitialPflegeUnits(fs, warnings); // 9. SITEMAP vs PAGE LIST CONSISTENCY validateSitemapConsistency(fs, errors); return { passed: errors.length === 0, errors, warnings, }; } /** * 1. Page count: the "Individuelle Seiten" position description must mention * roughly the same number of pages as the sitemap contains. * "er berechnet 15 Seiten nennt aber nur 11" * * NOTE: fs.pages (from auditedFacts) is a conceptual list of page groups * (e.g. "Leistungen") while the sitemap expands those into sub-pages. * Therefore we do NOT compare fs.pages.length to the sitemap count. * Instead, we verify that the position description text lists the right count. */ function validatePageCountParity( fs: Record, errors: ValidationError[], ): void { // Count pages listed in the sitemap (the source of truth) let sitemapPageCount = 0; if (Array.isArray(fs.sitemap)) { for (const cat of fs.sitemap) { sitemapPageCount += (cat.pages || []).length; } } if (sitemapPageCount === 0) return; // Extract page names mentioned in the "Individuelle Seiten" position description const positions = fs.positionDescriptions || {}; const pagesDesc = positions["Individuelle Seiten"] || positions["2. Individuelle Seiten"] || ""; if (!pagesDesc) return; const descStr = typeof pagesDesc === "string" ? pagesDesc : ""; // Count distinct page names mentioned (split by comma) // We avoid splitting by "&" or "und" because actual page names like // "Wartung & Störungsdienst" or "Genehmigungs- und Ausführungsplanung" contain them. const afterColon = descStr.includes(":") ? descStr.split(":").slice(1).join(":") : descStr; const segments = afterColon .split(/,/) .map((s: string) => s.replace(/\.$/, "").trim()) .filter((s: string) => s.length > 2); // Handle consolidated references like "Leistungen (6 Unterseiten)" or "(inkl. Messen)" let mentionedCount = 0; for (const seg of segments) { const subPageMatch = seg.match(/\((\d+)\s+(?:Unter)?[Ss]eiten?\)/); if (subPageMatch) { mentionedCount += parseInt(subPageMatch[1], 10); } else if (seg.match(/\(inkl\.\s+/)) { // "Unternehmen (inkl. Messen)" = 2 pages mentionedCount += 2; } else { mentionedCount += 1; } } if (mentionedCount > 0 && Math.abs(mentionedCount - sitemapPageCount) > 2) { errors.push({ code: "PAGE_COUNT_MISMATCH", message: `Seiten-Beschreibung nennt ~${mentionedCount} Seiten, aber ${sitemapPageCount} Seiten in der Sitemap.`, field: "positionDescriptions.Individuelle Seiten", expected: sitemapPageCount, actual: mentionedCount, }); } } /** * 2. Sorglos-Betrieb must be included in summary. * "Zusammenfassung der Schätzung hat Sorglos-Betrieb nicht miteingenommen" */ function validateSorglosBetrieb( fs: Record, errors: ValidationError[], _warnings: ValidationWarning[], ): void { const positions = fs.positionDescriptions || {}; const hasPosition = Object.keys(positions).some( (k) => k.toLowerCase().includes("sorglos") || k.toLowerCase().includes("betrieb") || k.toLowerCase().includes("pflege"), ); if (!hasPosition) { errors.push({ code: "MISSING_SORGLOS_BETRIEB", message: "Der Sorglos-Betrieb fehlt in den Position-Beschreibungen.", field: "positionDescriptions", }); } } /** * 3. Videos must not be treated as separate pages. * "Er hat Videos als eigene Seite aufgenommen" */ function validateNoVideosAsPages( fs: Record, errors: ValidationError[], ): void { const allPages = [...(fs.selectedPages || []), ...(fs.otherPages || [])]; const sitemapPages = Array.isArray(fs.sitemap) ? fs.sitemap.flatMap((cat: any) => (cat.pages || []).map((p: any) => p.title), ) : []; const allPageNames = [...allPages, ...sitemapPages]; const videoKeywords = ["video", "film", "messefilm", "imagefilm", "clip"]; for (const pageName of allPageNames) { const lower = (typeof pageName === "string" ? pageName : "").toLowerCase(); if ( videoKeywords.some( (kw) => lower.includes(kw) && !lower.includes("leistung"), ) ) { errors.push({ code: "VIDEO_AS_PAGE", message: `"${pageName}" ist ein Video-Asset, keine eigene Seite.`, field: "sitemap", }); } } } /** * 4. External sister-company domains must not be proposed as sub-pages. * "er hat ingenieursgesellschaft als seite integriert, die haben aber eine eigene website" */ function validateExternalDomains( fs: Record, siteProfile: any, errors: ValidationError[], ): void { if (!siteProfile?.externalDomains?.length) return; const sitemapPages = Array.isArray(fs.sitemap) ? fs.sitemap.flatMap((cat: any) => (cat.pages || []).map((p: any) => p.title || ""), ) : []; for (const extDomain of siteProfile.externalDomains) { // Extract base name (e.g. "etib-ing" from "etib-ing.com") const baseName = extDomain .replace(/^www\./, "") .split(".")[0] .toLowerCase(); for (const pageTitle of sitemapPages) { const lower = pageTitle.toLowerCase(); // Check if the page title references the external company if ( lower.includes(baseName) || (lower.includes("ingenieur") && extDomain.includes("ing")) ) { errors.push({ code: "EXTERNAL_DOMAIN_AS_PAGE", message: `"${pageTitle}" hat eine eigene Website (${extDomain}) und darf nicht als Unterseite vorgeschlagen werden.`, field: "sitemap", }); } } } } /** * 5. Services from the existing site should be covered. * "er hat leistungen ausgelassen die ganz klar auf der kompetenz seite genannt werden" */ function validateServiceCoverage( fs: Record, siteProfile: any, warnings: ValidationWarning[], ): void { if (!siteProfile?.services?.length) return; const allContent = JSON.stringify(fs).toLowerCase(); for (const service of siteProfile.services) { const keywords = service .toLowerCase() .split(/[\s,&-]+/) .filter((w: string) => w.length > 4); const isCovered = keywords.some((kw: string) => allContent.includes(kw)); if (!isCovered && service.length > 5) { warnings.push({ code: "MISSING_SERVICE", message: `Bestehende Leistung "${service}" ist nicht in der Schätzung berücksichtigt.`, suggestion: `Prüfen ob "${service}" im Briefing gewünscht ist und ggf. in die Seitenstruktur aufnehmen.`, }); } } } /** * 6. Existing features (search, forms) must be acknowledged. * "er hat die suchfunktion nicht bemerkt, die gibts schon auf der seite" */ function validateExistingFeatures( fs: Record, siteProfile: any, warnings: ValidationWarning[], ): void { if (!siteProfile?.existingFeatures?.length) return; const functions = fs.functions || []; const features = fs.features || []; const allSelected = [...functions, ...features]; for (const existingFeature of siteProfile.existingFeatures) { if (existingFeature === "cookie-consent") continue; // Standard, don't flag if (existingFeature === "video") continue; // Usually an asset, not a feature const isMapped = allSelected.some( (f: string) => f.toLowerCase() === existingFeature.toLowerCase(), ); if (!isMapped) { warnings.push({ code: "EXISTING_FEATURE_IGNORED", message: `Die bestehende Suchfunktion/Feature "${existingFeature}" wurde auf der aktuellen Website erkannt, aber nicht in der Schätzung berücksichtigt.`, suggestion: `"${existingFeature}" als Function oder Feature aufnehmen, da es bereits existiert und der Kunde es erwartet.`, }); } } } /** * 7. Multilang +20% must not be labeled as API. * "die +20% beziehen sich nicht auf API" */ function validateMultilangLabeling( fs: Record, errors: ValidationError[], ): void { const positions = fs.positionDescriptions || {}; for (const [key, desc] of Object.entries(positions)) { if ( key.toLowerCase().includes("api") || key.toLowerCase().includes("schnittstelle") ) { const descStr = typeof desc === "string" ? desc : ""; if ( descStr.toLowerCase().includes("mehrsprach") || descStr.toLowerCase().includes("multilang") || descStr.toLowerCase().includes("20%") ) { errors.push({ code: "MULTILANG_WRONG_POSITION", message: `Mehrsprachigkeit (+20%) ist unter "${key}" eingeordnet, gehört aber nicht zu API/Schnittstellen.`, field: key, }); } } } } /** * 8. Initial-Pflege should refer to "Datensätze" not "Seiten". * "Initialpflege => 100€/Stk => damit sind keine Seiten sondern Datensätze" */ function validateInitialPflegeUnits( fs: Record, warnings: ValidationWarning[], ): void { const positions = fs.positionDescriptions || {}; for (const [key, desc] of Object.entries(positions)) { if ( key.toLowerCase().includes("pflege") || key.toLowerCase().includes("initial") ) { const descStr = typeof desc === "string" ? desc : ""; if ( descStr.toLowerCase().includes("seiten") && !descStr.toLowerCase().includes("datensätz") ) { warnings.push({ code: "INITIALPFLEGE_WRONG_UNIT", message: `"${key}" spricht von "Seiten", aber gemeint sind Datensätze (z.B. Produkte, Referenzen).`, suggestion: `Beschreibung auf "Datensätze" statt "Seiten" ändern.`, }); } } } } /** * 9. Position descriptions must match calculated quantities. */ /** * 9. Position descriptions must match calculated quantities. */ // eslint-disable-next-line @typescript-eslint/no-unused-vars function validatePositionDescriptionsMath( fs: Record, errors: ValidationError[], ): void { const positions = fs.positionDescriptions || {}; // Check pages description mentions correct count const pagesDesc = positions["Individuelle Seiten"] || positions["2. Individuelle Seiten"] || ""; if (pagesDesc) { // Use the sitemap as the authoritative source of truth for page count let sitemapPageCount = 0; if (Array.isArray(fs.sitemap)) { for (const cat of fs.sitemap) { sitemapPageCount += (cat.pages || []).length; } } // Count how many page names are mentioned in the description const descStr = typeof pagesDesc === "string" ? pagesDesc : ""; const mentionedPages = descStr .split(/,|und|&/) .filter((s: string) => s.trim().length > 2); if ( sitemapPageCount > 0 && mentionedPages.length > 0 && Math.abs(mentionedPages.length - sitemapPageCount) > 2 ) { errors.push({ code: "PAGES_DESC_COUNT_MISMATCH", message: `Seiten-Beschreibung nennt ~${mentionedPages.length} Seiten, aber ${sitemapPageCount} in der Sitemap.`, field: "positionDescriptions.Individuelle Seiten", expected: sitemapPageCount, actual: mentionedPages.length, }); } } } /** * 10. Sitemap categories should be consistent with selected pages/features. */ function validateSitemapConsistency( fs: Record, errors: ValidationError[], ): void { if (!Array.isArray(fs.sitemap)) return; const sitemapTitles = fs.sitemap.flatMap((cat: any) => (cat.pages || []).map((p: any) => (p.title || "").toLowerCase()), ); // Check for "Verwaltung" page (hallucinated management page) for (const title of sitemapTitles) { if (title.includes("verwaltung") && !title.includes("inhalt")) { errors.push({ code: "HALLUCINATED_MANAGEMENT_PAGE", message: `"Verwaltung" als Seite ist vermutlich halluziniert. Verwaltung ist typischerweise eine interne Funktion, keine öffentliche Webseite.`, field: "sitemap", }); } } }