import { CheerioCrawler } from "crawlee"; import * as path from "node:path"; import * as fs from "node:fs/promises"; import { existsSync } from "node:fs"; import { URL } from "node:url"; import { execSync } from "node:child_process"; import axios from "axios"; import { FileCacheAdapter } from "../src/utils/cache/file-adapter.js"; import { initialState, PRICING } from "@mintel/pdf"; import { calculateTotals } from "@mintel/pdf"; async function main() { const OPENROUTER_KEY = process.env.OPENROUTER_KEY; if (!OPENROUTER_KEY) { console.error("❌ Error: OPENROUTER_KEY not found in environment."); process.exit(1); } let briefing = ""; let targetUrl: string | null = null; let comments: string | null = null; let budget: string | null = null; let cacheKey: string | null = null; let jsonStatePath: string | null = null; const isEstimation = process.argv.includes("--estimation") || process.argv.includes("-E"); const args = process.argv.slice(2); for (let i = 0; i < args.length; i++) { const arg = args[i]; if (arg === "--url") { targetUrl = args[++i]; } else if (arg === "--comments" || arg === "--notes") { comments = args[++i]; } else if (arg === "--budget") { budget = args[++i]; } else if (arg === "--cache-key") { cacheKey = args[++i]; } else if (arg === "--json") { jsonStatePath = args[++i]; } else if (arg === "--estimation" || arg === "-E") { // Handled above } else if (!arg.startsWith("--")) { briefing = arg; } } if (briefing && briefing.startsWith("@")) { const rawPath = briefing.substring(1); const filePath = rawPath.startsWith("/") ? rawPath : path.resolve(process.cwd(), rawPath); briefing = await fs.readFile(filePath, "utf8"); } // Discovery ONLY if not provided if (!targetUrl && briefing) { const urlMatch = briefing.match(/https?:\/\/[^\s]+/); if (urlMatch) { targetUrl = urlMatch[0]; console.log(`🔗 Discovered URL in briefing: ${targetUrl}`); } } if (!briefing && !targetUrl && !comments && !jsonStatePath) { console.error( '❌ Usage: npm run ai-estimate -- "Briefing text" [--url https://example.com] [--comments "Manual notes"]', ); console.error( " Or: npm run ai-estimate -- @briefing.txt [--url https://example.com]", ); console.error(" Or: npm run ai-estimate -- --json path/to/state.json"); process.exit(1); } const clearCache = process.argv.includes("--clear-cache"); if (clearCache) { console.log("🧹 Clearing cache..."); const cacheFiles = await fs.readdir(path.join(process.cwd(), ".cache")); for (const file of cacheFiles) { if (file.startsWith("ai_est_")) { await fs.unlink(path.join(process.cwd(), ".cache", file)); } } } const cache = new FileCacheAdapter({ prefix: "ai_est_" }); const finalCacheKey = cacheKey || `${briefing}_${targetUrl}_${comments}_${budget}`; // 1. Crawl if URL provided let crawlContext = ""; if (targetUrl) { console.log(`🔍 Crawling ${targetUrl} for context...`); const cachedCrawl = await cache.get(`crawl_${targetUrl}`); if (cachedCrawl && !clearCache) { console.log("📦 Using cached crawl results."); crawlContext = cachedCrawl; } else { crawlContext = await performCrawl(targetUrl); await cache.set(`crawl_${targetUrl}`, crawlContext, 86400); // 24h cache } } // 2. Distill Crawl Context (Context Filtering) let distilledCrawl = ""; if (crawlContext) { const cachedDistilled = await cache.get(`distilled_${targetUrl}`); if (cachedDistilled && !clearCache) { distilledCrawl = cachedDistilled; } else { distilledCrawl = await distillCrawlContext(crawlContext, OPENROUTER_KEY); await cache.set(`distilled_${targetUrl}`, distilledCrawl, 86400); } } else if (targetUrl) { distilledCrawl = `WARNING: The crawl of ${targetUrl} failed (ENOTFOUND or timeout). The AI must NOT hallucinate details about the current website. Focus ONLY on the BRIEFING provided. If details are missing, mark them as 'unknown'.`; console.warn( "⚠️ Crawl failed. AI will be notified to avoid hallucinations.", ); } // 3. AI Prompting console.log("🤖 Consultating Gemini 3 Flash..."); const cachedAi = !clearCache ? await cache.get(finalCacheKey) : null; let formState: any; let usage: { prompt: number; completion: number; cost: number } = { prompt: 0, completion: 0, cost: 0, }; // Load Context Documents const principles = await fs.readFile( path.resolve(process.cwd(), "docs/PRINCIPLES.md"), "utf8", ); const techStandards = await fs.readFile( path.resolve(process.cwd(), "docs/TECH.md"), "utf8", ); const tone = await fs.readFile( path.resolve(process.cwd(), "docs/TONE.md"), "utf8", ); if (jsonStatePath) { console.log(`📂 Loading state from JSON: ${jsonStatePath}`); const rawJson = await fs.readFile( path.resolve(process.cwd(), jsonStatePath), "utf8", ); formState = JSON.parse(rawJson); } else if (cachedAi) { console.log("📦 Using cached AI response."); formState = cachedAi; } else { const result = await getAiEstimation( briefing, distilledCrawl, comments, budget, OPENROUTER_KEY, principles, techStandards, tone, ); formState = result.state; usage = result.usage; await cache.set(finalCacheKey, formState); } // 3. Save Data & Generate PDF const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); const jsonOutDir = path.resolve(process.cwd(), "out/estimations/json"); if (!existsSync(jsonOutDir)) await fs.mkdir(jsonOutDir, { recursive: true }); const finalJsonPath = path.join( jsonOutDir, `${formState.companyName || "unknown"}_${timestamp}.json`, ); await fs.writeFile(finalJsonPath, JSON.stringify(formState, null, 2)); const tempJsonPath = path.resolve( process.cwd(), ".cache", `temp_state_${Date.now()}.json`, ); await fs.writeFile(tempJsonPath, JSON.stringify(formState, null, 2)); console.log(`📦 Saved detailed state to: ${finalJsonPath}`); console.log("📄 Generating PDF estimation..."); try { const genArgs = isEstimation ? "--estimation" : ""; execSync( `npx tsx ./scripts/generate-estimate.ts --input ${tempJsonPath} ${genArgs}`, { stdio: "inherit" }, ); } finally { // await fs.unlink(tempJsonPath); } console.log("\n✨ AI Estimation Complete!"); if (usage.prompt > 0) { console.log("--------------------------------------------------"); console.log("📊 ACCUMULATED API USAGE (SUM OF 6 PASSES)"); console.log(` Model: google/gemini-2.5-flash`); console.log(` Total Prompt: ${usage.prompt.toLocaleString()}`); console.log(` Total Completion: ${usage.completion.toLocaleString()}`); console.log( ` Total Tokens: ${(usage.prompt + usage.completion).toLocaleString()}`, ); console.log(` Total Cost (USD): $${usage.cost.toFixed(6)}`); console.log("--------------------------------------------------\n"); } } async function distillCrawlContext( rawCrawl: string, apiKey: string, ): Promise { if (!rawCrawl || rawCrawl.trim().length === 0) return "Keine Crawl-Daten vorhanden."; console.log(" ↳ Distilling Crawl Context (Noise Filtering)..."); const systemPrompt = ` You are a context distiller. Your goal is to strip away HTML noise, legal footers, and generic fluff from a website crawl. Extract the "Company DNA" in 5-8 bullet points (GERMAN). ### FOCUS ON: 1. Core Business / Services. 2. Unique Selling Points (USPs). 3. Target Audience (if clear). 4. Tech Stack or industry-specific equipment mentioned. 5. Brand tone (e.g. "industrial", "friendly", "technical"). ### OUTPUT: Return ONLY the bullet points. No intro/outro. `; const resp = await axios.post( "https://openrouter.ai/api/v1/chat/completions", { model: "google/gemini-2.5-flash", messages: [ { role: "system", content: systemPrompt }, { role: "user", content: `RAW_CRAWL:\n${rawCrawl.substring(0, 30000)}`, }, ], }, { headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json", }, }, ); return resp.data.choices[0].message.content; } async function performCrawl(url: string): Promise { const pages: { url: string; content: string; type: string }[] = []; const origin = new URL(url).origin; const crawler = new CheerioCrawler({ maxRequestsPerCrawl: 20, async requestHandler({ $, request, enqueueLinks }) { const title = $("title").text(); const urlObj = new URL(request.url); const urlPath = urlObj.pathname.toLowerCase(); let type = "other"; if (urlPath === "/" || urlPath === "") type = "home"; else if (urlPath.includes("service") || urlPath.includes("leistung")) type = "service"; else if ( urlPath.includes("blog") || urlPath.includes("news") || urlPath.includes("aktuelles") || urlPath.includes("magazin") ) type = "blog"; else if (urlPath.includes("contact") || urlPath.includes("kontakt")) type = "contact"; else if ( urlPath.includes("job") || urlPath.includes("karriere") || urlPath.includes("career") || urlPath.includes("human-resources") ) type = "career"; else if ( urlPath.includes("portfolio") || urlPath.includes("referenz") || urlPath.includes("projekt") || urlPath.includes("case-study") ) type = "portfolio"; else if ( urlPath.includes("legal") || urlPath.includes("impressum") || urlPath.includes("datenschutz") || urlPath.includes("privacy") ) type = "legal"; const h1s = $("h1") .map((_, el) => $(el).text()) .get(); const navLinks = $("nav a") .map((_, el) => $(el).text().trim()) .get() .filter((t) => t.length > 0); const bodyText = $("body") .text() .replace(/\s+/g, " ") .substring(0, 50000); const html = $.html(); const hexColors = html.match(/#(?:[0-9a-fA-F]{3}){1,2}\b/g) || []; const uniqueColors = Array.from(new Set(hexColors)).slice(0, 5); pages.push({ url: request.url, type, content: `Title: ${title}\nType: ${type}\nHeadings: ${h1s.join(", ")}\nNav: ${navLinks.join(", ")}\nColors: ${uniqueColors.join(", ")}\nText: ${bodyText}`, }); await enqueueLinks({ limit: 15, transformRequestFunction: (req) => { const reqUrl = new URL(req.url); if (reqUrl.origin !== origin) return false; // Skip assets if (reqUrl.pathname.match(/\.(pdf|zip|jpg|png|svg|webp)$/i)) return false; return req; }, }); }, }); await crawler.run([url]); const typeCounts = pages.reduce( (acc, p) => { acc[p.type] = (acc[p.type] || 0) + 1; return acc; }, {} as Record, ); let summary = `\nCrawl Summary: Identified ${pages.length} pages total on ${origin}.\n`; summary += Object.entries(typeCounts) .map(([type, count]) => `- ${type}: ${count}`) .join("\n") + "\n\n"; return ( summary + pages.map((p) => `--- PAGE: ${p.url} ---\n${p.content}`).join("\n\n") ); } const cleanJson = (str: string) => { // Remove markdown code blocks if present let cleaned = str.replace(/```json\n?|```/g, "").trim(); // Remove potential control characters that break JSON.parse // We keep \n \r \t for now as they might be escaped or need handling cleaned = cleaned.replace( /[\u0000-\u0009\u000B\u000C\u000E-\u001F\u007F-\u009F]/g, " ", ); // Specific fix for Gemini: raw newlines inside strings // This is tricky. We'll try to escape newlines that are NOT followed by a quote and colon (property start) // or a closing brace/bracket. This is heuristic. // A better way is to replace all raw newlines that are preceded by a non-backslash with \n // but only if they are inside double quotes. // Simplest robust approach: Remove trailing commas and hope response_format does its job. cleaned = cleaned.replace(/,\s*([\]}])/g, "$1"); return cleaned; }; const getAiEstimation = async ( briefing: string, distilledCrawl: string, comments: string | null, budget: string | null, apiKey: string, principles: string, techStandards: string, tone: string, ) => { let usage = { prompt: 0, completion: 0, cost: 0 }; const addUsage = (data: any) => { if (data?.usage) { usage.prompt += data.usage.prompt_tokens || 0; usage.completion += data.usage.completion_tokens || 0; // OpenRouter provides 'cost' field in USD (as per documentation) // If missing, we use a fallback calculation for transparency if (data.usage.cost !== undefined) { usage.cost += data.usage.cost; } else { // Fallback: Gemini 3 Flash Flash pricing (~$0.1 / 1M prompt, ~$0.4 / 1M completion) usage.cost += (data.usage.prompt_tokens || 0) * (0.1 / 1000000) + (data.usage.completion_tokens || 0) * (0.4 / 1000000); } } }; // 1. PASS 1: Fact Extraction (Briefing Sensor) console.log(" ↳ Pass 1: Fact Extraction (Briefing Sensor)..."); const pass1SystemPrompt = ` You are a precision sensor. Analyze the BRIEFING and extract ONLY the raw facts. Tone: Literal, non-interpretive. Output language: GERMAN (Strict). Output format: ROOT LEVEL JSON (No wrapper keys like '0' or 'data'). ### LEGAL CONSTRAINT (MANDATORY): Do NOT use the term "rechtssicher" (legally secure) or make any claims about legal compliance. We provide technical infrastructure, not legal advice. Use terms like "DSGVO-konforme Konfiguration" or "Datenschutz-Standard" instead. ### MISSION: Focus 100% on the BRIEFING text provided by the user. Use the DISTILLED_CRAWL only as background context for terms or company details. If there is a conflict, the BRIEFING is the absolute source of truth. ### PRICING REFERENCE (FOR CALCULATION): - Base Project (Infrastructure + 12 Months Hosting): 5.440 € (MANDATORY START) - Additional Pages: 600 € / stk - System-Modules (Features): 1.500 € / stk - Logic-Functions: 800 € / stk - API Integrations: 800 € / stk - Inhalts-Verwaltung (CMS-Modul): 1.500 € (optional) ${budget ? `### BUDGET LOGIC (ULTRA-STRICT): 1. **Mental Calculation**: Start with 7.000 €. Add items based on the reference above. 2. **Hard Ceiling**: If total > ${budget}, you MUST discard lower priority items. 3. **Priority**: High-End Design and Core Pages > Features. 4. **Restriction**: For ${budget}, do NOT exceed 2 features and 4 extra pages. 5. THE TOTAL COST CALCULATED BY THESE RULES MUST BE <= ${budget}. 6. Do NOT mention the budget in any string fields.` : "" } - ** features **: Items from the FEATURE_REFERENCE. - ** ABSOLUTE CONSERVATIVE RULE **: Only use features if the briefing implies *dynamic complexity* (CMS, filtering, search, database). - Simple keywords like 'Karriere', 'Referenzen', 'Messen' or lists of items MUST be treated as simple pages. Add them to 'otherPages' instead. - If in doubt, categorizing as a PAGE is the mandatory default. - ** otherPages **: Any specific pages mentioned (e.g. 'Historie', 'Team', 'Partner') that are not in the standard list. Use this for static lists of jobs or references too. - ** companyName **: The full legal and brand name (e.g., "E-TIB GmbH"). Use signatures and crawl data. - ** personName **: The name of the primary human contact (e.g., "Danny Joseph"). ** CRITICAL **: Check email signatures and "Mit freundlichen Grüßen" blocks. - ** email **: The email address of the contact person if found in the briefing / signature. - ** existingWebsite **: The primary URL mentioned in the briefing or signature (e.g., "www.e-tib.com"). - ** websiteTopic **: A short descriptor of the CORE BUSINESS (e.g., "Kabeltiefbau"). MAX 3 WORDS. - ** isRelaunch **: Set to TRUE if the briefing mentions an existing website, a URL, or if the company is an established entity. - ** CRITICAL LOGIC **: If a URL is mentioned, isRelaunch MUST be TRUE. - For all textual values: USE GERMAN. - ** multilang **: ONLY if the briefing EXPLICITLY mentions multiple target languages (e.g. "Seite soll auch auf Englisch verfügbar sein"). ABSOLUTE DEFAULT IS FALSE. - ** maps **: If "Google Maps" or location maps are mentioned or implicit. - ** CRITICAL **: Do NOT include "social" in apiSystems unless the user explicitly wants to SYNC / POST content. ### CATEGORY MAPPING(IDs ONLY): - ** selectedPages **: [Home, About, Services, Contact, Landing, Legal] - ** features **: [blog_news, products, jobs, refs, events] - ** functions **: [search, filter, pdf, forms, members, calendar, multilang, chat] - ** apiSystems **: [crm_erp, payment, marketing, ecommerce, maps, social, analytics] - ** assets **: [existing_website, logo, styleguide, content_concept, media, icons, illustrations, fonts] ### OUTPUT FORMAT(Strict JSON - ROOT LEVEL): { "companyName": string, "companyAddress": string, "personName": string, "email": string, "existingWebsite": string, "websiteTopic": string, "isRelaunch": boolean, "selectedPages": string[], "features": string[], "functions": string[], "apiSystems": string[], "assets": string[], "deadline": string(GERMAN), "targetAudience": "B2B" | "B2C" | "Internal" | string(GERMAN), "expectedAdjustments": "low" | "medium" | "high" | string(GERMAN), "employeeCount": "ca. 10+" | "ca. 50+" | "ca. 100+" | "ca. 250+" | "ca. 500+" | "ca. 1000+" } `; const pass1UserPrompt = `BRIEFING(TRUTH SOURCE): \n${briefing} \n\nCOMMENTS: \n${comments} \n\nDISTILLED_CRAWL(CONTEXT ONLY): \n${distilledCrawl} `; const p1Resp = await axios.post( "https://openrouter.ai/api/v1/chat/completions", { model: "google/gemini-2.5-flash", messages: [ { role: "system", content: pass1SystemPrompt }, { role: "user", content: pass1UserPrompt }, ], response_format: { type: "json_object" }, }, { headers: { Authorization: `Bearer ${apiKey} `, "Content-Type": "application/json", }, }, ); if (!p1Resp.data.choices?.[0]?.message?.content) { console.error( "❌ Pass 1 failed. Response:", JSON.stringify(p1Resp.data, null, 2), ); throw new Error("Pass 1: No content in response"); } const facts = JSON.parse(cleanJson(p1Resp.data.choices[0].message.content)); // 1.5. PASS 1.5: The Feature Auditor (Skeptical Review) console.log(" ↳ Pass 1.5: The Feature Auditor (Skeptical Review)..."); const pass15SystemPrompt = ` You are a "Strict Cost Controller". Your mission is to prevent over-billing. Review the extracted FEATURES and the BRIEFING. ### RULE OF THUMB: - A "Feature" (1.500 €) is ONLY justified for complex, dynamic systems (logic, database, CMS-driven management, advanced filtering). - Simple lists, information sections, or static descriptions (e.g., "Messen", "Team", "Historie", "Jobs" as mere text) are ALWAYS "Pages" (600 €). - If the briefing doesn't explicitly mention "Management System", "Filterable Database", or "Client Login", it is likely a PAGE. ### MISSION: Analyze each feature in the list. Decide if it should stay a "Feature" or be downgraded to the "otherPages" array. ### OUTPUT FORMAT: Return only the corrected 'features' and 'otherPages' arrays. { "features": string[], "otherPages": string[] } `; const p15Resp = await axios.post( "https://openrouter.ai/api/v1/chat/completions", { model: "google/gemini-2.5-flash", messages: [ { role: "system", content: pass15SystemPrompt }, { role: "user", content: `EXTRACTED_FEATURES: ${JSON.stringify(facts.features)} \nBRIEFING: \n${briefing}`, }, ], response_format: { type: "json_object" }, }, { headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json", }, }, ); addUsage(p15Resp.data); const auditResult = JSON.parse( cleanJson(p15Resp.data.choices[0].message.content), ); // Apply Audit: Downgrade features to otherPages facts.features = auditResult.features || []; facts.otherPages = Array.from( new Set([...(facts.otherPages || []), ...(auditResult.otherPages || [])]), ); // 2. PASS 2: Feature Deep-Dive console.log(" ↳ Pass 2: Feature Deep-Dive..."); const pass2SystemPrompt = ` You are a detail - oriented Solution Architect. For EVERY item selected in Pass 1(pages, features, functions, apiSystems), write a specific justification and technical scope. ### RULES: 1. ** CONCRETE & SPECIFIC **: Do NOT say "Implementation of X". Say "Displaying X with Y filters". 2. ** JUSTIFICATION (CRITICAL) **: For every entry in 'featureDetails', explicitly explain WHY this is a complex system (1.500 €) and not just a static page (600 €). If it's just a list of items, it's a PAGE. 3. ** NO EFFECTS **: Do not mention "fade-ins", "animations" or "visual styling". Focus on FUNCTION. 4. ** ABSOLUTE RULE **: EVERYTHING MUST BE GERMAN. 5. ** TRANSPARENCY **: Explain exactly what the USER gets. 6. ** API NOTE **: For 'media' or 'video', explicitly state "Upload & Integration" (NO STREAMING). ### INPUT (from Pass 1): ${JSON.stringify(facts, null, 2)} ### OUTPUT FORMAT(Strict JSON): { "pageDetails": { "Home": string, ... }, "featureDetails": { "blog_news": string, ... }, "functionDetails": { "search": string, ... }, "apiDetails": { "crm_erp": string, ... } } `; const p2Resp = await axios.post( "https://openrouter.ai/api/v1/chat/completions", { model: "google/gemini-2.5-flash", messages: [ { role: "system", content: pass2SystemPrompt }, { role: "user", content: briefing }, ], response_format: { type: "json_object" }, }, { headers: { Authorization: `Bearer ${apiKey} `, "Content-Type": "application/json", }, }, ); addUsage(p2Resp.data); if (!p2Resp.data.choices?.[0]?.message?.content) { console.error( "❌ Pass 2 failed. Response:", JSON.stringify(p2Resp.data, null, 2), ); throw new Error("Pass 2: No content in response"); } const details = JSON.parse(cleanJson(p2Resp.data.choices[0].message.content)); // 3. PASS 3: Strategic Content (Bespoke Strategy) console.log(" ↳ Pass 3: Strategic Content (Bespoke Strategy)..."); const pass3SystemPrompt = ` You are a high - end Digital Architect.Your goal is to make the CUSTOMER feel 100 % understood. Analyze the BRIEFING and the EXISTING WEBSITE context. ### TONE & COMMUNICATION PRINCIPLES(STRICT): ${tone} ### OBJECTIVE: 3. ** briefingSummary **: Ein sachlicher, tiefgehender Überblick der Unternehmenslage. - ** STIL **: Keine Ich - Form.Keine Marketing - Floskeln.Nutze präzise Fachbegriffe.Sei prägnant und effizient(ca. 70 % der vorherigen Länge). - ** FORM **: EXAKT ZWEI ABSÄTZE.Insgesamt ca. 6 Sätze. - ** INHALT **: Welcher technologische Sprung ist notwendig ? Was ist der Status Quo ? (Bezug zur URL / Briefing). - ** ABSOLUTE REGEL **: Keine Halluzinationen über fehlende Präsenzen bei Relaunches. - ** DATENSCHUTZ **: KEINERLEI namentliche Nennungen von Personen(z.B. "Danny Joseph") in diesen Texten. 4. ** designVision **: Ein abstraktes, strategisches Konzept. - ** STIL **: Rein konzeptionell.Keine Umsetzungsschritte.Keinerlei "To-dos".Keine Ich - Form.Sei prägnant. - ** FORM **: EXAKT ZWEI ABSÄTZE.Insgesamt ca. 4 Sätze. - ** DATENSCHUTZ **: KEINERLEI namentliche Nennungen von Personen in diesen Texten. - ** FOKUS **: Welche strategische Wirkung soll erzielt werden ? (Z.B. "Industrielle Souveränität"). ### OUTPUT FORMAT(Strict JSON): { "briefingSummary": string, "designVision": string } `; const p3Resp = await axios.post( "https://openrouter.ai/api/v1/chat/completions", { model: "google/gemini-2.5-flash", messages: [ { role: "system", content: pass3SystemPrompt }, { role: "user", content: `BRIEFING(TRUTH SOURCE): \n${briefing} \n\nEXISTING WEBSITE(CONTEXT): \n${distilledCrawl} \n\nEXTRACTED FACTS: \n${JSON.stringify(facts, null, 2)} `, }, ], response_format: { type: "json_object" }, }, { headers: { Authorization: `Bearer ${apiKey} `, "Content-Type": "application/json", }, }, ); addUsage(p3Resp.data); if (!p3Resp.data.choices?.[0]?.message?.content) { console.error( "❌ Pass 3 failed. Response:", JSON.stringify(p3Resp.data, null, 2), ); throw new Error("Pass 3: No content in response"); } const strategy = JSON.parse( cleanJson(p3Resp.data.choices[0].message.content), ); // 4. PASS 4: Information Architecture (Sitemap) console.log(" ↳ Pass 4: Information Architecture..."); const pass4SystemPrompt = ` You are a Senior UX Architect.Design a hierarchical sitemap following the 'Industrial Logic' principle. EVERYTHING MUST BE IN GERMAN. ### SITEMAP RULES: 1. ** HIERARCHY **: Build a logical tree.Group by category(e.g., "Kern-Präsenz", "Lösungen", "Vertrauen", "Rechtliches"). 2. ** INTENT **: Each page MUST have a title and a brief functional conversion intent(desc). 3. ** COMPREHENSIVENESS **: Ensure all 'selectedPages' and 'features' from Pass 1 are represented. 4. ** LANGUAGE **: STRICT GERMAN TITLES.Do NOT use "Home", "About", "Services".Use "Startseite", "Über uns", "Leistungen". 5. ** NO IMPLEMENTATION NOTES **: Do NOT add implementation details in parentheses to titles (e.g. NO "Startseite (Hero-Video)", NO "About (Timeline)"). Keep titles clean and abstract. ### DATA CONTEXT: ${JSON.stringify({ facts, strategy }, null, 2)} ### OUTPUT FORMAT(Strict JSON): { "websiteTopic": string, "sitemap": [{ "category": string, "pages": [{ "title": string, "desc": string }] }] } `; const p4Resp = await axios.post( "https://openrouter.ai/api/v1/chat/completions", { model: "google/gemini-2.5-flash", messages: [ { role: "system", content: pass4SystemPrompt }, { role: "user", content: `BRIEFING(TRUTH SOURCE): \n${briefing} \n\nDISTILLED_CRAWL(CONTEXT): \n${distilledCrawl} `, }, ], response_format: { type: "json_object" }, }, { headers: { Authorization: `Bearer ${apiKey} `, "Content-Type": "application/json", }, }, ); addUsage(p4Resp.data); if (!p4Resp.data.choices?.[0]?.message?.content) { console.error( "❌ Pass 4 failed. Response:", JSON.stringify(p4Resp.data, null, 2), ); throw new Error("Pass 4: No content in response"); } const ia = JSON.parse(cleanJson(p4Resp.data.choices[0].message.content)); // 5. PASS 5: Position Synthesis & Pricing Transparency console.log(" ↳ Pass 5: Position Synthesis..."); // Determine which positions are actually relevant to avoid hallucinations const requiredPositions = [ "Das technische Fundament", facts.selectedPages.length + facts.otherPages.length > 0 ? "Individuelle Seiten" : null, facts.features.length > 0 ? "System-Module (Features)" : null, facts.functions.length > 0 ? "Logik-Funktionen" : null, facts.apiSystems.length > 0 ? "Schnittstellen (API)" : null, facts.cmsSetup ? "Inhalts-Verwaltung" : null, facts.multilang ? "Mehrsprachigkeit" : null, "Inhaltliche Initial-Pflege", "Sorglos Betrieb", ].filter(Boolean); const pass5SystemPrompt = ` You are a Senior Solution Architect. Your goal is ABSOLUTE TRANSPARENCY and professionalism. Each position in the quote must be perfectly justified and detailed using an objective, technical tone. ### REQUIRED POSITION TITLES (STRICT - ONLY DESCRIBE THESE): ${requiredPositions.map((p) => `"${p}"`).join(", ")} ### MAPPING RULES (STRICT): - ** Das technische Fundament **: Infrastructure, Hosting setup, SEO-Basics, Analytics, Environments. - ** Individuelle Seiten **: Layout / structure for specific pages. ** RULE **: If quantity is high (e.g. > 10), lead with "Umsetzung von [QTY] individuellen Einzelseiten...". - ** System-Module (Features) **: Functional systems like Blog, News, Products, Jobs, References. ** RULE **: Describe exactly 1 thing if qty is 1. If qty is 0, DO NOT DESCRIBE THIS. - ** Logik-Funktionen **: Logic modules like Search, Filter, Forms, PDF-Export. - ** Schnittstellen (API) **: Data Syncs with CRM, ERP, Payment systems. - ** Inhalts-Verwaltung **: Setup and mapping of the administration system for self-managed content. - ** Mehrsprachigkeit **: Architecture scaling for multiple languages. - ** Inhaltliche Initial-Pflege **: Manual data entry / cleanup. - ** Sorglos Betrieb **: ** RULE **: Describe as "Inklusive 1 Jahr Sicherung des technischen Betriebs, Hosting, Instandhaltung, Sicherheits-Updates und techn. Support gemäß AGB Punkt 7a." ### RULES FOR positionDescriptions(STRICT): 1. ** ABSOLUTE RULE: NO FIRST PERSON **: NEVER use "Ich", "Mein", "Wir" or "Unser". Lead with nouns or passive verbs. 2. ** QUANTITY PARITY (ULTRA-STRICT) **: The description MUST list EXACTLY the number of items matching the 'qty' for that position. If qty is 3, describe exactly 3 items. If qty is 1, describe exactly 1 item. Do NOT "stuff" additional features into one description. 3. ** LOGIC GUARD (CMS) **: If 'cmsSetup' is false in the DATA CONTEXT, you MUST NOT mention "CMS", "Modul", "Management System" or "Inhaltsverwaltung". Use "Statische Seite" or "Darstellung". 4. ** STATIC vs DYNAMIC **: If no complex logic was extracted in Pass 2 for a feature, describe it as a technical layout/page, not as a system. 5. ** PROFESSIONAL TONE **: Use "Erstellung von...", "Anbindung der...", "Implementierung technischer...", "Bereitstellung von...". 6. ** CONCISE & ITEM-BASED **: Use technical, high-density sentences. Name specific industry terms from context. 7. ** ITEMIZED SYNTHESIS **: Mention EVERY component selected in Pass 1. 8. ** HARD SPECIFICS **: If the briefing mentions "Glasfaser-Trassen" or "Schwerlast-Logistik", IT MUST BE IN THE DESCRIPTION. 9. ** INDUSTRIAL AMBITION **: Describe it as a high-end technical solution. Avoid "schöne Website" or marketing fluff. 10. ** PAGES **: For "Individuelle Seiten", list the pages. ** ABSOLUTE RULE **: Do NOT add implementation details or technical notes in parentheses (e.g. NO "(Matrix-Struktur)", NO "(Timeline-Modul)"). Use clean titles like "Startseite, Über uns, Leistungen". 11. ** LOGIC **: Describe the ACTUAL logic (e.g., "Volltextsuche mit Auto-Complete", not "eine Suche"). 12. ** KEYS **: Return EXACTLY the keys defined in "POSITION TITLES". 13. ** NO AGB **: NEVER mention "AGB" or "Geschäftsbedingungen". ### EXAMPLES(PASSIVE & TECHNICAL): - ** GOOD **: "Konfiguration der CMS-Infrastruktur zur unabhängigen Verwaltung von Produkt-Katalogen und News-Beiträgen." - ** GOOD **: "Implementierung einer Volltextsuche inkl. Kategorisierungs-Logik für effizientes Auffinden von Projektreferenzen." - ** GOOD **: "Native API-Anbindung an das ERP-System zur Echtzeit-Synchronisation von Bestandsdaten." - ** BAD **: "Ich richte dir das CMS ein." - ** BAD **: "Ich programmiere eine tolle Suche für deine Seite." ### DATA CONTEXT: ${JSON.stringify({ facts, details, strategy, ia }, null, 2)} ### OUTPUT FORMAT(Strict JSON): { "positionDescriptions": { "Das technische Fundament": string, ... } } `; const p5Resp = await axios.post( "https://openrouter.ai/api/v1/chat/completions", { model: "google/gemini-2.5-flash", messages: [ { role: "system", content: pass5SystemPrompt }, { role: "user", content: briefing }, ], response_format: { type: "json_object" }, }, { headers: { Authorization: `Bearer ${apiKey} `, "Content-Type": "application/json", }, }, ); addUsage(p5Resp.data); if (!p5Resp.data.choices?.[0]?.message?.content) { console.error( "❌ Pass 5 failed. Response:", JSON.stringify(p5Resp.data, null, 2), ); throw new Error("Pass 5: No content in response"); } const positionsData = JSON.parse( cleanJson(p5Resp.data.choices[0].message.content), ); // 6. PASS 6: The Industrial Critic console.log(" ↳ Pass 6: The Industrial Critic (Quality Gate)..."); const pass6SystemPrompt = ` You are the "Industrial Critic".Your goal is to catch quality regressions and ensure the document is bespoke, technical, and professional. Analyze the CURRENT_STATE against the BRIEFING_TRUTH. ### CRITICAL ERROR CHECKLIST(FAIL IF FOUND): 1. ** Hallucination Leakage **: FAIL if names of people(e.g., "Frieder Helmich"), specific software versions, or invented details are used unless they appear EXACTLY in the BRIEFING. - ** CRITICAL **: Forbid "Sie", "Ansprechpartner" or "Unternehmen" for personName if a name IS in the briefing.If none is in briefing, use empty string. 2. ** Logic Conflict **: FAIL if isRelaunch is true but briefingSummary claims no website exists. - FAIL if the description in positionDescriptions mentions more items than extracted in facts. 3. ** Implementation Fluff **: FAIL if tech - stack details are mentioned(React, etc.).Focus on Concept & Result. 4. ** Genericism Check(CRITICAL) **: FAIL if any text sounds like it could apply to ANY company.It MUST mention specific industry details(e.g., "Kabeltiefbau", "Infrastruktur-Zentrum") from the Briefing or Crawl. 6. ** Namen-Verbot (STRICT) **: FAIL if any personal names (e.g. "Danny Joseph", "Joseph", etc.) appear in 'briefingSummary' or 'designVision'. Use abstract terms like "Unternehmensführung" or "Management" if necessary. 7. ** LOGIC GUARD (CMS) **: If 'cmsSetup' is false in the DATA CONTEXT, FAIL if any 'positionDescriptions' or 'briefingSummary' mentions "CMS", "Content Management System", "Inhaltsverwaltung" or "Redaktionssystem". 8. ** AGB BAN **: FAIL if "Allgemeine Geschäftsbedingungen" or "AGB" appear anywhere. 9. ** Length Check **: Briefing (ca. 6 Sätze) und Vision (ca. 4 Sätze). Kürze Texte, die zu ausschweifend sind, auf das Wesentliche. 10. **LEGAL SAFETY**: FAIL if the term "rechtssicher" or similar absolute legal claims are used. Ensure terminology remains technical (e.g., "Standard-konform", "Best Practice"). ### MISSION: Return updated fields ONLY.Specifically focus on hardening 'positionDescriptions', 'sitemap', 'briefingSummary', and 'designVision'. ### DATA CONTEXT: ${JSON.stringify({ facts, strategy, ia, positionsData }, null, 2)} `; const p6Resp = await axios.post( "https://openrouter.ai/api/v1/chat/completions", { model: "google/gemini-2.5-flash", messages: [ { role: "system", content: pass6SystemPrompt }, { role: "user", content: `BRIEFING_TRUTH: \n${briefing} ` }, ], response_format: { type: "json_object" }, }, { headers: { Authorization: `Bearer ${apiKey} `, "Content-Type": "application/json", }, }, ); addUsage(p6Resp.data); if (!p6Resp.data.choices?.[0]?.message?.content) { console.error( "❌ Pass 6 failed. Response:", JSON.stringify(p6Resp.data, null, 2), ); throw new Error("Pass 6: No content in response"); } const reflection = JSON.parse( cleanJson(p6Resp.data.choices[0].message.content), ); // 6. Reflection Merge Utility const mergeReflection = (state: any, reflection: any) => { let result = { ...state }; const unwrap = (obj: any): any => { if (!obj || typeof obj !== "object" || Array.isArray(obj)) return obj; // Always unwrap "0" if it exists, regardless of other keys (AI often nests) if (obj["0"]) return unwrap(obj["0"]); if (obj.state && Object.keys(obj).length === 1) return unwrap(obj.state); if (obj.facts && Object.keys(obj).length === 1) return unwrap(obj.facts); return obj; }; const cleanedReflection = unwrap(reflection); Object.entries(cleanedReflection).forEach(([key, value]) => { if (value && value !== "" && value !== "null") { result[key] = value; } }); return result; }; let finalState = mergeReflection( { ...initialState, ...facts, ...strategy, ...ia, ...positionsData, }, reflection, ); finalState.statusQuo = facts.isRelaunch ? "Relaunch" : "Neuentwicklung"; // Recipient Mapping if (finalState.personName) finalState.name = finalState.personName; if (finalState.email) finalState.email = finalState.email; // Normalization Layer: Map hallucinated German keys back to internal keys const normalizationMap: Record = { "Briefing-Zusammenfassung": "briefingSummary", "Design-Vision": "designVision", Zusammenfassung: "briefingSummary", Vision: "designVision", BRIEFING_SUMMARY: "briefingSummary", DESIGN_VISION: "designVision", }; Object.entries(normalizationMap).forEach(([gerKey, intKey]) => { if (finalState[gerKey] && !finalState[intKey]) { if ( typeof finalState[gerKey] === "object" && !Array.isArray(finalState[gerKey]) ) { finalState[intKey] = Object.values(finalState[gerKey]).join("\n\n"); } else { finalState[intKey] = finalState[gerKey]; } } }); // Final Logic Guard: Strictly strip CMS from ALL descriptions and fields if not enabled if (!finalState.cmsSetup) { const stripCMS = (obj: any): any => { if (typeof obj === "string") { return obj.replace( /CMS|Content-Management-System|Inhaltsverwaltung/gi, "Plattform-Struktur", ); } if (Array.isArray(obj)) { return obj.map(stripCMS); } if (obj !== null && typeof obj === "object") { const newObj: any = {}; Object.entries(obj).forEach(([k, v]) => { newObj[k] = stripCMS(v); }); return newObj; } return obj; }; finalState = stripCMS(finalState); } // Sitemap Normalization (German keys to internal) if (Array.isArray(finalState.sitemap)) { finalState.sitemap = finalState.sitemap.map((cat: any) => ({ category: cat.category || cat.kategorie || cat.Kategorie || cat.title || "Allgemein", pages: (cat.pages || cat.seiten || cat.Seiten || []).map((page: any) => ({ title: page.title || page.titel || page.Titel || "Seite", desc: page.desc || page.beschreibung || page.Beschreibung || page.description || "", })), })); } // Position Descriptions Normalization (Strict Title Mapping + Index-based Fallback) if (finalState.positionDescriptions) { const normalized: Record = {}; const rawPositions = finalState.positionDescriptions; // 1. Initial cleanup Object.entries(rawPositions).forEach(([key, value]) => { const normalizedValue = typeof value === "object" ? (value as any).beschreibung || (value as any).description || JSON.stringify(value) : value; normalized[key] = normalizedValue as string; }); // 2. Index-based matching (Map "10. Foo" to "10. Bar") const standardTitles = [ "1. Das technische Fundament", "2. Individuelle Seiten", "3. System-Module (Features)", "4. Logik-Funktionen", "5. Schnittstellen (API)", "6. Inhalts-Verwaltung", "7. Mehrsprachigkeit", "8. Inhaltliche Initial-Pflege", "9. Sorglos Betrieb", ]; standardTitles.forEach((std) => { const prefix = std.split(".")[0] + "."; // e.g., "10." // Find any key in the AI output that starts with this number const matchingKey = Object.keys(normalized).find((k) => k.trim().startsWith(prefix), ); if (matchingKey && matchingKey !== std) { normalized[std] = normalized[matchingKey]; // Keep the old key too just in case, but prioritize the standard one } }); finalState.positionDescriptions = normalized; } // Normalize final state if (Array.isArray(finalState.positionDescriptions)) { const normalized: Record = {}; finalState.positionDescriptions.forEach((item: any) => { const key = item.feature || item.id || item.title || item.pos; if (key) normalized[key] = item.description || item.desc; }); finalState.positionDescriptions = normalized; } if (finalState.sitemap && !Array.isArray(finalState.sitemap)) { if (finalState.sitemap.categories) finalState.sitemap = finalState.sitemap.categories; else if (finalState.sitemap.sitemap) finalState.sitemap = finalState.sitemap.sitemap; else { const entries = Object.entries(finalState.sitemap); if (entries.every(([__, v]) => Array.isArray(v))) { finalState.sitemap = entries.map(([category, pages]) => ({ category, pages, })); } } } // Final Post-Reflection Budget Sync (Hard Pruning if still over) if (budget) { const targetValue = parseInt(budget.replace(/[^0-9]/g, "")); if (!isNaN(targetValue)) { console.log(`⚖️ Final Budget Audit(${targetValue} € target)...`); let currentTotals = calculateTotals(finalState, PRICING); // Step-by-step pruning if too expensive if (currentTotals.totalPrice > targetValue) { console.log( `⚠️ Budget exceeded(${currentTotals.totalPrice} €).Pruning scope to fit ${targetValue} €...`, ); // 1. Remove optional "other" stuff finalState.otherFeatures = []; finalState.otherFunctions = []; finalState.otherTech = []; // 2. Remove non-critical functions const funcPriority = ["search", "filter", "calendar", "multilang"]; for (const f of funcPriority) { if (currentTotals.totalPrice <= targetValue) break; if (finalState.functions.includes(f)) { finalState.functions = finalState.functions.filter( (x: string) => x !== f, ); currentTotals = calculateTotals(finalState, PRICING); } } // 3. Remove least critical features if still over const featurePriority = ["events", "blog_news", "products"]; for (const p of featurePriority) { if (currentTotals.totalPrice <= targetValue) break; if (finalState.features.includes(p)) { finalState.features = finalState.features.filter( (f: string) => f !== p, ); currentTotals = calculateTotals(finalState, PRICING); } } // 4. Reduce page count (Selected Pages AND Sitemap) while ( currentTotals.totalPrice > targetValue && (finalState.selectedPages.length > 4 || currentTotals.totalPagesCount > 5) ) { if (finalState.selectedPages.length > 4) { finalState.selectedPages.pop(); } // Prune Sitemap to match if (finalState.sitemap && Array.isArray(finalState.sitemap)) { const lastCat = finalState.sitemap[finalState.sitemap.length - 1]; if (lastCat && lastCat.pages && lastCat.pages.length > 0) { lastCat.pages.pop(); if (lastCat.pages.length === 0) finalState.sitemap.pop(); } } currentTotals = calculateTotals(finalState, PRICING); } // 5. Final fallback: Remove second feature if still over if ( currentTotals.totalPrice > targetValue && finalState.features.length > 1 ) { finalState.features.pop(); currentTotals = calculateTotals(finalState, PRICING); } } console.log( `✅ Final budget audit complete: ${currentTotals.totalPrice} €`, ); } } return { state: finalState, usage }; }; main().catch(console.error);