feat: migrate npm registry from Verdaccio to Gitea Packages

2026-02-27 00:12:00 +01:00
parent efd1341762
commit 5da88356a8
69 changed files with 5397 additions and 114 deletions
--- a/packages/concept-engine/src/_test_pipeline.ts
+++ b/packages/concept-engine/src/_test_pipeline.ts
@@ -0,0 +1,40 @@
+import { config as dotenvConfig } from 'dotenv';
+import * as path from 'node:path';
+import * as fs from 'node:fs/promises';
+import { EstimationPipeline } from './pipeline.js';
+
+dotenvConfig({ path: path.resolve(process.cwd(), '../../.env') });
+
+const briefing = await fs.readFile(
+    path.resolve(process.cwd(), '../../data/briefings/etib.txt'),
+    'utf8',
+);
+
+console.log(`Briefing loaded: ${briefing.length} chars`);
+
+const pipeline = new EstimationPipeline(
+    {
+        openrouterKey: process.env.OPENROUTER_API_KEY || '',
+        zyteApiKey: process.env.ZYTE_API_KEY,
+        outputDir: path.resolve(process.cwd(), '../../out/estimations'),
+        crawlDir: path.resolve(process.cwd(), '../../data/crawls'),
+    },
+    {
+        onStepStart: (id, name) => console.log(`[CB] Starting: ${id}`),
+        onStepComplete: (id) => console.log(`[CB] Done: ${id}`),
+        onStepError: (id, err) => console.error(`[CB] Error in ${id}: ${err}`),
+    },
+);
+
+try {
+    const result = await pipeline.run({
+        briefing,
+        url: 'https://www.e-tib.com',
+    });
+
+    console.log('\n✨ Pipeline complete!');
+    console.log('Validation:', result.validationResult?.passed ? 'PASSED' : 'FAILED');
+} catch (err: any) {
+    console.error('\n❌ Pipeline failed:', err.message);
+    console.error(err.stack);
+}
--- a/packages/concept-engine/src/analyzer.ts
+++ b/packages/concept-engine/src/analyzer.ts
@@ -0,0 +1,334 @@
+// ============================================================================
+// Analyzer — Deterministic Site Analysis (NO LLM!)
+// Builds a SiteProfile from crawled pages using pure code logic.
+// This is the core fix against hallucinated page structures.
+// ============================================================================
+
+import type {
+    CrawledPage,
+    SiteProfile,
+    NavItem,
+    CompanyInfo,
+    PageInventoryItem,
+} from "./types.js";
+
+/**
+ * Build a complete SiteProfile from an array of crawled pages.
+ * This is 100% deterministic — no LLM calls involved.
+ */
+export function analyzeSite(pages: CrawledPage[], domain: string): SiteProfile {
+    const navigation = extractNavigation(pages);
+    const existingFeatures = extractExistingFeatures(pages);
+    const services = extractAllServices(pages);
+    const companyInfo = extractCompanyInfo(pages);
+    const colors = extractColors(pages);
+    const socialLinks = extractSocialLinks(pages);
+    const externalDomains = extractExternalDomains(pages, domain);
+    const images = extractAllImages(pages);
+    const employeeCount = extractEmployeeCount(pages);
+    const pageInventory = buildPageInventory(pages);
+
+    return {
+        domain,
+        crawledAt: new Date().toISOString(),
+        totalPages: pages.filter((p) => p.type !== "legal").length,
+        navigation,
+        existingFeatures,
+        services,
+        companyInfo,
+        pageInventory,
+        colors,
+        socialLinks,
+        externalDomains,
+        images,
+        employeeCount,
+    };
+}
+
+/**
+ * Extract the site's main navigation structure from <nav> elements.
+ * Uses the HOME page's nav as the canonical source.
+ */
+function extractNavigation(pages: CrawledPage[]): NavItem[] {
+    // Prefer the home page's nav
+    const homePage = pages.find((p) => p.type === "home");
+    const sourcePage = homePage || pages[0];
+    if (!sourcePage) return [];
+
+    // Deduplicate nav items
+    const seen = new Set<string>();
+    const navItems: NavItem[] = [];
+
+    for (const label of sourcePage.navItems) {
+        const normalized = label.toLowerCase().trim();
+        if (seen.has(normalized)) continue;
+        if (normalized.length < 2) continue;
+        seen.add(normalized);
+        navItems.push({ label, href: "" });
+    }
+
+    return navItems;
+}
+
+/**
+ * Aggregate all detected interactive features across all pages.
+ */
+function extractExistingFeatures(pages: CrawledPage[]): string[] {
+    const allFeatures = new Set<string>();
+    for (const page of pages) {
+        for (const feature of page.features) {
+            allFeatures.add(feature);
+        }
+    }
+    return [...allFeatures];
+}
+
+/**
+ * Aggregate all images found across all pages.
+ */
+function extractAllImages(pages: CrawledPage[]): string[] {
+    const allImages = new Set<string>();
+    for (const page of pages) {
+        if (!page.images) continue;
+        for (const img of page.images) {
+            allImages.add(img);
+        }
+    }
+    return [...allImages];
+}
+
+/**
+ * Extract employee count from page text.
+ * Looks for patterns like "über 50 Mitarbeitern", "200 Mitarbeiter", "50+ employees".
+ */
+function extractEmployeeCount(pages: CrawledPage[]): string | null {
+    const allText = pages.map((p) => p.text).join(" ");
+
+    // German patterns: 'über 50 Mitarbeitern', '120 Beschäftigte', '+200 MA'
+    const patterns = [
+        /(über|ca\.?|rund|mehr als|\+)?\s*(\d{1,4})\s*(Mitarbeiter(?:innen)?|Beschäftigte|MA|Fachkräfte)\b/gi,
+        /(\d{1,4})\+?\s*(employees|team members)/gi,
+    ];
+
+    for (const pattern of patterns) {
+        const match = allText.match(pattern);
+        if (match && match[0]) {
+            const num = match[0].match(/(\d{1,4})/)?.[1];
+            const prefix = match[0].match(/über|ca\.?|rund|mehr als/i)?.[0];
+            if (num) return prefix ? `${prefix} ${num}` : num;
+        }
+    }
+    return null;
+}
+
+/**
+ * Extract services/competencies from service-type pages.
+ * Focuses on H2-H3 headings and list items on service pages.
+ */
+function extractAllServices(pages: CrawledPage[]): string[] {
+    const servicePages = pages.filter(
+        (p) => p.type === "service" || p.pathname.includes("kompetenz"),
+    );
+
+    const services = new Set<string>();
+    for (const page of servicePages) {
+        // Use headings as primary service indicators
+        for (const heading of page.headings) {
+            const clean = heading.trim();
+            if (clean.length > 3 && clean.length < 100) {
+                // Skip generic headings
+                if (/^(home|kontakt|impressum|datenschutz|menü|navigation|suche)/i.test(clean)) continue;
+                services.add(clean);
+            }
+        }
+    }
+
+    // If no service pages found, look at the home page headings too
+    if (services.size === 0) {
+        const homePage = pages.find((p) => p.type === "home");
+        if (homePage) {
+            for (const heading of homePage.headings) {
+                const clean = heading.trim();
+                if (clean.length > 3 && clean.length < 80) {
+                    services.add(clean);
+                }
+            }
+        }
+    }
+
+    return [...services];
+}
+
+/**
+ * Extract company information from Impressum / footer content.
+ */
+function extractCompanyInfo(pages: CrawledPage[]): CompanyInfo {
+    const info: CompanyInfo = {};
+
+    // Find Impressum or legal page
+    const legalPage = pages.find(
+        (p) =>
+            p.type === "legal" &&
+            (p.pathname.includes("impressum") || p.title.toLowerCase().includes("impressum")),
+    );
+
+    const sourceText = legalPage?.text || pages.find((p) => p.type === "home")?.text || "";
+
+    // USt-ID
+    const taxMatch = sourceText.match(/USt[.\s-]*(?:ID[.\s-]*Nr\.?|IdNr\.?)[:\s]*([A-Z]{2}\d{9,11})/i);
+    if (taxMatch) info.taxId = taxMatch[1];
+
+    // HRB number
+    const hrbMatch = sourceText.match(/HRB[:\s]*(\d+\s*[A-Z]*)/i);
+    if (hrbMatch) info.registerNumber = `HRB ${hrbMatch[1].trim()}`;
+
+    // Phone
+    const phoneMatch = sourceText.match(/(?:Tel|Telefon|Fon)[.:\s]*([+\d\s()/-]{10,20})/i);
+    if (phoneMatch) info.phone = phoneMatch[1].trim();
+
+    // Email
+    const emailMatch = sourceText.match(/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/);
+    if (emailMatch) info.email = emailMatch[0];
+
+    // Address (look for German postal code pattern)
+    const addressMatch = sourceText.match(
+        /(?:[\w\s.-]+(?:straße|str\.|weg|platz|ring|allee|gasse)\s*\d+[a-z]?\s*,?\s*)?(?:D-)?(\d{5})\s+\w+/i,
+    );
+    if (addressMatch) info.address = addressMatch[0].trim();
+
+    // GF / Geschäftsführer
+    const gfMatch = sourceText.match(
+        /Geschäftsführ(?:er|ung)[:\s]*([A-ZÄÖÜ][a-zäöüß]+(?:\s+[A-ZÄÖÜ][a-zäöüß]+){1,3})/,
+    );
+    if (gfMatch) info.managingDirector = gfMatch[1].trim();
+
+    return info;
+}
+
+/**
+ * Extract brand colors from HTML (inline styles, CSS variables).
+ */
+function extractColors(pages: CrawledPage[]): string[] {
+    const colors = new Set<string>();
+    const homePage = pages.find((p) => p.type === "home");
+    if (!homePage) return [];
+
+    const hexMatches = homePage.html.match(/#(?:[0-9a-fA-F]{3}){1,2}\b/g) || [];
+    for (const hex of hexMatches) {
+        colors.add(hex.toLowerCase());
+        if (colors.size >= 8) break;
+    }
+
+    return [...colors];
+}
+
+/**
+ * Extract social media links from footers / headers.
+ */
+function extractSocialLinks(pages: CrawledPage[]): Record<string, string> {
+    const socials: Record<string, string> = {};
+    const platforms = [
+        { key: "linkedin", patterns: ["linkedin.com"] },
+        { key: "instagram", patterns: ["instagram.com"] },
+        { key: "facebook", patterns: ["facebook.com", "fb.com"] },
+        { key: "youtube", patterns: ["youtube.com", "youtu.be"] },
+        { key: "twitter", patterns: ["twitter.com", "x.com"] },
+        { key: "xing", patterns: ["xing.com"] },
+    ];
+
+    const homePage = pages.find((p) => p.type === "home");
+    if (!homePage) return socials;
+
+    const urlMatches = homePage.html.match(/https?:\/\/[^\s"'<>]+/g) || [];
+    for (const url of urlMatches) {
+        for (const platform of platforms) {
+            if (platform.patterns.some((p) => url.includes(p)) && !socials[platform.key]) {
+                socials[platform.key] = url;
+            }
+        }
+    }
+
+    return socials;
+}
+
+/**
+ * Find domains that are linked but separate from the main domain.
+ * Critical for detecting sister companies with own websites (e.g. etib-ing.com).
+ */
+function extractExternalDomains(pages: CrawledPage[], mainDomain: string): string[] {
+    const externalDomains = new Set<string>();
+    const cleanMain = mainDomain.replace(/^www\./, "");
+    // Extract meaningful base parts: "e-tib.com" → ["e", "tib", "etib"]
+    const mainParts = cleanMain.split(".")[0].toLowerCase().split(/[-_]/).filter(p => p.length > 1);
+    const mainJoined = mainParts.join(""); // "etib"
+
+    for (const page of pages) {
+        const linkMatches = page.html.match(/https?:\/\/[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g) || [];
+        for (const url of linkMatches) {
+            try {
+                const urlObj = new URL(url);
+                const domain = urlObj.hostname.replace(/^www\./, "");
+                // Skip same domain
+                if (domain === cleanMain) continue;
+                // Skip common third-party services
+                if (
+                    domain.includes("google") ||
+                    domain.includes("facebook") ||
+                    domain.includes("twitter") ||
+                    domain.includes("linkedin") ||
+                    domain.includes("instagram") ||
+                    domain.includes("youtube") ||
+                    domain.includes("cookie") ||
+                    domain.includes("analytics") ||
+                    domain.includes("cdn") ||
+                    domain.includes("cloudflare") ||
+                    domain.includes("fonts") ||
+                    domain.includes("jquery") ||
+                    domain.includes("bootstrap") ||
+                    domain.includes("wordpress") ||
+                    domain.includes("jimdo") ||
+                    domain.includes("wix")
+                )
+                    continue;
+
+                // Fuzzy match: check if the domain contains any base part of the main domain
+                // e.g. main="e-tib.com" → mainParts=["e","tib"], mainJoined="etib"
+                // target="etib-ing.com" → domainBase="etib-ing", domainJoined="etibing"
+                const domainBase = domain.split(".")[0].toLowerCase();
+                const domainJoined = domainBase.replace(/[-_]/g, "");
+
+                const isRelated =
+                    domainJoined.includes(mainJoined) ||
+                    mainJoined.includes(domainJoined) ||
+                    mainParts.some(part => part.length > 2 && domainBase.includes(part));
+
+                if (isRelated) {
+                    externalDomains.add(domain);
+                }
+            } catch {
+                // Invalid URL
+            }
+        }
+    }
+
+    return [...externalDomains];
+}
+
+/**
+ * Build a structured inventory of all pages.
+ */
+function buildPageInventory(pages: CrawledPage[]): PageInventoryItem[] {
+    return pages.map((page) => ({
+        url: page.url,
+        pathname: page.pathname,
+        title: page.title,
+        type: page.type,
+        headings: page.headings.slice(0, 10),
+        services: page.type === "service" ? page.headings.filter((h) => h.length > 3 && h.length < 80) : [],
+        hasSearch: page.features.includes("search"),
+        hasForms: page.features.includes("forms"),
+        hasMap: page.features.includes("maps"),
+        hasVideo: page.features.includes("video"),
+        contentSummary: page.text.substring(0, 500),
+    }));
+}
--- a/packages/concept-engine/src/cli.ts
+++ b/packages/concept-engine/src/cli.ts
@@ -0,0 +1,149 @@
+#!/usr/bin/env node
+// ============================================================================
+// @mintel/concept-engine — CLI Entry Point
+// Simple commander-based CLI for concept generation.
+// ============================================================================
+
+import { Command } from "commander";
+import * as path from "node:path";
+import * as fs from "node:fs/promises";
+import { existsSync } from "node:fs";
+import { config as dotenvConfig } from "dotenv";
+import { ConceptPipeline } from "./pipeline.js";
+
+// Load .env from monorepo root
+dotenvConfig({ path: path.resolve(process.cwd(), "../../.env") });
+dotenvConfig({ path: path.resolve(process.cwd(), ".env") });
+
+const program = new Command();
+
+program
+    .name("concept")
+    .description("AI-powered project concept generator")
+    .version("1.0.0");
+
+program
+    .command("run")
+    .description("Run the full concept pipeline")
+    .argument("[briefing]", "Briefing text or @path/to/file.txt")
+    .option("--url <url>", "Target website URL")
+    .option("--comments <comments>", "Additional notes")
+    .option("--clear-cache", "Clear crawl cache and re-crawl")
+    .option("--output <dir>", "Output directory", "../../out/concepts")
+    .option("--crawl-dir <dir>", "Crawl data directory", "../../data/crawls")
+    .action(async (briefingArg: string | undefined, options: any) => {
+        const openrouterKey = process.env.OPENROUTER_API_KEY || process.env.OPENROUTER_KEY;
+        if (!openrouterKey) {
+            console.error("❌ OPENROUTER_API_KEY not found in environment.");
+            process.exit(1);
+        }
+
+        let briefing = briefingArg || "";
+
+        // Handle @file references
+        if (briefing.startsWith("@")) {
+            const rawPath = briefing.substring(1);
+            const filePath = rawPath.startsWith("/")
+                ? rawPath
+                : path.resolve(process.cwd(), rawPath);
+            if (!existsSync(filePath)) {
+                console.error(`❌ Briefing file not found: ${filePath}`);
+                process.exit(1);
+            }
+            briefing = await fs.readFile(filePath, "utf8");
+            console.log(`📄 Loaded briefing from: ${filePath}`);
+        }
+
+        // Auto-discover URL from briefing
+        let url = options.url;
+        if (!url && briefing) {
+            const urlMatch = briefing.match(/https?:\/\/[^\s]+/);
+            if (urlMatch) {
+                url = urlMatch[0];
+                console.log(`🔗 Discovered URL in briefing: ${url}`);
+            }
+        }
+
+        if (!briefing && !url) {
+            console.error("❌ Provide a briefing text or --url");
+            process.exit(1);
+        }
+
+        const pipeline = new ConceptPipeline(
+            {
+                openrouterKey,
+                zyteApiKey: process.env.ZYTE_API_KEY,
+                outputDir: path.resolve(process.cwd(), options.output),
+                crawlDir: path.resolve(process.cwd(), options.crawlDir),
+            },
+            {
+                onStepStart: (id, name) => {
+                    // Will be enhanced with Ink spinner later
+                },
+                onStepComplete: (id, result) => {
+                    // Will be enhanced with Ink UI later
+                },
+            },
+        );
+
+        try {
+            await pipeline.run({
+                briefing,
+                url,
+                comments: options.comments,
+                clearCache: options.clearCache,
+            });
+
+            console.log("\n✨ Concept generation complete!");
+        } catch (err) {
+            console.error(`\n❌ Pipeline failed: ${(err as Error).message}`);
+            process.exit(1);
+        }
+    });
+
+program
+    .command("analyze")
+    .description("Only crawl and analyze a website (no LLM)")
+    .argument("<url>", "Website URL to analyze")
+    .option("--crawl-dir <dir>", "Crawl data directory", "../../data/crawls")
+    .option("--clear-cache", "Clear existing crawl cache")
+    .action(async (url: string, options: any) => {
+        const { crawlSite } = await import("./scraper.js");
+        const { analyzeSite } = await import("./analyzer.js");
+
+        if (options.clearCache) {
+            const { clearCrawlCache } = await import("./scraper.js");
+            const domain = new URL(url).hostname;
+            await clearCrawlCache(path.resolve(process.cwd(), options.crawlDir), domain);
+        }
+
+        const pages = await crawlSite(url, {
+            zyteApiKey: process.env.ZYTE_API_KEY,
+            crawlDir: path.resolve(process.cwd(), options.crawlDir),
+        });
+
+        const domain = new URL(url).hostname;
+        const profile = analyzeSite(pages, domain);
+
+        console.log("\n📊 Site Profile:");
+        console.log(`  Domain: ${profile.domain}`);
+        console.log(`  Total Pages: ${profile.totalPages}`);
+        console.log(`  Navigation: ${profile.navigation.map((n) => n.label).join(", ")}`);
+        console.log(`  Features: ${profile.existingFeatures.join(", ") || "none"}`);
+        console.log(`  Services: ${profile.services.join(", ") || "none"}`);
+        console.log(`  External Domains: ${profile.externalDomains.join(", ") || "none"}`);
+        console.log(`  Company: ${profile.companyInfo.name || "unbekannt"}`);
+        console.log(`  Tax ID: ${profile.companyInfo.taxId || "unbekannt"}`);
+        console.log(`  Colors: ${profile.colors.join(", ")}`);
+        console.log(`  Images Found: ${profile.images.length}`);
+        console.log(`  Social: ${Object.entries(profile.socialLinks).map(([k, v]) => `${k}`).join(", ") || "none"}`);
+
+        const outputPath = path.join(
+            path.resolve(process.cwd(), options.crawlDir),
+            domain.replace(/\./g, "-"),
+            "_site_profile.json",
+        );
+        console.log(`\n📦 Full profile saved to: ${outputPath}`);
+    });
+
+program.parse();
--- a/packages/concept-engine/src/index.ts
+++ b/packages/concept-engine/src/index.ts
@@ -0,0 +1,10 @@
+// ============================================================================
+// @mintel/concept-engine — Public API
+// ============================================================================
+
+export { ConceptPipeline } from "./pipeline.js";
+export type { PipelineCallbacks } from "./pipeline.js";
+export { crawlSite, clearCrawlCache } from "./scraper.js";
+export { analyzeSite } from "./analyzer.js";
+export { llmRequest, llmJsonRequest, cleanJson } from "./llm-client.js";
+export * from "./types.js";
--- a/packages/concept-engine/src/llm-client.ts
+++ b/packages/concept-engine/src/llm-client.ts
@@ -0,0 +1,133 @@
+// ============================================================================
+// LLM Client — Unified interface with model routing via OpenRouter
+// ============================================================================
+
+import axios from "axios";
+
+interface LLMRequestOptions {
+    model: string;
+    systemPrompt: string;
+    userPrompt: string;
+    jsonMode?: boolean;
+    apiKey: string;
+}
+
+interface LLMResponse {
+    content: string;
+    usage: {
+        promptTokens: number;
+        completionTokens: number;
+        cost: number;
+    };
+}
+
+/**
+ * Clean raw LLM output to parseable JSON.
+ * Handles markdown fences, control chars, trailing commas.
+ */
+export function cleanJson(str: string): string {
+    let cleaned = str.replace(/```json\n?|```/g, "").trim();
+    cleaned = cleaned.replace(
+        /[\u0000-\u0009\u000B\u000C\u000E-\u001F\u007F-\u009F]/g,
+        " ",
+    );
+    cleaned = cleaned.replace(/,\s*([\]}])/g, "$1");
+    return cleaned;
+}
+
+/**
+ * Send a request to an LLM via OpenRouter.
+ */
+export async function llmRequest(options: LLMRequestOptions): Promise<LLMResponse> {
+    const { model, systemPrompt, userPrompt, jsonMode = true, apiKey } = options;
+
+    const startTime = Date.now();
+
+    const resp = await axios.post(
+        "https://openrouter.ai/api/v1/chat/completions",
+        {
+            model,
+            messages: [
+                { role: "system", content: systemPrompt },
+                { role: "user", content: userPrompt },
+            ],
+            ...(jsonMode ? { response_format: { type: "json_object" } } : {}),
+        },
+        {
+            headers: {
+                Authorization: `Bearer ${apiKey}`,
+                "Content-Type": "application/json",
+            },
+            timeout: 120000,
+        },
+    ).catch(err => {
+        if (err.response) {
+            console.error("OpenRouter API Error:", JSON.stringify(err.response.data, null, 2));
+        }
+        throw err;
+    });
+
+    const content = resp.data.choices?.[0]?.message?.content;
+    if (!content) {
+        throw new Error(`LLM returned no content. Model: ${model}`);
+    }
+
+    let cost = 0;
+    const usage = resp.data.usage || {};
+    if (usage.cost !== undefined) {
+        cost = usage.cost;
+    } else {
+        // Fallback estimation
+        cost =
+            (usage.prompt_tokens || 0) * (0.1 / 1_000_000) +
+            (usage.completion_tokens || 0) * (0.4 / 1_000_000);
+    }
+
+    return {
+        content,
+        usage: {
+            promptTokens: usage.prompt_tokens || 0,
+            completionTokens: usage.completion_tokens || 0,
+            cost,
+        },
+    };
+}
+
+/**
+ * Send a request and parse the response as JSON.
+ */
+export async function llmJsonRequest<T = any>(
+    options: LLMRequestOptions,
+): Promise<{ data: T; usage: LLMResponse["usage"] }> {
+    const response = await llmRequest({ ...options, jsonMode: true });
+    const cleaned = cleanJson(response.content);
+
+    let parsed: T;
+    try {
+        parsed = JSON.parse(cleaned);
+    } catch (e) {
+        throw new Error(
+            `Failed to parse LLM JSON response: ${(e as Error).message}\nRaw: ${cleaned.substring(0, 500)}`,
+        );
+    }
+
+    // Unwrap common LLM artifacts: {"0": {...}}, {"state": {...}}, etc.
+    const unwrapped = unwrapResponse(parsed);
+
+    return { data: unwrapped as T, usage: response.usage };
+}
+
+/**
+ * Recursively unwrap common LLM wrapping patterns.
+ */
+function unwrapResponse(obj: any): any {
+    if (!obj || typeof obj !== "object" || Array.isArray(obj)) return obj;
+    const keys = Object.keys(obj);
+    if (keys.length === 1) {
+        const key = keys[0];
+        if (key === "0" || key === "state" || key === "facts" || key === "result" || key === "data") {
+            return unwrapResponse(obj[key]);
+        }
+    }
+    return obj;
+}
--- a/packages/concept-engine/src/pipeline.ts
+++ b/packages/concept-engine/src/pipeline.ts
@@ -0,0 +1,257 @@
+// ============================================================================
+// Pipeline Orchestrator
+// Runs all steps sequentially, tracks state, supports re-running individual steps.
+// ============================================================================
+
+import * as fs from "node:fs/promises";
+import * as path from "node:path";
+import { existsSync } from "node:fs";
+import { crawlSite, clearCrawlCache } from "./scraper.js";
+import { analyzeSite } from "./analyzer.js";
+import { executeResearch } from "./steps/00b-research.js";
+import { executeExtract } from "./steps/01-extract.js";
+import { executeSiteAudit } from "./steps/00a-site-audit.js";
+import { executeAudit } from "./steps/02-audit.js";
+import { executeStrategize } from "./steps/03-strategize.js";
+import { executeArchitect } from "./steps/04-architect.js";
+import type {
+    PipelineConfig,
+    PipelineInput,
+    ConceptState,
+    ProjectConcept,
+    StepResult,
+    StepUsage,
+} from "./types.js";
+
+export interface PipelineCallbacks {
+    onStepStart?: (stepId: string, stepName: string) => void;
+    onStepComplete?: (stepId: string, result: StepResult) => void;
+    onStepError?: (stepId: string, error: string) => void;
+}
+
+/**
+ * The main concept pipeline orchestrator.
+ * Runs conceptual steps sequentially and builds the ProjectConcept.
+ */
+export class ConceptPipeline {
+    private config: PipelineConfig;
+    private state: ConceptState;
+    private callbacks: PipelineCallbacks;
+
+    constructor(config: PipelineConfig, callbacks: PipelineCallbacks = {}) {
+        this.config = config;
+        this.callbacks = callbacks;
+        this.state = this.createInitialState();
+    }
+
+    private createInitialState(): ConceptState {
+        return {
+            briefing: "",
+            usage: {
+                totalPromptTokens: 0,
+                totalCompletionTokens: 0,
+                totalCost: 0,
+                perStep: [],
+            },
+        };
+    }
+
+    /**
+     * Run the full concept pipeline from scratch.
+     */
+    async run(input: PipelineInput): Promise<ProjectConcept> {
+        this.state.briefing = input.briefing;
+        this.state.url = input.url;
+        this.state.comments = input.comments;
+
+        // Ensure output directories
+        await fs.mkdir(this.config.outputDir, { recursive: true });
+        await fs.mkdir(this.config.crawlDir, { recursive: true });
+
+        // Step 0: Scrape & Analyze (deterministic)
+        if (input.url) {
+            if (input.clearCache) {
+                const domain = new URL(input.url).hostname;
+                await clearCrawlCache(this.config.crawlDir, domain);
+            }
+            await this.runStep("00-scrape", "Scraping & Analyzing Website", async () => {
+                const pages = await crawlSite(input.url!, {
+                    zyteApiKey: this.config.zyteApiKey,
+                    crawlDir: this.config.crawlDir,
+                });
+                const domain = new URL(input.url!).hostname;
+                const siteProfile = analyzeSite(pages, domain);
+                this.state.siteProfile = siteProfile;
+                this.state.crawlDir = path.join(this.config.crawlDir, domain.replace(/\./g, "-"));
+
+                // Save site profile
+                await fs.writeFile(
+                    path.join(this.state.crawlDir!, "_site_profile.json"),
+                    JSON.stringify(siteProfile, null, 2),
+                );
+
+                return {
+                    success: true,
+                    data: siteProfile,
+                    usage: { step: "00-scrape", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: 0 },
+                };
+            });
+        }
+
+        // Step 00a: Site Audit (DataForSEO)
+        await this.runStep("00a-site-audit", "IST-Analysis (DataForSEO)", async () => {
+            const result = await executeSiteAudit(this.state, this.config);
+            if (result.success && result.data) {
+                this.state.siteAudit = result.data;
+            }
+            return result;
+        });
+
+        // Step 00b: Research (real web data via journaling)
+        await this.runStep("00b-research", "Industry & Company Research", async () => {
+            const result = await executeResearch(this.state);
+            if (result.success && result.data) {
+                this.state.researchData = result.data;
+            }
+            return result;
+        });
+
+        // Step 1: Extract facts
+        await this.runStep("01-extract", "Extracting Facts from Briefing", async () => {
+            const result = await executeExtract(this.state, this.config);
+            if (result.success) this.state.facts = result.data;
+            return result;
+        });
+
+        // Step 2: Audit features
+        await this.runStep("02-audit", "Auditing Features (Skeptical Review)", async () => {
+            const result = await executeAudit(this.state, this.config);
+            if (result.success) this.state.auditedFacts = result.data;
+            return result;
+        });
+
+        // Step 3: Strategic analysis
+        await this.runStep("03-strategize", "Strategic Analysis", async () => {
+            const result = await executeStrategize(this.state, this.config);
+            if (result.success) {
+                this.state.briefingSummary = result.data.briefingSummary;
+                this.state.designVision = result.data.designVision;
+            }
+            return result;
+        });
+
+        // Step 4: Sitemap architecture
+        await this.runStep("04-architect", "Information Architecture", async () => {
+            const result = await executeArchitect(this.state, this.config);
+            if (result.success) {
+                this.state.sitemap = result.data.sitemap;
+                this.state.websiteTopic = result.data.websiteTopic;
+            }
+            return result;
+        });
+
+        const projectConcept = this.buildProjectConcept();
+        await this.saveState(projectConcept);
+
+        return projectConcept;
+    }
+
+    /**
+     * Run a single step with callbacks and error handling.
+     */
+    private async runStep(
+        stepId: string,
+        stepName: string,
+        executor: () => Promise<StepResult>,
+    ): Promise<void> {
+        this.callbacks.onStepStart?.(stepId, stepName);
+        console.log(`\n📍 ${stepName}...`);
+
+        try {
+            const result = await executor();
+            if (result.usage) {
+                this.state.usage.perStep.push(result.usage);
+                this.state.usage.totalPromptTokens += result.usage.promptTokens;
+                this.state.usage.totalCompletionTokens += result.usage.completionTokens;
+                this.state.usage.totalCost += result.usage.cost;
+            }
+
+            if (result.success) {
+                const cost = result.usage?.cost ? ` ($${result.usage.cost.toFixed(4)})` : "";
+                const duration = result.usage?.durationMs ? ` [${(result.usage.durationMs / 1000).toFixed(1)}s]` : "";
+                console.log(`  ✅ ${stepName} complete${cost}${duration}`);
+                this.callbacks.onStepComplete?.(stepId, result);
+            } else {
+                console.error(`  ❌ ${stepName} failed: ${result.error}`);
+                this.callbacks.onStepError?.(stepId, result.error || "Unknown error");
+                throw new Error(result.error);
+            }
+        } catch (err) {
+            const errorMsg = (err as Error).message;
+            this.callbacks.onStepError?.(stepId, errorMsg);
+            throw err;
+        }
+    }
+
+    /**
+     * Build the final Concept object.
+     */
+    private buildProjectConcept(): ProjectConcept {
+        return {
+            domain: this.state.siteProfile?.domain || "unknown",
+            timestamp: new Date().toISOString(),
+            briefing: this.state.briefing,
+            auditedFacts: this.state.auditedFacts || {},
+            siteProfile: this.state.siteProfile,
+            siteAudit: this.state.siteAudit,
+            researchData: this.state.researchData,
+            strategy: {
+                briefingSummary: this.state.briefingSummary || "",
+                designVision: this.state.designVision || "",
+            },
+            architecture: {
+                websiteTopic: this.state.websiteTopic || "",
+                sitemap: this.state.sitemap || [],
+            },
+            usage: this.state.usage,
+        };
+    }
+
+    /**
+     * Save the full concept generated state to disk.
+     */
+    private async saveState(concept: ProjectConcept): Promise<void> {
+        const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
+        const companyName = this.state.auditedFacts?.companyName || "unknown";
+
+        const stateDir = path.join(this.config.outputDir, "concepts");
+        await fs.mkdir(stateDir, { recursive: true });
+
+        const statePath = path.join(stateDir, `${companyName}_${timestamp}.json`);
+        await fs.writeFile(statePath, JSON.stringify(concept, null, 2));
+        console.log(`\n📦 Saved Project Concept to: ${statePath}`);
+
+        // Save debug trace
+        const debugPath = path.join(stateDir, `${companyName}_${timestamp}_debug.json`);
+        await fs.writeFile(debugPath, JSON.stringify(this.state, null, 2));
+
+        // Print usage summary
+        console.log("\n──────────────────────────────────────────────");
+        console.log("📊 PIPELINE USAGE SUMMARY");
+        console.log("──────────────────────────────────────────────");
+        for (const step of this.state.usage.perStep) {
+            if (step.cost > 0) {
+                console.log(`  ${step.step}: ${step.model} — $${step.cost.toFixed(6)} (${(step.durationMs / 1000).toFixed(1)}s)`);
+            }
+        }
+        console.log("──────────────────────────────────────────────");
+        console.log(`  TOTAL: $${this.state.usage.totalCost.toFixed(6)}`);
+        console.log(`  Tokens: ${(this.state.usage.totalPromptTokens + this.state.usage.totalCompletionTokens).toLocaleString()}`);
+        console.log("──────────────────────────────────────────────\n");
+    }
+
+    /** Get the current internal state (for CLI inspection). */
+    getState(): ConceptState {
+        return this.state;
+    }
+}
--- a/packages/concept-engine/src/scraper.ts
+++ b/packages/concept-engine/src/scraper.ts
@@ -0,0 +1,432 @@
+// ============================================================================
+// Scraper — Zyte API + Local Persistence
+// Crawls all pages of a website, stores them locally for reuse.
+// ============================================================================
+
+import axios from "axios";
+import * as cheerio from "cheerio";
+import * as fs from "node:fs/promises";
+import * as path from "node:path";
+import { existsSync } from "node:fs";
+import type { CrawledPage, PageType } from "./types.js";
+
+interface ScraperConfig {
+    zyteApiKey?: string;
+    crawlDir: string;
+    maxPages?: number;
+}
+
+/**
+ * Classify a URL pathname into a page type.
+ */
+function classifyPage(pathname: string): PageType {
+    const p = pathname.toLowerCase();
+    if (p === "/" || p === "" || p === "/index.html") return "home";
+    if (p.includes("service") || p.includes("leistung") || p.includes("kompetenz"))
+        return "service";
+    if (p.includes("about") || p.includes("ueber") || p.includes("über") || p.includes("unternehmen"))
+        return "about";
+    if (p.includes("contact") || p.includes("kontakt")) return "contact";
+    if (p.includes("job") || p.includes("karriere") || p.includes("career") || p.includes("human-resources"))
+        return "career";
+    if (p.includes("portfolio") || p.includes("referenz") || p.includes("projekt") || p.includes("case-study"))
+        return "portfolio";
+    if (p.includes("blog") || p.includes("news") || p.includes("aktuelles") || p.includes("magazin"))
+        return "blog";
+    if (p.includes("legal") || p.includes("impressum") || p.includes("datenschutz") || p.includes("privacy") || p.includes("agb"))
+        return "legal";
+    return "other";
+}
+
+/**
+ * Detect interactive features present on a page.
+ */
+function detectFeatures($: cheerio.CheerioAPI): string[] {
+    const features: string[] = [];
+
+    // Search
+    if (
+        $('input[type="search"]').length > 0 ||
+        $('form[role="search"]').length > 0 ||
+        $(".search-form, .search-box, #search, .searchbar").length > 0 ||
+        $('input[name="q"], input[name="s"], input[name="search"]').length > 0
+    ) {
+        features.push("search");
+    }
+
+    // Forms (beyond search)
+    const formCount = $("form").length;
+    const searchForms = $('form[role="search"], .search-form').length;
+    if (formCount > searchForms) {
+        features.push("forms");
+    }
+
+    // Maps
+    if (
+        $('iframe[src*="google.com/maps"], iframe[src*="openstreetmap"], .map-container, #map, [data-map]').length > 0
+    ) {
+        features.push("maps");
+    }
+
+    // Video
+    if (
+        $("video, iframe[src*='youtube'], iframe[src*='vimeo'], .video-container").length > 0
+    ) {
+        features.push("video");
+    }
+
+    // Calendar / Events
+    if ($(".calendar, .event, [data-calendar]").length > 0) {
+        features.push("calendar");
+    }
+
+    // Cookie consent
+    if ($(".cookie-banner, .cookie-consent, #cookie-notice, [data-cookie]").length > 0) {
+        features.push("cookie-consent");
+    }
+
+    return features;
+}
+
+/**
+ * Extract all internal links from a page.
+ */
+function extractInternalLinks($: cheerio.CheerioAPI, origin: string): string[] {
+    const links: string[] = [];
+    $("a[href]").each((_, el) => {
+        const href = $(el).attr("href");
+        if (!href) return;
+        try {
+            const url = new URL(href, origin);
+            if (url.origin === origin) {
+                // Skip assets
+                if (/\.(pdf|zip|jpg|jpeg|png|svg|webp|gif|css|js|ico|woff|woff2|ttf|eot)$/i.test(url.pathname)) return;
+                // Skip anchors-only
+                if (url.pathname === "/" && url.hash) return;
+                links.push(url.pathname);
+            }
+        } catch {
+            // Invalid URL, skip
+        }
+    });
+    return [...new Set(links)];
+}
+
+/**
+ * Extract all images from a page.
+ */
+function extractImages($: cheerio.CheerioAPI, origin: string): string[] {
+    const images: string[] = [];
+
+    // Regular img tags
+    $("img[src]").each((_, el) => {
+        const src = $(el).attr("src");
+        if (src) images.push(src);
+    });
+
+    // CSS background images (inline styles)
+    $("[style*='background-image']").each((_, el) => {
+        const style = $(el).attr("style");
+        const match = style?.match(/url\(['"]?(.*?)['"]?\)/);
+        if (match && match[1]) {
+            images.push(match[1]);
+        }
+    });
+
+    // Resolve URLs to absolute
+    const absoluteImages: string[] = [];
+    for (const img of images) {
+        if (img.startsWith("data:image")) continue; // Skip inline base64
+        try {
+            const url = new URL(img, origin);
+            // Ignore small tracking pixels or generic vectors
+            if (url.pathname.endsWith(".svg") && !url.pathname.includes("logo")) continue;
+            absoluteImages.push(url.href);
+        } catch {
+            // Invalid URL
+        }
+    }
+
+    return [...new Set(absoluteImages)];
+}
+
+/**
+ * Extract services/competencies from text content.
+ */
+function extractServices(text: string): string[] {
+    const services: string[] = [];
+    // Common pattern: bulleted or newline-separated service lists
+    const lines = text.split(/\n/).map((l) => l.trim()).filter((l) => l.length > 3 && l.length < 100);
+    for (const line of lines) {
+        // Skip generic boilerplate
+        if (/cookie|datenschutz|impressum|copyright|©/i.test(line)) continue;
+        if (/^(tel|fax|e-mail|mobil|web|http)/i.test(line)) continue;
+        services.push(line);
+    }
+    return services;
+}
+
+/**
+ * Fetch a page via Zyte API with browser rendering.
+ */
+async function fetchWithZyte(url: string, apiKey: string): Promise<string> {
+    try {
+        const resp = await axios.post(
+            "https://api.zyte.com/v1/extract",
+            {
+                url,
+                browserHtml: true,
+            },
+            {
+                auth: { username: apiKey, password: "" },
+                timeout: 60000,
+            },
+        );
+        const html = resp.data.browserHtml || "";
+        if (!html) {
+            console.warn(`  ⚠️ Zyte returned empty browserHtml for ${url}`);
+        }
+        return html;
+    } catch (err: any) {
+        if (err.response) {
+            console.error(`  ❌ Zyte API error ${err.response.status} for ${url}: ${err.response.data?.detail || err.response.statusText}`);
+            // Rate limited — wait and retry once
+            if (err.response.status === 429) {
+                console.log("  ⏳ Rate limited, waiting 5s and retrying...");
+                await new Promise((r) => setTimeout(r, 5000));
+                return fetchWithZyte(url, apiKey);
+            }
+        }
+        throw err;
+    }
+}
+
+/**
+ * Fetch a page via simple HTTP GET (fallback).
+ */
+async function fetchDirect(url: string): Promise<string> {
+    const resp = await axios.get(url, {
+        timeout: 30000,
+        headers: {
+            "User-Agent":
+                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
+        },
+    });
+    return typeof resp.data === "string" ? resp.data : "";
+}
+
+/**
+ * Parse an HTML string into a CrawledPage.
+ */
+function parsePage(html: string, url: string): CrawledPage {
+    const $ = cheerio.load(html);
+    const urlObj = new URL(url);
+
+    const title = $("title").text().trim();
+    const headings = $("h1, h2, h3")
+        .map((_, el) => $(el).text().trim())
+        .get()
+        .filter((h) => h.length > 0);
+
+    const navItems = $("nav a")
+        .map((_, el) => $(el).text().trim())
+        .get()
+        .filter((t) => t.length > 0 && t.length < 100);
+
+    const bodyText = $("body")
+        .text()
+        .replace(/\s+/g, " ")
+        .substring(0, 50000)
+        .trim();
+
+    const features = detectFeatures($);
+    const links = extractInternalLinks($, urlObj.origin);
+    const images = extractImages($, urlObj.origin);
+
+    const description = $('meta[name="description"]').attr("content") || undefined;
+    const ogTitle = $('meta[property="og:title"]').attr("content") || undefined;
+    const ogImage = $('meta[property="og:image"]').attr("content") || undefined;
+
+    return {
+        url,
+        pathname: urlObj.pathname,
+        title,
+        html,
+        text: bodyText,
+        headings,
+        navItems,
+        features,
+        type: classifyPage(urlObj.pathname),
+        links,
+        images,
+        meta: { description, ogTitle, ogImage },
+    };
+}
+
+/**
+ * Crawl a website and persist all pages locally.
+ *
+ * Returns an array of CrawledPage objects.
+ */
+export async function crawlSite(
+    targetUrl: string,
+    config: ScraperConfig,
+): Promise<CrawledPage[]> {
+    const urlObj = new URL(targetUrl);
+    const origin = urlObj.origin;
+    const domain = urlObj.hostname;
+    const domainDir = path.join(config.crawlDir, domain.replace(/\./g, "-"));
+
+    // Check for existing crawl
+    const metaFile = path.join(domainDir, "_crawl_meta.json");
+    if (existsSync(metaFile)) {
+        console.log(`📦 Found existing crawl for ${domain}. Loading from disk...`);
+        return loadCrawlFromDisk(domainDir);
+    }
+
+    console.log(`🔍 Crawling ${targetUrl} via ${config.zyteApiKey ? "Zyte API" : "direct HTTP"}...`);
+
+    // Ensure output dir
+    await fs.mkdir(domainDir, { recursive: true });
+
+    const maxPages = config.maxPages || 30;
+    const visited = new Set<string>();
+    const queue: string[] = [targetUrl];
+    const pages: CrawledPage[] = [];
+
+    while (queue.length > 0 && visited.size < maxPages) {
+        const url = queue.shift()!;
+        const urlPath = new URL(url).pathname;
+
+        if (visited.has(urlPath)) continue;
+        visited.add(urlPath);
+
+        try {
+            console.log(`  ↳ Fetching ${url} (${visited.size}/${maxPages})...`);
+
+            let html: string;
+            if (config.zyteApiKey) {
+                html = await fetchWithZyte(url, config.zyteApiKey);
+            } else {
+                html = await fetchDirect(url);
+            }
+
+            if (!html || html.length < 100) {
+                console.warn(`  ⚠️ Empty/tiny response for ${url}, skipping.`);
+                continue;
+            }
+
+            const page = parsePage(html, url);
+            pages.push(page);
+
+            // Save HTML + metadata to disk
+            const safeName = urlPath === "/" ? "index" : urlPath.replace(/\//g, "_").replace(/^_/, "");
+            await fs.writeFile(path.join(domainDir, `${safeName}.html`), html);
+            await fs.writeFile(
+                path.join(domainDir, `${safeName}.meta.json`),
+                JSON.stringify(
+                    {
+                        url: page.url,
+                        pathname: page.pathname,
+                        title: page.title,
+                        type: page.type,
+                        headings: page.headings,
+                        navItems: page.navItems,
+                        features: page.features,
+                        links: page.links,
+                        images: page.images,
+                        meta: page.meta,
+                    },
+                    null,
+                    2,
+                ),
+            );
+
+            // Discover new links
+            for (const link of page.links) {
+                if (!visited.has(link)) {
+                    const fullUrl = `${origin}${link}`;
+                    queue.push(fullUrl);
+                }
+            }
+        } catch (err) {
+            console.warn(`  ⚠️ Failed to fetch ${url}: ${(err as Error).message}`);
+        }
+    }
+
+    // Save crawl metadata
+    await fs.writeFile(
+        metaFile,
+        JSON.stringify(
+            {
+                domain,
+                crawledAt: new Date().toISOString(),
+                totalPages: pages.length,
+                urls: pages.map((p) => p.url),
+            },
+            null,
+            2,
+        ),
+    );
+
+    console.log(`✅ Crawled ${pages.length} pages for ${domain}. Saved to ${domainDir}`);
+    return pages;
+}
+
+/**
+ * Load a previously crawled site from disk.
+ */
+async function loadCrawlFromDisk(domainDir: string): Promise<CrawledPage[]> {
+    const files = await fs.readdir(domainDir);
+    const metaFiles = files.filter((f) => f.endsWith(".meta.json") && f !== "_crawl_meta.json");
+
+    const pages: CrawledPage[] = [];
+    for (const metaFile of metaFiles) {
+        const baseName = metaFile.replace(".meta.json", "");
+        const htmlFile = `${baseName}.html`;
+
+        const meta = JSON.parse(await fs.readFile(path.join(domainDir, metaFile), "utf8"));
+        let html = "";
+        if (files.includes(htmlFile)) {
+            html = await fs.readFile(path.join(domainDir, htmlFile), "utf8");
+        }
+
+        const text = html
+            ? cheerio
+                .load(html)("body")
+                .text()
+                .replace(/\s+/g, " ")
+                .substring(0, 50000)
+                .trim()
+            : "";
+
+        pages.push({
+            url: meta.url,
+            pathname: meta.pathname,
+            title: meta.title,
+            html,
+            text,
+            headings: meta.headings || [],
+            navItems: meta.navItems || [],
+            features: meta.features || [],
+            type: meta.type || "other",
+            links: meta.links || [],
+            images: meta.images || [],
+            meta: meta.meta || {},
+        });
+    }
+
+    console.log(`  📂 Loaded ${pages.length} cached pages from disk.`);
+    return pages;
+}
+
+/**
+ * Delete a cached crawl to force re-crawl.
+ */
+export async function clearCrawlCache(crawlDir: string, domain: string): Promise<void> {
+    const domainDir = path.join(crawlDir, domain.replace(/\./g, "-"));
+    if (existsSync(domainDir)) {
+        await fs.rm(domainDir, { recursive: true, force: true });
+        console.log(`🧹 Cleared crawl cache for ${domain}`);
+    }
+}
--- a/packages/concept-engine/src/steps/00a-site-audit.ts
+++ b/packages/concept-engine/src/steps/00a-site-audit.ts
@@ -0,0 +1,65 @@
+// ============================================================================
+// Step 00a: Site Audit (DataForSEO + AI)
+// ============================================================================
+
+import { PageAuditor } from "@mintel/page-audit";
+import type { ConceptState, StepResult, PipelineConfig } from "../types.js";
+
+export async function executeSiteAudit(
+    state: ConceptState,
+    config: PipelineConfig,
+): Promise<StepResult> {
+    const startTime = Date.now();
+
+    if (!state.url) {
+        return {
+            success: true,
+            data: null,
+            usage: { step: "00a-site-audit", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: Date.now() - startTime },
+        };
+    }
+
+    try {
+        const login = process.env.DATA_FOR_SEO_LOGIN || process.env.DATA_FOR_SEO_API_KEY?.split(":")?.[0];
+        const password = process.env.DATA_FOR_SEO_PASSWORD || process.env.DATA_FOR_SEO_API_KEY?.split(":")?.slice(1)?.join(":");
+
+        if (!login || !password) {
+            console.warn("  ⚠️ Site Audit skipped: DataForSEO credentials missing from environment.");
+            return {
+                success: true,
+                data: null,
+                usage: { step: "00a-site-audit", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: Date.now() - startTime },
+            };
+        }
+
+        const auditor = new PageAuditor({
+            dataForSeoLogin: login,
+            dataForSeoPassword: password,
+            openrouterKey: config.openrouterKey,
+            outputDir: config.outputDir ? `${config.outputDir}/audits` : undefined,
+        });
+
+        // Run audit (max 20 pages for the estimation phase to keep it fast)
+        const result = await auditor.audit(state.url, { maxPages: 20 });
+
+        return {
+            success: true,
+            data: result,
+            usage: {
+                step: "00a-site-audit",
+                model: "dataforseo",
+                cost: 0, // DataForSEO cost tracking could be added later
+                promptTokens: 0,
+                completionTokens: 0,
+                durationMs: Date.now() - startTime,
+            },
+        };
+    } catch (err: any) {
+        console.warn(`  ⚠️ Site Audit failed, skipping: ${err.message}`);
+        return {
+            success: true,
+            data: null,
+            usage: { step: "00a-site-audit", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: Date.now() - startTime },
+        };
+    }
+}
--- a/packages/concept-engine/src/steps/00b-research.ts
+++ b/packages/concept-engine/src/steps/00b-research.ts
@@ -0,0 +1,121 @@
+// ============================================================================
+// Step 00b: Research — Industry Research via @mintel/journaling (No LLM hallus)
+// Uses Serper API for real web search results about the industry/company.
+// ============================================================================
+
+import type { ConceptState, StepResult } from "../types.js";
+
+interface ResearchResult {
+    companyContext: string[];
+    industryInsights: string[];
+    competitorInfo: string[];
+}
+
+/**
+ * Research the company and industry using real web search data.
+ * Uses @mintel/journaling's ResearchAgent — results are grounded in real sources.
+ *
+ * NOTE: The journaling package can cause unhandled rejections that crash the process.
+ * We wrap each call in an additional safety layer.
+ */
+export async function executeResearch(
+    state: ConceptState,
+): Promise<StepResult<ResearchResult>> {
+    const startTime = Date.now();
+
+    const companyName = state.siteProfile?.companyInfo?.name || "";
+    const websiteTopic = state.siteProfile?.services?.slice(0, 3).join(", ") || "";
+    const domain = state.siteProfile?.domain || "";
+
+    if (!companyName && !websiteTopic && !domain) {
+        return {
+            success: true,
+            data: { companyContext: [], industryInsights: [], competitorInfo: [] },
+            usage: { step: "00b-research", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: 0 },
+        };
+    }
+
+    // Safety wrapper: catch ANY unhandled rejections during this step
+    const safeCall = <T>(fn: () => Promise<T>, fallback: T): Promise<T> => {
+        return new Promise<T>((resolve) => {
+            const handler = (err: any) => {
+                console.warn(`  ⚠️ Unhandled rejection caught in research: ${err?.message || err}`);
+                process.removeListener("unhandledRejection", handler);
+                resolve(fallback);
+            };
+            process.on("unhandledRejection", handler);
+
+            fn()
+                .then((result) => {
+                    process.removeListener("unhandledRejection", handler);
+                    resolve(result);
+                })
+                .catch((err) => {
+                    process.removeListener("unhandledRejection", handler);
+                    console.warn(`  ⚠️ Research call failed: ${err?.message || err}`);
+                    resolve(fallback);
+                });
+        });
+    };
+
+    try {
+        const { ResearchAgent } = await import("@mintel/journaling");
+        const agent = new ResearchAgent(process.env.OPENROUTER_API_KEY || "");
+
+        const results: ResearchResult = {
+            companyContext: [],
+            industryInsights: [],
+            competitorInfo: [],
+        };
+
+        // 1. Research the company itself
+        if (companyName || domain) {
+            const searchQuery = companyName
+                ? `${companyName} ${websiteTopic} Unternehmen`
+                : `site:${domain}`;
+
+            console.log(`  🔍 Researching: "${searchQuery}"...`);
+            const facts = await safeCall(
+                () => agent.researchTopic(searchQuery),
+                [] as any[],
+            );
+            results.companyContext = (facts || [])
+                .filter((f: any) => f?.fact || f?.value || f?.text || f?.statement)
+                .map((f: any) => f.fact || f.value || f.text || f.statement)
+                .slice(0, 5);
+        }
+
+        // 2. Industry research
+        if (websiteTopic) {
+            console.log(`  🔍 Researching industry: "${websiteTopic}"...`);
+            const insights = await safeCall(
+                () => agent.researchCompetitors(websiteTopic),
+                [] as any[],
+            );
+            results.industryInsights = (insights || []).slice(0, 5);
+        }
+
+        const totalFacts = results.companyContext.length + results.industryInsights.length + results.competitorInfo.length;
+        console.log(`  📊 Research found ${totalFacts} data points.`);
+
+        return {
+            success: true,
+            data: results,
+            usage: {
+                step: "00b-research",
+                model: "serper/datacommons",
+                promptTokens: 0,
+                completionTokens: 0,
+                cost: 0,
+                durationMs: Date.now() - startTime,
+            },
+        };
+    } catch (err) {
+        console.warn(`  ⚠️ Research step skipped: ${(err as Error).message}`);
+        return {
+            success: true,
+            data: { companyContext: [], industryInsights: [], competitorInfo: [] },
+            usage: { step: "00b-research", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: Date.now() - startTime },
+        };
+    }
+}
--- a/packages/concept-engine/src/steps/01-extract.ts
+++ b/packages/concept-engine/src/steps/01-extract.ts
@@ -0,0 +1,108 @@
+// ============================================================================
+// Step 01: Extract — Briefing Fact Extraction (Gemini Flash)
+// ============================================================================
+
+import { llmJsonRequest } from "../llm-client.js";
+import type { ConceptState, StepResult, PipelineConfig } from "../types.js";
+import { DEFAULT_MODELS } from "../types.js";
+
+export async function executeExtract(
+    state: ConceptState,
+    config: PipelineConfig,
+): Promise<StepResult> {
+    const models = { ...DEFAULT_MODELS, ...config.modelsOverride };
+    const startTime = Date.now();
+
+    // Build site context from the deterministic analyzer
+    const siteContext = state.siteProfile
+        ? `
+EXISTING WEBSITE ANALYSIS (FACTS — verifiably crawled, NOT guessed):
+- Domain: ${state.siteProfile.domain}
+- Total pages crawled: ${state.siteProfile.totalPages}
+- Navigation items: ${state.siteProfile.navigation.map((n) => n.label).join(", ") || "nicht erkannt"}
+- Existing features: ${state.siteProfile.existingFeatures.join(", ") || "keine"}
+- Services / Kompetenzen: ${state.siteProfile.services.join(" | ") || "keine"}
+- Employee count (from website text): ${(state.siteProfile as any).employeeCount || "nicht genannt"}
+- Company name: ${state.siteProfile.companyInfo.name || "unbekannt"}
+- Address: ${state.siteProfile.companyInfo.address || "unbekannt"}
+- Tax ID (USt-ID): ${state.siteProfile.companyInfo.taxId || "unbekannt"}
+- HRB: ${state.siteProfile.companyInfo.registerNumber || "unbekannt"}
+- Managing Director: ${state.siteProfile.companyInfo.managingDirector || "unbekannt"}
+- External related domains (HAVE OWN WEBSITES — DO NOT include as sub-pages!): ${state.siteProfile.externalDomains.join(", ") || "keine"}
+- Social links: ${Object.entries(state.siteProfile.socialLinks).map(([k, v]) => `${k}: ${v}`).join(", ") || "keine"}
+`
+        : "No existing website data available.";
+
+    const systemPrompt = `
+You are a precision fact extractor. Your only job: extract verifiable facts from the BRIEFING.
+Output language: GERMAN (strict).
+Output format: flat JSON at root level. No nesting except arrays.
+
+### CRITICAL RULES:
+1. "employeeCount": take from SITE ANALYSIS if available. Only override if briefing states something more specific.
+2. External domains (e.g. "etib-ing.com") have their OWN website. NEVER include them as sub-pages.
+3. Videos (Messefilm, Imagefilm) are CONTENT ASSETS, not pages.
+4. If existing site already has search, include "search" in functions.
+5. DO NOT invent pages not mentioned in briefing or existing navigation.
+
+### CONSERVATIVE RULE:
+- simple lists (Jobs, Referenzen, Messen) = pages, NOT features
+- Assume "page" as default. Only add "feature" for complex interactive systems.
+
+### OUTPUT FORMAT:
+{
+  "companyName": string,
+  "companyAddress": string,
+  "personName": string,
+  "email": string,
+  "existingWebsite": string,
+  "websiteTopic": string,         // MAX 3 words
+  "isRelaunch": boolean,
+  "employeeCount": string,        // from site analysis, e.g. "über 50"
+  "pages": string[],              // ALL pages: ["Startseite", "Über Uns", "Leistungen", ...]
+  "functions": string[],          // search, forms, maps, video, cookie_consent, etc.
+  "assets": string[],             // existing_website, logo, media, photos, videos
+  "deadline": string,
+  "targetAudience": string,
+  "cmsSetup": boolean,
+  "multilang": boolean
+}
+
+BANNED OUTPUT KEYS: "selectedPages", "otherPages", "features", "apiSystems" — use pages[] and functions[] ONLY.
+`;
+
+    const userPrompt = `BRIEFING (TRUTH SOURCE):
+${state.briefing}
+
+COMMENTS:
+${state.comments || "keine"}
+
+${siteContext}`;
+
+    try {
+        const { data, usage } = await llmJsonRequest({
+            model: models.flash,
+            systemPrompt,
+            userPrompt,
+            apiKey: config.openrouterKey,
+        });
+
+        return {
+            success: true,
+            data,
+            usage: {
+                step: "01-extract",
+                model: models.flash,
+                promptTokens: usage.promptTokens,
+                completionTokens: usage.completionTokens,
+                cost: usage.cost,
+                durationMs: Date.now() - startTime,
+            },
+        };
+    } catch (err) {
+        return {
+            success: false,
+            error: `Extract step failed: ${(err as Error).message}`,
+        };
+    }
+}
--- a/packages/concept-engine/src/steps/02-audit.ts
+++ b/packages/concept-engine/src/steps/02-audit.ts
@@ -0,0 +1,110 @@
+// ============================================================================
+// Step 02: Audit — Feature Auditor + Skeptical Review (Gemini Flash)
+// ============================================================================
+
+import { llmJsonRequest } from "../llm-client.js";
+import type { ConceptState, StepResult, PipelineConfig } from "../types.js";
+import { DEFAULT_MODELS } from "../types.js";
+
+export async function executeAudit(
+    state: ConceptState,
+    config: PipelineConfig,
+): Promise<StepResult> {
+    const models = { ...DEFAULT_MODELS, ...config.modelsOverride };
+    const startTime = Date.now();
+
+    if (!state.facts) {
+        return { success: false, error: "No facts from Step 01 available." };
+    }
+
+    const systemPrompt = `
+You are a "Strict Cost Controller". Your mission is to prevent over-billing.
+Review the extracted FEATURES against the BRIEFING and the EXISTING SITE ANALYSIS.
+
+### RULE OF THUMB:
+- A "Feature" (1.500 €) is ONLY justified for complex, dynamic systems (logic, database, CMS-driven management, advanced filtering).
+- Simple lists, information sections, or static descriptions (e.g., "Messen", "Team", "Historie", "Jobs" as mere text) are ALWAYS "Pages" (600 €).
+- If the briefing doesn't explicitly mention "Management System", "Filterable Database", or "Client Login", it is a PAGE.
+
+### ADDITIONAL CHECKS:
+1. If any feature maps to an entity that has its own external website (listed in EXTERNAL_DOMAINS), remove it entirely — it's out of scope.
+2. Videos are ASSETS not pages. Remove any video-related entries from pages.
+3. If the existing site has features (search, forms, etc.), ensure they are in the functions list.
+
+### MISSION:
+Return the corrected 'features', 'otherPages', and 'functions' arrays.
+
+### OUTPUT FORMAT:
+{
+  "features": string[],
+  "otherPages": string[],
+  "functions": string[],
+  "removedItems": [{ "item": string, "reason": string }],
+  "addedItems": [{ "item": string, "reason": string }]
+}
+`;
+
+    const userPrompt = `
+EXTRACTED FACTS:
+${JSON.stringify(state.facts, null, 2)}
+
+BRIEFING:
+${state.briefing}
+
+EXTERNAL DOMAINS (have own websites, OUT OF SCOPE):
+${state.siteProfile?.externalDomains?.join(", ") || "none"}
+
+EXISTING FEATURES ON CURRENT SITE:
+${state.siteProfile?.existingFeatures?.join(", ") || "none"}
+`;
+
+    try {
+        const { data, usage } = await llmJsonRequest({
+            model: models.flash,
+            systemPrompt,
+            userPrompt,
+            apiKey: config.openrouterKey,
+        });
+
+        // Apply audit results to facts
+        const auditedFacts = { ...state.facts };
+        auditedFacts.features = data.features || [];
+        auditedFacts.otherPages = [
+            ...new Set([...(auditedFacts.otherPages || []), ...(data.otherPages || [])]),
+        ];
+        if (data.functions) {
+            auditedFacts.functions = [
+                ...new Set([...(auditedFacts.functions || []), ...data.functions]),
+            ];
+        }
+
+        // Log changes
+        if (data.removedItems?.length) {
+            console.log("  📉 Audit removed:");
+            for (const item of data.removedItems) {
+                console.log(`     - ${item.item}: ${item.reason}`);
+            }
+        }
+        if (data.addedItems?.length) {
+            console.log("  📈 Audit added:");
+            for (const item of data.addedItems) {
+                console.log(`     + ${item.item}: ${item.reason}`);
+            }
+        }
+
+        return {
+            success: true,
+            data: auditedFacts,
+            usage: {
+                step: "02-audit",
+                model: models.flash,
+                promptTokens: usage.promptTokens,
+                completionTokens: usage.completionTokens,
+                cost: usage.cost,
+                durationMs: Date.now() - startTime,
+            },
+        };
+    } catch (err) {
+        return { success: false, error: `Audit step failed: ${(err as Error).message}` };
+    }
+}
--- a/packages/concept-engine/src/steps/03-strategize.ts
+++ b/packages/concept-engine/src/steps/03-strategize.ts
@@ -0,0 +1,99 @@
+// ============================================================================
+// Step 03: Strategize — Briefing Summary + Design Vision (Gemini Pro)
+// ============================================================================
+
+import { llmJsonRequest } from "../llm-client.js";
+import type { ConceptState, StepResult, PipelineConfig } from "../types.js";
+import { DEFAULT_MODELS } from "../types.js";
+
+export async function executeStrategize(
+    state: ConceptState,
+    config: PipelineConfig,
+): Promise<StepResult> {
+    const models = { ...DEFAULT_MODELS, ...config.modelsOverride };
+    const startTime = Date.now();
+
+    if (!state.auditedFacts) {
+        return { success: false, error: "No audited facts from Step 02 available." };
+    }
+
+    const systemPrompt = `
+You are a high-end Digital Architect. Your goal is to make the CUSTOMER feel 100% understood.
+Analyze the BRIEFING and the EXISTING WEBSITE context.
+
+### OBJECTIVE:
+1. **briefingSummary**: Ein sachlicher, tiefgehender Überblick der Unternehmenslage.
+   - STIL: Keine Ich-Form. Keine Marketing-Floskeln. Nutze präzise Fachbegriffe. Sei prägnant.
+   - FORM: EXAKT ZWEI ABSÄTZE. Insgesamt ca. 6 Sätze.
+   - INHALT: Status Quo, was der Kunde will, welcher Sprung notwendig ist.
+   - ABSOLUTE REGEL: Keine Halluzinationen. Keine namentlichen Nennungen von Personen.
+   - RELAUNCH-REGEL: Wenn isRelaunch=true, NICHT sagen "keine digitale Präsenz". Es GIBT eine Seite.
+   - SORGLOS BETRIEB: MUSS erwähnt werden als Teil des Gesamtpakets.
+
+2. **designVision**: Ein abstraktes, strategisches Konzept.
+   - STIL: Rein konzeptionell. Keine Umsetzungsschritte. Keine Ich-Form. Sei prägnant.
+   - FORM: EXAKT ZWEI ABSÄTZE. Insgesamt ca. 4 Sätze.
+   - DATENSCHUTZ: KEINERLEI namentliche Nennungen.
+   - FOKUS: Welche strategische Wirkung soll erzielt werden?
+
+### RULES:
+- NO "wir/unser". NO "Ich/Mein". Objective, fact-oriented narrative.
+- NO marketing lingo. NO "innovativ", "revolutionär", "state-of-the-art".
+- NO hallucinations about features not in the briefing.
+- NO "SEO-Standards zur Fachkräftesicherung" or "B2B-Nutzerströme" — das ist Schwachsinn.
+  Use specific industry terms from the briefing (e.g. "Kabeltiefbau", "HDD-Bohrverfahren").
+- LANGUAGE: Professional German. Simple but expert-level.
+
+### OUTPUT FORMAT:
+{
+  "briefingSummary": string,
+  "designVision": string
+}
+`;
+
+    const userPrompt = `
+BRIEFING (TRUTH SOURCE):
+${state.briefing}
+
+EXISTING WEBSITE DATA:
+- Services: ${state.siteProfile?.services?.join(", ") || "unbekannt"}
+- Navigation: ${state.siteProfile?.navigation?.map((n) => n.label).join(", ") || "unbekannt"}
+- Company: ${state.auditedFacts.companyName || "unbekannt"}
+
+EXTRACTED & AUDITED FACTS:
+${JSON.stringify(state.auditedFacts, null, 2)}
+
+${state.siteAudit?.report ? `
+TECHNICAL SITE AUDIT (IST-Analyse):
+Health: ${state.siteAudit.report.overallHealth} (SEO: ${state.siteAudit.report.seoScore}, UX: ${state.siteAudit.report.uxScore}, Perf: ${state.siteAudit.report.performanceScore})
+- Executive Summary: ${state.siteAudit.report.executiveSummary}
+- Strengths: ${state.siteAudit.report.strengths.join(", ")}
+- Critical Issues: ${state.siteAudit.report.criticalIssues.join(", ")}
+- Quick Wins: ${state.siteAudit.report.quickWins.join(", ")}
+` : ""}
+`;
+
+    try {
+        const { data, usage } = await llmJsonRequest({
+            model: models.pro,
+            systemPrompt,
+            userPrompt,
+            apiKey: config.openrouterKey,
+        });
+
+        return {
+            success: true,
+            data,
+            usage: {
+                step: "03-strategize",
+                model: models.pro,
+                promptTokens: usage.promptTokens,
+                completionTokens: usage.completionTokens,
+                cost: usage.cost,
+                durationMs: Date.now() - startTime,
+            },
+        };
+    } catch (err) {
+        return { success: false, error: `Strategize step failed: ${(err as Error).message}` };
+    }
+}
--- a/packages/concept-engine/src/steps/04-architect.ts
+++ b/packages/concept-engine/src/steps/04-architect.ts
@@ -0,0 +1,133 @@
+// ============================================================================
+// Step 04: Architect — Sitemap & Information Architecture (Gemini Pro)
+// ============================================================================
+
+import { llmJsonRequest } from "../llm-client.js";
+import type { ConceptState, StepResult, PipelineConfig } from "../types.js";
+import { DEFAULT_MODELS } from "../types.js";
+
+export async function executeArchitect(
+    state: ConceptState,
+    config: PipelineConfig,
+): Promise<StepResult> {
+    const models = { ...DEFAULT_MODELS, ...config.modelsOverride };
+    const startTime = Date.now();
+
+    if (!state.auditedFacts) {
+        return { success: false, error: "No audited facts available." };
+    }
+
+    // Build navigation constraint from the real site
+    const existingNav = state.siteProfile?.navigation?.map((n) => n.label).join(", ") || "unbekannt";
+    const existingServices = state.siteProfile?.services?.join(", ") || "unbekannt";
+    const externalDomains = state.siteProfile?.externalDomains?.join(", ") || "keine";
+
+    const systemPrompt = `
+Du bist ein Senior UX Architekt. Erstelle einen ECHTEN SEITENBAUM für die neue Website.
+Regelwerk für den Output:
+
+### SEITENBAUM-REGELN:
+1. KEIN MARKETINGSPRECH als Kategoriename. Gültige Kategorien sind nur die echten Navigationspunkte der Website.
+   ERLAUBT: "Startseite", "Leistungen", "Über uns", "Karriere", "Referenzen", "Kontakt", "Rechtliches"
+   VERBOTEN: "Kern-Präsenz", "Vertrauen", "Business Areas", "Digitaler Auftritt"
+
+2. LEISTUNGEN muss in ECHTE UNTERSEITEN aufgeteilt werden — nicht eine einzige "Leistungen"-Seite.
+   Jede Kompetenz aus dem existierenden Leistungsspektrum = eine eigene Seite.
+   Beispiel statt:
+     { category: "Leistungen", pages: [{ title: "Leistungen", desc: "..." }] }
+   So:
+     { category: "Leistungen", pages: [
+       { title: "Kabeltiefbau", desc: "Mittelspannung, Niederspannung, Kabelpflugarbeiten..." },
+       { title: "Horizontalspülbohrungen", desc: "HDD in allen Bodenklassen..." },
+       { title: "Elektromontagen", desc: "Bis 110 kV, Glasfaserkabelmontagen..." },
+       { title: "Planung & Dokumentation", desc: "Genehmigungs- und Ausführungsplanung, Vermessung..." }
+     ]}
+
+3. SEITENTITEL: Kurz, klar, faktisch. Kein Werbejargon.
+   ERLAUBT: "Kabeltiefbau", "Über uns", "Karriere"
+   VERBOTEN: "Unsere Expertise", "Kompetenzspektrum", "Community"
+
+4. Gruppe die Leistungen nach dem ECHTEN Kompetenzkatalog der bestehenden Site — nicht erfinden.
+
+5. Keine doppelten Seiten. Keine Phantomseiten.
+
+6. Videos = Content-Assets, keine eigene Seite.
+
+7. Entitäten mit eigener Domain (${externalDomains}) = NICHT als Seite. Nur als Teaser/Link wenn nötig.
+
+### KONTEXT:
+Bestehende Navigation: ${existingNav}
+Bestehende Services: ${existingServices}
+Externe Domains (haben eigene Website): ${externalDomains}
+Angeforderte zusätzliche Seiten aus Briefing: ${(state.auditedFacts as any)?.pages?.join(", ") || "keine spezifischen"}
+
+### OUTPUT FORMAT (JSON):
+{
+  "websiteTopic": string,  // MAX 3 Wörter, beschreibend
+  "sitemap": [
+    {
+      "category": string,   // Echter Nav-Eintrag. KEIN Marketingsprech.
+      "pages": [
+        { "title": string, "desc": string }  // Echte Unterseite, 1-2 Sätze Zweck
+      ]
+    }
+  ]
+}
+`;
+
+    const userPrompt = `
+BRIEFING:
+${state.briefing}
+
+FAKTEN (aus Extraktion):
+${JSON.stringify({ facts: state.auditedFacts, strategy: { briefingSummary: state.briefingSummary } }, null, 2)}
+
+Erstelle den Seitenbaum. Baue die Leistungen DETAILLIERT aus — echte Unterseiten pro Kompetenzbereich.
+`;
+
+    try {
+        const { data, usage } = await llmJsonRequest({
+            model: models.pro,
+            systemPrompt,
+            userPrompt,
+            apiKey: config.openrouterKey,
+        });
+
+        // Normalize sitemap structure
+        let sitemap = data.sitemap;
+        if (sitemap && !Array.isArray(sitemap)) {
+            if (sitemap.categories) sitemap = sitemap.categories;
+            else {
+                const entries = Object.entries(sitemap);
+                if (entries.every(([, v]) => Array.isArray(v))) {
+                    sitemap = entries.map(([category, pages]) => ({ category, pages }));
+                }
+            }
+        }
+
+        if (Array.isArray(sitemap)) {
+            sitemap = sitemap.map((cat: any) => ({
+                category: cat.category || cat.kategorie || cat.Kategorie || "Allgemein",
+                pages: (cat.pages || cat.seiten || []).map((page: any) => ({
+                    title: page.title || page.titel || "Seite",
+                    desc: page.desc || page.beschreibung || page.description || "",
+                })),
+            }));
+        }
+
+        return {
+            success: true,
+            data: { websiteTopic: data.websiteTopic, sitemap },
+            usage: {
+                step: "04-architect",
+                model: models.pro,
+                promptTokens: usage.promptTokens,
+                completionTokens: usage.completionTokens,
+                cost: usage.cost,
+                durationMs: Date.now() - startTime,
+            },
+        };
+    } catch (err) {
+        return { success: false, error: `Architect step failed: ${(err as Error).message}` };
+    }
+}
--- a/packages/concept-engine/src/types.ts
+++ b/packages/concept-engine/src/types.ts
@@ -0,0 +1,233 @@
+// ============================================================================
+// @mintel/concept-engine — Core Type Definitions
+// ============================================================================
+
+/** Page types recognized during crawling */
+export type PageType =
+    | "home"
+    | "service"
+    | "about"
+    | "contact"
+    | "career"
+    | "portfolio"
+    | "blog"
+    | "legal"
+    | "other";
+
+/** A single crawled page with extracted metadata */
+export interface CrawledPage {
+    url: string;
+    pathname: string;
+    title: string;
+    html: string;
+    text: string;
+    headings: string[];
+    navItems: string[];
+    features: string[];
+    type: PageType;
+    links: string[];
+    images: string[];
+    meta: {
+        description?: string;
+        ogTitle?: string;
+        ogImage?: string;
+    };
+}
+
+/** Navigation item extracted from <nav> elements */
+export interface NavItem {
+    label: string;
+    href: string;
+    children?: NavItem[];
+}
+
+/** Company info extracted from Impressum / footer */
+export interface CompanyInfo {
+    name?: string;
+    address?: string;
+    phone?: string;
+    email?: string;
+    taxId?: string;
+    registerNumber?: string;
+    managingDirector?: string;
+}
+
+/** A page in the site inventory */
+export interface PageInventoryItem {
+    url: string;
+    pathname: string;
+    title: string;
+    type: PageType;
+    headings: string[];
+    services: string[];
+    hasSearch: boolean;
+    hasForms: boolean;
+    hasMap: boolean;
+    hasVideo: boolean;
+    contentSummary: string;
+}
+
+/** Full site profile — deterministic, no LLM involved */
+export interface SiteProfile {
+    domain: string;
+    crawledAt: string;
+    totalPages: number;
+    navigation: NavItem[];
+    existingFeatures: string[];
+    services: string[];
+    companyInfo: CompanyInfo;
+    pageInventory: PageInventoryItem[];
+    colors: string[];
+    socialLinks: Record<string, string>;
+    externalDomains: string[];
+    images: string[];
+    employeeCount: string | null;
+}
+
+/** Configuration for the estimation pipeline */
+export interface PipelineConfig {
+    openrouterKey: string;
+    zyteApiKey?: string;
+    outputDir: string;
+    crawlDir: string;
+    modelsOverride?: Partial<ModelConfig>;
+}
+
+/** Model routing configuration */
+export interface ModelConfig {
+    flash: string;
+    pro: string;
+    opus: string;
+}
+
+export const DEFAULT_MODELS: ModelConfig = {
+    flash: "google/gemini-3-flash-preview",
+    pro: "google/gemini-3.1-pro-preview",
+    opus: "anthropic/claude-opus-4-6",
+};
+
+/** Input for a pipeline run */
+export interface PipelineInput {
+    briefing: string;
+    url?: string;
+    budget?: string;
+    comments?: string;
+    clearCache?: boolean;
+}
+
+/** State that flows through all concept pipeline steps */
+export interface ConceptState {
+    // Input
+    briefing: string;
+    url?: string;
+    comments?: string;
+
+    // Output: Scrape & Analyze
+    siteProfile?: SiteProfile;
+    crawlDir?: string;
+
+    // Output: Site Audit
+    siteAudit?: any;
+
+    // Output: Research
+    researchData?: any;
+
+    // Output: Extract
+    facts?: Record<string, any>;
+
+    // Output: Audit
+    auditedFacts?: Record<string, any>;
+
+    // Output: Strategy
+    briefingSummary?: string;
+    designVision?: string;
+
+    // Output: Architecture
+    sitemap?: SitemapCategory[];
+    websiteTopic?: string;
+
+    // Cost tracking
+    usage: UsageStats;
+}
+
+/** Final output of the Concept Engine */
+export interface ProjectConcept {
+    domain: string;
+    timestamp: string;
+    briefing: string;
+    auditedFacts: Record<string, any>;
+    siteProfile?: SiteProfile;
+    siteAudit?: any;
+    researchData?: any;
+    strategy: {
+        briefingSummary: string;
+        designVision: string;
+    };
+    architecture: {
+        websiteTopic: string;
+        sitemap: SitemapCategory[];
+    };
+    usage: UsageStats;
+}
+
+export interface SitemapCategory {
+    category: string;
+    pages: { title: string; desc: string }[];
+}
+
+export interface UsageStats {
+    totalPromptTokens: number;
+    totalCompletionTokens: number;
+    totalCost: number;
+    perStep: StepUsage[];
+}
+
+export interface StepUsage {
+    step: string;
+    model: string;
+    promptTokens: number;
+    completionTokens: number;
+    cost: number;
+    durationMs: number;
+}
+
+/** Result of a single pipeline step */
+export interface StepResult<T = any> {
+    success: boolean;
+    data?: T;
+    error?: string;
+    usage?: StepUsage;
+}
+
+/** Validation result from the deterministic validator */
+export interface ValidationResult {
+    passed: boolean;
+    errors: ValidationError[];
+    warnings: ValidationWarning[];
+}
+
+export interface ValidationError {
+    code: string;
+    message: string;
+    field?: string;
+    expected?: any;
+    actual?: any;
+}
+
+export interface ValidationWarning {
+    code: string;
+    message: string;
+    suggestion?: string;
+}
+
+/** Step definition for the concept pipeline */
+export interface PipelineStep {
+    id: string;
+    name: string;
+    description: string;
+    model: "flash" | "pro" | "opus" | "none";
+    execute: (
+        state: ConceptState,
+        config: PipelineConfig,
+    ) => Promise<StepResult>;
+}