feat: migrate npm registry from Verdaccio to Gitea Packages
Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 1s
Monorepo Pipeline / 🧹 Lint (push) Failing after 35s
Monorepo Pipeline / 🧪 Test (push) Failing after 35s
Monorepo Pipeline / 🏗️ Build (push) Failing after 12s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Image Processor (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 1s
Monorepo Pipeline / 🧹 Lint (push) Failing after 35s
Monorepo Pipeline / 🧪 Test (push) Failing after 35s
Monorepo Pipeline / 🏗️ Build (push) Failing after 12s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Image Processor (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
"version": "1.8.21",
|
||||
"publishConfig": {
|
||||
"access": "public",
|
||||
"registry": "https://npm.infra.mintel.me"
|
||||
"registry": "https://git.infra.mintel.me/api/packages/mmintel/npm"
|
||||
},
|
||||
"type": "module",
|
||||
"bin": {
|
||||
|
||||
40
packages/concept-engine/src/_test_pipeline.ts
Normal file
40
packages/concept-engine/src/_test_pipeline.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import { config as dotenvConfig } from 'dotenv';
|
||||
import * as path from 'node:path';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import { EstimationPipeline } from './pipeline.js';
|
||||
|
||||
dotenvConfig({ path: path.resolve(process.cwd(), '../../.env') });
|
||||
|
||||
const briefing = await fs.readFile(
|
||||
path.resolve(process.cwd(), '../../data/briefings/etib.txt'),
|
||||
'utf8',
|
||||
);
|
||||
|
||||
console.log(`Briefing loaded: ${briefing.length} chars`);
|
||||
|
||||
const pipeline = new EstimationPipeline(
|
||||
{
|
||||
openrouterKey: process.env.OPENROUTER_API_KEY || '',
|
||||
zyteApiKey: process.env.ZYTE_API_KEY,
|
||||
outputDir: path.resolve(process.cwd(), '../../out/estimations'),
|
||||
crawlDir: path.resolve(process.cwd(), '../../data/crawls'),
|
||||
},
|
||||
{
|
||||
onStepStart: (id, name) => console.log(`[CB] Starting: ${id}`),
|
||||
onStepComplete: (id) => console.log(`[CB] Done: ${id}`),
|
||||
onStepError: (id, err) => console.error(`[CB] Error in ${id}: ${err}`),
|
||||
},
|
||||
);
|
||||
|
||||
try {
|
||||
const result = await pipeline.run({
|
||||
briefing,
|
||||
url: 'https://www.e-tib.com',
|
||||
});
|
||||
|
||||
console.log('\n✨ Pipeline complete!');
|
||||
console.log('Validation:', result.validationResult?.passed ? 'PASSED' : 'FAILED');
|
||||
} catch (err: any) {
|
||||
console.error('\n❌ Pipeline failed:', err.message);
|
||||
console.error(err.stack);
|
||||
}
|
||||
334
packages/concept-engine/src/analyzer.ts
Normal file
334
packages/concept-engine/src/analyzer.ts
Normal file
@@ -0,0 +1,334 @@
|
||||
// ============================================================================
|
||||
// Analyzer — Deterministic Site Analysis (NO LLM!)
|
||||
// Builds a SiteProfile from crawled pages using pure code logic.
|
||||
// This is the core fix against hallucinated page structures.
|
||||
// ============================================================================
|
||||
|
||||
import type {
|
||||
CrawledPage,
|
||||
SiteProfile,
|
||||
NavItem,
|
||||
CompanyInfo,
|
||||
PageInventoryItem,
|
||||
} from "./types.js";
|
||||
|
||||
/**
|
||||
* Build a complete SiteProfile from an array of crawled pages.
|
||||
* This is 100% deterministic — no LLM calls involved.
|
||||
*/
|
||||
export function analyzeSite(pages: CrawledPage[], domain: string): SiteProfile {
|
||||
const navigation = extractNavigation(pages);
|
||||
const existingFeatures = extractExistingFeatures(pages);
|
||||
const services = extractAllServices(pages);
|
||||
const companyInfo = extractCompanyInfo(pages);
|
||||
const colors = extractColors(pages);
|
||||
const socialLinks = extractSocialLinks(pages);
|
||||
const externalDomains = extractExternalDomains(pages, domain);
|
||||
const images = extractAllImages(pages);
|
||||
const employeeCount = extractEmployeeCount(pages);
|
||||
const pageInventory = buildPageInventory(pages);
|
||||
|
||||
return {
|
||||
domain,
|
||||
crawledAt: new Date().toISOString(),
|
||||
totalPages: pages.filter((p) => p.type !== "legal").length,
|
||||
navigation,
|
||||
existingFeatures,
|
||||
services,
|
||||
companyInfo,
|
||||
pageInventory,
|
||||
colors,
|
||||
socialLinks,
|
||||
externalDomains,
|
||||
images,
|
||||
employeeCount,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the site's main navigation structure from <nav> elements.
|
||||
* Uses the HOME page's nav as the canonical source.
|
||||
*/
|
||||
function extractNavigation(pages: CrawledPage[]): NavItem[] {
|
||||
// Prefer the home page's nav
|
||||
const homePage = pages.find((p) => p.type === "home");
|
||||
const sourcePage = homePage || pages[0];
|
||||
if (!sourcePage) return [];
|
||||
|
||||
// Deduplicate nav items
|
||||
const seen = new Set<string>();
|
||||
const navItems: NavItem[] = [];
|
||||
|
||||
for (const label of sourcePage.navItems) {
|
||||
const normalized = label.toLowerCase().trim();
|
||||
if (seen.has(normalized)) continue;
|
||||
if (normalized.length < 2) continue;
|
||||
seen.add(normalized);
|
||||
navItems.push({ label, href: "" });
|
||||
}
|
||||
|
||||
return navItems;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate all detected interactive features across all pages.
|
||||
*/
|
||||
function extractExistingFeatures(pages: CrawledPage[]): string[] {
|
||||
const allFeatures = new Set<string>();
|
||||
for (const page of pages) {
|
||||
for (const feature of page.features) {
|
||||
allFeatures.add(feature);
|
||||
}
|
||||
}
|
||||
return [...allFeatures];
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate all images found across all pages.
|
||||
*/
|
||||
function extractAllImages(pages: CrawledPage[]): string[] {
|
||||
const allImages = new Set<string>();
|
||||
for (const page of pages) {
|
||||
if (!page.images) continue;
|
||||
for (const img of page.images) {
|
||||
allImages.add(img);
|
||||
}
|
||||
}
|
||||
return [...allImages];
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract employee count from page text.
|
||||
* Looks for patterns like "über 50 Mitarbeitern", "200 Mitarbeiter", "50+ employees".
|
||||
*/
|
||||
function extractEmployeeCount(pages: CrawledPage[]): string | null {
|
||||
const allText = pages.map((p) => p.text).join(" ");
|
||||
|
||||
// German patterns: 'über 50 Mitarbeitern', '120 Beschäftigte', '+200 MA'
|
||||
const patterns = [
|
||||
/(über|ca\.?|rund|mehr als|\+)?\s*(\d{1,4})\s*(Mitarbeiter(?:innen)?|Beschäftigte|MA|Fachkräfte)\b/gi,
|
||||
/(\d{1,4})\+?\s*(employees|team members)/gi,
|
||||
];
|
||||
|
||||
for (const pattern of patterns) {
|
||||
const match = allText.match(pattern);
|
||||
if (match && match[0]) {
|
||||
const num = match[0].match(/(\d{1,4})/)?.[1];
|
||||
const prefix = match[0].match(/über|ca\.?|rund|mehr als/i)?.[0];
|
||||
if (num) return prefix ? `${prefix} ${num}` : num;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract services/competencies from service-type pages.
|
||||
* Focuses on H2-H3 headings and list items on service pages.
|
||||
*/
|
||||
function extractAllServices(pages: CrawledPage[]): string[] {
|
||||
const servicePages = pages.filter(
|
||||
(p) => p.type === "service" || p.pathname.includes("kompetenz"),
|
||||
);
|
||||
|
||||
const services = new Set<string>();
|
||||
for (const page of servicePages) {
|
||||
// Use headings as primary service indicators
|
||||
for (const heading of page.headings) {
|
||||
const clean = heading.trim();
|
||||
if (clean.length > 3 && clean.length < 100) {
|
||||
// Skip generic headings
|
||||
if (/^(home|kontakt|impressum|datenschutz|menü|navigation|suche)/i.test(clean)) continue;
|
||||
services.add(clean);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no service pages found, look at the home page headings too
|
||||
if (services.size === 0) {
|
||||
const homePage = pages.find((p) => p.type === "home");
|
||||
if (homePage) {
|
||||
for (const heading of homePage.headings) {
|
||||
const clean = heading.trim();
|
||||
if (clean.length > 3 && clean.length < 80) {
|
||||
services.add(clean);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return [...services];
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract company information from Impressum / footer content.
|
||||
*/
|
||||
function extractCompanyInfo(pages: CrawledPage[]): CompanyInfo {
|
||||
const info: CompanyInfo = {};
|
||||
|
||||
// Find Impressum or legal page
|
||||
const legalPage = pages.find(
|
||||
(p) =>
|
||||
p.type === "legal" &&
|
||||
(p.pathname.includes("impressum") || p.title.toLowerCase().includes("impressum")),
|
||||
);
|
||||
|
||||
const sourceText = legalPage?.text || pages.find((p) => p.type === "home")?.text || "";
|
||||
|
||||
// USt-ID
|
||||
const taxMatch = sourceText.match(/USt[.\s-]*(?:ID[.\s-]*Nr\.?|IdNr\.?)[:\s]*([A-Z]{2}\d{9,11})/i);
|
||||
if (taxMatch) info.taxId = taxMatch[1];
|
||||
|
||||
// HRB number
|
||||
const hrbMatch = sourceText.match(/HRB[:\s]*(\d+\s*[A-Z]*)/i);
|
||||
if (hrbMatch) info.registerNumber = `HRB ${hrbMatch[1].trim()}`;
|
||||
|
||||
// Phone
|
||||
const phoneMatch = sourceText.match(/(?:Tel|Telefon|Fon)[.:\s]*([+\d\s()/-]{10,20})/i);
|
||||
if (phoneMatch) info.phone = phoneMatch[1].trim();
|
||||
|
||||
// Email
|
||||
const emailMatch = sourceText.match(/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/);
|
||||
if (emailMatch) info.email = emailMatch[0];
|
||||
|
||||
// Address (look for German postal code pattern)
|
||||
const addressMatch = sourceText.match(
|
||||
/(?:[\w\s.-]+(?:straße|str\.|weg|platz|ring|allee|gasse)\s*\d+[a-z]?\s*,?\s*)?(?:D-)?(\d{5})\s+\w+/i,
|
||||
);
|
||||
if (addressMatch) info.address = addressMatch[0].trim();
|
||||
|
||||
// GF / Geschäftsführer
|
||||
const gfMatch = sourceText.match(
|
||||
/Geschäftsführ(?:er|ung)[:\s]*([A-ZÄÖÜ][a-zäöüß]+(?:\s+[A-ZÄÖÜ][a-zäöüß]+){1,3})/,
|
||||
);
|
||||
if (gfMatch) info.managingDirector = gfMatch[1].trim();
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract brand colors from HTML (inline styles, CSS variables).
|
||||
*/
|
||||
function extractColors(pages: CrawledPage[]): string[] {
|
||||
const colors = new Set<string>();
|
||||
const homePage = pages.find((p) => p.type === "home");
|
||||
if (!homePage) return [];
|
||||
|
||||
const hexMatches = homePage.html.match(/#(?:[0-9a-fA-F]{3}){1,2}\b/g) || [];
|
||||
for (const hex of hexMatches) {
|
||||
colors.add(hex.toLowerCase());
|
||||
if (colors.size >= 8) break;
|
||||
}
|
||||
|
||||
return [...colors];
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract social media links from footers / headers.
|
||||
*/
|
||||
function extractSocialLinks(pages: CrawledPage[]): Record<string, string> {
|
||||
const socials: Record<string, string> = {};
|
||||
const platforms = [
|
||||
{ key: "linkedin", patterns: ["linkedin.com"] },
|
||||
{ key: "instagram", patterns: ["instagram.com"] },
|
||||
{ key: "facebook", patterns: ["facebook.com", "fb.com"] },
|
||||
{ key: "youtube", patterns: ["youtube.com", "youtu.be"] },
|
||||
{ key: "twitter", patterns: ["twitter.com", "x.com"] },
|
||||
{ key: "xing", patterns: ["xing.com"] },
|
||||
];
|
||||
|
||||
const homePage = pages.find((p) => p.type === "home");
|
||||
if (!homePage) return socials;
|
||||
|
||||
const urlMatches = homePage.html.match(/https?:\/\/[^\s"'<>]+/g) || [];
|
||||
for (const url of urlMatches) {
|
||||
for (const platform of platforms) {
|
||||
if (platform.patterns.some((p) => url.includes(p)) && !socials[platform.key]) {
|
||||
socials[platform.key] = url;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return socials;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find domains that are linked but separate from the main domain.
|
||||
* Critical for detecting sister companies with own websites (e.g. etib-ing.com).
|
||||
*/
|
||||
function extractExternalDomains(pages: CrawledPage[], mainDomain: string): string[] {
|
||||
const externalDomains = new Set<string>();
|
||||
const cleanMain = mainDomain.replace(/^www\./, "");
|
||||
// Extract meaningful base parts: "e-tib.com" → ["e", "tib", "etib"]
|
||||
const mainParts = cleanMain.split(".")[0].toLowerCase().split(/[-_]/).filter(p => p.length > 1);
|
||||
const mainJoined = mainParts.join(""); // "etib"
|
||||
|
||||
for (const page of pages) {
|
||||
const linkMatches = page.html.match(/https?:\/\/[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g) || [];
|
||||
for (const url of linkMatches) {
|
||||
try {
|
||||
const urlObj = new URL(url);
|
||||
const domain = urlObj.hostname.replace(/^www\./, "");
|
||||
// Skip same domain
|
||||
if (domain === cleanMain) continue;
|
||||
// Skip common third-party services
|
||||
if (
|
||||
domain.includes("google") ||
|
||||
domain.includes("facebook") ||
|
||||
domain.includes("twitter") ||
|
||||
domain.includes("linkedin") ||
|
||||
domain.includes("instagram") ||
|
||||
domain.includes("youtube") ||
|
||||
domain.includes("cookie") ||
|
||||
domain.includes("analytics") ||
|
||||
domain.includes("cdn") ||
|
||||
domain.includes("cloudflare") ||
|
||||
domain.includes("fonts") ||
|
||||
domain.includes("jquery") ||
|
||||
domain.includes("bootstrap") ||
|
||||
domain.includes("wordpress") ||
|
||||
domain.includes("jimdo") ||
|
||||
domain.includes("wix")
|
||||
)
|
||||
continue;
|
||||
|
||||
// Fuzzy match: check if the domain contains any base part of the main domain
|
||||
// e.g. main="e-tib.com" → mainParts=["e","tib"], mainJoined="etib"
|
||||
// target="etib-ing.com" → domainBase="etib-ing", domainJoined="etibing"
|
||||
const domainBase = domain.split(".")[0].toLowerCase();
|
||||
const domainJoined = domainBase.replace(/[-_]/g, "");
|
||||
|
||||
const isRelated =
|
||||
domainJoined.includes(mainJoined) ||
|
||||
mainJoined.includes(domainJoined) ||
|
||||
mainParts.some(part => part.length > 2 && domainBase.includes(part));
|
||||
|
||||
if (isRelated) {
|
||||
externalDomains.add(domain);
|
||||
}
|
||||
} catch {
|
||||
// Invalid URL
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return [...externalDomains];
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a structured inventory of all pages.
|
||||
*/
|
||||
function buildPageInventory(pages: CrawledPage[]): PageInventoryItem[] {
|
||||
return pages.map((page) => ({
|
||||
url: page.url,
|
||||
pathname: page.pathname,
|
||||
title: page.title,
|
||||
type: page.type,
|
||||
headings: page.headings.slice(0, 10),
|
||||
services: page.type === "service" ? page.headings.filter((h) => h.length > 3 && h.length < 80) : [],
|
||||
hasSearch: page.features.includes("search"),
|
||||
hasForms: page.features.includes("forms"),
|
||||
hasMap: page.features.includes("maps"),
|
||||
hasVideo: page.features.includes("video"),
|
||||
contentSummary: page.text.substring(0, 500),
|
||||
}));
|
||||
}
|
||||
149
packages/concept-engine/src/cli.ts
Normal file
149
packages/concept-engine/src/cli.ts
Normal file
@@ -0,0 +1,149 @@
|
||||
#!/usr/bin/env node
|
||||
// ============================================================================
|
||||
// @mintel/concept-engine — CLI Entry Point
|
||||
// Simple commander-based CLI for concept generation.
|
||||
// ============================================================================
|
||||
|
||||
import { Command } from "commander";
|
||||
import * as path from "node:path";
|
||||
import * as fs from "node:fs/promises";
|
||||
import { existsSync } from "node:fs";
|
||||
import { config as dotenvConfig } from "dotenv";
|
||||
import { ConceptPipeline } from "./pipeline.js";
|
||||
|
||||
// Load .env from monorepo root
|
||||
dotenvConfig({ path: path.resolve(process.cwd(), "../../.env") });
|
||||
dotenvConfig({ path: path.resolve(process.cwd(), ".env") });
|
||||
|
||||
const program = new Command();
|
||||
|
||||
program
|
||||
.name("concept")
|
||||
.description("AI-powered project concept generator")
|
||||
.version("1.0.0");
|
||||
|
||||
program
|
||||
.command("run")
|
||||
.description("Run the full concept pipeline")
|
||||
.argument("[briefing]", "Briefing text or @path/to/file.txt")
|
||||
.option("--url <url>", "Target website URL")
|
||||
.option("--comments <comments>", "Additional notes")
|
||||
.option("--clear-cache", "Clear crawl cache and re-crawl")
|
||||
.option("--output <dir>", "Output directory", "../../out/concepts")
|
||||
.option("--crawl-dir <dir>", "Crawl data directory", "../../data/crawls")
|
||||
.action(async (briefingArg: string | undefined, options: any) => {
|
||||
const openrouterKey = process.env.OPENROUTER_API_KEY || process.env.OPENROUTER_KEY;
|
||||
if (!openrouterKey) {
|
||||
console.error("❌ OPENROUTER_API_KEY not found in environment.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let briefing = briefingArg || "";
|
||||
|
||||
// Handle @file references
|
||||
if (briefing.startsWith("@")) {
|
||||
const rawPath = briefing.substring(1);
|
||||
const filePath = rawPath.startsWith("/")
|
||||
? rawPath
|
||||
: path.resolve(process.cwd(), rawPath);
|
||||
if (!existsSync(filePath)) {
|
||||
console.error(`❌ Briefing file not found: ${filePath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
briefing = await fs.readFile(filePath, "utf8");
|
||||
console.log(`📄 Loaded briefing from: ${filePath}`);
|
||||
}
|
||||
|
||||
// Auto-discover URL from briefing
|
||||
let url = options.url;
|
||||
if (!url && briefing) {
|
||||
const urlMatch = briefing.match(/https?:\/\/[^\s]+/);
|
||||
if (urlMatch) {
|
||||
url = urlMatch[0];
|
||||
console.log(`🔗 Discovered URL in briefing: ${url}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (!briefing && !url) {
|
||||
console.error("❌ Provide a briefing text or --url");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const pipeline = new ConceptPipeline(
|
||||
{
|
||||
openrouterKey,
|
||||
zyteApiKey: process.env.ZYTE_API_KEY,
|
||||
outputDir: path.resolve(process.cwd(), options.output),
|
||||
crawlDir: path.resolve(process.cwd(), options.crawlDir),
|
||||
},
|
||||
{
|
||||
onStepStart: (id, name) => {
|
||||
// Will be enhanced with Ink spinner later
|
||||
},
|
||||
onStepComplete: (id, result) => {
|
||||
// Will be enhanced with Ink UI later
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
try {
|
||||
await pipeline.run({
|
||||
briefing,
|
||||
url,
|
||||
comments: options.comments,
|
||||
clearCache: options.clearCache,
|
||||
});
|
||||
|
||||
console.log("\n✨ Concept generation complete!");
|
||||
} catch (err) {
|
||||
console.error(`\n❌ Pipeline failed: ${(err as Error).message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
program
|
||||
.command("analyze")
|
||||
.description("Only crawl and analyze a website (no LLM)")
|
||||
.argument("<url>", "Website URL to analyze")
|
||||
.option("--crawl-dir <dir>", "Crawl data directory", "../../data/crawls")
|
||||
.option("--clear-cache", "Clear existing crawl cache")
|
||||
.action(async (url: string, options: any) => {
|
||||
const { crawlSite } = await import("./scraper.js");
|
||||
const { analyzeSite } = await import("./analyzer.js");
|
||||
|
||||
if (options.clearCache) {
|
||||
const { clearCrawlCache } = await import("./scraper.js");
|
||||
const domain = new URL(url).hostname;
|
||||
await clearCrawlCache(path.resolve(process.cwd(), options.crawlDir), domain);
|
||||
}
|
||||
|
||||
const pages = await crawlSite(url, {
|
||||
zyteApiKey: process.env.ZYTE_API_KEY,
|
||||
crawlDir: path.resolve(process.cwd(), options.crawlDir),
|
||||
});
|
||||
|
||||
const domain = new URL(url).hostname;
|
||||
const profile = analyzeSite(pages, domain);
|
||||
|
||||
console.log("\n📊 Site Profile:");
|
||||
console.log(` Domain: ${profile.domain}`);
|
||||
console.log(` Total Pages: ${profile.totalPages}`);
|
||||
console.log(` Navigation: ${profile.navigation.map((n) => n.label).join(", ")}`);
|
||||
console.log(` Features: ${profile.existingFeatures.join(", ") || "none"}`);
|
||||
console.log(` Services: ${profile.services.join(", ") || "none"}`);
|
||||
console.log(` External Domains: ${profile.externalDomains.join(", ") || "none"}`);
|
||||
console.log(` Company: ${profile.companyInfo.name || "unbekannt"}`);
|
||||
console.log(` Tax ID: ${profile.companyInfo.taxId || "unbekannt"}`);
|
||||
console.log(` Colors: ${profile.colors.join(", ")}`);
|
||||
console.log(` Images Found: ${profile.images.length}`);
|
||||
console.log(` Social: ${Object.entries(profile.socialLinks).map(([k, v]) => `${k}`).join(", ") || "none"}`);
|
||||
|
||||
const outputPath = path.join(
|
||||
path.resolve(process.cwd(), options.crawlDir),
|
||||
domain.replace(/\./g, "-"),
|
||||
"_site_profile.json",
|
||||
);
|
||||
console.log(`\n📦 Full profile saved to: ${outputPath}`);
|
||||
});
|
||||
|
||||
program.parse();
|
||||
10
packages/concept-engine/src/index.ts
Normal file
10
packages/concept-engine/src/index.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
// ============================================================================
|
||||
// @mintel/concept-engine — Public API
|
||||
// ============================================================================
|
||||
|
||||
export { ConceptPipeline } from "./pipeline.js";
|
||||
export type { PipelineCallbacks } from "./pipeline.js";
|
||||
export { crawlSite, clearCrawlCache } from "./scraper.js";
|
||||
export { analyzeSite } from "./analyzer.js";
|
||||
export { llmRequest, llmJsonRequest, cleanJson } from "./llm-client.js";
|
||||
export * from "./types.js";
|
||||
133
packages/concept-engine/src/llm-client.ts
Normal file
133
packages/concept-engine/src/llm-client.ts
Normal file
@@ -0,0 +1,133 @@
|
||||
// ============================================================================
|
||||
// LLM Client — Unified interface with model routing via OpenRouter
|
||||
// ============================================================================
|
||||
|
||||
import axios from "axios";
|
||||
|
||||
interface LLMRequestOptions {
|
||||
model: string;
|
||||
systemPrompt: string;
|
||||
userPrompt: string;
|
||||
jsonMode?: boolean;
|
||||
apiKey: string;
|
||||
}
|
||||
|
||||
interface LLMResponse {
|
||||
content: string;
|
||||
usage: {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
cost: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean raw LLM output to parseable JSON.
|
||||
* Handles markdown fences, control chars, trailing commas.
|
||||
*/
|
||||
export function cleanJson(str: string): string {
|
||||
let cleaned = str.replace(/```json\n?|```/g, "").trim();
|
||||
cleaned = cleaned.replace(
|
||||
/[\u0000-\u0009\u000B\u000C\u000E-\u001F\u007F-\u009F]/g,
|
||||
" ",
|
||||
);
|
||||
cleaned = cleaned.replace(/,\s*([\]}])/g, "$1");
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a request to an LLM via OpenRouter.
|
||||
*/
|
||||
export async function llmRequest(options: LLMRequestOptions): Promise<LLMResponse> {
|
||||
const { model, systemPrompt, userPrompt, jsonMode = true, apiKey } = options;
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
const resp = await axios.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
{
|
||||
model,
|
||||
messages: [
|
||||
{ role: "system", content: systemPrompt },
|
||||
{ role: "user", content: userPrompt },
|
||||
],
|
||||
...(jsonMode ? { response_format: { type: "json_object" } } : {}),
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
timeout: 120000,
|
||||
},
|
||||
).catch(err => {
|
||||
if (err.response) {
|
||||
console.error("OpenRouter API Error:", JSON.stringify(err.response.data, null, 2));
|
||||
}
|
||||
throw err;
|
||||
});
|
||||
|
||||
const content = resp.data.choices?.[0]?.message?.content;
|
||||
if (!content) {
|
||||
throw new Error(`LLM returned no content. Model: ${model}`);
|
||||
}
|
||||
|
||||
let cost = 0;
|
||||
const usage = resp.data.usage || {};
|
||||
if (usage.cost !== undefined) {
|
||||
cost = usage.cost;
|
||||
} else {
|
||||
// Fallback estimation
|
||||
cost =
|
||||
(usage.prompt_tokens || 0) * (0.1 / 1_000_000) +
|
||||
(usage.completion_tokens || 0) * (0.4 / 1_000_000);
|
||||
}
|
||||
|
||||
return {
|
||||
content,
|
||||
usage: {
|
||||
promptTokens: usage.prompt_tokens || 0,
|
||||
completionTokens: usage.completion_tokens || 0,
|
||||
cost,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a request and parse the response as JSON.
|
||||
*/
|
||||
export async function llmJsonRequest<T = any>(
|
||||
options: LLMRequestOptions,
|
||||
): Promise<{ data: T; usage: LLMResponse["usage"] }> {
|
||||
const response = await llmRequest({ ...options, jsonMode: true });
|
||||
const cleaned = cleanJson(response.content);
|
||||
|
||||
let parsed: T;
|
||||
try {
|
||||
parsed = JSON.parse(cleaned);
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`Failed to parse LLM JSON response: ${(e as Error).message}\nRaw: ${cleaned.substring(0, 500)}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Unwrap common LLM artifacts: {"0": {...}}, {"state": {...}}, etc.
|
||||
const unwrapped = unwrapResponse(parsed);
|
||||
|
||||
return { data: unwrapped as T, usage: response.usage };
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively unwrap common LLM wrapping patterns.
|
||||
*/
|
||||
function unwrapResponse(obj: any): any {
|
||||
if (!obj || typeof obj !== "object" || Array.isArray(obj)) return obj;
|
||||
const keys = Object.keys(obj);
|
||||
if (keys.length === 1) {
|
||||
const key = keys[0];
|
||||
if (key === "0" || key === "state" || key === "facts" || key === "result" || key === "data") {
|
||||
return unwrapResponse(obj[key]);
|
||||
}
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
257
packages/concept-engine/src/pipeline.ts
Normal file
257
packages/concept-engine/src/pipeline.ts
Normal file
@@ -0,0 +1,257 @@
|
||||
// ============================================================================
|
||||
// Pipeline Orchestrator
|
||||
// Runs all steps sequentially, tracks state, supports re-running individual steps.
|
||||
// ============================================================================
|
||||
|
||||
import * as fs from "node:fs/promises";
|
||||
import * as path from "node:path";
|
||||
import { existsSync } from "node:fs";
|
||||
import { crawlSite, clearCrawlCache } from "./scraper.js";
|
||||
import { analyzeSite } from "./analyzer.js";
|
||||
import { executeResearch } from "./steps/00b-research.js";
|
||||
import { executeExtract } from "./steps/01-extract.js";
|
||||
import { executeSiteAudit } from "./steps/00a-site-audit.js";
|
||||
import { executeAudit } from "./steps/02-audit.js";
|
||||
import { executeStrategize } from "./steps/03-strategize.js";
|
||||
import { executeArchitect } from "./steps/04-architect.js";
|
||||
import type {
|
||||
PipelineConfig,
|
||||
PipelineInput,
|
||||
ConceptState,
|
||||
ProjectConcept,
|
||||
StepResult,
|
||||
StepUsage,
|
||||
} from "./types.js";
|
||||
|
||||
export interface PipelineCallbacks {
|
||||
onStepStart?: (stepId: string, stepName: string) => void;
|
||||
onStepComplete?: (stepId: string, result: StepResult) => void;
|
||||
onStepError?: (stepId: string, error: string) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* The main concept pipeline orchestrator.
|
||||
* Runs conceptual steps sequentially and builds the ProjectConcept.
|
||||
*/
|
||||
export class ConceptPipeline {
|
||||
private config: PipelineConfig;
|
||||
private state: ConceptState;
|
||||
private callbacks: PipelineCallbacks;
|
||||
|
||||
constructor(config: PipelineConfig, callbacks: PipelineCallbacks = {}) {
|
||||
this.config = config;
|
||||
this.callbacks = callbacks;
|
||||
this.state = this.createInitialState();
|
||||
}
|
||||
|
||||
private createInitialState(): ConceptState {
|
||||
return {
|
||||
briefing: "",
|
||||
usage: {
|
||||
totalPromptTokens: 0,
|
||||
totalCompletionTokens: 0,
|
||||
totalCost: 0,
|
||||
perStep: [],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the full concept pipeline from scratch.
|
||||
*/
|
||||
async run(input: PipelineInput): Promise<ProjectConcept> {
|
||||
this.state.briefing = input.briefing;
|
||||
this.state.url = input.url;
|
||||
this.state.comments = input.comments;
|
||||
|
||||
// Ensure output directories
|
||||
await fs.mkdir(this.config.outputDir, { recursive: true });
|
||||
await fs.mkdir(this.config.crawlDir, { recursive: true });
|
||||
|
||||
// Step 0: Scrape & Analyze (deterministic)
|
||||
if (input.url) {
|
||||
if (input.clearCache) {
|
||||
const domain = new URL(input.url).hostname;
|
||||
await clearCrawlCache(this.config.crawlDir, domain);
|
||||
}
|
||||
await this.runStep("00-scrape", "Scraping & Analyzing Website", async () => {
|
||||
const pages = await crawlSite(input.url!, {
|
||||
zyteApiKey: this.config.zyteApiKey,
|
||||
crawlDir: this.config.crawlDir,
|
||||
});
|
||||
const domain = new URL(input.url!).hostname;
|
||||
const siteProfile = analyzeSite(pages, domain);
|
||||
this.state.siteProfile = siteProfile;
|
||||
this.state.crawlDir = path.join(this.config.crawlDir, domain.replace(/\./g, "-"));
|
||||
|
||||
// Save site profile
|
||||
await fs.writeFile(
|
||||
path.join(this.state.crawlDir!, "_site_profile.json"),
|
||||
JSON.stringify(siteProfile, null, 2),
|
||||
);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: siteProfile,
|
||||
usage: { step: "00-scrape", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: 0 },
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// Step 00a: Site Audit (DataForSEO)
|
||||
await this.runStep("00a-site-audit", "IST-Analysis (DataForSEO)", async () => {
|
||||
const result = await executeSiteAudit(this.state, this.config);
|
||||
if (result.success && result.data) {
|
||||
this.state.siteAudit = result.data;
|
||||
}
|
||||
return result;
|
||||
});
|
||||
|
||||
// Step 00b: Research (real web data via journaling)
|
||||
await this.runStep("00b-research", "Industry & Company Research", async () => {
|
||||
const result = await executeResearch(this.state);
|
||||
if (result.success && result.data) {
|
||||
this.state.researchData = result.data;
|
||||
}
|
||||
return result;
|
||||
});
|
||||
|
||||
// Step 1: Extract facts
|
||||
await this.runStep("01-extract", "Extracting Facts from Briefing", async () => {
|
||||
const result = await executeExtract(this.state, this.config);
|
||||
if (result.success) this.state.facts = result.data;
|
||||
return result;
|
||||
});
|
||||
|
||||
// Step 2: Audit features
|
||||
await this.runStep("02-audit", "Auditing Features (Skeptical Review)", async () => {
|
||||
const result = await executeAudit(this.state, this.config);
|
||||
if (result.success) this.state.auditedFacts = result.data;
|
||||
return result;
|
||||
});
|
||||
|
||||
// Step 3: Strategic analysis
|
||||
await this.runStep("03-strategize", "Strategic Analysis", async () => {
|
||||
const result = await executeStrategize(this.state, this.config);
|
||||
if (result.success) {
|
||||
this.state.briefingSummary = result.data.briefingSummary;
|
||||
this.state.designVision = result.data.designVision;
|
||||
}
|
||||
return result;
|
||||
});
|
||||
|
||||
// Step 4: Sitemap architecture
|
||||
await this.runStep("04-architect", "Information Architecture", async () => {
|
||||
const result = await executeArchitect(this.state, this.config);
|
||||
if (result.success) {
|
||||
this.state.sitemap = result.data.sitemap;
|
||||
this.state.websiteTopic = result.data.websiteTopic;
|
||||
}
|
||||
return result;
|
||||
});
|
||||
|
||||
const projectConcept = this.buildProjectConcept();
|
||||
await this.saveState(projectConcept);
|
||||
|
||||
return projectConcept;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a single step with callbacks and error handling.
|
||||
*/
|
||||
private async runStep(
|
||||
stepId: string,
|
||||
stepName: string,
|
||||
executor: () => Promise<StepResult>,
|
||||
): Promise<void> {
|
||||
this.callbacks.onStepStart?.(stepId, stepName);
|
||||
console.log(`\n📍 ${stepName}...`);
|
||||
|
||||
try {
|
||||
const result = await executor();
|
||||
if (result.usage) {
|
||||
this.state.usage.perStep.push(result.usage);
|
||||
this.state.usage.totalPromptTokens += result.usage.promptTokens;
|
||||
this.state.usage.totalCompletionTokens += result.usage.completionTokens;
|
||||
this.state.usage.totalCost += result.usage.cost;
|
||||
}
|
||||
|
||||
if (result.success) {
|
||||
const cost = result.usage?.cost ? ` ($${result.usage.cost.toFixed(4)})` : "";
|
||||
const duration = result.usage?.durationMs ? ` [${(result.usage.durationMs / 1000).toFixed(1)}s]` : "";
|
||||
console.log(` ✅ ${stepName} complete${cost}${duration}`);
|
||||
this.callbacks.onStepComplete?.(stepId, result);
|
||||
} else {
|
||||
console.error(` ❌ ${stepName} failed: ${result.error}`);
|
||||
this.callbacks.onStepError?.(stepId, result.error || "Unknown error");
|
||||
throw new Error(result.error);
|
||||
}
|
||||
} catch (err) {
|
||||
const errorMsg = (err as Error).message;
|
||||
this.callbacks.onStepError?.(stepId, errorMsg);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the final Concept object.
|
||||
*/
|
||||
private buildProjectConcept(): ProjectConcept {
|
||||
return {
|
||||
domain: this.state.siteProfile?.domain || "unknown",
|
||||
timestamp: new Date().toISOString(),
|
||||
briefing: this.state.briefing,
|
||||
auditedFacts: this.state.auditedFacts || {},
|
||||
siteProfile: this.state.siteProfile,
|
||||
siteAudit: this.state.siteAudit,
|
||||
researchData: this.state.researchData,
|
||||
strategy: {
|
||||
briefingSummary: this.state.briefingSummary || "",
|
||||
designVision: this.state.designVision || "",
|
||||
},
|
||||
architecture: {
|
||||
websiteTopic: this.state.websiteTopic || "",
|
||||
sitemap: this.state.sitemap || [],
|
||||
},
|
||||
usage: this.state.usage,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Save the full concept generated state to disk.
|
||||
*/
|
||||
private async saveState(concept: ProjectConcept): Promise<void> {
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
||||
const companyName = this.state.auditedFacts?.companyName || "unknown";
|
||||
|
||||
const stateDir = path.join(this.config.outputDir, "concepts");
|
||||
await fs.mkdir(stateDir, { recursive: true });
|
||||
|
||||
const statePath = path.join(stateDir, `${companyName}_${timestamp}.json`);
|
||||
await fs.writeFile(statePath, JSON.stringify(concept, null, 2));
|
||||
console.log(`\n📦 Saved Project Concept to: ${statePath}`);
|
||||
|
||||
// Save debug trace
|
||||
const debugPath = path.join(stateDir, `${companyName}_${timestamp}_debug.json`);
|
||||
await fs.writeFile(debugPath, JSON.stringify(this.state, null, 2));
|
||||
|
||||
// Print usage summary
|
||||
console.log("\n──────────────────────────────────────────────");
|
||||
console.log("📊 PIPELINE USAGE SUMMARY");
|
||||
console.log("──────────────────────────────────────────────");
|
||||
for (const step of this.state.usage.perStep) {
|
||||
if (step.cost > 0) {
|
||||
console.log(` ${step.step}: ${step.model} — $${step.cost.toFixed(6)} (${(step.durationMs / 1000).toFixed(1)}s)`);
|
||||
}
|
||||
}
|
||||
console.log("──────────────────────────────────────────────");
|
||||
console.log(` TOTAL: $${this.state.usage.totalCost.toFixed(6)}`);
|
||||
console.log(` Tokens: ${(this.state.usage.totalPromptTokens + this.state.usage.totalCompletionTokens).toLocaleString()}`);
|
||||
console.log("──────────────────────────────────────────────\n");
|
||||
}
|
||||
|
||||
/** Get the current internal state (for CLI inspection). */
|
||||
getState(): ConceptState {
|
||||
return this.state;
|
||||
}
|
||||
}
|
||||
432
packages/concept-engine/src/scraper.ts
Normal file
432
packages/concept-engine/src/scraper.ts
Normal file
@@ -0,0 +1,432 @@
|
||||
// ============================================================================
|
||||
// Scraper — Zyte API + Local Persistence
|
||||
// Crawls all pages of a website, stores them locally for reuse.
|
||||
// ============================================================================
|
||||
|
||||
import axios from "axios";
|
||||
import * as cheerio from "cheerio";
|
||||
import * as fs from "node:fs/promises";
|
||||
import * as path from "node:path";
|
||||
import { existsSync } from "node:fs";
|
||||
import type { CrawledPage, PageType } from "./types.js";
|
||||
|
||||
interface ScraperConfig {
|
||||
zyteApiKey?: string;
|
||||
crawlDir: string;
|
||||
maxPages?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify a URL pathname into a page type.
|
||||
*/
|
||||
function classifyPage(pathname: string): PageType {
|
||||
const p = pathname.toLowerCase();
|
||||
if (p === "/" || p === "" || p === "/index.html") return "home";
|
||||
if (p.includes("service") || p.includes("leistung") || p.includes("kompetenz"))
|
||||
return "service";
|
||||
if (p.includes("about") || p.includes("ueber") || p.includes("über") || p.includes("unternehmen"))
|
||||
return "about";
|
||||
if (p.includes("contact") || p.includes("kontakt")) return "contact";
|
||||
if (p.includes("job") || p.includes("karriere") || p.includes("career") || p.includes("human-resources"))
|
||||
return "career";
|
||||
if (p.includes("portfolio") || p.includes("referenz") || p.includes("projekt") || p.includes("case-study"))
|
||||
return "portfolio";
|
||||
if (p.includes("blog") || p.includes("news") || p.includes("aktuelles") || p.includes("magazin"))
|
||||
return "blog";
|
||||
if (p.includes("legal") || p.includes("impressum") || p.includes("datenschutz") || p.includes("privacy") || p.includes("agb"))
|
||||
return "legal";
|
||||
return "other";
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect interactive features present on a page.
|
||||
*/
|
||||
function detectFeatures($: cheerio.CheerioAPI): string[] {
|
||||
const features: string[] = [];
|
||||
|
||||
// Search
|
||||
if (
|
||||
$('input[type="search"]').length > 0 ||
|
||||
$('form[role="search"]').length > 0 ||
|
||||
$(".search-form, .search-box, #search, .searchbar").length > 0 ||
|
||||
$('input[name="q"], input[name="s"], input[name="search"]').length > 0
|
||||
) {
|
||||
features.push("search");
|
||||
}
|
||||
|
||||
// Forms (beyond search)
|
||||
const formCount = $("form").length;
|
||||
const searchForms = $('form[role="search"], .search-form').length;
|
||||
if (formCount > searchForms) {
|
||||
features.push("forms");
|
||||
}
|
||||
|
||||
// Maps
|
||||
if (
|
||||
$('iframe[src*="google.com/maps"], iframe[src*="openstreetmap"], .map-container, #map, [data-map]').length > 0
|
||||
) {
|
||||
features.push("maps");
|
||||
}
|
||||
|
||||
// Video
|
||||
if (
|
||||
$("video, iframe[src*='youtube'], iframe[src*='vimeo'], .video-container").length > 0
|
||||
) {
|
||||
features.push("video");
|
||||
}
|
||||
|
||||
// Calendar / Events
|
||||
if ($(".calendar, .event, [data-calendar]").length > 0) {
|
||||
features.push("calendar");
|
||||
}
|
||||
|
||||
// Cookie consent
|
||||
if ($(".cookie-banner, .cookie-consent, #cookie-notice, [data-cookie]").length > 0) {
|
||||
features.push("cookie-consent");
|
||||
}
|
||||
|
||||
return features;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all internal links from a page.
|
||||
*/
|
||||
function extractInternalLinks($: cheerio.CheerioAPI, origin: string): string[] {
|
||||
const links: string[] = [];
|
||||
$("a[href]").each((_, el) => {
|
||||
const href = $(el).attr("href");
|
||||
if (!href) return;
|
||||
try {
|
||||
const url = new URL(href, origin);
|
||||
if (url.origin === origin) {
|
||||
// Skip assets
|
||||
if (/\.(pdf|zip|jpg|jpeg|png|svg|webp|gif|css|js|ico|woff|woff2|ttf|eot)$/i.test(url.pathname)) return;
|
||||
// Skip anchors-only
|
||||
if (url.pathname === "/" && url.hash) return;
|
||||
links.push(url.pathname);
|
||||
}
|
||||
} catch {
|
||||
// Invalid URL, skip
|
||||
}
|
||||
});
|
||||
return [...new Set(links)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all images from a page.
|
||||
*/
|
||||
function extractImages($: cheerio.CheerioAPI, origin: string): string[] {
|
||||
const images: string[] = [];
|
||||
|
||||
// Regular img tags
|
||||
$("img[src]").each((_, el) => {
|
||||
const src = $(el).attr("src");
|
||||
if (src) images.push(src);
|
||||
});
|
||||
|
||||
// CSS background images (inline styles)
|
||||
$("[style*='background-image']").each((_, el) => {
|
||||
const style = $(el).attr("style");
|
||||
const match = style?.match(/url\(['"]?(.*?)['"]?\)/);
|
||||
if (match && match[1]) {
|
||||
images.push(match[1]);
|
||||
}
|
||||
});
|
||||
|
||||
// Resolve URLs to absolute
|
||||
const absoluteImages: string[] = [];
|
||||
for (const img of images) {
|
||||
if (img.startsWith("data:image")) continue; // Skip inline base64
|
||||
try {
|
||||
const url = new URL(img, origin);
|
||||
// Ignore small tracking pixels or generic vectors
|
||||
if (url.pathname.endsWith(".svg") && !url.pathname.includes("logo")) continue;
|
||||
absoluteImages.push(url.href);
|
||||
} catch {
|
||||
// Invalid URL
|
||||
}
|
||||
}
|
||||
|
||||
return [...new Set(absoluteImages)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract services/competencies from text content.
|
||||
*/
|
||||
function extractServices(text: string): string[] {
|
||||
const services: string[] = [];
|
||||
// Common pattern: bulleted or newline-separated service lists
|
||||
const lines = text.split(/\n/).map((l) => l.trim()).filter((l) => l.length > 3 && l.length < 100);
|
||||
for (const line of lines) {
|
||||
// Skip generic boilerplate
|
||||
if (/cookie|datenschutz|impressum|copyright|©/i.test(line)) continue;
|
||||
if (/^(tel|fax|e-mail|mobil|web|http)/i.test(line)) continue;
|
||||
services.push(line);
|
||||
}
|
||||
return services;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a page via Zyte API with browser rendering.
|
||||
*/
|
||||
async function fetchWithZyte(url: string, apiKey: string): Promise<string> {
|
||||
try {
|
||||
const resp = await axios.post(
|
||||
"https://api.zyte.com/v1/extract",
|
||||
{
|
||||
url,
|
||||
browserHtml: true,
|
||||
},
|
||||
{
|
||||
auth: { username: apiKey, password: "" },
|
||||
timeout: 60000,
|
||||
},
|
||||
);
|
||||
const html = resp.data.browserHtml || "";
|
||||
if (!html) {
|
||||
console.warn(` ⚠️ Zyte returned empty browserHtml for ${url}`);
|
||||
}
|
||||
return html;
|
||||
} catch (err: any) {
|
||||
if (err.response) {
|
||||
console.error(` ❌ Zyte API error ${err.response.status} for ${url}: ${err.response.data?.detail || err.response.statusText}`);
|
||||
// Rate limited — wait and retry once
|
||||
if (err.response.status === 429) {
|
||||
console.log(" ⏳ Rate limited, waiting 5s and retrying...");
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
return fetchWithZyte(url, apiKey);
|
||||
}
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a page via simple HTTP GET (fallback).
|
||||
*/
|
||||
async function fetchDirect(url: string): Promise<string> {
|
||||
const resp = await axios.get(url, {
|
||||
timeout: 30000,
|
||||
headers: {
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
||||
},
|
||||
});
|
||||
return typeof resp.data === "string" ? resp.data : "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse an HTML string into a CrawledPage.
|
||||
*/
|
||||
function parsePage(html: string, url: string): CrawledPage {
|
||||
const $ = cheerio.load(html);
|
||||
const urlObj = new URL(url);
|
||||
|
||||
const title = $("title").text().trim();
|
||||
const headings = $("h1, h2, h3")
|
||||
.map((_, el) => $(el).text().trim())
|
||||
.get()
|
||||
.filter((h) => h.length > 0);
|
||||
|
||||
const navItems = $("nav a")
|
||||
.map((_, el) => $(el).text().trim())
|
||||
.get()
|
||||
.filter((t) => t.length > 0 && t.length < 100);
|
||||
|
||||
const bodyText = $("body")
|
||||
.text()
|
||||
.replace(/\s+/g, " ")
|
||||
.substring(0, 50000)
|
||||
.trim();
|
||||
|
||||
const features = detectFeatures($);
|
||||
const links = extractInternalLinks($, urlObj.origin);
|
||||
const images = extractImages($, urlObj.origin);
|
||||
|
||||
const description = $('meta[name="description"]').attr("content") || undefined;
|
||||
const ogTitle = $('meta[property="og:title"]').attr("content") || undefined;
|
||||
const ogImage = $('meta[property="og:image"]').attr("content") || undefined;
|
||||
|
||||
return {
|
||||
url,
|
||||
pathname: urlObj.pathname,
|
||||
title,
|
||||
html,
|
||||
text: bodyText,
|
||||
headings,
|
||||
navItems,
|
||||
features,
|
||||
type: classifyPage(urlObj.pathname),
|
||||
links,
|
||||
images,
|
||||
meta: { description, ogTitle, ogImage },
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Crawl a website and persist all pages locally.
|
||||
*
|
||||
* Returns an array of CrawledPage objects.
|
||||
*/
|
||||
export async function crawlSite(
|
||||
targetUrl: string,
|
||||
config: ScraperConfig,
|
||||
): Promise<CrawledPage[]> {
|
||||
const urlObj = new URL(targetUrl);
|
||||
const origin = urlObj.origin;
|
||||
const domain = urlObj.hostname;
|
||||
const domainDir = path.join(config.crawlDir, domain.replace(/\./g, "-"));
|
||||
|
||||
// Check for existing crawl
|
||||
const metaFile = path.join(domainDir, "_crawl_meta.json");
|
||||
if (existsSync(metaFile)) {
|
||||
console.log(`📦 Found existing crawl for ${domain}. Loading from disk...`);
|
||||
return loadCrawlFromDisk(domainDir);
|
||||
}
|
||||
|
||||
console.log(`🔍 Crawling ${targetUrl} via ${config.zyteApiKey ? "Zyte API" : "direct HTTP"}...`);
|
||||
|
||||
// Ensure output dir
|
||||
await fs.mkdir(domainDir, { recursive: true });
|
||||
|
||||
const maxPages = config.maxPages || 30;
|
||||
const visited = new Set<string>();
|
||||
const queue: string[] = [targetUrl];
|
||||
const pages: CrawledPage[] = [];
|
||||
|
||||
while (queue.length > 0 && visited.size < maxPages) {
|
||||
const url = queue.shift()!;
|
||||
const urlPath = new URL(url).pathname;
|
||||
|
||||
if (visited.has(urlPath)) continue;
|
||||
visited.add(urlPath);
|
||||
|
||||
try {
|
||||
console.log(` ↳ Fetching ${url} (${visited.size}/${maxPages})...`);
|
||||
|
||||
let html: string;
|
||||
if (config.zyteApiKey) {
|
||||
html = await fetchWithZyte(url, config.zyteApiKey);
|
||||
} else {
|
||||
html = await fetchDirect(url);
|
||||
}
|
||||
|
||||
if (!html || html.length < 100) {
|
||||
console.warn(` ⚠️ Empty/tiny response for ${url}, skipping.`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const page = parsePage(html, url);
|
||||
pages.push(page);
|
||||
|
||||
// Save HTML + metadata to disk
|
||||
const safeName = urlPath === "/" ? "index" : urlPath.replace(/\//g, "_").replace(/^_/, "");
|
||||
await fs.writeFile(path.join(domainDir, `${safeName}.html`), html);
|
||||
await fs.writeFile(
|
||||
path.join(domainDir, `${safeName}.meta.json`),
|
||||
JSON.stringify(
|
||||
{
|
||||
url: page.url,
|
||||
pathname: page.pathname,
|
||||
title: page.title,
|
||||
type: page.type,
|
||||
headings: page.headings,
|
||||
navItems: page.navItems,
|
||||
features: page.features,
|
||||
links: page.links,
|
||||
images: page.images,
|
||||
meta: page.meta,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
|
||||
// Discover new links
|
||||
for (const link of page.links) {
|
||||
if (!visited.has(link)) {
|
||||
const fullUrl = `${origin}${link}`;
|
||||
queue.push(fullUrl);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn(` ⚠️ Failed to fetch ${url}: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Save crawl metadata
|
||||
await fs.writeFile(
|
||||
metaFile,
|
||||
JSON.stringify(
|
||||
{
|
||||
domain,
|
||||
crawledAt: new Date().toISOString(),
|
||||
totalPages: pages.length,
|
||||
urls: pages.map((p) => p.url),
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
|
||||
console.log(`✅ Crawled ${pages.length} pages for ${domain}. Saved to ${domainDir}`);
|
||||
return pages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a previously crawled site from disk.
|
||||
*/
|
||||
async function loadCrawlFromDisk(domainDir: string): Promise<CrawledPage[]> {
|
||||
const files = await fs.readdir(domainDir);
|
||||
const metaFiles = files.filter((f) => f.endsWith(".meta.json") && f !== "_crawl_meta.json");
|
||||
|
||||
const pages: CrawledPage[] = [];
|
||||
for (const metaFile of metaFiles) {
|
||||
const baseName = metaFile.replace(".meta.json", "");
|
||||
const htmlFile = `${baseName}.html`;
|
||||
|
||||
const meta = JSON.parse(await fs.readFile(path.join(domainDir, metaFile), "utf8"));
|
||||
let html = "";
|
||||
if (files.includes(htmlFile)) {
|
||||
html = await fs.readFile(path.join(domainDir, htmlFile), "utf8");
|
||||
}
|
||||
|
||||
const text = html
|
||||
? cheerio
|
||||
.load(html)("body")
|
||||
.text()
|
||||
.replace(/\s+/g, " ")
|
||||
.substring(0, 50000)
|
||||
.trim()
|
||||
: "";
|
||||
|
||||
pages.push({
|
||||
url: meta.url,
|
||||
pathname: meta.pathname,
|
||||
title: meta.title,
|
||||
html,
|
||||
text,
|
||||
headings: meta.headings || [],
|
||||
navItems: meta.navItems || [],
|
||||
features: meta.features || [],
|
||||
type: meta.type || "other",
|
||||
links: meta.links || [],
|
||||
images: meta.images || [],
|
||||
meta: meta.meta || {},
|
||||
});
|
||||
}
|
||||
|
||||
console.log(` 📂 Loaded ${pages.length} cached pages from disk.`);
|
||||
return pages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a cached crawl to force re-crawl.
|
||||
*/
|
||||
export async function clearCrawlCache(crawlDir: string, domain: string): Promise<void> {
|
||||
const domainDir = path.join(crawlDir, domain.replace(/\./g, "-"));
|
||||
if (existsSync(domainDir)) {
|
||||
await fs.rm(domainDir, { recursive: true, force: true });
|
||||
console.log(`🧹 Cleared crawl cache for ${domain}`);
|
||||
}
|
||||
}
|
||||
65
packages/concept-engine/src/steps/00a-site-audit.ts
Normal file
65
packages/concept-engine/src/steps/00a-site-audit.ts
Normal file
@@ -0,0 +1,65 @@
|
||||
// ============================================================================
|
||||
// Step 00a: Site Audit (DataForSEO + AI)
|
||||
// ============================================================================
|
||||
|
||||
import { PageAuditor } from "@mintel/page-audit";
|
||||
import type { ConceptState, StepResult, PipelineConfig } from "../types.js";
|
||||
|
||||
export async function executeSiteAudit(
|
||||
state: ConceptState,
|
||||
config: PipelineConfig,
|
||||
): Promise<StepResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
if (!state.url) {
|
||||
return {
|
||||
success: true,
|
||||
data: null,
|
||||
usage: { step: "00a-site-audit", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: Date.now() - startTime },
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const login = process.env.DATA_FOR_SEO_LOGIN || process.env.DATA_FOR_SEO_API_KEY?.split(":")?.[0];
|
||||
const password = process.env.DATA_FOR_SEO_PASSWORD || process.env.DATA_FOR_SEO_API_KEY?.split(":")?.slice(1)?.join(":");
|
||||
|
||||
if (!login || !password) {
|
||||
console.warn(" ⚠️ Site Audit skipped: DataForSEO credentials missing from environment.");
|
||||
return {
|
||||
success: true,
|
||||
data: null,
|
||||
usage: { step: "00a-site-audit", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: Date.now() - startTime },
|
||||
};
|
||||
}
|
||||
|
||||
const auditor = new PageAuditor({
|
||||
dataForSeoLogin: login,
|
||||
dataForSeoPassword: password,
|
||||
openrouterKey: config.openrouterKey,
|
||||
outputDir: config.outputDir ? `${config.outputDir}/audits` : undefined,
|
||||
});
|
||||
|
||||
// Run audit (max 20 pages for the estimation phase to keep it fast)
|
||||
const result = await auditor.audit(state.url, { maxPages: 20 });
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: result,
|
||||
usage: {
|
||||
step: "00a-site-audit",
|
||||
model: "dataforseo",
|
||||
cost: 0, // DataForSEO cost tracking could be added later
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
durationMs: Date.now() - startTime,
|
||||
},
|
||||
};
|
||||
} catch (err: any) {
|
||||
console.warn(` ⚠️ Site Audit failed, skipping: ${err.message}`);
|
||||
return {
|
||||
success: true,
|
||||
data: null,
|
||||
usage: { step: "00a-site-audit", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: Date.now() - startTime },
|
||||
};
|
||||
}
|
||||
}
|
||||
121
packages/concept-engine/src/steps/00b-research.ts
Normal file
121
packages/concept-engine/src/steps/00b-research.ts
Normal file
@@ -0,0 +1,121 @@
|
||||
// ============================================================================
|
||||
// Step 00b: Research — Industry Research via @mintel/journaling (No LLM hallus)
|
||||
// Uses Serper API for real web search results about the industry/company.
|
||||
// ============================================================================
|
||||
|
||||
import type { ConceptState, StepResult } from "../types.js";
|
||||
|
||||
interface ResearchResult {
|
||||
companyContext: string[];
|
||||
industryInsights: string[];
|
||||
competitorInfo: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Research the company and industry using real web search data.
|
||||
* Uses @mintel/journaling's ResearchAgent — results are grounded in real sources.
|
||||
*
|
||||
* NOTE: The journaling package can cause unhandled rejections that crash the process.
|
||||
* We wrap each call in an additional safety layer.
|
||||
*/
|
||||
export async function executeResearch(
|
||||
state: ConceptState,
|
||||
): Promise<StepResult<ResearchResult>> {
|
||||
const startTime = Date.now();
|
||||
|
||||
const companyName = state.siteProfile?.companyInfo?.name || "";
|
||||
const websiteTopic = state.siteProfile?.services?.slice(0, 3).join(", ") || "";
|
||||
const domain = state.siteProfile?.domain || "";
|
||||
|
||||
if (!companyName && !websiteTopic && !domain) {
|
||||
return {
|
||||
success: true,
|
||||
data: { companyContext: [], industryInsights: [], competitorInfo: [] },
|
||||
usage: { step: "00b-research", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
// Safety wrapper: catch ANY unhandled rejections during this step
|
||||
const safeCall = <T>(fn: () => Promise<T>, fallback: T): Promise<T> => {
|
||||
return new Promise<T>((resolve) => {
|
||||
const handler = (err: any) => {
|
||||
console.warn(` ⚠️ Unhandled rejection caught in research: ${err?.message || err}`);
|
||||
process.removeListener("unhandledRejection", handler);
|
||||
resolve(fallback);
|
||||
};
|
||||
process.on("unhandledRejection", handler);
|
||||
|
||||
fn()
|
||||
.then((result) => {
|
||||
process.removeListener("unhandledRejection", handler);
|
||||
resolve(result);
|
||||
})
|
||||
.catch((err) => {
|
||||
process.removeListener("unhandledRejection", handler);
|
||||
console.warn(` ⚠️ Research call failed: ${err?.message || err}`);
|
||||
resolve(fallback);
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
try {
|
||||
const { ResearchAgent } = await import("@mintel/journaling");
|
||||
const agent = new ResearchAgent(process.env.OPENROUTER_API_KEY || "");
|
||||
|
||||
const results: ResearchResult = {
|
||||
companyContext: [],
|
||||
industryInsights: [],
|
||||
competitorInfo: [],
|
||||
};
|
||||
|
||||
// 1. Research the company itself
|
||||
if (companyName || domain) {
|
||||
const searchQuery = companyName
|
||||
? `${companyName} ${websiteTopic} Unternehmen`
|
||||
: `site:${domain}`;
|
||||
|
||||
console.log(` 🔍 Researching: "${searchQuery}"...`);
|
||||
const facts = await safeCall(
|
||||
() => agent.researchTopic(searchQuery),
|
||||
[] as any[],
|
||||
);
|
||||
results.companyContext = (facts || [])
|
||||
.filter((f: any) => f?.fact || f?.value || f?.text || f?.statement)
|
||||
.map((f: any) => f.fact || f.value || f.text || f.statement)
|
||||
.slice(0, 5);
|
||||
}
|
||||
|
||||
// 2. Industry research
|
||||
if (websiteTopic) {
|
||||
console.log(` 🔍 Researching industry: "${websiteTopic}"...`);
|
||||
const insights = await safeCall(
|
||||
() => agent.researchCompetitors(websiteTopic),
|
||||
[] as any[],
|
||||
);
|
||||
results.industryInsights = (insights || []).slice(0, 5);
|
||||
}
|
||||
|
||||
const totalFacts = results.companyContext.length + results.industryInsights.length + results.competitorInfo.length;
|
||||
console.log(` 📊 Research found ${totalFacts} data points.`);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: results,
|
||||
usage: {
|
||||
step: "00b-research",
|
||||
model: "serper/datacommons",
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
cost: 0,
|
||||
durationMs: Date.now() - startTime,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
console.warn(` ⚠️ Research step skipped: ${(err as Error).message}`);
|
||||
return {
|
||||
success: true,
|
||||
data: { companyContext: [], industryInsights: [], competitorInfo: [] },
|
||||
usage: { step: "00b-research", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: Date.now() - startTime },
|
||||
};
|
||||
}
|
||||
}
|
||||
108
packages/concept-engine/src/steps/01-extract.ts
Normal file
108
packages/concept-engine/src/steps/01-extract.ts
Normal file
@@ -0,0 +1,108 @@
|
||||
// ============================================================================
|
||||
// Step 01: Extract — Briefing Fact Extraction (Gemini Flash)
|
||||
// ============================================================================
|
||||
|
||||
import { llmJsonRequest } from "../llm-client.js";
|
||||
import type { ConceptState, StepResult, PipelineConfig } from "../types.js";
|
||||
import { DEFAULT_MODELS } from "../types.js";
|
||||
|
||||
export async function executeExtract(
|
||||
state: ConceptState,
|
||||
config: PipelineConfig,
|
||||
): Promise<StepResult> {
|
||||
const models = { ...DEFAULT_MODELS, ...config.modelsOverride };
|
||||
const startTime = Date.now();
|
||||
|
||||
// Build site context from the deterministic analyzer
|
||||
const siteContext = state.siteProfile
|
||||
? `
|
||||
EXISTING WEBSITE ANALYSIS (FACTS — verifiably crawled, NOT guessed):
|
||||
- Domain: ${state.siteProfile.domain}
|
||||
- Total pages crawled: ${state.siteProfile.totalPages}
|
||||
- Navigation items: ${state.siteProfile.navigation.map((n) => n.label).join(", ") || "nicht erkannt"}
|
||||
- Existing features: ${state.siteProfile.existingFeatures.join(", ") || "keine"}
|
||||
- Services / Kompetenzen: ${state.siteProfile.services.join(" | ") || "keine"}
|
||||
- Employee count (from website text): ${(state.siteProfile as any).employeeCount || "nicht genannt"}
|
||||
- Company name: ${state.siteProfile.companyInfo.name || "unbekannt"}
|
||||
- Address: ${state.siteProfile.companyInfo.address || "unbekannt"}
|
||||
- Tax ID (USt-ID): ${state.siteProfile.companyInfo.taxId || "unbekannt"}
|
||||
- HRB: ${state.siteProfile.companyInfo.registerNumber || "unbekannt"}
|
||||
- Managing Director: ${state.siteProfile.companyInfo.managingDirector || "unbekannt"}
|
||||
- External related domains (HAVE OWN WEBSITES — DO NOT include as sub-pages!): ${state.siteProfile.externalDomains.join(", ") || "keine"}
|
||||
- Social links: ${Object.entries(state.siteProfile.socialLinks).map(([k, v]) => `${k}: ${v}`).join(", ") || "keine"}
|
||||
`
|
||||
: "No existing website data available.";
|
||||
|
||||
const systemPrompt = `
|
||||
You are a precision fact extractor. Your only job: extract verifiable facts from the BRIEFING.
|
||||
Output language: GERMAN (strict).
|
||||
Output format: flat JSON at root level. No nesting except arrays.
|
||||
|
||||
### CRITICAL RULES:
|
||||
1. "employeeCount": take from SITE ANALYSIS if available. Only override if briefing states something more specific.
|
||||
2. External domains (e.g. "etib-ing.com") have their OWN website. NEVER include them as sub-pages.
|
||||
3. Videos (Messefilm, Imagefilm) are CONTENT ASSETS, not pages.
|
||||
4. If existing site already has search, include "search" in functions.
|
||||
5. DO NOT invent pages not mentioned in briefing or existing navigation.
|
||||
|
||||
### CONSERVATIVE RULE:
|
||||
- simple lists (Jobs, Referenzen, Messen) = pages, NOT features
|
||||
- Assume "page" as default. Only add "feature" for complex interactive systems.
|
||||
|
||||
### OUTPUT FORMAT:
|
||||
{
|
||||
"companyName": string,
|
||||
"companyAddress": string,
|
||||
"personName": string,
|
||||
"email": string,
|
||||
"existingWebsite": string,
|
||||
"websiteTopic": string, // MAX 3 words
|
||||
"isRelaunch": boolean,
|
||||
"employeeCount": string, // from site analysis, e.g. "über 50"
|
||||
"pages": string[], // ALL pages: ["Startseite", "Über Uns", "Leistungen", ...]
|
||||
"functions": string[], // search, forms, maps, video, cookie_consent, etc.
|
||||
"assets": string[], // existing_website, logo, media, photos, videos
|
||||
"deadline": string,
|
||||
"targetAudience": string,
|
||||
"cmsSetup": boolean,
|
||||
"multilang": boolean
|
||||
}
|
||||
|
||||
BANNED OUTPUT KEYS: "selectedPages", "otherPages", "features", "apiSystems" — use pages[] and functions[] ONLY.
|
||||
`;
|
||||
|
||||
const userPrompt = `BRIEFING (TRUTH SOURCE):
|
||||
${state.briefing}
|
||||
|
||||
COMMENTS:
|
||||
${state.comments || "keine"}
|
||||
|
||||
${siteContext}`;
|
||||
|
||||
try {
|
||||
const { data, usage } = await llmJsonRequest({
|
||||
model: models.flash,
|
||||
systemPrompt,
|
||||
userPrompt,
|
||||
apiKey: config.openrouterKey,
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data,
|
||||
usage: {
|
||||
step: "01-extract",
|
||||
model: models.flash,
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
cost: usage.cost,
|
||||
durationMs: Date.now() - startTime,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
return {
|
||||
success: false,
|
||||
error: `Extract step failed: ${(err as Error).message}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
110
packages/concept-engine/src/steps/02-audit.ts
Normal file
110
packages/concept-engine/src/steps/02-audit.ts
Normal file
@@ -0,0 +1,110 @@
|
||||
// ============================================================================
|
||||
// Step 02: Audit — Feature Auditor + Skeptical Review (Gemini Flash)
|
||||
// ============================================================================
|
||||
|
||||
import { llmJsonRequest } from "../llm-client.js";
|
||||
import type { ConceptState, StepResult, PipelineConfig } from "../types.js";
|
||||
import { DEFAULT_MODELS } from "../types.js";
|
||||
|
||||
export async function executeAudit(
|
||||
state: ConceptState,
|
||||
config: PipelineConfig,
|
||||
): Promise<StepResult> {
|
||||
const models = { ...DEFAULT_MODELS, ...config.modelsOverride };
|
||||
const startTime = Date.now();
|
||||
|
||||
if (!state.facts) {
|
||||
return { success: false, error: "No facts from Step 01 available." };
|
||||
}
|
||||
|
||||
const systemPrompt = `
|
||||
You are a "Strict Cost Controller". Your mission is to prevent over-billing.
|
||||
Review the extracted FEATURES against the BRIEFING and the EXISTING SITE ANALYSIS.
|
||||
|
||||
### RULE OF THUMB:
|
||||
- A "Feature" (1.500 €) is ONLY justified for complex, dynamic systems (logic, database, CMS-driven management, advanced filtering).
|
||||
- Simple lists, information sections, or static descriptions (e.g., "Messen", "Team", "Historie", "Jobs" as mere text) are ALWAYS "Pages" (600 €).
|
||||
- If the briefing doesn't explicitly mention "Management System", "Filterable Database", or "Client Login", it is a PAGE.
|
||||
|
||||
### ADDITIONAL CHECKS:
|
||||
1. If any feature maps to an entity that has its own external website (listed in EXTERNAL_DOMAINS), remove it entirely — it's out of scope.
|
||||
2. Videos are ASSETS not pages. Remove any video-related entries from pages.
|
||||
3. If the existing site has features (search, forms, etc.), ensure they are in the functions list.
|
||||
|
||||
### MISSION:
|
||||
Return the corrected 'features', 'otherPages', and 'functions' arrays.
|
||||
|
||||
### OUTPUT FORMAT:
|
||||
{
|
||||
"features": string[],
|
||||
"otherPages": string[],
|
||||
"functions": string[],
|
||||
"removedItems": [{ "item": string, "reason": string }],
|
||||
"addedItems": [{ "item": string, "reason": string }]
|
||||
}
|
||||
`;
|
||||
|
||||
const userPrompt = `
|
||||
EXTRACTED FACTS:
|
||||
${JSON.stringify(state.facts, null, 2)}
|
||||
|
||||
BRIEFING:
|
||||
${state.briefing}
|
||||
|
||||
EXTERNAL DOMAINS (have own websites, OUT OF SCOPE):
|
||||
${state.siteProfile?.externalDomains?.join(", ") || "none"}
|
||||
|
||||
EXISTING FEATURES ON CURRENT SITE:
|
||||
${state.siteProfile?.existingFeatures?.join(", ") || "none"}
|
||||
`;
|
||||
|
||||
try {
|
||||
const { data, usage } = await llmJsonRequest({
|
||||
model: models.flash,
|
||||
systemPrompt,
|
||||
userPrompt,
|
||||
apiKey: config.openrouterKey,
|
||||
});
|
||||
|
||||
// Apply audit results to facts
|
||||
const auditedFacts = { ...state.facts };
|
||||
auditedFacts.features = data.features || [];
|
||||
auditedFacts.otherPages = [
|
||||
...new Set([...(auditedFacts.otherPages || []), ...(data.otherPages || [])]),
|
||||
];
|
||||
if (data.functions) {
|
||||
auditedFacts.functions = [
|
||||
...new Set([...(auditedFacts.functions || []), ...data.functions]),
|
||||
];
|
||||
}
|
||||
|
||||
// Log changes
|
||||
if (data.removedItems?.length) {
|
||||
console.log(" 📉 Audit removed:");
|
||||
for (const item of data.removedItems) {
|
||||
console.log(` - ${item.item}: ${item.reason}`);
|
||||
}
|
||||
}
|
||||
if (data.addedItems?.length) {
|
||||
console.log(" 📈 Audit added:");
|
||||
for (const item of data.addedItems) {
|
||||
console.log(` + ${item.item}: ${item.reason}`);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: auditedFacts,
|
||||
usage: {
|
||||
step: "02-audit",
|
||||
model: models.flash,
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
cost: usage.cost,
|
||||
durationMs: Date.now() - startTime,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
return { success: false, error: `Audit step failed: ${(err as Error).message}` };
|
||||
}
|
||||
}
|
||||
99
packages/concept-engine/src/steps/03-strategize.ts
Normal file
99
packages/concept-engine/src/steps/03-strategize.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
// ============================================================================
|
||||
// Step 03: Strategize — Briefing Summary + Design Vision (Gemini Pro)
|
||||
// ============================================================================
|
||||
|
||||
import { llmJsonRequest } from "../llm-client.js";
|
||||
import type { ConceptState, StepResult, PipelineConfig } from "../types.js";
|
||||
import { DEFAULT_MODELS } from "../types.js";
|
||||
|
||||
export async function executeStrategize(
|
||||
state: ConceptState,
|
||||
config: PipelineConfig,
|
||||
): Promise<StepResult> {
|
||||
const models = { ...DEFAULT_MODELS, ...config.modelsOverride };
|
||||
const startTime = Date.now();
|
||||
|
||||
if (!state.auditedFacts) {
|
||||
return { success: false, error: "No audited facts from Step 02 available." };
|
||||
}
|
||||
|
||||
const systemPrompt = `
|
||||
You are a high-end Digital Architect. Your goal is to make the CUSTOMER feel 100% understood.
|
||||
Analyze the BRIEFING and the EXISTING WEBSITE context.
|
||||
|
||||
### OBJECTIVE:
|
||||
1. **briefingSummary**: Ein sachlicher, tiefgehender Überblick der Unternehmenslage.
|
||||
- STIL: Keine Ich-Form. Keine Marketing-Floskeln. Nutze präzise Fachbegriffe. Sei prägnant.
|
||||
- FORM: EXAKT ZWEI ABSÄTZE. Insgesamt ca. 6 Sätze.
|
||||
- INHALT: Status Quo, was der Kunde will, welcher Sprung notwendig ist.
|
||||
- ABSOLUTE REGEL: Keine Halluzinationen. Keine namentlichen Nennungen von Personen.
|
||||
- RELAUNCH-REGEL: Wenn isRelaunch=true, NICHT sagen "keine digitale Präsenz". Es GIBT eine Seite.
|
||||
- SORGLOS BETRIEB: MUSS erwähnt werden als Teil des Gesamtpakets.
|
||||
|
||||
2. **designVision**: Ein abstraktes, strategisches Konzept.
|
||||
- STIL: Rein konzeptionell. Keine Umsetzungsschritte. Keine Ich-Form. Sei prägnant.
|
||||
- FORM: EXAKT ZWEI ABSÄTZE. Insgesamt ca. 4 Sätze.
|
||||
- DATENSCHUTZ: KEINERLEI namentliche Nennungen.
|
||||
- FOKUS: Welche strategische Wirkung soll erzielt werden?
|
||||
|
||||
### RULES:
|
||||
- NO "wir/unser". NO "Ich/Mein". Objective, fact-oriented narrative.
|
||||
- NO marketing lingo. NO "innovativ", "revolutionär", "state-of-the-art".
|
||||
- NO hallucinations about features not in the briefing.
|
||||
- NO "SEO-Standards zur Fachkräftesicherung" or "B2B-Nutzerströme" — das ist Schwachsinn.
|
||||
Use specific industry terms from the briefing (e.g. "Kabeltiefbau", "HDD-Bohrverfahren").
|
||||
- LANGUAGE: Professional German. Simple but expert-level.
|
||||
|
||||
### OUTPUT FORMAT:
|
||||
{
|
||||
"briefingSummary": string,
|
||||
"designVision": string
|
||||
}
|
||||
`;
|
||||
|
||||
const userPrompt = `
|
||||
BRIEFING (TRUTH SOURCE):
|
||||
${state.briefing}
|
||||
|
||||
EXISTING WEBSITE DATA:
|
||||
- Services: ${state.siteProfile?.services?.join(", ") || "unbekannt"}
|
||||
- Navigation: ${state.siteProfile?.navigation?.map((n) => n.label).join(", ") || "unbekannt"}
|
||||
- Company: ${state.auditedFacts.companyName || "unbekannt"}
|
||||
|
||||
EXTRACTED & AUDITED FACTS:
|
||||
${JSON.stringify(state.auditedFacts, null, 2)}
|
||||
|
||||
${state.siteAudit?.report ? `
|
||||
TECHNICAL SITE AUDIT (IST-Analyse):
|
||||
Health: ${state.siteAudit.report.overallHealth} (SEO: ${state.siteAudit.report.seoScore}, UX: ${state.siteAudit.report.uxScore}, Perf: ${state.siteAudit.report.performanceScore})
|
||||
- Executive Summary: ${state.siteAudit.report.executiveSummary}
|
||||
- Strengths: ${state.siteAudit.report.strengths.join(", ")}
|
||||
- Critical Issues: ${state.siteAudit.report.criticalIssues.join(", ")}
|
||||
- Quick Wins: ${state.siteAudit.report.quickWins.join(", ")}
|
||||
` : ""}
|
||||
`;
|
||||
|
||||
try {
|
||||
const { data, usage } = await llmJsonRequest({
|
||||
model: models.pro,
|
||||
systemPrompt,
|
||||
userPrompt,
|
||||
apiKey: config.openrouterKey,
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data,
|
||||
usage: {
|
||||
step: "03-strategize",
|
||||
model: models.pro,
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
cost: usage.cost,
|
||||
durationMs: Date.now() - startTime,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
return { success: false, error: `Strategize step failed: ${(err as Error).message}` };
|
||||
}
|
||||
}
|
||||
133
packages/concept-engine/src/steps/04-architect.ts
Normal file
133
packages/concept-engine/src/steps/04-architect.ts
Normal file
@@ -0,0 +1,133 @@
|
||||
// ============================================================================
|
||||
// Step 04: Architect — Sitemap & Information Architecture (Gemini Pro)
|
||||
// ============================================================================
|
||||
|
||||
import { llmJsonRequest } from "../llm-client.js";
|
||||
import type { ConceptState, StepResult, PipelineConfig } from "../types.js";
|
||||
import { DEFAULT_MODELS } from "../types.js";
|
||||
|
||||
export async function executeArchitect(
|
||||
state: ConceptState,
|
||||
config: PipelineConfig,
|
||||
): Promise<StepResult> {
|
||||
const models = { ...DEFAULT_MODELS, ...config.modelsOverride };
|
||||
const startTime = Date.now();
|
||||
|
||||
if (!state.auditedFacts) {
|
||||
return { success: false, error: "No audited facts available." };
|
||||
}
|
||||
|
||||
// Build navigation constraint from the real site
|
||||
const existingNav = state.siteProfile?.navigation?.map((n) => n.label).join(", ") || "unbekannt";
|
||||
const existingServices = state.siteProfile?.services?.join(", ") || "unbekannt";
|
||||
const externalDomains = state.siteProfile?.externalDomains?.join(", ") || "keine";
|
||||
|
||||
const systemPrompt = `
|
||||
Du bist ein Senior UX Architekt. Erstelle einen ECHTEN SEITENBAUM für die neue Website.
|
||||
Regelwerk für den Output:
|
||||
|
||||
### SEITENBAUM-REGELN:
|
||||
1. KEIN MARKETINGSPRECH als Kategoriename. Gültige Kategorien sind nur die echten Navigationspunkte der Website.
|
||||
ERLAUBT: "Startseite", "Leistungen", "Über uns", "Karriere", "Referenzen", "Kontakt", "Rechtliches"
|
||||
VERBOTEN: "Kern-Präsenz", "Vertrauen", "Business Areas", "Digitaler Auftritt"
|
||||
|
||||
2. LEISTUNGEN muss in ECHTE UNTERSEITEN aufgeteilt werden — nicht eine einzige "Leistungen"-Seite.
|
||||
Jede Kompetenz aus dem existierenden Leistungsspektrum = eine eigene Seite.
|
||||
Beispiel statt:
|
||||
{ category: "Leistungen", pages: [{ title: "Leistungen", desc: "..." }] }
|
||||
So:
|
||||
{ category: "Leistungen", pages: [
|
||||
{ title: "Kabeltiefbau", desc: "Mittelspannung, Niederspannung, Kabelpflugarbeiten..." },
|
||||
{ title: "Horizontalspülbohrungen", desc: "HDD in allen Bodenklassen..." },
|
||||
{ title: "Elektromontagen", desc: "Bis 110 kV, Glasfaserkabelmontagen..." },
|
||||
{ title: "Planung & Dokumentation", desc: "Genehmigungs- und Ausführungsplanung, Vermessung..." }
|
||||
]}
|
||||
|
||||
3. SEITENTITEL: Kurz, klar, faktisch. Kein Werbejargon.
|
||||
ERLAUBT: "Kabeltiefbau", "Über uns", "Karriere"
|
||||
VERBOTEN: "Unsere Expertise", "Kompetenzspektrum", "Community"
|
||||
|
||||
4. Gruppe die Leistungen nach dem ECHTEN Kompetenzkatalog der bestehenden Site — nicht erfinden.
|
||||
|
||||
5. Keine doppelten Seiten. Keine Phantomseiten.
|
||||
|
||||
6. Videos = Content-Assets, keine eigene Seite.
|
||||
|
||||
7. Entitäten mit eigener Domain (${externalDomains}) = NICHT als Seite. Nur als Teaser/Link wenn nötig.
|
||||
|
||||
### KONTEXT:
|
||||
Bestehende Navigation: ${existingNav}
|
||||
Bestehende Services: ${existingServices}
|
||||
Externe Domains (haben eigene Website): ${externalDomains}
|
||||
Angeforderte zusätzliche Seiten aus Briefing: ${(state.auditedFacts as any)?.pages?.join(", ") || "keine spezifischen"}
|
||||
|
||||
### OUTPUT FORMAT (JSON):
|
||||
{
|
||||
"websiteTopic": string, // MAX 3 Wörter, beschreibend
|
||||
"sitemap": [
|
||||
{
|
||||
"category": string, // Echter Nav-Eintrag. KEIN Marketingsprech.
|
||||
"pages": [
|
||||
{ "title": string, "desc": string } // Echte Unterseite, 1-2 Sätze Zweck
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
`;
|
||||
|
||||
const userPrompt = `
|
||||
BRIEFING:
|
||||
${state.briefing}
|
||||
|
||||
FAKTEN (aus Extraktion):
|
||||
${JSON.stringify({ facts: state.auditedFacts, strategy: { briefingSummary: state.briefingSummary } }, null, 2)}
|
||||
|
||||
Erstelle den Seitenbaum. Baue die Leistungen DETAILLIERT aus — echte Unterseiten pro Kompetenzbereich.
|
||||
`;
|
||||
|
||||
try {
|
||||
const { data, usage } = await llmJsonRequest({
|
||||
model: models.pro,
|
||||
systemPrompt,
|
||||
userPrompt,
|
||||
apiKey: config.openrouterKey,
|
||||
});
|
||||
|
||||
// Normalize sitemap structure
|
||||
let sitemap = data.sitemap;
|
||||
if (sitemap && !Array.isArray(sitemap)) {
|
||||
if (sitemap.categories) sitemap = sitemap.categories;
|
||||
else {
|
||||
const entries = Object.entries(sitemap);
|
||||
if (entries.every(([, v]) => Array.isArray(v))) {
|
||||
sitemap = entries.map(([category, pages]) => ({ category, pages }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Array.isArray(sitemap)) {
|
||||
sitemap = sitemap.map((cat: any) => ({
|
||||
category: cat.category || cat.kategorie || cat.Kategorie || "Allgemein",
|
||||
pages: (cat.pages || cat.seiten || []).map((page: any) => ({
|
||||
title: page.title || page.titel || "Seite",
|
||||
desc: page.desc || page.beschreibung || page.description || "",
|
||||
})),
|
||||
}));
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: { websiteTopic: data.websiteTopic, sitemap },
|
||||
usage: {
|
||||
step: "04-architect",
|
||||
model: models.pro,
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
cost: usage.cost,
|
||||
durationMs: Date.now() - startTime,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
return { success: false, error: `Architect step failed: ${(err as Error).message}` };
|
||||
}
|
||||
}
|
||||
233
packages/concept-engine/src/types.ts
Normal file
233
packages/concept-engine/src/types.ts
Normal file
@@ -0,0 +1,233 @@
|
||||
// ============================================================================
|
||||
// @mintel/concept-engine — Core Type Definitions
|
||||
// ============================================================================
|
||||
|
||||
/** Page types recognized during crawling */
|
||||
export type PageType =
|
||||
| "home"
|
||||
| "service"
|
||||
| "about"
|
||||
| "contact"
|
||||
| "career"
|
||||
| "portfolio"
|
||||
| "blog"
|
||||
| "legal"
|
||||
| "other";
|
||||
|
||||
/** A single crawled page with extracted metadata */
|
||||
export interface CrawledPage {
|
||||
url: string;
|
||||
pathname: string;
|
||||
title: string;
|
||||
html: string;
|
||||
text: string;
|
||||
headings: string[];
|
||||
navItems: string[];
|
||||
features: string[];
|
||||
type: PageType;
|
||||
links: string[];
|
||||
images: string[];
|
||||
meta: {
|
||||
description?: string;
|
||||
ogTitle?: string;
|
||||
ogImage?: string;
|
||||
};
|
||||
}
|
||||
|
||||
/** Navigation item extracted from <nav> elements */
|
||||
export interface NavItem {
|
||||
label: string;
|
||||
href: string;
|
||||
children?: NavItem[];
|
||||
}
|
||||
|
||||
/** Company info extracted from Impressum / footer */
|
||||
export interface CompanyInfo {
|
||||
name?: string;
|
||||
address?: string;
|
||||
phone?: string;
|
||||
email?: string;
|
||||
taxId?: string;
|
||||
registerNumber?: string;
|
||||
managingDirector?: string;
|
||||
}
|
||||
|
||||
/** A page in the site inventory */
|
||||
export interface PageInventoryItem {
|
||||
url: string;
|
||||
pathname: string;
|
||||
title: string;
|
||||
type: PageType;
|
||||
headings: string[];
|
||||
services: string[];
|
||||
hasSearch: boolean;
|
||||
hasForms: boolean;
|
||||
hasMap: boolean;
|
||||
hasVideo: boolean;
|
||||
contentSummary: string;
|
||||
}
|
||||
|
||||
/** Full site profile — deterministic, no LLM involved */
|
||||
export interface SiteProfile {
|
||||
domain: string;
|
||||
crawledAt: string;
|
||||
totalPages: number;
|
||||
navigation: NavItem[];
|
||||
existingFeatures: string[];
|
||||
services: string[];
|
||||
companyInfo: CompanyInfo;
|
||||
pageInventory: PageInventoryItem[];
|
||||
colors: string[];
|
||||
socialLinks: Record<string, string>;
|
||||
externalDomains: string[];
|
||||
images: string[];
|
||||
employeeCount: string | null;
|
||||
}
|
||||
|
||||
/** Configuration for the estimation pipeline */
|
||||
export interface PipelineConfig {
|
||||
openrouterKey: string;
|
||||
zyteApiKey?: string;
|
||||
outputDir: string;
|
||||
crawlDir: string;
|
||||
modelsOverride?: Partial<ModelConfig>;
|
||||
}
|
||||
|
||||
/** Model routing configuration */
|
||||
export interface ModelConfig {
|
||||
flash: string;
|
||||
pro: string;
|
||||
opus: string;
|
||||
}
|
||||
|
||||
export const DEFAULT_MODELS: ModelConfig = {
|
||||
flash: "google/gemini-3-flash-preview",
|
||||
pro: "google/gemini-3.1-pro-preview",
|
||||
opus: "anthropic/claude-opus-4-6",
|
||||
};
|
||||
|
||||
/** Input for a pipeline run */
|
||||
export interface PipelineInput {
|
||||
briefing: string;
|
||||
url?: string;
|
||||
budget?: string;
|
||||
comments?: string;
|
||||
clearCache?: boolean;
|
||||
}
|
||||
|
||||
/** State that flows through all concept pipeline steps */
|
||||
export interface ConceptState {
|
||||
// Input
|
||||
briefing: string;
|
||||
url?: string;
|
||||
comments?: string;
|
||||
|
||||
// Output: Scrape & Analyze
|
||||
siteProfile?: SiteProfile;
|
||||
crawlDir?: string;
|
||||
|
||||
// Output: Site Audit
|
||||
siteAudit?: any;
|
||||
|
||||
// Output: Research
|
||||
researchData?: any;
|
||||
|
||||
// Output: Extract
|
||||
facts?: Record<string, any>;
|
||||
|
||||
// Output: Audit
|
||||
auditedFacts?: Record<string, any>;
|
||||
|
||||
// Output: Strategy
|
||||
briefingSummary?: string;
|
||||
designVision?: string;
|
||||
|
||||
// Output: Architecture
|
||||
sitemap?: SitemapCategory[];
|
||||
websiteTopic?: string;
|
||||
|
||||
// Cost tracking
|
||||
usage: UsageStats;
|
||||
}
|
||||
|
||||
/** Final output of the Concept Engine */
|
||||
export interface ProjectConcept {
|
||||
domain: string;
|
||||
timestamp: string;
|
||||
briefing: string;
|
||||
auditedFacts: Record<string, any>;
|
||||
siteProfile?: SiteProfile;
|
||||
siteAudit?: any;
|
||||
researchData?: any;
|
||||
strategy: {
|
||||
briefingSummary: string;
|
||||
designVision: string;
|
||||
};
|
||||
architecture: {
|
||||
websiteTopic: string;
|
||||
sitemap: SitemapCategory[];
|
||||
};
|
||||
usage: UsageStats;
|
||||
}
|
||||
|
||||
export interface SitemapCategory {
|
||||
category: string;
|
||||
pages: { title: string; desc: string }[];
|
||||
}
|
||||
|
||||
export interface UsageStats {
|
||||
totalPromptTokens: number;
|
||||
totalCompletionTokens: number;
|
||||
totalCost: number;
|
||||
perStep: StepUsage[];
|
||||
}
|
||||
|
||||
export interface StepUsage {
|
||||
step: string;
|
||||
model: string;
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
cost: number;
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
/** Result of a single pipeline step */
|
||||
export interface StepResult<T = any> {
|
||||
success: boolean;
|
||||
data?: T;
|
||||
error?: string;
|
||||
usage?: StepUsage;
|
||||
}
|
||||
|
||||
/** Validation result from the deterministic validator */
|
||||
export interface ValidationResult {
|
||||
passed: boolean;
|
||||
errors: ValidationError[];
|
||||
warnings: ValidationWarning[];
|
||||
}
|
||||
|
||||
export interface ValidationError {
|
||||
code: string;
|
||||
message: string;
|
||||
field?: string;
|
||||
expected?: any;
|
||||
actual?: any;
|
||||
}
|
||||
|
||||
export interface ValidationWarning {
|
||||
code: string;
|
||||
message: string;
|
||||
suggestion?: string;
|
||||
}
|
||||
|
||||
/** Step definition for the concept pipeline */
|
||||
export interface PipelineStep {
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
model: "flash" | "pro" | "opus" | "none";
|
||||
execute: (
|
||||
state: ConceptState,
|
||||
config: PipelineConfig,
|
||||
) => Promise<StepResult>;
|
||||
}
|
||||
28
packages/concept-engine/tsconfig.json
Normal file
28
packages/concept-engine/tsconfig.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"extends": "../../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"target": "ES2022",
|
||||
"lib": [
|
||||
"ES2022",
|
||||
"DOM"
|
||||
],
|
||||
"outDir": "dist",
|
||||
"rootDir": "src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"declaration": true,
|
||||
"sourceMap": true
|
||||
},
|
||||
"include": [
|
||||
"src/**/*"
|
||||
],
|
||||
"exclude": [
|
||||
"node_modules",
|
||||
"dist",
|
||||
"**/*.test.ts"
|
||||
]
|
||||
}
|
||||
9
packages/concept-engine/tsup.config.ts
Normal file
9
packages/concept-engine/tsup.config.ts
Normal file
@@ -0,0 +1,9 @@
|
||||
import { defineConfig } from "tsup";
|
||||
|
||||
export default defineConfig({
|
||||
entry: ["src/index.ts", "src/cli.ts"],
|
||||
format: ["esm"],
|
||||
dts: true,
|
||||
clean: true,
|
||||
target: "es2022",
|
||||
});
|
||||
@@ -52,11 +52,11 @@ interface Insertion {
|
||||
// Model configuration: specialized models for different tasks
|
||||
const MODELS = {
|
||||
// Structured JSON output, research planning, diagram models: {
|
||||
STRUCTURED: "google/gemini-2.5-flash",
|
||||
ROUTING: "google/gemini-2.5-flash",
|
||||
CONTENT: "google/gemini-2.5-pro",
|
||||
STRUCTURED: "google/gemini-3-flash-preview",
|
||||
ROUTING: "google/gemini-3-flash-preview",
|
||||
CONTENT: "google/gemini-3.1-pro-preview",
|
||||
// Mermaid diagram generation - User requested Pro
|
||||
DIAGRAM: "google/gemini-2.5-pro",
|
||||
DIAGRAM: "google/gemini-3.1-pro-preview",
|
||||
} as const;
|
||||
|
||||
/** Strip markdown fences that some models wrap around JSON despite response_format */
|
||||
@@ -831,12 +831,12 @@ Return ONLY the JSON.`,
|
||||
const componentsContext =
|
||||
components.length > 0
|
||||
? `\n\nAvailable Components:\n` +
|
||||
components
|
||||
.map(
|
||||
(c) =>
|
||||
`- <${c.name}>: ${c.description}\n Example: ${c.usageExample}`,
|
||||
)
|
||||
.join("\n")
|
||||
components
|
||||
.map(
|
||||
(c) =>
|
||||
`- <${c.name}>: ${c.description}\n Example: ${c.usageExample}`,
|
||||
)
|
||||
.join("\n")
|
||||
: "";
|
||||
|
||||
const response = await this.openai.chat.completions.create({
|
||||
|
||||
@@ -214,7 +214,7 @@ export class AiBlogPostOrchestrator {
|
||||
|
||||
async generateSlug(content: string, title?: string, instructions?: string): Promise<string> {
|
||||
const response = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.5-flash",
|
||||
model: "google/gemini-3-flash-preview",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
@@ -347,7 +347,7 @@ Example output: "A complex network of glowing fiber optic nodes forming a recurs
|
||||
|
||||
private async identifyTopics(content: string): Promise<string[]> {
|
||||
const response = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.5-flash", // fast structured model for topic extraction
|
||||
model: "google/gemini-3-flash-preview", // fast structured model for topic extraction
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"version": "1.8.21",
|
||||
"publishConfig": {
|
||||
"access": "public",
|
||||
"registry": "https://npm.infra.mintel.me"
|
||||
"registry": "https://git.infra.mintel.me/api/packages/mmintel/npm"
|
||||
},
|
||||
"type": "module",
|
||||
"main": "index.js",
|
||||
|
||||
40
packages/estimation-engine/src/_test_pipeline.ts
Normal file
40
packages/estimation-engine/src/_test_pipeline.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import { config as dotenvConfig } from 'dotenv';
|
||||
import * as path from 'node:path';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import { EstimationPipeline } from './pipeline.js';
|
||||
|
||||
dotenvConfig({ path: path.resolve(process.cwd(), '../../.env') });
|
||||
|
||||
const briefing = await fs.readFile(
|
||||
path.resolve(process.cwd(), '../../data/briefings/etib.txt'),
|
||||
'utf8',
|
||||
);
|
||||
|
||||
console.log(`Briefing loaded: ${briefing.length} chars`);
|
||||
|
||||
const pipeline = new EstimationPipeline(
|
||||
{
|
||||
openrouterKey: process.env.OPENROUTER_API_KEY || '',
|
||||
zyteApiKey: process.env.ZYTE_API_KEY,
|
||||
outputDir: path.resolve(process.cwd(), '../../out/estimations'),
|
||||
crawlDir: path.resolve(process.cwd(), '../../data/crawls'),
|
||||
},
|
||||
{
|
||||
onStepStart: (id, name) => console.log(`[CB] Starting: ${id}`),
|
||||
onStepComplete: (id) => console.log(`[CB] Done: ${id}`),
|
||||
onStepError: (id, err) => console.error(`[CB] Error in ${id}: ${err}`),
|
||||
},
|
||||
);
|
||||
|
||||
try {
|
||||
const result = await pipeline.run({
|
||||
briefing,
|
||||
url: 'https://www.e-tib.com',
|
||||
});
|
||||
|
||||
console.log('\n✨ Pipeline complete!');
|
||||
console.log('Validation:', result.validationResult?.passed ? 'PASSED' : 'FAILED');
|
||||
} catch (err: any) {
|
||||
console.error('\n❌ Pipeline failed:', err.message);
|
||||
console.error(err.stack);
|
||||
}
|
||||
78
packages/estimation-engine/src/cli.ts
Normal file
78
packages/estimation-engine/src/cli.ts
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env node
|
||||
// ============================================================================
|
||||
// @mintel/estimation-engine — CLI Entry Point
|
||||
// ============================================================================
|
||||
|
||||
import { Command } from "commander";
|
||||
import * as path from "node:path";
|
||||
import * as fs from "node:fs/promises";
|
||||
import { existsSync } from "node:fs";
|
||||
import { config as dotenvConfig } from "dotenv";
|
||||
import { EstimationPipeline } from "./pipeline.js";
|
||||
import type { ProjectConcept } from "@mintel/concept-engine";
|
||||
|
||||
// Load .env from monorepo root
|
||||
dotenvConfig({ path: path.resolve(process.cwd(), "../../.env") });
|
||||
dotenvConfig({ path: path.resolve(process.cwd(), ".env") });
|
||||
|
||||
const program = new Command();
|
||||
|
||||
program
|
||||
.name("estimate")
|
||||
.description("AI-powered project estimation engine")
|
||||
.version("1.0.0");
|
||||
|
||||
program
|
||||
.command("run")
|
||||
.description("Run the financial estimation pipeline from a concept file")
|
||||
.argument("<concept-file>", "Path to the ProjectConcept JSON file")
|
||||
.option("--budget <budget>", "Budget constraint (e.g. '15.000 €')")
|
||||
.option("--output <dir>", "Output directory", "../../out/estimations")
|
||||
.action(async (conceptFile: string, options: any) => {
|
||||
const openrouterKey = process.env.OPENROUTER_API_KEY || process.env.OPENROUTER_KEY;
|
||||
if (!openrouterKey) {
|
||||
console.error("❌ OPENROUTER_API_KEY not found in environment.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const filePath = path.resolve(process.cwd(), conceptFile);
|
||||
if (!existsSync(filePath)) {
|
||||
console.error(`❌ Concept file not found: ${filePath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`📄 Loading concept from: ${filePath}`);
|
||||
const rawConcept = await fs.readFile(filePath, "utf8");
|
||||
const concept = JSON.parse(rawConcept) as ProjectConcept;
|
||||
|
||||
const pipeline = new EstimationPipeline(
|
||||
{
|
||||
openrouterKey,
|
||||
outputDir: path.resolve(process.cwd(), options.output),
|
||||
crawlDir: "" // No longer needed here
|
||||
},
|
||||
{
|
||||
onStepStart: (id, name) => { },
|
||||
onStepComplete: (id, result) => { },
|
||||
},
|
||||
);
|
||||
|
||||
try {
|
||||
const result = await pipeline.run({
|
||||
concept,
|
||||
budget: options.budget,
|
||||
});
|
||||
|
||||
console.log("\n✨ Estimation complete!");
|
||||
|
||||
if (result.validationResult && !result.validationResult.passed) {
|
||||
console.log(`\n⚠️ ${result.validationResult.errors.length} validation issues found.`);
|
||||
console.log(" Review the output JSON and re-run problematic steps.");
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(`\n❌ Pipeline failed: ${(err as Error).message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
program.parse();
|
||||
9
packages/estimation-engine/src/index.ts
Normal file
9
packages/estimation-engine/src/index.ts
Normal file
@@ -0,0 +1,9 @@
|
||||
// ============================================================================
|
||||
// @mintel/estimation-engine — Public API
|
||||
// ============================================================================
|
||||
|
||||
export { EstimationPipeline } from "./pipeline.js";
|
||||
export type { PipelineCallbacks } from "./pipeline.js";
|
||||
export { validateEstimation } from "./validators.js";
|
||||
export { llmRequest, llmJsonRequest, cleanJson } from "./llm-client.js";
|
||||
export * from "./types.js";
|
||||
128
packages/estimation-engine/src/llm-client.ts
Normal file
128
packages/estimation-engine/src/llm-client.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
// ============================================================================
|
||||
// LLM Client — Unified interface with model routing via OpenRouter
|
||||
// ============================================================================
|
||||
|
||||
import axios from "axios";
|
||||
|
||||
interface LLMRequestOptions {
|
||||
model: string;
|
||||
systemPrompt: string;
|
||||
userPrompt: string;
|
||||
jsonMode?: boolean;
|
||||
apiKey: string;
|
||||
}
|
||||
|
||||
interface LLMResponse {
|
||||
content: string;
|
||||
usage: {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
cost: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean raw LLM output to parseable JSON.
|
||||
* Handles markdown fences, control chars, trailing commas.
|
||||
*/
|
||||
export function cleanJson(str: string): string {
|
||||
let cleaned = str.replace(/```json\n?|```/g, "").trim();
|
||||
cleaned = cleaned.replace(
|
||||
/[\u0000-\u0009\u000B\u000C\u000E-\u001F\u007F-\u009F]/g,
|
||||
" ",
|
||||
);
|
||||
cleaned = cleaned.replace(/,\s*([\]}])/g, "$1");
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a request to an LLM via OpenRouter.
|
||||
*/
|
||||
export async function llmRequest(options: LLMRequestOptions): Promise<LLMResponse> {
|
||||
const { model, systemPrompt, userPrompt, jsonMode = true, apiKey } = options;
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
const resp = await axios.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
{
|
||||
model,
|
||||
messages: [
|
||||
{ role: "system", content: systemPrompt },
|
||||
{ role: "user", content: userPrompt },
|
||||
],
|
||||
...(jsonMode ? { response_format: { type: "json_object" } } : {}),
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
timeout: 120000,
|
||||
},
|
||||
);
|
||||
|
||||
const content = resp.data.choices?.[0]?.message?.content;
|
||||
if (!content) {
|
||||
throw new Error(`LLM returned no content. Model: ${model}`);
|
||||
}
|
||||
|
||||
let cost = 0;
|
||||
const usage = resp.data.usage || {};
|
||||
if (usage.cost !== undefined) {
|
||||
cost = usage.cost;
|
||||
} else {
|
||||
// Fallback estimation
|
||||
cost =
|
||||
(usage.prompt_tokens || 0) * (0.1 / 1_000_000) +
|
||||
(usage.completion_tokens || 0) * (0.4 / 1_000_000);
|
||||
}
|
||||
|
||||
return {
|
||||
content,
|
||||
usage: {
|
||||
promptTokens: usage.prompt_tokens || 0,
|
||||
completionTokens: usage.completion_tokens || 0,
|
||||
cost,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a request and parse the response as JSON.
|
||||
*/
|
||||
export async function llmJsonRequest<T = any>(
|
||||
options: LLMRequestOptions,
|
||||
): Promise<{ data: T; usage: LLMResponse["usage"] }> {
|
||||
const response = await llmRequest({ ...options, jsonMode: true });
|
||||
const cleaned = cleanJson(response.content);
|
||||
|
||||
let parsed: T;
|
||||
try {
|
||||
parsed = JSON.parse(cleaned);
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`Failed to parse LLM JSON response: ${(e as Error).message}\nRaw: ${cleaned.substring(0, 500)}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Unwrap common LLM artifacts: {"0": {...}}, {"state": {...}}, etc.
|
||||
const unwrapped = unwrapResponse(parsed);
|
||||
|
||||
return { data: unwrapped as T, usage: response.usage };
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively unwrap common LLM wrapping patterns.
|
||||
*/
|
||||
function unwrapResponse(obj: any): any {
|
||||
if (!obj || typeof obj !== "object" || Array.isArray(obj)) return obj;
|
||||
const keys = Object.keys(obj);
|
||||
if (keys.length === 1) {
|
||||
const key = keys[0];
|
||||
if (key === "0" || key === "state" || key === "facts" || key === "result" || key === "data") {
|
||||
return unwrapResponse(obj[key]);
|
||||
}
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
228
packages/estimation-engine/src/pipeline.ts
Normal file
228
packages/estimation-engine/src/pipeline.ts
Normal file
@@ -0,0 +1,228 @@
|
||||
// ============================================================================
|
||||
// Pipeline Orchestrator
|
||||
// Runs all steps sequentially, tracks state, supports re-running individual steps.
|
||||
// ============================================================================
|
||||
|
||||
import * as fs from "node:fs/promises";
|
||||
import * as path from "node:path";
|
||||
import { existsSync } from "node:fs";
|
||||
import { validateEstimation } from "./validators.js";
|
||||
import { executeSynthesize } from "./steps/05-synthesize.js";
|
||||
import { executeCritique } from "./steps/06-critique.js";
|
||||
import type {
|
||||
PipelineConfig,
|
||||
PipelineInput,
|
||||
EstimationState,
|
||||
StepResult,
|
||||
StepUsage,
|
||||
} from "./types.js";
|
||||
|
||||
export interface PipelineCallbacks {
|
||||
onStepStart?: (stepId: string, stepName: string) => void;
|
||||
onStepComplete?: (stepId: string, result: StepResult) => void;
|
||||
onStepError?: (stepId: string, error: string) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* The main estimation pipeline orchestrator.
|
||||
* Runs steps sequentially, persists state between steps, supports re-entry.
|
||||
*/
|
||||
export class EstimationPipeline {
|
||||
private config: PipelineConfig;
|
||||
private state: EstimationState;
|
||||
private callbacks: PipelineCallbacks;
|
||||
|
||||
constructor(config: PipelineConfig, callbacks: PipelineCallbacks = {}) {
|
||||
this.config = config;
|
||||
this.callbacks = callbacks;
|
||||
this.state = this.createInitialState();
|
||||
}
|
||||
|
||||
private createInitialState(): EstimationState {
|
||||
return {
|
||||
concept: null as any, // Will be set in run()
|
||||
usage: {
|
||||
totalPromptTokens: 0,
|
||||
totalCompletionTokens: 0,
|
||||
totalCost: 0,
|
||||
perStep: [],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the full estimation pipeline from a completed project concept.
|
||||
*/
|
||||
async run(input: PipelineInput): Promise<EstimationState> {
|
||||
this.state.concept = input.concept;
|
||||
this.state.budget = input.budget;
|
||||
|
||||
// Ensure output directories
|
||||
await fs.mkdir(this.config.outputDir, { recursive: true });
|
||||
|
||||
// Step 5: Position synthesis
|
||||
await this.runStep("05-synthesize", "Position Descriptions", async () => {
|
||||
const result = await executeSynthesize(this.state, this.config);
|
||||
if (result.success) this.state.positionDescriptions = result.data;
|
||||
return result;
|
||||
});
|
||||
|
||||
// Step 6: Quality critique
|
||||
await this.runStep("06-critique", "Quality Gate (Industrial Critic)", async () => {
|
||||
const result = await executeCritique(this.state, this.config);
|
||||
if (result.success) {
|
||||
this.state.critiquePassed = result.data.passed;
|
||||
this.state.critiqueErrors = result.data.errors?.map((e: any) => `${e.field}: ${e.issue}`) || [];
|
||||
|
||||
// Apply corrections
|
||||
if (result.data.corrections) {
|
||||
const corrections = result.data.corrections;
|
||||
// Note: We only correct the positionDescriptions since briefing/design/sitemap are locked in the concept phase.
|
||||
// If the critique suggests changes to those, it should be a warning or failure.
|
||||
if (corrections.positionDescriptions) {
|
||||
this.state.positionDescriptions = {
|
||||
...this.state.positionDescriptions,
|
||||
...corrections.positionDescriptions,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
});
|
||||
|
||||
// Step 7: Deterministic validation
|
||||
await this.runStep("07-validate", "Deterministic Validation", async () => {
|
||||
// Build the merged form state first
|
||||
this.state.formState = this.buildFormState();
|
||||
const validationResult = validateEstimation(this.state);
|
||||
this.state.validationResult = validationResult;
|
||||
|
||||
if (!validationResult.passed) {
|
||||
console.log("\n⚠️ Validation Issues:");
|
||||
for (const error of validationResult.errors) {
|
||||
console.log(` ❌ [${error.code}] ${error.message}`);
|
||||
}
|
||||
}
|
||||
if (validationResult.warnings.length > 0) {
|
||||
console.log("\n⚡ Warnings:");
|
||||
for (const warning of validationResult.warnings) {
|
||||
console.log(` ⚡ [${warning.code}] ${warning.message}`);
|
||||
if (warning.suggestion) console.log(` → ${warning.suggestion}`);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: validationResult,
|
||||
usage: { step: "07-validate", model: "none", promptTokens: 0, completionTokens: 0, cost: 0, durationMs: 0 },
|
||||
};
|
||||
});
|
||||
|
||||
// Save final state
|
||||
await this.saveState();
|
||||
|
||||
return this.state;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a single step with callbacks and error handling.
|
||||
*/
|
||||
private async runStep(
|
||||
stepId: string,
|
||||
stepName: string,
|
||||
executor: () => Promise<StepResult>,
|
||||
): Promise<void> {
|
||||
this.callbacks.onStepStart?.(stepId, stepName);
|
||||
console.log(`\n📍 ${stepName}...`);
|
||||
|
||||
try {
|
||||
const result = await executor();
|
||||
if (result.usage) {
|
||||
this.state.usage.perStep.push(result.usage);
|
||||
this.state.usage.totalPromptTokens += result.usage.promptTokens;
|
||||
this.state.usage.totalCompletionTokens += result.usage.completionTokens;
|
||||
this.state.usage.totalCost += result.usage.cost;
|
||||
}
|
||||
|
||||
if (result.success) {
|
||||
const cost = result.usage?.cost ? ` ($${result.usage.cost.toFixed(4)})` : "";
|
||||
const duration = result.usage?.durationMs ? ` [${(result.usage.durationMs / 1000).toFixed(1)}s]` : "";
|
||||
console.log(` ✅ ${stepName} complete${cost}${duration}`);
|
||||
this.callbacks.onStepComplete?.(stepId, result);
|
||||
} else {
|
||||
console.error(` ❌ ${stepName} failed: ${result.error}`);
|
||||
this.callbacks.onStepError?.(stepId, result.error || "Unknown error");
|
||||
throw new Error(result.error);
|
||||
}
|
||||
} catch (err) {
|
||||
const errorMsg = (err as Error).message;
|
||||
this.callbacks.onStepError?.(stepId, errorMsg);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the final FormState compatible with @mintel/pdf.
|
||||
*/
|
||||
private buildFormState(): Record<string, any> {
|
||||
const facts = this.state.concept.auditedFacts || {};
|
||||
return {
|
||||
projectType: "website",
|
||||
...facts,
|
||||
briefingSummary: this.state.concept.strategy.briefingSummary || "",
|
||||
designVision: this.state.concept.strategy.designVision || "",
|
||||
sitemap: this.state.concept.architecture.sitemap || [],
|
||||
positionDescriptions: this.state.positionDescriptions || {},
|
||||
websiteTopic: this.state.concept.architecture.websiteTopic || facts.websiteTopic || "",
|
||||
statusQuo: facts.isRelaunch ? "Relaunch" : "Neuentwicklung",
|
||||
name: facts.personName || "",
|
||||
email: facts.email || "",
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Save the full state to disk for later re-use.
|
||||
*/
|
||||
private async saveState(): Promise<void> {
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
||||
const companyName = this.state.concept.auditedFacts?.companyName || "unknown";
|
||||
|
||||
// Save full state
|
||||
const stateDir = path.join(this.config.outputDir, "json");
|
||||
await fs.mkdir(stateDir, { recursive: true });
|
||||
|
||||
const statePath = path.join(stateDir, `${companyName}_${timestamp}.json`);
|
||||
await fs.writeFile(statePath, JSON.stringify(this.state.formState, null, 2));
|
||||
console.log(`\n📦 Saved state to: ${statePath}`);
|
||||
|
||||
// Save full pipeline state (for debugging / re-entry)
|
||||
const debugPath = path.join(stateDir, `${companyName}_${timestamp}_debug.json`);
|
||||
await fs.writeFile(debugPath, JSON.stringify(this.state, null, 2));
|
||||
|
||||
// Print usage summary
|
||||
console.log("\n──────────────────────────────────────────────");
|
||||
console.log("📊 PIPELINE USAGE SUMMARY");
|
||||
console.log("──────────────────────────────────────────────");
|
||||
for (const step of this.state.usage.perStep) {
|
||||
if (step.cost > 0) {
|
||||
console.log(` ${step.step}: ${step.model} — $${step.cost.toFixed(6)} (${(step.durationMs / 1000).toFixed(1)}s)`);
|
||||
}
|
||||
}
|
||||
console.log("──────────────────────────────────────────────");
|
||||
console.log(` TOTAL: $${this.state.usage.totalCost.toFixed(6)}`);
|
||||
console.log(` Tokens: ${(this.state.usage.totalPromptTokens + this.state.usage.totalCompletionTokens).toLocaleString()}`);
|
||||
console.log("──────────────────────────────────────────────\n");
|
||||
}
|
||||
|
||||
/** Get the current state (for CLI inspection). */
|
||||
getState(): EstimationState {
|
||||
return this.state;
|
||||
}
|
||||
|
||||
/** Load a saved state from JSON. */
|
||||
async loadState(jsonPath: string): Promise<void> {
|
||||
const raw = await fs.readFile(jsonPath, "utf8");
|
||||
const formState = JSON.parse(raw);
|
||||
this.state.formState = formState;
|
||||
}
|
||||
}
|
||||
95
packages/estimation-engine/src/steps/05-synthesize.ts
Normal file
95
packages/estimation-engine/src/steps/05-synthesize.ts
Normal file
@@ -0,0 +1,95 @@
|
||||
// ============================================================================
|
||||
// Step 05: Synthesize — Position Descriptions (Gemini Pro)
|
||||
// ============================================================================
|
||||
|
||||
import { llmJsonRequest } from "../llm-client.js";
|
||||
import type { EstimationState, StepResult, PipelineConfig } from "../types.js";
|
||||
import { DEFAULT_MODELS } from "../types.js";
|
||||
|
||||
export async function executeSynthesize(
|
||||
state: EstimationState,
|
||||
config: PipelineConfig,
|
||||
): Promise<StepResult> {
|
||||
const models = { ...DEFAULT_MODELS, ...config.modelsOverride };
|
||||
const startTime = Date.now();
|
||||
|
||||
if (!state.concept?.auditedFacts || !state.concept?.architecture?.sitemap) {
|
||||
return { success: false, error: "Missing audited facts or sitemap." };
|
||||
}
|
||||
|
||||
const facts = state.concept.auditedFacts;
|
||||
|
||||
// Determine which positions are required
|
||||
const requiredPositions = [
|
||||
"Das technische Fundament",
|
||||
(facts.selectedPages?.length || 0) + (facts.otherPages?.length || 0) > 0
|
||||
? "Individuelle Seiten"
|
||||
: null,
|
||||
facts.features?.length > 0 ? "System-Module (Features)" : null,
|
||||
facts.functions?.length > 0 ? "Logik-Funktionen" : null,
|
||||
facts.apiSystems?.length > 0 ? "Schnittstellen (API)" : null,
|
||||
facts.cmsSetup ? "Inhalts-Verwaltung" : null,
|
||||
facts.multilang ? "Mehrsprachigkeit" : null,
|
||||
"Inhaltliche Initial-Pflege",
|
||||
"Sorglos Betrieb",
|
||||
].filter(Boolean);
|
||||
|
||||
const systemPrompt = `
|
||||
You are a Senior Solution Architect. Write position descriptions for a professional B2B quote.
|
||||
|
||||
### REQUIRED POSITIONS (STRICT — ONLY DESCRIBE THESE):
|
||||
${requiredPositions.map((p) => `"${p}"`).join(", ")}
|
||||
|
||||
### RULES (STRICT):
|
||||
1. NO FIRST PERSON: NEVER "Ich", "Mein", "Wir", "Unser". Lead with nouns or passive verbs.
|
||||
2. QUANTITY PARITY: Description MUST list EXACTLY the number of items matching 'qty'.
|
||||
3. CMS GUARD: If cmsSetup=false, do NOT mention "CMS", "Inhaltsverwaltung". Use "Plattform-Struktur".
|
||||
4. TONE: "Erstellung von...", "Anbindung der...", "Bereitstellung von...". Technical, high-density.
|
||||
5. PAGES: List actual page names. NO implementation notes in parentheses.
|
||||
6. HARD SPECIFICS: Use industry terms from the briefing (e.g. "Kabeltiefbau", "110 kV").
|
||||
7. KEYS: Return EXACTLY the keys from REQUIRED POSITIONS.
|
||||
8. NO AGB: NEVER mention "AGB" or "Geschäftsbedingungen".
|
||||
9. Sorglos Betrieb: "Inklusive 1 Jahr technischer Betrieb, Hosting, SSL, Sicherheits-Updates, Monitoring und techn. Support."
|
||||
10. Inhaltliche Initial-Pflege: Refers to DATENSÄTZE (datasets like products, references), NOT Seiten.
|
||||
Use "Datensätze" in the description, not "Seiten".
|
||||
11. Mehrsprachigkeit: This is a +20% markup on the subtotal. NOT an API. NOT a Schnittstelle.
|
||||
|
||||
### EXAMPLES:
|
||||
- GOOD: "Erstellung der Seiten: Startseite, Über uns, Leistungen, Kontakt."
|
||||
- GOOD: "Native API-Anbindung an Google Maps mit individueller Standort-Visualisierung."
|
||||
- BAD: "Ich richte dir das CMS ein."
|
||||
- BAD: "Verschiedene Funktionen" (too generic — name the things!)
|
||||
|
||||
### DATA CONTEXT:
|
||||
${JSON.stringify({ facts, sitemap: state.concept.architecture.sitemap, strategy: { briefingSummary: state.concept.strategy.briefingSummary } }, null, 2)}
|
||||
|
||||
### OUTPUT FORMAT:
|
||||
{
|
||||
"positionDescriptions": { "Das technische Fundament": string, ... }
|
||||
}
|
||||
`;
|
||||
|
||||
try {
|
||||
const { data, usage } = await llmJsonRequest({
|
||||
model: models.pro,
|
||||
systemPrompt,
|
||||
userPrompt: state.concept.briefing,
|
||||
apiKey: config.openrouterKey,
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: data.positionDescriptions || data,
|
||||
usage: {
|
||||
step: "05-synthesize",
|
||||
model: models.pro,
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
cost: usage.cost,
|
||||
durationMs: Date.now() - startTime,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
return { success: false, error: `Synthesize step failed: ${(err as Error).message}` };
|
||||
}
|
||||
}
|
||||
99
packages/estimation-engine/src/steps/06-critique.ts
Normal file
99
packages/estimation-engine/src/steps/06-critique.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
// ============================================================================
|
||||
// Step 06: Critique — Industrial Critic Quality Gate (Claude Opus)
|
||||
// ============================================================================
|
||||
|
||||
import { llmJsonRequest } from "../llm-client.js";
|
||||
import type { EstimationState, StepResult, PipelineConfig } from "../types.js";
|
||||
import { DEFAULT_MODELS } from "../types.js";
|
||||
|
||||
export async function executeCritique(
|
||||
state: EstimationState,
|
||||
config: PipelineConfig,
|
||||
): Promise<StepResult> {
|
||||
const models = { ...DEFAULT_MODELS, ...config.modelsOverride };
|
||||
const startTime = Date.now();
|
||||
|
||||
const currentState = {
|
||||
facts: state.concept?.auditedFacts,
|
||||
briefingSummary: state.concept?.strategy?.briefingSummary,
|
||||
designVision: state.concept?.strategy?.designVision,
|
||||
sitemap: state.concept?.architecture?.sitemap,
|
||||
positionDescriptions: state.positionDescriptions,
|
||||
siteProfile: state.concept?.siteProfile
|
||||
? {
|
||||
existingFeatures: state.concept.siteProfile.existingFeatures,
|
||||
services: state.concept.siteProfile.services,
|
||||
externalDomains: state.concept.siteProfile.externalDomains,
|
||||
navigation: state.concept.siteProfile.navigation,
|
||||
totalPages: state.concept.siteProfile.totalPages,
|
||||
}
|
||||
: null,
|
||||
};
|
||||
|
||||
const systemPrompt = `
|
||||
You are the "Industrial Critic" — the final quality gate for a professional B2B estimation.
|
||||
Your job is to find EVERY error, hallucination, and inconsistency before this goes to the client.
|
||||
|
||||
### CRITICAL ERROR CHECKLIST (FAIL IF ANY FOUND):
|
||||
1. HALLUCINATION: FAIL if names, software versions, or details not in the BRIEFING are used.
|
||||
- "Sie", "Ansprechpartner" for personName when an actual name exists = FAIL.
|
||||
2. LOGIC CONFLICT: FAIL if isRelaunch=true but text claims "no website exists".
|
||||
3. IMPLEMENTATION FLUFF: FAIL if "React", "Next.js", "TypeScript", "Tailwind" are mentioned.
|
||||
4. GENERICISM: FAIL if text could apply to ANY company. Must use specific industry terms.
|
||||
5. NAMEN-VERBOT: FAIL if personal names in briefingSummary or designVision.
|
||||
6. CMS-LEAKAGE: FAIL if cmsSetup=false but descriptions mention "CMS", "Inhaltsverwaltung".
|
||||
7. AGB BAN: FAIL if "AGB" or "Geschäftsbedingungen" appear anywhere.
|
||||
8. LENGTH: briefingSummary ~6 sentences, designVision ~4 sentences. Shorten if too wordy.
|
||||
9. LEGAL SAFETY: FAIL if "rechtssicher" is used. Use "Standard-konform" instead.
|
||||
10. BULLSHIT DETECTOR: FAIL if jargon like "SEO-Standards zur Fachkräftesicherung",
|
||||
"B2B-Nutzerströme", "Digitale Konvergenzstrategie" or similar meaningless buzzwords are used.
|
||||
The text must make SENSE to a construction industry CEO.
|
||||
11. PAGE STRUCTURE: FAIL if the sitemap contains:
|
||||
- Videos as pages (Messefilm, Imagefilm)
|
||||
- Internal functions as pages (Verwaltung)
|
||||
- Entities with their own domains as sub-pages (check externalDomains!)
|
||||
12. SORGLOS-BETRIEB: FAIL if not mentioned in the summary or position descriptions.
|
||||
13. TONE: FAIL if "wir/unser" or "Ich/Mein" in position descriptions. FAIL if marketing fluff.
|
||||
14. MULTILANG: FAIL if Mehrsprachigkeit is described as an API or Schnittstelle.
|
||||
15. INITIAL-PFLEGE: FAIL if described in terms of "Seiten" instead of "Datensätze".
|
||||
|
||||
### MISSION:
|
||||
Return corrected fields ONLY for fields with issues. If everything passes, return empty corrections.
|
||||
|
||||
### OUTPUT FORMAT:
|
||||
{
|
||||
"passed": boolean,
|
||||
"errors": [{ "field": string, "issue": string, "severity": "critical" | "warning" }],
|
||||
"corrections": {
|
||||
"briefingSummary"?: string,
|
||||
"designVision"?: string,
|
||||
"positionDescriptions"?: Record<string, string>,
|
||||
"sitemap"?: array
|
||||
}
|
||||
}
|
||||
`;
|
||||
|
||||
try {
|
||||
const { data, usage } = await llmJsonRequest({
|
||||
model: models.opus,
|
||||
systemPrompt,
|
||||
userPrompt: `BRIEFING_TRUTH:\n${state.concept?.briefing}\n\nCURRENT_STATE:\n${JSON.stringify(currentState, null, 2)}`,
|
||||
apiKey: config.openrouterKey,
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data,
|
||||
usage: {
|
||||
step: "06-critique",
|
||||
model: models.opus,
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
cost: usage.cost,
|
||||
durationMs: Date.now() - startTime,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
return { success: false, error: `Critique step failed: ${(err as Error).message}` };
|
||||
}
|
||||
}
|
||||
113
packages/estimation-engine/src/types.ts
Normal file
113
packages/estimation-engine/src/types.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
// ============================================================================
|
||||
// @mintel/estimation-engine — Core Type Definitions
|
||||
// ============================================================================
|
||||
|
||||
import type { ProjectConcept, SitemapCategory } from "@mintel/concept-engine";
|
||||
|
||||
/** Configuration for the estimation pipeline */
|
||||
export interface PipelineConfig {
|
||||
openrouterKey: string;
|
||||
zyteApiKey?: string;
|
||||
outputDir: string;
|
||||
crawlDir: string;
|
||||
modelsOverride?: Partial<ModelConfig>;
|
||||
}
|
||||
|
||||
/** Model routing configuration */
|
||||
export interface ModelConfig {
|
||||
flash: string;
|
||||
pro: string;
|
||||
opus: string;
|
||||
}
|
||||
|
||||
export const DEFAULT_MODELS: ModelConfig = {
|
||||
flash: "google/gemini-3-flash-preview",
|
||||
pro: "google/gemini-3.1-pro-preview",
|
||||
opus: "anthropic/claude-opus-4-6",
|
||||
};
|
||||
|
||||
/** Input for the estimation pipeline */
|
||||
export interface PipelineInput {
|
||||
concept: ProjectConcept;
|
||||
budget?: string;
|
||||
}
|
||||
|
||||
/** State that flows through all pipeline steps */
|
||||
export interface EstimationState {
|
||||
// Input
|
||||
concept: ProjectConcept;
|
||||
budget?: string;
|
||||
|
||||
// Step 5 output: Position Synthesis
|
||||
positionDescriptions?: Record<string, string>;
|
||||
|
||||
// Step 6 output: Critique
|
||||
critiquePassed?: boolean;
|
||||
critiqueErrors?: string[];
|
||||
|
||||
// Step 7 output: Validation
|
||||
validationResult?: ValidationResult;
|
||||
|
||||
// Final merged form state for PDF generation
|
||||
formState?: Record<string, any>;
|
||||
|
||||
// Cost tracking
|
||||
usage: UsageStats;
|
||||
}
|
||||
|
||||
export interface UsageStats {
|
||||
totalPromptTokens: number;
|
||||
totalCompletionTokens: number;
|
||||
totalCost: number;
|
||||
perStep: StepUsage[];
|
||||
}
|
||||
|
||||
export interface StepUsage {
|
||||
step: string;
|
||||
model: string;
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
cost: number;
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
/** Result of a single pipeline step */
|
||||
export interface StepResult<T = any> {
|
||||
success: boolean;
|
||||
data?: T;
|
||||
error?: string;
|
||||
usage?: StepUsage;
|
||||
}
|
||||
|
||||
/** Validation result from the deterministic validator */
|
||||
export interface ValidationResult {
|
||||
passed: boolean;
|
||||
errors: ValidationError[];
|
||||
warnings: ValidationWarning[];
|
||||
}
|
||||
|
||||
export interface ValidationError {
|
||||
code: string;
|
||||
message: string;
|
||||
field?: string;
|
||||
expected?: any;
|
||||
actual?: any;
|
||||
}
|
||||
|
||||
export interface ValidationWarning {
|
||||
code: string;
|
||||
message: string;
|
||||
suggestion?: string;
|
||||
}
|
||||
|
||||
/** Step definition for the pipeline */
|
||||
export interface PipelineStep {
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
model: "flash" | "pro" | "opus" | "none";
|
||||
execute: (
|
||||
state: EstimationState,
|
||||
config: PipelineConfig,
|
||||
) => Promise<StepResult>;
|
||||
}
|
||||
380
packages/estimation-engine/src/validators.ts
Normal file
380
packages/estimation-engine/src/validators.ts
Normal file
@@ -0,0 +1,380 @@
|
||||
// ============================================================================
|
||||
// Validators — Deterministic Math & Logic Checks (NO LLM!)
|
||||
// Catches all the issues reported by the user programmatically.
|
||||
// ============================================================================
|
||||
|
||||
import type { EstimationState, ValidationResult, ValidationError, ValidationWarning } from "./types.js";
|
||||
|
||||
/**
|
||||
* Run all deterministic validation checks on the final estimation state.
|
||||
*/
|
||||
export function validateEstimation(state: EstimationState): ValidationResult {
|
||||
const errors: ValidationError[] = [];
|
||||
const warnings: ValidationWarning[] = [];
|
||||
|
||||
if (!state.formState) {
|
||||
return { passed: false, errors: [{ code: "NO_FORM_STATE", message: "No form state available for validation." }], warnings: [] };
|
||||
}
|
||||
|
||||
const fs = state.formState;
|
||||
|
||||
// 1. PAGE COUNT PARITY
|
||||
validatePageCountParity(fs, errors);
|
||||
|
||||
// 2. SORGLOS-BETRIEB IN SUMMARY
|
||||
validateSorglosBetrieb(fs, errors, warnings);
|
||||
|
||||
// 3. NO VIDEOS AS PAGES
|
||||
validateNoVideosAsPages(fs, errors);
|
||||
|
||||
// 4. EXTERNAL DOMAINS NOT AS PAGES
|
||||
validateExternalDomains(fs, state.concept?.siteProfile, errors);
|
||||
|
||||
// 5. SERVICE COVERAGE
|
||||
validateServiceCoverage(fs, state.concept?.siteProfile, warnings);
|
||||
|
||||
// 6. EXISTING FEATURE DETECTION
|
||||
validateExistingFeatures(fs, state.concept?.siteProfile, warnings);
|
||||
|
||||
// 7. MULTILANG LABEL CORRECTNESS
|
||||
validateMultilangLabeling(fs, errors);
|
||||
|
||||
// 8. INITIAL-PFLEGE UNITS
|
||||
validateInitialPflegeUnits(fs, warnings);
|
||||
|
||||
// 9. SITEMAP vs PAGE LIST CONSISTENCY
|
||||
validateSitemapConsistency(fs, errors);
|
||||
|
||||
return {
|
||||
passed: errors.length === 0,
|
||||
errors,
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 1. Page count: the "Individuelle Seiten" position description must mention
|
||||
* roughly the same number of pages as the sitemap contains.
|
||||
* "er berechnet 15 Seiten nennt aber nur 11"
|
||||
*
|
||||
* NOTE: fs.pages (from auditedFacts) is a conceptual list of page groups
|
||||
* (e.g. "Leistungen") while the sitemap expands those into sub-pages.
|
||||
* Therefore we do NOT compare fs.pages.length to the sitemap count.
|
||||
* Instead, we verify that the position description text lists the right count.
|
||||
*/
|
||||
function validatePageCountParity(
|
||||
fs: Record<string, any>,
|
||||
errors: ValidationError[],
|
||||
): void {
|
||||
// Count pages listed in the sitemap (the source of truth)
|
||||
let sitemapPageCount = 0;
|
||||
if (Array.isArray(fs.sitemap)) {
|
||||
for (const cat of fs.sitemap) {
|
||||
sitemapPageCount += (cat.pages || []).length;
|
||||
}
|
||||
}
|
||||
if (sitemapPageCount === 0) return;
|
||||
|
||||
// Extract page names mentioned in the "Individuelle Seiten" position description
|
||||
const positions = fs.positionDescriptions || {};
|
||||
const pagesDesc = positions["Individuelle Seiten"] || positions["2. Individuelle Seiten"] || "";
|
||||
if (!pagesDesc) return;
|
||||
|
||||
const descStr = typeof pagesDesc === "string" ? pagesDesc : "";
|
||||
|
||||
// Count distinct page names mentioned (split by comma)
|
||||
// We avoid splitting by "&" or "und" because actual page names like
|
||||
// "Wartung & Störungsdienst" or "Genehmigungs- und Ausführungsplanung" contain them.
|
||||
const afterColon = descStr.includes(":") ? descStr.split(":").slice(1).join(":") : descStr;
|
||||
const segments = afterColon
|
||||
.split(/,/)
|
||||
.map((s: string) => s.replace(/\.$/, "").trim())
|
||||
.filter((s: string) => s.length > 2);
|
||||
|
||||
// Handle consolidated references like "Leistungen (6 Unterseiten)" or "(inkl. Messen)"
|
||||
let mentionedCount = 0;
|
||||
for (const seg of segments) {
|
||||
const subPageMatch = seg.match(/\((\d+)\s+(?:Unter)?[Ss]eiten?\)/);
|
||||
if (subPageMatch) {
|
||||
mentionedCount += parseInt(subPageMatch[1], 10);
|
||||
} else if (seg.match(/\(inkl\.\s+/)) {
|
||||
// "Unternehmen (inkl. Messen)" = 2 pages
|
||||
mentionedCount += 2;
|
||||
} else {
|
||||
mentionedCount += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (mentionedCount > 0 && Math.abs(mentionedCount - sitemapPageCount) > 2) {
|
||||
errors.push({
|
||||
code: "PAGE_COUNT_MISMATCH",
|
||||
message: `Seiten-Beschreibung nennt ~${mentionedCount} Seiten, aber ${sitemapPageCount} Seiten in der Sitemap.`,
|
||||
field: "positionDescriptions.Individuelle Seiten",
|
||||
expected: sitemapPageCount,
|
||||
actual: mentionedCount,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 2. Sorglos-Betrieb must be included in summary.
|
||||
* "Zusammenfassung der Schätzung hat Sorglos-Betrieb nicht miteingenommen"
|
||||
*/
|
||||
function validateSorglosBetrieb(
|
||||
fs: Record<string, any>,
|
||||
errors: ValidationError[],
|
||||
warnings: ValidationWarning[],
|
||||
): void {
|
||||
const positions = fs.positionDescriptions || {};
|
||||
const hasPosition = Object.keys(positions).some(
|
||||
(k) =>
|
||||
k.toLowerCase().includes("sorglos") ||
|
||||
k.toLowerCase().includes("betrieb") ||
|
||||
k.toLowerCase().includes("pflege"),
|
||||
);
|
||||
|
||||
if (!hasPosition) {
|
||||
errors.push({
|
||||
code: "MISSING_SORGLOS_BETRIEB",
|
||||
message: "Der Sorglos-Betrieb fehlt in den Position-Beschreibungen.",
|
||||
field: "positionDescriptions",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 3. Videos must not be treated as separate pages.
|
||||
* "Er hat Videos als eigene Seite aufgenommen"
|
||||
*/
|
||||
function validateNoVideosAsPages(
|
||||
fs: Record<string, any>,
|
||||
errors: ValidationError[],
|
||||
): void {
|
||||
const allPages = [...(fs.selectedPages || []), ...(fs.otherPages || [])];
|
||||
const sitemapPages = Array.isArray(fs.sitemap)
|
||||
? fs.sitemap.flatMap((cat: any) => (cat.pages || []).map((p: any) => p.title))
|
||||
: [];
|
||||
|
||||
const allPageNames = [...allPages, ...sitemapPages];
|
||||
const videoKeywords = ["video", "film", "messefilm", "imagefilm", "clip"];
|
||||
|
||||
for (const pageName of allPageNames) {
|
||||
const lower = (typeof pageName === "string" ? pageName : "").toLowerCase();
|
||||
if (videoKeywords.some((kw) => lower.includes(kw) && !lower.includes("leistung"))) {
|
||||
errors.push({
|
||||
code: "VIDEO_AS_PAGE",
|
||||
message: `"${pageName}" ist ein Video-Asset, keine eigene Seite.`,
|
||||
field: "sitemap",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 4. External sister-company domains must not be proposed as sub-pages.
|
||||
* "er hat ingenieursgesellschaft als seite integriert, die haben aber eine eigene website"
|
||||
*/
|
||||
function validateExternalDomains(
|
||||
fs: Record<string, any>,
|
||||
siteProfile: any,
|
||||
errors: ValidationError[],
|
||||
): void {
|
||||
if (!siteProfile?.externalDomains?.length) return;
|
||||
|
||||
const sitemapPages = Array.isArray(fs.sitemap)
|
||||
? fs.sitemap.flatMap((cat: any) => (cat.pages || []).map((p: any) => p.title || ""))
|
||||
: [];
|
||||
|
||||
for (const extDomain of siteProfile.externalDomains) {
|
||||
// Extract base name (e.g. "etib-ing" from "etib-ing.com")
|
||||
const baseName = extDomain.replace(/^www\./, "").split(".")[0].toLowerCase();
|
||||
|
||||
for (const pageTitle of sitemapPages) {
|
||||
const lower = pageTitle.toLowerCase();
|
||||
// Check if the page title references the external company
|
||||
if (lower.includes(baseName) || (lower.includes("ingenieur") && extDomain.includes("ing"))) {
|
||||
errors.push({
|
||||
code: "EXTERNAL_DOMAIN_AS_PAGE",
|
||||
message: `"${pageTitle}" hat eine eigene Website (${extDomain}) und darf nicht als Unterseite vorgeschlagen werden.`,
|
||||
field: "sitemap",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 5. Services from the existing site should be covered.
|
||||
* "er hat leistungen ausgelassen die ganz klar auf der kompetenz seite genannt werden"
|
||||
*/
|
||||
function validateServiceCoverage(
|
||||
fs: Record<string, any>,
|
||||
siteProfile: any,
|
||||
warnings: ValidationWarning[],
|
||||
): void {
|
||||
if (!siteProfile?.services?.length) return;
|
||||
|
||||
const allContent = JSON.stringify(fs).toLowerCase();
|
||||
|
||||
for (const service of siteProfile.services) {
|
||||
const keywords = service
|
||||
.toLowerCase()
|
||||
.split(/[\s,&-]+/)
|
||||
.filter((w: string) => w.length > 4);
|
||||
|
||||
const isCovered = keywords.some((kw: string) => allContent.includes(kw));
|
||||
|
||||
if (!isCovered && service.length > 5) {
|
||||
warnings.push({
|
||||
code: "MISSING_SERVICE",
|
||||
message: `Bestehende Leistung "${service}" ist nicht in der Schätzung berücksichtigt.`,
|
||||
suggestion: `Prüfen ob "${service}" im Briefing gewünscht ist und ggf. in die Seitenstruktur aufnehmen.`,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 6. Existing features (search, forms) must be acknowledged.
|
||||
* "er hat die suchfunktion nicht bemerkt, die gibts schon auf der seite"
|
||||
*/
|
||||
function validateExistingFeatures(
|
||||
fs: Record<string, any>,
|
||||
siteProfile: any,
|
||||
warnings: ValidationWarning[],
|
||||
): void {
|
||||
if (!siteProfile?.existingFeatures?.length) return;
|
||||
|
||||
const functions = fs.functions || [];
|
||||
const features = fs.features || [];
|
||||
const allSelected = [...functions, ...features];
|
||||
|
||||
for (const existingFeature of siteProfile.existingFeatures) {
|
||||
if (existingFeature === "cookie-consent") continue; // Standard, don't flag
|
||||
if (existingFeature === "video") continue; // Usually an asset, not a feature
|
||||
|
||||
const isMapped = allSelected.some(
|
||||
(f: string) => f.toLowerCase() === existingFeature.toLowerCase(),
|
||||
);
|
||||
|
||||
if (!isMapped) {
|
||||
warnings.push({
|
||||
code: "EXISTING_FEATURE_IGNORED",
|
||||
message: `Die bestehende Suchfunktion/Feature "${existingFeature}" wurde auf der aktuellen Website erkannt, aber nicht in der Schätzung berücksichtigt.`,
|
||||
suggestion: `"${existingFeature}" als Function oder Feature aufnehmen, da es bereits existiert und der Kunde es erwartet.`,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 7. Multilang +20% must not be labeled as API.
|
||||
* "die +20% beziehen sich nicht auf API"
|
||||
*/
|
||||
function validateMultilangLabeling(
|
||||
fs: Record<string, any>,
|
||||
errors: ValidationError[],
|
||||
): void {
|
||||
const positions = fs.positionDescriptions || {};
|
||||
|
||||
for (const [key, desc] of Object.entries(positions)) {
|
||||
if (key.toLowerCase().includes("api") || key.toLowerCase().includes("schnittstelle")) {
|
||||
const descStr = typeof desc === "string" ? desc : "";
|
||||
if (
|
||||
descStr.toLowerCase().includes("mehrsprach") ||
|
||||
descStr.toLowerCase().includes("multilang") ||
|
||||
descStr.toLowerCase().includes("20%")
|
||||
) {
|
||||
errors.push({
|
||||
code: "MULTILANG_WRONG_POSITION",
|
||||
message: `Mehrsprachigkeit (+20%) ist unter "${key}" eingeordnet, gehört aber nicht zu API/Schnittstellen.`,
|
||||
field: key,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 8. Initial-Pflege should refer to "Datensätze" not "Seiten".
|
||||
* "Initialpflege => 100€/Stk => damit sind keine Seiten sondern Datensätze"
|
||||
*/
|
||||
function validateInitialPflegeUnits(
|
||||
fs: Record<string, any>,
|
||||
warnings: ValidationWarning[],
|
||||
): void {
|
||||
const positions = fs.positionDescriptions || {};
|
||||
|
||||
for (const [key, desc] of Object.entries(positions)) {
|
||||
if (key.toLowerCase().includes("pflege") || key.toLowerCase().includes("initial")) {
|
||||
const descStr = typeof desc === "string" ? desc : "";
|
||||
if (descStr.toLowerCase().includes("seiten") && !descStr.toLowerCase().includes("datensätz")) {
|
||||
warnings.push({
|
||||
code: "INITIALPFLEGE_WRONG_UNIT",
|
||||
message: `"${key}" spricht von "Seiten", aber gemeint sind Datensätze (z.B. Produkte, Referenzen).`,
|
||||
suggestion: `Beschreibung auf "Datensätze" statt "Seiten" ändern.`,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 9. Position descriptions must match calculated quantities.
|
||||
*/
|
||||
function validatePositionDescriptionsMath(
|
||||
fs: Record<string, any>,
|
||||
errors: ValidationError[],
|
||||
): void {
|
||||
const positions = fs.positionDescriptions || {};
|
||||
|
||||
// Check pages description mentions correct count
|
||||
const pagesDesc = positions["Individuelle Seiten"] || positions["2. Individuelle Seiten"] || "";
|
||||
if (pagesDesc) {
|
||||
// Use the sitemap as the authoritative source of truth for page count
|
||||
let sitemapPageCount = 0;
|
||||
if (Array.isArray(fs.sitemap)) {
|
||||
for (const cat of fs.sitemap) {
|
||||
sitemapPageCount += (cat.pages || []).length;
|
||||
}
|
||||
}
|
||||
|
||||
// Count how many page names are mentioned in the description
|
||||
const descStr = typeof pagesDesc === "string" ? pagesDesc : "";
|
||||
const mentionedPages = descStr.split(/,|und|&/).filter((s: string) => s.trim().length > 2);
|
||||
|
||||
if (sitemapPageCount > 0 && mentionedPages.length > 0 && Math.abs(mentionedPages.length - sitemapPageCount) > 2) {
|
||||
errors.push({
|
||||
code: "PAGES_DESC_COUNT_MISMATCH",
|
||||
message: `Seiten-Beschreibung nennt ~${mentionedPages.length} Seiten, aber ${sitemapPageCount} in der Sitemap.`,
|
||||
field: "positionDescriptions.Individuelle Seiten",
|
||||
expected: sitemapPageCount,
|
||||
actual: mentionedPages.length,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 10. Sitemap categories should be consistent with selected pages/features.
|
||||
*/
|
||||
function validateSitemapConsistency(
|
||||
fs: Record<string, any>,
|
||||
errors: ValidationError[],
|
||||
): void {
|
||||
if (!Array.isArray(fs.sitemap)) return;
|
||||
|
||||
const sitemapTitles = fs.sitemap
|
||||
.flatMap((cat: any) => (cat.pages || []).map((p: any) => (p.title || "").toLowerCase()));
|
||||
|
||||
// Check for "Verwaltung" page (hallucinated management page)
|
||||
for (const title of sitemapTitles) {
|
||||
if (title.includes("verwaltung") && !title.includes("inhalt")) {
|
||||
errors.push({
|
||||
code: "HALLUCINATED_MANAGEMENT_PAGE",
|
||||
message: `"Verwaltung" als Seite ist vermutlich halluziniert. Verwaltung ist typischerweise eine interne Funktion, keine öffentliche Webseite.`,
|
||||
field: "sitemap",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
14
packages/estimation-engine/tsconfig.json
Normal file
14
packages/estimation-engine/tsconfig.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"extends": "@mintel/tsconfig/base.json",
|
||||
"compilerOptions": {
|
||||
"module": "ESNext",
|
||||
"target": "ESNext",
|
||||
"moduleResolution": "Bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"noEmit": true,
|
||||
"jsx": "react-jsx"
|
||||
},
|
||||
"include": [
|
||||
"src"
|
||||
]
|
||||
}
|
||||
@@ -3,6 +3,7 @@ import { NextConfig } from "next";
|
||||
|
||||
const nextConfig: NextConfig = {
|
||||
basePath: '/gatekeeper',
|
||||
output: 'standalone',
|
||||
};
|
||||
|
||||
export default mintelNextConfig(nextConfig);
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"version": "1.8.21",
|
||||
"publishConfig": {
|
||||
"access": "public",
|
||||
"registry": "https://npm.infra.mintel.me"
|
||||
"registry": "https://git.infra.mintel.me/api/packages/mmintel/npm"
|
||||
},
|
||||
"type": "module",
|
||||
"main": "index.js",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"version": "1.8.21",
|
||||
"publishConfig": {
|
||||
"access": "public",
|
||||
"registry": "https://npm.infra.mintel.me"
|
||||
"registry": "https://git.infra.mintel.me/api/packages/mmintel/npm"
|
||||
},
|
||||
"files": [
|
||||
"docker",
|
||||
|
||||
@@ -77,7 +77,7 @@ export class ResearchAgent {
|
||||
// but formatted as "facts".
|
||||
|
||||
const synthesis = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.0-flash-001",
|
||||
model: "google/gemini-3-flash-preview",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
@@ -186,7 +186,7 @@ Return JSON: { "facts": [ { "statement": "...", "source": "Organization Name Onl
|
||||
// Step 1: Ask the LLM to generate a highly specific YouTube search query
|
||||
// We want tutorials, explanations, or deep dives.
|
||||
const queryGen = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.5-flash",
|
||||
model: "google/gemini-3-flash-preview",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
@@ -262,7 +262,7 @@ RULES:
|
||||
Return ONLY a JSON object: {"bestVideoId": number}`;
|
||||
|
||||
const evalResponse = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.5-flash",
|
||||
model: "google/gemini-3-flash-preview",
|
||||
messages: [{ role: "system", content: evalPrompt }],
|
||||
response_format: { type: "json_object" },
|
||||
});
|
||||
@@ -320,7 +320,7 @@ Return ONLY a JSON object: {"bestVideoId": number}`;
|
||||
topic: string,
|
||||
): Promise<{ trendsKeywords: string[]; dcVariables: string[] }> {
|
||||
const response = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.0-flash-001",
|
||||
model: "google/gemini-3-flash-preview",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
@@ -378,7 +378,7 @@ CRITICAL: Do NOT provide more than 2 trendsKeywords. Keep it extremely focused.`
|
||||
|
||||
// Step 1: LLM generates the optimal Google Search query
|
||||
const queryGen = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.5-flash",
|
||||
model: "google/gemini-3-flash-preview",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
|
||||
@@ -34,7 +34,7 @@ export class TrendsClient {
|
||||
);
|
||||
try {
|
||||
const response = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.5-flash",
|
||||
model: "google/gemini-3-flash-preview",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
"private": false,
|
||||
"publishConfig": {
|
||||
"access": "public",
|
||||
"registry": "https://npm.infra.mintel.me"
|
||||
"registry": "https://git.infra.mintel.me/api/packages/mmintel/npm"
|
||||
},
|
||||
"type": "module",
|
||||
"main": "./dist/index.js",
|
||||
|
||||
@@ -82,7 +82,7 @@ export class MemeGenerator {
|
||||
.join(", ");
|
||||
|
||||
const response = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.5-flash",
|
||||
model: "google/gemini-3-flash-preview",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"version": "1.8.21",
|
||||
"publishConfig": {
|
||||
"access": "public",
|
||||
"registry": "https://npm.infra.mintel.me"
|
||||
"registry": "https://git.infra.mintel.me/api/packages/mmintel/npm"
|
||||
},
|
||||
"type": "module",
|
||||
"main": "index.js",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"version": "1.8.21",
|
||||
"publishConfig": {
|
||||
"access": "public",
|
||||
"registry": "https://npm.infra.mintel.me"
|
||||
"registry": "https://git.infra.mintel.me/api/packages/mmintel/npm"
|
||||
},
|
||||
"main": "./dist/index.js",
|
||||
"module": "./dist/index.mjs",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"version": "1.8.21",
|
||||
"publishConfig": {
|
||||
"access": "public",
|
||||
"registry": "https://npm.infra.mintel.me"
|
||||
"registry": "https://git.infra.mintel.me/api/packages/mmintel/npm"
|
||||
},
|
||||
"main": "./dist/index.cjs",
|
||||
"module": "./dist/index.js",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"version": "1.8.21",
|
||||
"publishConfig": {
|
||||
"access": "public",
|
||||
"registry": "https://npm.infra.mintel.me"
|
||||
"registry": "https://git.infra.mintel.me/api/packages/mmintel/npm"
|
||||
},
|
||||
"type": "module",
|
||||
"main": "./dist/index.js",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"version": "1.8.21",
|
||||
"publishConfig": {
|
||||
"access": "public",
|
||||
"registry": "https://npm.infra.mintel.me"
|
||||
"registry": "https://git.infra.mintel.me/api/packages/mmintel/npm"
|
||||
},
|
||||
"main": "./dist/index.cjs",
|
||||
"module": "./dist/index.js",
|
||||
|
||||
85
packages/page-audit/README.md
Normal file
85
packages/page-audit/README.md
Normal file
@@ -0,0 +1,85 @@
|
||||
# @mintel/page-audit
|
||||
|
||||
AI-powered website IST-analysis — combines DataForSEO On-Page crawl data with Gemini Pro to generate a comprehensive German-language audit report.
|
||||
|
||||
## Setup
|
||||
|
||||
Add the following to your `.env`:
|
||||
|
||||
```env
|
||||
# DataForSEO — get credentials at https://dataforseo.com
|
||||
# Format: your login email + API password from the dashboard
|
||||
DATA_FOR_SEO_LOGIN=yourlogin@example.com
|
||||
DATA_FOR_SEO_PASSWORD=your_api_password
|
||||
|
||||
# Or as a single key (login:password)
|
||||
DATA_FOR_SEO_API_KEY=yourlogin@example.com:your_api_password
|
||||
|
||||
# OpenRouter (for AI report)
|
||||
OPENROUTER_API_KEY=sk-or-...
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### CLI
|
||||
|
||||
```bash
|
||||
# Full audit with AI report
|
||||
npx tsx src/cli.ts run https://www.e-tib.com
|
||||
|
||||
# Faster: skip AI report (data only)
|
||||
npx tsx src/cli.ts run https://www.e-tib.com --light
|
||||
|
||||
# Custom max pages and output dir
|
||||
npx tsx src/cli.ts run https://www.e-tib.com --max-pages 100 --output ./out/audits
|
||||
```
|
||||
|
||||
### Programmatic
|
||||
|
||||
```typescript
|
||||
import { PageAuditor } from '@mintel/page-audit';
|
||||
|
||||
const auditor = new PageAuditor({
|
||||
dataForSeoLogin: process.env.DATA_FOR_SEO_LOGIN!,
|
||||
dataForSeoPassword: process.env.DATA_FOR_SEO_PASSWORD!,
|
||||
openrouterKey: process.env.OPENROUTER_API_KEY,
|
||||
outputDir: './out/page-audits',
|
||||
});
|
||||
|
||||
const result = await auditor.audit('https://www.e-tib.com', {
|
||||
maxPages: 50,
|
||||
onProgress: (msg) => console.log(msg),
|
||||
});
|
||||
|
||||
console.log(result.report?.executiveSummary);
|
||||
console.log(result.report?.seoScore); // 0-100
|
||||
```
|
||||
|
||||
## What it checks
|
||||
|
||||
### DataForSEO On-Page (deterministic)
|
||||
- HTTP status codes, broken pages (4xx/5xx)
|
||||
- Title tags, meta descriptions, H1 presence
|
||||
- Image alt attributes
|
||||
- Internal/external link counts
|
||||
- Core Web Vitals: LCP, CLS, TTFB
|
||||
- Viewport meta, canonical tags
|
||||
- Indexability
|
||||
|
||||
### AI Report (Gemini Pro)
|
||||
- Executive summary of current state
|
||||
- Strengths (what's working)
|
||||
- Critical issues (urgent fixes)
|
||||
- Quick wins (high impact, low effort)
|
||||
- Strategic recommendations
|
||||
- Scores: SEO (0-100), UX (0-100), Performance (0-100)
|
||||
- Overall health: `critical` | `needs-work` | `good` | `excellent`
|
||||
|
||||
## Output
|
||||
|
||||
Results are saved as JSON to `out/page-audits/{domain}_{timestamp}.json`.
|
||||
|
||||
## Integration in Estimation Engine
|
||||
|
||||
`@mintel/page-audit` can be used as an optional pre-step in the `@mintel/estimation-engine` pipeline
|
||||
to enrich the site analysis with real SEO metrics from DataForSEO before the AI estimation runs.
|
||||
31
packages/page-audit/_debug_vitals.ts
Normal file
31
packages/page-audit/_debug_vitals.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import { config } from "dotenv";
|
||||
import * as path from "node:path";
|
||||
|
||||
config({ path: path.resolve(process.cwd(), "../../.env") });
|
||||
|
||||
const login = process.env.DATA_FOR_SEO_LOGIN || "";
|
||||
const password = process.env.DATA_FOR_SEO_PASSWORD || "";
|
||||
const BASE = "https://api.dataforseo.com/v3";
|
||||
const auth = Buffer.from(`${login}:${password}`).toString("base64");
|
||||
const headers: Record<string, string> = { Authorization: `Basic ${auth}`, "Content-Type": "application/json" };
|
||||
|
||||
async function apiPost(path: string, body: any) {
|
||||
const r = await fetch(`${BASE}${path}`, { method: "POST", headers, body: JSON.stringify(body) });
|
||||
return r.json();
|
||||
}
|
||||
|
||||
async function run() {
|
||||
console.log("Starting test crawl with enable_browser_rendering = true...");
|
||||
const req = await apiPost("/on_page/task_post", [{
|
||||
target: "e-tib.com",
|
||||
max_crawl_pages: 1,
|
||||
load_resources: true,
|
||||
enable_javascript: true,
|
||||
enable_browser_rendering: true,
|
||||
check_spell: false,
|
||||
}]);
|
||||
|
||||
console.log(JSON.stringify(req?.tasks?.[0] || req, null, 2));
|
||||
}
|
||||
|
||||
run();
|
||||
152
packages/page-audit/src/auditor.ts
Normal file
152
packages/page-audit/src/auditor.ts
Normal file
@@ -0,0 +1,152 @@
|
||||
// ============================================================================
|
||||
// @mintel/page-audit — Auditor Orchestrator
|
||||
// Main entry point: runs the full audit pipeline for a domain.
|
||||
// ============================================================================
|
||||
|
||||
import * as fs from "node:fs/promises";
|
||||
import * as path from "node:path";
|
||||
import { DataForSeoClient, normalizePage } from "./dataforseo.js";
|
||||
import { generateAuditReport } from "./report.js";
|
||||
import type { AuditConfig, AuditIssue, DomainAuditResult, PageAuditData } from "./types.js";
|
||||
|
||||
export class PageAuditor {
|
||||
private client: DataForSeoClient;
|
||||
|
||||
constructor(private config: AuditConfig) {
|
||||
this.client = new DataForSeoClient(
|
||||
config.dataForSeoLogin,
|
||||
config.dataForSeoPassword,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a full audit for a domain.
|
||||
* Steps:
|
||||
* 1. Start DataForSEO On-Page crawl task
|
||||
* 2. Wait for completion
|
||||
* 3. Fetch page results + broken resources
|
||||
* 4. Normalize and aggregate issues
|
||||
* 5. Generate AI report (unless lightMode)
|
||||
* 6. Save to disk
|
||||
*/
|
||||
async audit(domain: string, opts?: { maxPages?: number; onProgress?: (msg: string) => void }): Promise<DomainAuditResult> {
|
||||
const log = opts?.onProgress || console.log;
|
||||
const cleanDomain = domain.replace(/^https?:\/\//, "").replace(/\/$/, "");
|
||||
|
||||
log(`🔍 Starting audit for ${cleanDomain}...`);
|
||||
|
||||
// Step 1: Start crawl
|
||||
const taskId = await this.client.startCrawl(domain, opts?.maxPages || 50);
|
||||
log(`📋 DataForSEO task started: ${taskId}`);
|
||||
|
||||
// Step 2: Wait for completion
|
||||
log("⏳ Waiting for crawl to complete...");
|
||||
await this.client.waitForTask(taskId, 300_000);
|
||||
log("✅ Crawl complete!");
|
||||
|
||||
// Step 3: Fetch results
|
||||
log("📥 Fetching page data...");
|
||||
const [rawPages, brokenResources] = await Promise.all([
|
||||
this.client.getPages(taskId, 100, 0),
|
||||
this.client.getBrokenResources(taskId),
|
||||
]);
|
||||
|
||||
// Step 4: Normalize pages
|
||||
const pages: PageAuditData[] = rawPages.map(normalizePage);
|
||||
|
||||
// Count broken links per page
|
||||
for (const broken of brokenResources) {
|
||||
const sourceUrl = broken.source_url;
|
||||
const sourcePage = pages.find((p) => p.url === sourceUrl);
|
||||
if (sourcePage) sourcePage.links.broken++;
|
||||
}
|
||||
|
||||
// Aggregate top issues
|
||||
const issueMap = new Map<string, AuditIssue & { count: number }>();
|
||||
for (const page of pages) {
|
||||
for (const issue of page.issues) {
|
||||
const existing = issueMap.get(issue.code);
|
||||
if (existing) {
|
||||
existing.count++;
|
||||
} else {
|
||||
issueMap.set(issue.code, { ...issue, count: 1 });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const topIssues = Array.from(issueMap.values())
|
||||
.sort((a, b) => {
|
||||
const severityOrder = { critical: 0, warning: 1, info: 2 };
|
||||
return severityOrder[a.severity] - severityOrder[b.severity] || b.count - a.count;
|
||||
})
|
||||
.slice(0, 20);
|
||||
|
||||
const result: DomainAuditResult = {
|
||||
domain: cleanDomain,
|
||||
auditedAt: new Date().toISOString(),
|
||||
totalPages: pages.length,
|
||||
pages,
|
||||
topIssues,
|
||||
report: null,
|
||||
dataForSeoTaskId: taskId,
|
||||
};
|
||||
|
||||
// Step 5: AI Report
|
||||
if (!this.config.lightMode && this.config.openrouterKey) {
|
||||
log("🤖 Generating AI analysis...");
|
||||
try {
|
||||
result.report = await generateAuditReport(result, this.config.openrouterKey);
|
||||
log("✅ AI report generated!");
|
||||
} catch (err: any) {
|
||||
console.warn(`⚠️ AI report failed (audit data still saved): ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 6: Save
|
||||
await this.saveResult(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async saveResult(result: DomainAuditResult): Promise<void> {
|
||||
const outputDir = this.config.outputDir || "./out/page-audits";
|
||||
await fs.mkdir(outputDir, { recursive: true });
|
||||
|
||||
const slug = result.domain.replace(/\./g, "-");
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
|
||||
const outputPath = path.join(outputDir, `${slug}_${timestamp}.json`);
|
||||
|
||||
await fs.writeFile(outputPath, JSON.stringify(result, null, 2));
|
||||
console.log(`\n📦 Audit saved to: ${outputPath}`);
|
||||
|
||||
// Print summary
|
||||
console.log("\n" + "─".repeat(50));
|
||||
console.log("📊 AUDIT SUMMARY — " + result.domain);
|
||||
console.log("─".repeat(50));
|
||||
console.log(` Pages audited: ${result.totalPages}`);
|
||||
console.log(` Critical issues: ${result.topIssues.filter((i) => i.severity === "critical").length}`);
|
||||
console.log(` Warnings: ${result.topIssues.filter((i) => i.severity === "warning").length}`);
|
||||
|
||||
if (result.report) {
|
||||
console.log("\n🤖 AI REPORT:");
|
||||
console.log(` Overall Health: ${result.report.overallHealth.toUpperCase()}`);
|
||||
console.log(` SEO Score: ${result.report.seoScore}/100`);
|
||||
console.log(` UX Score: ${result.report.uxScore}/100`);
|
||||
console.log(` Performance Score: ${result.report.performanceScore}/100`);
|
||||
console.log(`\n Summary: ${result.report.executiveSummary}`);
|
||||
if (result.report.criticalIssues.length > 0) {
|
||||
console.log("\n 🔴 Critical Issues:");
|
||||
for (const issue of result.report.criticalIssues) {
|
||||
console.log(` - ${issue}`);
|
||||
}
|
||||
}
|
||||
if (result.report.quickWins.length > 0) {
|
||||
console.log("\n 🟢 Quick Wins:");
|
||||
for (const win of result.report.quickWins) {
|
||||
console.log(` - ${win}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log("─".repeat(50));
|
||||
}
|
||||
}
|
||||
70
packages/page-audit/src/cli.ts
Normal file
70
packages/page-audit/src/cli.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env node
|
||||
// ============================================================================
|
||||
// @mintel/page-audit — CLI
|
||||
// ============================================================================
|
||||
|
||||
import { Command } from "commander";
|
||||
import * as path from "node:path";
|
||||
import { config as dotenvConfig } from "dotenv";
|
||||
import { PageAuditor } from "./auditor.js";
|
||||
|
||||
dotenvConfig({ path: path.resolve(process.cwd(), "../../.env") });
|
||||
dotenvConfig({ path: path.resolve(process.cwd(), ".env") });
|
||||
|
||||
const program = new Command();
|
||||
|
||||
program
|
||||
.name("page-audit")
|
||||
.description("AI-powered website IST-analysis using DataForSEO + Gemini")
|
||||
.version("1.0.0");
|
||||
|
||||
program
|
||||
.command("run")
|
||||
.description("Run a full SEO and UX audit for a website")
|
||||
.argument("<url>", "Website URL or domain to audit")
|
||||
.option("--max-pages <n>", "Maximum pages to crawl", "50")
|
||||
.option("--output <dir>", "Output directory", "../../out/page-audits")
|
||||
.option("--light", "Skip AI report (faster)")
|
||||
.action(async (url: string, options: any) => {
|
||||
// Support both DATA_FOR_SEO_API_KEY (login:password) and separate vars
|
||||
let login = process.env.DATA_FOR_SEO_LOGIN;
|
||||
let password = process.env.DATA_FOR_SEO_PASSWORD;
|
||||
|
||||
if (!login && process.env.DATA_FOR_SEO_API_KEY) {
|
||||
const parts = process.env.DATA_FOR_SEO_API_KEY.split(":");
|
||||
login = parts[0];
|
||||
password = parts.slice(1).join(":"); // passwords may contain colons
|
||||
}
|
||||
|
||||
if (!login || !password) {
|
||||
console.error("\n❌ DataForSEO credentials not found.");
|
||||
console.error(" Set in .env:");
|
||||
console.error(" DATA_FOR_SEO_LOGIN=yourlogin@example.com");
|
||||
console.error(" DATA_FOR_SEO_PASSWORD=your_api_password");
|
||||
console.error(" OR: DATA_FOR_SEO_API_KEY=email:password");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const auditor = new PageAuditor({
|
||||
dataForSeoLogin: login,
|
||||
dataForSeoPassword: password,
|
||||
openrouterKey: process.env.OPENROUTER_API_KEY,
|
||||
outputDir: path.resolve(process.cwd(), options.output),
|
||||
lightMode: options.light || false,
|
||||
});
|
||||
|
||||
try {
|
||||
await auditor.audit(url, {
|
||||
maxPages: parseInt(options.maxPages, 10),
|
||||
onProgress: (msg) => console.log(msg),
|
||||
});
|
||||
} catch (err: any) {
|
||||
console.error(`\n❌ Audit failed: ${err.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
program.parseAsync(process.argv).catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
212
packages/page-audit/src/dataforseo.ts
Normal file
212
packages/page-audit/src/dataforseo.ts
Normal file
@@ -0,0 +1,212 @@
|
||||
// ============================================================================
|
||||
// @mintel/page-audit — DataForSEO API Client
|
||||
// Uses native fetch (no axios) to avoid Node event loop exit during polling.
|
||||
// Docs: https://docs.dataforseo.com/v3/on_page/
|
||||
// ============================================================================
|
||||
|
||||
import type { PageAuditData, AuditIssue } from "./types.js";
|
||||
|
||||
const API_BASE = "https://api.dataforseo.com/v3";
|
||||
|
||||
/** Authenticated DataForSEO client */
|
||||
export class DataForSeoClient {
|
||||
private auth: string;
|
||||
|
||||
constructor(login: string, password: string) {
|
||||
this.auth = Buffer.from(`${login}:${password}`).toString("base64");
|
||||
}
|
||||
|
||||
private get headers(): Record<string, string> {
|
||||
return {
|
||||
Authorization: `Basic ${this.auth}`,
|
||||
"Content-Type": "application/json",
|
||||
};
|
||||
}
|
||||
|
||||
private async apiGet(path: string): Promise<any> {
|
||||
const resp = await fetch(`${API_BASE}${path}`, { headers: this.headers });
|
||||
if (!resp.ok) throw new Error(`DataForSEO GET ${path} failed: ${resp.status}`);
|
||||
return resp.json();
|
||||
}
|
||||
|
||||
private async apiPost(path: string, body: any): Promise<any> {
|
||||
const resp = await fetch(`${API_BASE}${path}`, {
|
||||
method: "POST",
|
||||
headers: this.headers,
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!resp.ok) throw new Error(`DataForSEO POST ${path} failed: ${resp.status}`);
|
||||
return resp.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Start an On-Page crawl for a domain and return the task ID.
|
||||
*/
|
||||
async startCrawl(domain: string, maxCrawlPages = 50): Promise<string> {
|
||||
const url = domain.startsWith("http") ? domain : `https://${domain}`;
|
||||
|
||||
const data = await this.apiPost("/on_page/task_post", [
|
||||
{
|
||||
target: url,
|
||||
max_crawl_pages: maxCrawlPages,
|
||||
load_resources: true,
|
||||
enable_javascript: true,
|
||||
enable_browser_rendering: true,
|
||||
check_spell: false,
|
||||
calculate_keyword_density: false,
|
||||
},
|
||||
]);
|
||||
|
||||
const task = data?.tasks?.[0];
|
||||
if (!task?.id) {
|
||||
throw new Error(`DataForSEO task creation failed: ${JSON.stringify(task?.status_message || "unknown")}`);
|
||||
}
|
||||
|
||||
return task.id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a task is ready via the tasks_ready endpoint.
|
||||
*/
|
||||
async isTaskReady(taskId: string): Promise<boolean> {
|
||||
const data = await this.apiGet("/on_page/tasks_ready");
|
||||
const readyTasks: string[] = data?.tasks?.[0]?.result?.map((t: any) => t.id) || [];
|
||||
return readyTasks.includes(taskId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll for task completion using tasks_ready endpoint.
|
||||
* DataForSEO crawls can take 2-5 minutes.
|
||||
*/
|
||||
async waitForTask(taskId: string, timeoutMs = 300_000): Promise<void> {
|
||||
const start = Date.now();
|
||||
let delay = 15_000;
|
||||
let pollCount = 0;
|
||||
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
await this.sleep(delay);
|
||||
pollCount++;
|
||||
|
||||
const ready = await this.isTaskReady(taskId);
|
||||
const elapsed = Math.round((Date.now() - start) / 1000);
|
||||
console.log(` 📊 Poll #${pollCount}: ${ready ? "READY ✅" : "not ready"} (${elapsed}s elapsed)`);
|
||||
|
||||
if (ready) {
|
||||
// Short grace period so the pages endpoint settles
|
||||
await this.sleep(5_000);
|
||||
return;
|
||||
}
|
||||
|
||||
delay = Math.min(delay * 1.3, 30_000);
|
||||
}
|
||||
|
||||
throw new Error(`DataForSEO task ${taskId} timed out after ${timeoutMs / 1000}s`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sleep that keeps the Node event loop alive.
|
||||
*/
|
||||
private sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => {
|
||||
const timer = setTimeout(resolve, ms);
|
||||
// Explicitly ref the timer to prevent Node from exiting
|
||||
if (timer && typeof timer === "object" && "ref" in timer) {
|
||||
(timer as NodeJS.Timeout).ref();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the crawl summary.
|
||||
*/
|
||||
async getCrawlSummary(taskId: string): Promise<any> {
|
||||
const data = await this.apiGet(`/on_page/summary/${taskId}`);
|
||||
return data?.tasks?.[0]?.result?.[0] || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch all page-level results.
|
||||
*/
|
||||
async getPages(taskId: string, limit = 100, offset = 0): Promise<any[]> {
|
||||
const data = await this.apiPost("/on_page/pages", [
|
||||
{ id: taskId, limit, offset },
|
||||
]);
|
||||
return data?.tasks?.[0]?.result?.[0]?.items || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch non-indexable pages.
|
||||
*/
|
||||
async getNonIndexable(taskId: string): Promise<any[]> {
|
||||
const data = await this.apiPost("/on_page/non_indexable", [
|
||||
{ id: taskId, limit: 100, offset: 0 },
|
||||
]);
|
||||
return data?.tasks?.[0]?.result?.[0]?.items || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch broken resources (404s, timeouts, etc.)
|
||||
*/
|
||||
async getBrokenResources(taskId: string): Promise<any[]> {
|
||||
const data = await this.apiPost("/on_page/resources", [
|
||||
{ id: taskId, limit: 100, filters: [["status_code", ">", "399"]] },
|
||||
]);
|
||||
return data?.tasks?.[0]?.result?.[0]?.items || [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize a DataForSEO raw page result into our PageAuditData type.
|
||||
*/
|
||||
export function normalizePage(raw: any): PageAuditData {
|
||||
const issues: AuditIssue[] = [];
|
||||
|
||||
const checks = raw.checks || {};
|
||||
|
||||
if (checks.no_title) issues.push({ code: "NO_TITLE", severity: "critical", message: "Seite hat keinen <title> Tag" });
|
||||
if (checks.title_too_long) issues.push({ code: "TITLE_TOO_LONG", severity: "warning", message: `Title zu lang (${raw.meta?.title?.length || "?"} Zeichen, max 60)` });
|
||||
if (checks.no_description) issues.push({ code: "NO_META_DESCRIPTION", severity: "warning", message: "Keine Meta-Description" });
|
||||
if (checks.description_too_long) issues.push({ code: "META_DESC_TOO_LONG", severity: "info", message: "Meta-Description zu lang (max 160)" });
|
||||
if (checks.no_h1_tag) issues.push({ code: "NO_H1", severity: "critical", message: "Kein H1-Tag auf der Seite" });
|
||||
if (checks.duplicate_h1_tag) issues.push({ code: "DUPLICATE_H1", severity: "warning", message: "Mehrere H1-Tags gefunden" });
|
||||
if (checks.is_broken) issues.push({ code: "PAGE_BROKEN", severity: "critical", message: `HTTP ${raw.status_code}: Seite nicht erreichbar` });
|
||||
if (checks.low_content_rate) issues.push({ code: "THIN_CONTENT", severity: "warning", message: "Zu wenig Content (dünne Seite)" });
|
||||
if (checks.has_render_blocking_resources) issues.push({ code: "RENDER_BLOCKING", severity: "warning", message: "Render-blockierende Ressourcen gefunden" });
|
||||
if (checks.image_not_optimized) issues.push({ code: "UNOPTIMIZED_IMAGES", severity: "info", message: "Nicht-optimierte Bilder vorhanden" });
|
||||
|
||||
const imagesWithoutAlt = raw.checks?.no_image_alt ? (raw.meta?.images_count || 0) : 0;
|
||||
|
||||
return {
|
||||
url: raw.url,
|
||||
statusCode: raw.status_code,
|
||||
pageTitle: raw.meta?.title || null,
|
||||
metaDescription: raw.meta?.description || null,
|
||||
h1: raw.meta?.htags?.h1?.[0] || null,
|
||||
wordCount: raw.meta?.content?.words_count || 0,
|
||||
loadTime: raw.page_timing?.time_to_interactive || null,
|
||||
links: {
|
||||
internal: raw.meta?.internal_links_count || 0,
|
||||
external: raw.meta?.external_links_count || 0,
|
||||
broken: 0,
|
||||
},
|
||||
images: {
|
||||
total: raw.meta?.images_count || 0,
|
||||
missingAlt: imagesWithoutAlt,
|
||||
},
|
||||
seo: {
|
||||
hasViewport: !raw.checks?.no_viewport_tag,
|
||||
hasCanonical: !!raw.meta?.canonical,
|
||||
isIndexable: !raw.checks?.is_4xx_code && !raw.checks?.is_5xx_code,
|
||||
robotsTxt: raw.meta?.robots || null,
|
||||
ogTitle: raw.meta?.social_media_tags?.og_title || null,
|
||||
ogDescription: raw.meta?.social_media_tags?.og_description || null,
|
||||
},
|
||||
performance: {
|
||||
cls: raw.page_timing?.cumulative_layout_shift || null,
|
||||
lcp: raw.page_timing?.largest_contentful_paint || null,
|
||||
fid: raw.page_timing?.first_input_delay || null,
|
||||
ttfb: raw.page_timing?.waiting_time || null,
|
||||
},
|
||||
issues,
|
||||
};
|
||||
}
|
||||
14
packages/page-audit/src/index.ts
Normal file
14
packages/page-audit/src/index.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
// ============================================================================
|
||||
// @mintel/page-audit — Public API
|
||||
// ============================================================================
|
||||
|
||||
export { PageAuditor } from "./auditor.js";
|
||||
export { DataForSeoClient, normalizePage } from "./dataforseo.js";
|
||||
export { generateAuditReport } from "./report.js";
|
||||
export type {
|
||||
PageAuditData,
|
||||
AuditIssue,
|
||||
DomainAuditResult,
|
||||
AuditReport,
|
||||
AuditConfig,
|
||||
} from "./types.js";
|
||||
149
packages/page-audit/src/report.ts
Normal file
149
packages/page-audit/src/report.ts
Normal file
@@ -0,0 +1,149 @@
|
||||
// ============================================================================
|
||||
// @mintel/page-audit — AI Report Generator
|
||||
// Uses Gemini Pro (via OpenRouter) to synthesize DataForSEO data into
|
||||
// a structured IST-analysis report in German.
|
||||
// ============================================================================
|
||||
|
||||
import type { DomainAuditResult, AuditReport, PageAuditData, AuditIssue } from "./types.js";
|
||||
|
||||
const OPENROUTER_BASE = "https://openrouter.ai/api/v1";
|
||||
const REPORT_MODEL = "google/gemini-3.1-pro-preview";
|
||||
|
||||
/**
|
||||
* Generate an AI-powered IST-analysis report from audit data.
|
||||
*/
|
||||
export async function generateAuditReport(
|
||||
audit: DomainAuditResult,
|
||||
openrouterKey: string,
|
||||
): Promise<AuditReport> {
|
||||
const summary = buildAuditSummary(audit);
|
||||
|
||||
const systemPrompt = `
|
||||
Du bist ein Senior SEO- und UX-Stratege. Analysiere die technischen Audit-Daten einer Website
|
||||
und erstelle einen präzisen IST-Analyse-Bericht auf DEUTSCH.
|
||||
|
||||
Stil:
|
||||
- Faktisch, direkt, kein Bullshit
|
||||
- Konkrete Handlungsempfehlungen, keine vagen Floskeln
|
||||
- Technik-verständlich für Entscheider (nicht für Entwickler)
|
||||
|
||||
Output: JSON (kein Markdown drumherum)
|
||||
`;
|
||||
|
||||
const userPrompt = `
|
||||
Website: ${audit.domain}
|
||||
Seiten gecrawlt: ${audit.totalPages}
|
||||
Audit-Datum: ${audit.auditedAt}
|
||||
|
||||
=== TECHNISCHE AUSWERTUNG ===
|
||||
${summary}
|
||||
|
||||
=== TOP-ISSUES ===
|
||||
${audit.topIssues.map((i) => `[${i.severity.toUpperCase()}] ${i.message}${i.count ? ` (${i.count}x)` : ""}`).join("\n")}
|
||||
|
||||
Erstelle jetzt den IST-Analyse-Report als JSON:
|
||||
{
|
||||
"executiveSummary": string (2-3 Sätze über den aktuellen Zustand der Website),
|
||||
"strengths": string[] (max 4, was gut läuft),
|
||||
"criticalIssues": string[] (max 5, sofort zu beheben),
|
||||
"quickWins": string[] (max 4, einfach umzusetzen mit großer Wirkung),
|
||||
"strategicRecommendations": string[] (max 4, mittel-/langfristig),
|
||||
"seoScore": number (0-100, realistisch),
|
||||
"uxScore": number (0-100, realistisch),
|
||||
"performanceScore": number (0-100, realistisch),
|
||||
"overallHealth": "critical" | "needs-work" | "good" | "excellent"
|
||||
}
|
||||
`;
|
||||
|
||||
const response = await fetch(`${OPENROUTER_BASE}/chat/completions`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${openrouterKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: REPORT_MODEL,
|
||||
messages: [
|
||||
{ role: "system", content: systemPrompt },
|
||||
{ role: "user", content: userPrompt },
|
||||
],
|
||||
response_format: { type: "json_object" },
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`LLM request failed: ${response.status} ${await response.text()}`);
|
||||
}
|
||||
|
||||
const json = await response.json();
|
||||
const content = json.choices?.[0]?.message?.content || "{}";
|
||||
|
||||
// Clean up markdown JSON wrappers if present
|
||||
const cleaned = content.replace(/^```(?:json)?\n?/m, "").replace(/```$/m, "").trim();
|
||||
|
||||
try {
|
||||
return JSON.parse(cleaned) as AuditReport;
|
||||
} catch {
|
||||
throw new Error(`Could not parse AI report: ${cleaned.slice(0, 200)}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a human-readable text summary of the audit data for the LLM prompt.
|
||||
*/
|
||||
function buildAuditSummary(audit: DomainAuditResult): string {
|
||||
const pages = audit.pages;
|
||||
const brokenPages = pages.filter((p) => p.statusCode >= 400);
|
||||
const noTitle = pages.filter((p) => !p.pageTitle);
|
||||
const noDesc = pages.filter((p) => !p.metaDescription);
|
||||
const noH1 = pages.filter((p) => !p.h1);
|
||||
const notIndexable = pages.filter((p) => !p.seo.isIndexable);
|
||||
const noViewport = pages.filter((p) => !p.seo.hasViewport);
|
||||
const slowPages = pages.filter((p) => p.loadTime !== null && p.loadTime > 3000);
|
||||
const imagesWithoutAlt = pages.reduce((sum, p) => sum + p.images.missingAlt, 0);
|
||||
const totalImages = pages.reduce((sum, p) => sum + p.images.total, 0);
|
||||
|
||||
const avgLoad = pages
|
||||
.filter((p) => p.loadTime !== null)
|
||||
.reduce((sum, p, _, arr) => sum + (p.loadTime || 0) / arr.length, 0);
|
||||
|
||||
const lines = [
|
||||
`Seiten gesamt: ${pages.length}`,
|
||||
`Seiten mit Fehler (4xx/5xx): ${brokenPages.length}`,
|
||||
`Seiten ohne <title>: ${noTitle.length}`,
|
||||
`Seiten ohne Meta-Description: ${noDesc.length}`,
|
||||
`Seiten ohne H1: ${noH1.length}`,
|
||||
`Nicht-indexierbare Seiten: ${notIndexable.length}`,
|
||||
`Seiten ohne Viewport-Meta: ${noViewport.length}`,
|
||||
`Bilder gesamt: ${totalImages}, davon ohne alt-Attribut: ${imagesWithoutAlt}`,
|
||||
`Langsame Seiten (>3s): ${slowPages.length}`,
|
||||
`Ø Ladezeit: ${avgLoad > 0 ? `${(avgLoad / 1000).toFixed(1)}s` : "unbekannt"}`,
|
||||
];
|
||||
|
||||
// Core Web Vitals (from first valid page)
|
||||
const lcpPages = pages.filter((p) => p.performance.lcp !== null);
|
||||
if (lcpPages.length > 0) {
|
||||
const avgLcp = lcpPages.reduce((s, p) => s + (p.performance.lcp || 0), 0) / lcpPages.length;
|
||||
lines.push(`Ø LCP: ${(avgLcp / 1000).toFixed(1)}s (Ziel: <2.5s)`);
|
||||
}
|
||||
|
||||
const clsPages = pages.filter((p) => p.performance.cls !== null);
|
||||
if (clsPages.length > 0) {
|
||||
const avgCls = clsPages.reduce((s, p) => s + (p.performance.cls || 0), 0) / clsPages.length;
|
||||
lines.push(`Ø CLS: ${avgCls.toFixed(3)} (Ziel: <0.1)`);
|
||||
}
|
||||
|
||||
// Top pages by issues
|
||||
const worstPages = [...pages]
|
||||
.sort((a, b) => b.issues.length - a.issues.length)
|
||||
.slice(0, 5);
|
||||
|
||||
if (worstPages.length > 0) {
|
||||
lines.push("\nSeiten mit den meisten Problemen:");
|
||||
for (const page of worstPages) {
|
||||
lines.push(` ${page.url}: ${page.issues.length} Issues (${page.issues.map((i) => i.code).join(", ")})`);
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
83
packages/page-audit/src/types.ts
Normal file
83
packages/page-audit/src/types.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
// ============================================================================
|
||||
// @mintel/page-audit — Types
|
||||
// ============================================================================
|
||||
|
||||
/** DataForSEO On-Page audit result for a single page */
|
||||
export interface PageAuditData {
|
||||
url: string;
|
||||
statusCode: number;
|
||||
pageTitle: string | null;
|
||||
metaDescription: string | null;
|
||||
h1: string | null;
|
||||
wordCount: number;
|
||||
loadTime: number | null; // ms
|
||||
links: {
|
||||
internal: number;
|
||||
external: number;
|
||||
broken: number;
|
||||
};
|
||||
images: {
|
||||
total: number;
|
||||
missingAlt: number;
|
||||
};
|
||||
seo: {
|
||||
hasViewport: boolean;
|
||||
hasCanonical: boolean;
|
||||
isIndexable: boolean;
|
||||
robotsTxt: string | null;
|
||||
ogTitle: string | null;
|
||||
ogDescription: string | null;
|
||||
};
|
||||
performance: {
|
||||
cls: number | null; // Cumulative Layout Shift
|
||||
lcp: number | null; // Largest Contentful Paint (ms)
|
||||
fid: number | null; // First Input Delay (ms)
|
||||
ttfb: number | null; // Time to First Byte (ms)
|
||||
};
|
||||
issues: AuditIssue[];
|
||||
}
|
||||
|
||||
/** A single issue found during audit */
|
||||
export interface AuditIssue {
|
||||
code: string;
|
||||
severity: "critical" | "warning" | "info";
|
||||
message: string;
|
||||
count?: number;
|
||||
}
|
||||
|
||||
/** Full crawled audit result for a domain */
|
||||
export interface DomainAuditResult {
|
||||
domain: string;
|
||||
auditedAt: string;
|
||||
totalPages: number;
|
||||
pages: PageAuditData[];
|
||||
/** Aggregated issues sorted by severity */
|
||||
topIssues: AuditIssue[];
|
||||
/** AI-generated analysis */
|
||||
report: AuditReport | null;
|
||||
/** Raw DataForSEO task ID for reference */
|
||||
dataForSeoTaskId?: string;
|
||||
}
|
||||
|
||||
/** AI-generated IST analysis report */
|
||||
export interface AuditReport {
|
||||
executiveSummary: string;
|
||||
strengths: string[];
|
||||
criticalIssues: string[];
|
||||
quickWins: string[];
|
||||
strategicRecommendations: string[];
|
||||
seoScore: number; // 0-100
|
||||
uxScore: number; // 0-100
|
||||
performanceScore: number; // 0-100
|
||||
overallHealth: "critical" | "needs-work" | "good" | "excellent";
|
||||
}
|
||||
|
||||
/** Config for running an audit */
|
||||
export interface AuditConfig {
|
||||
dataForSeoLogin: string;
|
||||
dataForSeoPassword: string;
|
||||
openrouterKey?: string;
|
||||
outputDir?: string;
|
||||
/** If true, only analyze — no AI report */
|
||||
lightMode?: boolean;
|
||||
}
|
||||
13
packages/page-audit/tsconfig.json
Normal file
13
packages/page-audit/tsconfig.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"extends": "@mintel/tsconfig/base.json",
|
||||
"compilerOptions": {
|
||||
"module": "ESNext",
|
||||
"target": "ESNext",
|
||||
"moduleResolution": "Bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"noEmit": true
|
||||
},
|
||||
"include": [
|
||||
"src"
|
||||
]
|
||||
}
|
||||
70
packages/pdf-library/src/components/ConceptPDF.tsx
Normal file
70
packages/pdf-library/src/components/ConceptPDF.tsx
Normal file
@@ -0,0 +1,70 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import { Page as PDFPage, Document as PDFDocument } from "@react-pdf/renderer";
|
||||
import { pdfStyles } from "./pdf/SharedUI.js";
|
||||
import { SimpleLayout } from "./pdf/SimpleLayout.js";
|
||||
|
||||
// Modules
|
||||
import { FrontPageModule } from "./pdf/modules/FrontPageModule.js";
|
||||
import { BriefingModule } from "./pdf/modules/BriefingModule.js";
|
||||
import { SitemapModule } from "./pdf/modules/SitemapModule.js";
|
||||
import { ClosingModule } from "./pdf/modules/CommonModules.js";
|
||||
|
||||
export const ConceptPDF = ({
|
||||
concept,
|
||||
headerIcon,
|
||||
footerLogo,
|
||||
}: any) => {
|
||||
const date = new Date().toLocaleDateString("de-DE", {
|
||||
year: "numeric",
|
||||
month: "long",
|
||||
day: "numeric",
|
||||
});
|
||||
|
||||
// Flatten the ProjectConcept to match what the legacy modules expect
|
||||
const flatState = {
|
||||
...concept.auditedFacts,
|
||||
briefingSummary: concept.strategy?.briefingSummary || "",
|
||||
designVision: concept.strategy?.designVision || "",
|
||||
sitemap: concept.architecture?.sitemap || [],
|
||||
websiteTopic: concept.architecture?.websiteTopic || concept.auditedFacts?.websiteTopic || "",
|
||||
};
|
||||
|
||||
const companyData = {
|
||||
name: "Marc Mintel",
|
||||
address1: "Georg-Meistermann-Straße 7",
|
||||
address2: "54586 Schüller",
|
||||
ustId: "DE367588065",
|
||||
};
|
||||
|
||||
const commonProps = {
|
||||
state: flatState,
|
||||
date,
|
||||
headerIcon,
|
||||
footerLogo,
|
||||
companyData,
|
||||
};
|
||||
|
||||
return (
|
||||
<PDFDocument title={`Projektkonzept - ${flatState.companyName || "Projekt"}`}>
|
||||
<PDFPage size="A4" style={pdfStyles.titlePage}>
|
||||
<FrontPageModule state={flatState} headerIcon={headerIcon} date={date} />
|
||||
</PDFPage>
|
||||
|
||||
<SimpleLayout {...commonProps}>
|
||||
<BriefingModule state={flatState} />
|
||||
</SimpleLayout>
|
||||
|
||||
{flatState.sitemap && flatState.sitemap.length > 0 && (
|
||||
<SimpleLayout {...commonProps}>
|
||||
<SitemapModule state={flatState} />
|
||||
</SimpleLayout>
|
||||
)}
|
||||
|
||||
<SimpleLayout {...commonProps}>
|
||||
<ClosingModule />
|
||||
</SimpleLayout>
|
||||
</PDFDocument>
|
||||
);
|
||||
};
|
||||
@@ -7,8 +7,6 @@ import { SimpleLayout } from "./pdf/SimpleLayout.js";
|
||||
|
||||
// Modules
|
||||
import { FrontPageModule } from "./pdf/modules/FrontPageModule.js";
|
||||
import { BriefingModule } from "./pdf/modules/BriefingModule.js";
|
||||
import { SitemapModule } from "./pdf/modules/SitemapModule.js";
|
||||
import { EstimationModule } from "./pdf/modules/EstimationModule.js";
|
||||
import { TransparenzModule } from "./pdf/modules/TransparenzModule.js";
|
||||
import { ClosingModule } from "./pdf/modules/CommonModules.js";
|
||||
@@ -64,16 +62,6 @@ export const EstimationPDF = ({
|
||||
<FrontPageModule state={state} headerIcon={headerIcon} date={date} />
|
||||
</PDFPage>
|
||||
|
||||
<SimpleLayout {...commonProps}>
|
||||
<BriefingModule state={state} />
|
||||
</SimpleLayout>
|
||||
|
||||
{state.sitemap && state.sitemap.length > 0 && (
|
||||
<SimpleLayout {...commonProps}>
|
||||
<SitemapModule state={state} />
|
||||
</SimpleLayout>
|
||||
)}
|
||||
|
||||
<SimpleLayout {...commonProps}>
|
||||
<EstimationModule
|
||||
state={state}
|
||||
|
||||
172
packages/pdf-library/src/components/InfoPDF.tsx
Normal file
172
packages/pdf-library/src/components/InfoPDF.tsx
Normal file
@@ -0,0 +1,172 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import {
|
||||
Page as PDFPage,
|
||||
Text as PDFText,
|
||||
View as PDFView,
|
||||
StyleSheet as PDFStyleSheet,
|
||||
Document as PDFDocument,
|
||||
} from "@react-pdf/renderer";
|
||||
import {
|
||||
pdfStyles,
|
||||
DocumentTitle,
|
||||
COLORS,
|
||||
FONT_SIZES,
|
||||
Divider,
|
||||
} from "./pdf/SharedUI.js";
|
||||
import { SimpleLayout } from "./pdf/SimpleLayout.js";
|
||||
|
||||
const styles = PDFStyleSheet.create({
|
||||
section: {
|
||||
marginBottom: 24,
|
||||
},
|
||||
textLead: {
|
||||
fontSize: FONT_SIZES.BODY,
|
||||
color: COLORS.TEXT_MAIN,
|
||||
lineHeight: 1.6,
|
||||
marginBottom: 16,
|
||||
},
|
||||
textRegular: {
|
||||
fontSize: FONT_SIZES.BODY,
|
||||
color: COLORS.TEXT_DIM,
|
||||
lineHeight: 1.6,
|
||||
marginBottom: 12,
|
||||
},
|
||||
bulletPoint: {
|
||||
flexDirection: "row",
|
||||
marginBottom: 6,
|
||||
paddingLeft: 10,
|
||||
},
|
||||
bullet: {
|
||||
width: 15,
|
||||
fontSize: FONT_SIZES.BODY,
|
||||
color: COLORS.TEXT_LIGHT,
|
||||
},
|
||||
bulletText: {
|
||||
flex: 1,
|
||||
fontSize: FONT_SIZES.BODY,
|
||||
color: COLORS.TEXT_DIM,
|
||||
lineHeight: 1.6,
|
||||
},
|
||||
quoteBox: {
|
||||
marginTop: 20,
|
||||
marginBottom: 20,
|
||||
padding: 20,
|
||||
backgroundColor: COLORS.GRID,
|
||||
borderLeftWidth: 3,
|
||||
borderLeftColor: COLORS.CHARCOAL,
|
||||
},
|
||||
quoteText: {
|
||||
fontSize: FONT_SIZES.LABEL,
|
||||
fontWeight: "bold",
|
||||
color: COLORS.CHARCOAL,
|
||||
lineHeight: 1.4,
|
||||
},
|
||||
headingSmall: {
|
||||
fontSize: FONT_SIZES.LABEL,
|
||||
fontWeight: "bold",
|
||||
color: COLORS.CHARCOAL,
|
||||
textTransform: "uppercase",
|
||||
letterSpacing: 1,
|
||||
marginBottom: 12,
|
||||
marginTop: 8,
|
||||
},
|
||||
});
|
||||
|
||||
export const InfoPDF = ({ headerIcon, footerLogo }: { headerIcon?: string; footerLogo?: string }) => {
|
||||
const companyData = {
|
||||
name: "Marc Mintel",
|
||||
address1: "Georg-Meistermann-Straße 7",
|
||||
address2: "54586 Schüller",
|
||||
ustId: "DE367588065",
|
||||
};
|
||||
|
||||
const bankData = {
|
||||
name: "N26",
|
||||
bic: "NTSBDEB1XXX",
|
||||
iban: "DE50 1001 1001 2620 4328 65",
|
||||
};
|
||||
|
||||
const content = (
|
||||
<PDFView>
|
||||
<DocumentTitle
|
||||
title="Arbeitsweise & Philosophie"
|
||||
subLines={["Digital Architect — Marc Mintel"]}
|
||||
isHero={true}
|
||||
/>
|
||||
|
||||
<PDFView style={styles.section}>
|
||||
<PDFText style={styles.headingSmall}>Hintergrund & Motivation</PDFText>
|
||||
<PDFText style={styles.textLead}>
|
||||
Ich baue Websites und Systeme seit über 15 Jahren. Nicht weil ich Websites so liebe – sondern weil ich es hasse, wenn Dinge nicht funktionieren.
|
||||
</PDFText>
|
||||
<PDFText style={styles.textRegular}>
|
||||
In diesen 15 Jahren habe ich Agenturen von innen gesehen, Konzerne erlebt, Startups aufgebaut und gelernt, wie man Dinge baut, die einfach laufen. Heute mache ich das ohne Agentur-Zwischenschichten: Direkt. Sauber. Verantwortlich.
|
||||
</PDFText>
|
||||
</PDFView>
|
||||
|
||||
<PDFView style={styles.quoteBox}>
|
||||
<PDFText style={styles.quoteText}>
|
||||
"Das Problem ist selten Technik. Es ist immer Zuständigkeit. Wenn keiner verantwortlich ist, passiert nichts."
|
||||
</PDFText>
|
||||
</PDFView>
|
||||
|
||||
<PDFView style={styles.section}>
|
||||
<PDFText style={styles.headingSmall}>System-Architektur statt Baukasten</PDFText>
|
||||
<PDFText style={styles.textRegular}>
|
||||
Als Senior Developer in Umgebungen mit Millionenumsätzen habe ich gelernt: Performance ist nicht optional, Sicherheit kein Nice-to-Have. Deshalb sind meine Lösungen:
|
||||
</PDFText>
|
||||
<PDFView style={styles.bulletPoint}>
|
||||
<PDFText style={styles.bullet}>•</PDFText>
|
||||
<PDFText style={styles.bulletText}>Schnell, stabil und "boring" (im besten Sinne).</PDFText>
|
||||
</PDFView>
|
||||
<PDFView style={styles.bulletPoint}>
|
||||
<PDFText style={styles.bullet}>•</PDFText>
|
||||
<PDFText style={styles.bulletText}>Wartungsarm und unabhängig von Plugins oder Agenturen.</PDFText>
|
||||
</PDFView>
|
||||
<PDFView style={styles.bulletPoint}>
|
||||
<PDFText style={styles.bullet}>•</PDFText>
|
||||
<PDFText style={styles.bulletText}>Technologisch auf Augenhöhe mit Konzern-Standards, ohne bürokratischen Overhead.</PDFText>
|
||||
</PDFView>
|
||||
</PDFView>
|
||||
|
||||
<PDFView style={styles.section}>
|
||||
<PDFText style={styles.headingSmall}>Ihre Vorteile</PDFText>
|
||||
<PDFText style={styles.textRegular}>
|
||||
Sie bekommen keinen Projektmanager, keinen starren Prozess und kein CMS-Drama.
|
||||
</PDFText>
|
||||
<PDFView style={{ flexDirection: 'row', marginTop: 10 }}>
|
||||
<PDFView style={{ flex: 1, paddingRight: 20 }}>
|
||||
<PDFText style={{ fontSize: FONT_SIZES.TINY, color: COLORS.TEXT_LIGHT, marginBottom: 4 }}>KOMMUNIKATION</PDFText>
|
||||
<PDFText style={styles.textRegular}>Ein Ansprechpartner. Eine kurze Mail reicht oft aus. Keine endlosen Meetings.</PDFText>
|
||||
</PDFView>
|
||||
<PDFView style={{ flex: 1 }}>
|
||||
<PDFText style={{ fontSize: FONT_SIZES.TINY, color: COLORS.TEXT_LIGHT, marginBottom: 4 }}>VERANTWORTUNG</PDFText>
|
||||
<PDFText style={styles.textRegular}>Ich übernehme das Thema komplett, damit es für Sie kein Thema mehr ist.</PDFText>
|
||||
</PDFView>
|
||||
</PDFView>
|
||||
</PDFView>
|
||||
|
||||
<Divider style={{ marginTop: 20, marginBottom: 20, backgroundColor: COLORS.GRID }} />
|
||||
|
||||
<PDFText style={[styles.textRegular, { fontSize: FONT_SIZES.SMALL, textAlign: 'center', color: COLORS.TEXT_LIGHT }]}>
|
||||
Marc Mintel — Digital Architect & Senior Software Developer
|
||||
</PDFText>
|
||||
</PDFView>
|
||||
);
|
||||
|
||||
return (
|
||||
<PDFDocument title="Marc Mintel - Arbeitsweise">
|
||||
<SimpleLayout
|
||||
companyData={companyData}
|
||||
bankData={bankData}
|
||||
headerIcon={headerIcon}
|
||||
footerLogo={footerLogo}
|
||||
showPageNumber={false}
|
||||
>
|
||||
{content}
|
||||
</SimpleLayout>
|
||||
</PDFDocument>
|
||||
);
|
||||
};
|
||||
@@ -24,9 +24,10 @@ const styles = StyleSheet.create({
|
||||
borderBottomColor: COLORS.GRID,
|
||||
alignItems: "flex-start",
|
||||
},
|
||||
colPos: { width: "8%" },
|
||||
colDesc: { width: "62%" },
|
||||
colQty: { width: "10%", textAlign: "center" },
|
||||
colPos: { width: "6%" },
|
||||
colDesc: { width: "46%", paddingRight: 10 },
|
||||
colQty: { width: "8%", textAlign: "center" },
|
||||
colUnitPrice: { width: "20%", textAlign: "right", paddingRight: 10 },
|
||||
colPrice: { width: "20%", textAlign: "right" },
|
||||
headerText: {
|
||||
fontSize: FONT_SIZES.TINY,
|
||||
@@ -111,7 +112,8 @@ export const EstimationModule = ({
|
||||
Beschreibung
|
||||
</PDFText>
|
||||
<PDFText style={[styles.headerText, styles.colQty]}>Menge</PDFText>
|
||||
<PDFText style={[styles.headerText, styles.colPrice]}>Betrag</PDFText>
|
||||
<PDFText style={[styles.headerText, styles.colUnitPrice]}>E-Preis</PDFText>
|
||||
<PDFText style={[styles.headerText, styles.colPrice]}>Gesamt</PDFText>
|
||||
</PDFView>
|
||||
{positions.map((item: any, i: number) => (
|
||||
<PDFView key={i} style={styles.tableRow} wrap={false}>
|
||||
@@ -125,6 +127,11 @@ export const EstimationModule = ({
|
||||
</PDFText>
|
||||
</PDFView>
|
||||
<PDFText style={[styles.posText, styles.colQty]}>{item.qty}</PDFText>
|
||||
<PDFText style={[styles.priceText, styles.colUnitPrice, { fontSize: FONT_SIZES.SMALL, color: COLORS.TEXT_MAIN, fontWeight: "normal" }]}>
|
||||
{item.price > 0 && item.qty > 0
|
||||
? `${(item.price / item.qty).toLocaleString("de-DE")} €`
|
||||
: "n. A."}
|
||||
</PDFText>
|
||||
<PDFText style={[styles.priceText, styles.colPrice]}>
|
||||
{item.price > 0
|
||||
? `${item.price.toLocaleString("de-DE")} €`
|
||||
|
||||
@@ -17,64 +17,61 @@ const styles = StyleSheet.create({
|
||||
marginBottom: 24,
|
||||
textAlign: "justify",
|
||||
},
|
||||
sitemapTree: { marginTop: 8 },
|
||||
rootNode: {
|
||||
padding: 12,
|
||||
backgroundColor: COLORS.GRID,
|
||||
marginBottom: 20,
|
||||
borderLeftWidth: 2,
|
||||
borderLeftColor: COLORS.CHARCOAL,
|
||||
sitemapTree: {
|
||||
marginTop: 8,
|
||||
borderLeftWidth: 1,
|
||||
borderLeftColor: COLORS.GRID,
|
||||
marginLeft: 4,
|
||||
paddingLeft: 16,
|
||||
},
|
||||
rootTitle: {
|
||||
fontSize: FONT_SIZES.HEADING,
|
||||
fontSize: FONT_SIZES.LABEL,
|
||||
fontWeight: "bold",
|
||||
color: COLORS.CHARCOAL,
|
||||
letterSpacing: 0.5,
|
||||
textTransform: "uppercase",
|
||||
letterSpacing: 1,
|
||||
marginBottom: 16,
|
||||
marginLeft: -16, // offset the padding
|
||||
},
|
||||
categorySection: { marginBottom: 20 },
|
||||
categorySection: { marginBottom: 16 },
|
||||
categoryHeader: {
|
||||
flexDirection: "row",
|
||||
alignItems: "center",
|
||||
paddingBottom: 6,
|
||||
borderBottomWidth: 1,
|
||||
borderBottomColor: COLORS.BLUEPRINT,
|
||||
marginBottom: 10,
|
||||
marginBottom: 8,
|
||||
},
|
||||
categoryIcon: {
|
||||
width: 8,
|
||||
height: 8,
|
||||
backgroundColor: COLORS.GRID,
|
||||
borderInlineWidth: 1,
|
||||
borderColor: COLORS.DIVIDER,
|
||||
marginRight: 10,
|
||||
width: 6,
|
||||
height: 6,
|
||||
backgroundColor: COLORS.CHARCOAL,
|
||||
marginRight: 8,
|
||||
},
|
||||
categoryTitle: {
|
||||
fontSize: FONT_SIZES.BODY,
|
||||
fontWeight: "bold",
|
||||
color: COLORS.CHARCOAL,
|
||||
textTransform: "uppercase",
|
||||
letterSpacing: 1,
|
||||
},
|
||||
pagesGrid: { flexDirection: "row", flexWrap: "wrap" },
|
||||
pageCard: {
|
||||
width: "48%",
|
||||
marginRight: "2%",
|
||||
marginBottom: 12,
|
||||
padding: 10,
|
||||
borderWidth: 1,
|
||||
borderColor: COLORS.GRID,
|
||||
backgroundColor: "#fafafa",
|
||||
pageRow: {
|
||||
flexDirection: "row",
|
||||
alignItems: "flex-start",
|
||||
marginBottom: 6,
|
||||
paddingLeft: 14,
|
||||
},
|
||||
pageBullet: {
|
||||
fontSize: FONT_SIZES.BODY,
|
||||
color: COLORS.TEXT_LIGHT,
|
||||
marginRight: 8,
|
||||
width: 10,
|
||||
},
|
||||
pageTitle: {
|
||||
fontSize: FONT_SIZES.BODY,
|
||||
fontWeight: "bold",
|
||||
color: COLORS.TEXT_MAIN,
|
||||
marginBottom: 4,
|
||||
fontWeight: "bold",
|
||||
},
|
||||
pageDesc: {
|
||||
fontSize: FONT_SIZES.TINY,
|
||||
fontSize: FONT_SIZES.SMALL,
|
||||
color: COLORS.TEXT_DIM,
|
||||
lineHeight: 1.3,
|
||||
marginLeft: 6,
|
||||
marginTop: 1,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -83,16 +80,13 @@ export const SitemapModule = ({ state }: any) => (
|
||||
<DocumentTitle title="Informationsarchitektur" isHero={true} />
|
||||
<PDFView style={styles.section}>
|
||||
<PDFText style={styles.intro}>
|
||||
Die folgende Struktur definiert die logische Hierarchie und
|
||||
Benutzerführung. Sie dient als Bauplan für die technische Umsetzung und
|
||||
stellt sicher, dass alle relevanten Geschäftsbereiche intuitiv
|
||||
auffindbar sind.
|
||||
Die folgende Baumstruktur definiert die logische Hierarchie und
|
||||
Benutzerführung. Sie dient als kompakter Bauplan für die technische
|
||||
Umsetzung aller relevanten Geschäftsbereiche.
|
||||
</PDFText>
|
||||
|
||||
<PDFView style={styles.sitemapTree}>
|
||||
<PDFView style={styles.rootNode}>
|
||||
<PDFText style={styles.rootTitle}>Seitenstruktur</PDFText>
|
||||
</PDFView>
|
||||
<PDFText style={styles.rootTitle}>/ Root (Startseite)</PDFText>
|
||||
|
||||
{state.sitemap?.map((cat: any, i: number) => (
|
||||
<PDFView key={i} style={styles.categorySection} wrap={false}>
|
||||
@@ -101,18 +95,13 @@ export const SitemapModule = ({ state }: any) => (
|
||||
<PDFText style={styles.categoryTitle}>{cat.category}</PDFText>
|
||||
</PDFView>
|
||||
|
||||
<PDFView style={styles.pagesGrid}>
|
||||
<PDFView>
|
||||
{cat.pages.map((p: any, j: number) => (
|
||||
<PDFView
|
||||
key={j}
|
||||
style={[
|
||||
styles.pageCard,
|
||||
j % 2 === 1 ? { marginRight: 0 } : {},
|
||||
]}
|
||||
>
|
||||
<PDFView key={j} style={styles.pageRow}>
|
||||
<PDFText style={styles.pageBullet}>↳</PDFText>
|
||||
<PDFText style={styles.pageTitle}>{p.title}</PDFText>
|
||||
{p.desc && (
|
||||
<PDFText style={styles.pageDesc}>{p.desc}</PDFText>
|
||||
<PDFText style={styles.pageDesc}> – {p.desc}</PDFText>
|
||||
)}
|
||||
</PDFView>
|
||||
))}
|
||||
|
||||
@@ -81,7 +81,7 @@ export const TransparenzModule = ({ pricing }: any) => {
|
||||
},
|
||||
{
|
||||
l: "Sprachversionen",
|
||||
d: "Skalierung der System-Architektur auf Zweit-Sprachen.",
|
||||
d: "Skalierung der Architektur für weitere Sprachen (+20% Aufschlag auf die Zwischensumme aller vorherigen Positionen).",
|
||||
p: "+20%",
|
||||
},
|
||||
{
|
||||
|
||||
@@ -2,6 +2,8 @@ export * from "./logic/pricing/types.js";
|
||||
export * from "./logic/pricing/constants.js";
|
||||
export * from "./logic/pricing/calculator.js";
|
||||
export * from "./components/EstimationPDF.js";
|
||||
export * from "./components/ConceptPDF.js";
|
||||
export * from "./components/InfoPDF.js";
|
||||
export * from "./components/pdf/SimpleLayout.js";
|
||||
export * from "./components/pdf/SharedUI.js";
|
||||
export * from "./components/pdf/modules/FrontPageModule.js";
|
||||
@@ -12,4 +14,5 @@ export * from "./components/pdf/modules/CommonModules.js";
|
||||
export * from "./components/pdf/modules/BrandingModules.js";
|
||||
export * from "./components/pdf/modules/TransparenzModule.js";
|
||||
export * from "./components/AgbsPDF.js";
|
||||
export * from "./components/InfoPDF.js";
|
||||
export * from "./components/CombinedQuotePDF.js";
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { FormState } from "./types.js";
|
||||
|
||||
export const PRICING = {
|
||||
BASE_WEBSITE: 5440, // Updated to match AI prompt requirement in Pass 1
|
||||
BASE_WEBSITE: 4000, // Foundation server infrastructure setup
|
||||
PAGE: 600,
|
||||
FEATURE: 1500,
|
||||
FUNCTION: 800,
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
export * from "./index.js";
|
||||
export * from "./services/AcquisitionService.js";
|
||||
export * from "./services/PdfEngine.js";
|
||||
|
||||
@@ -1,6 +1,27 @@
|
||||
import { renderToFile } from "@react-pdf/renderer";
|
||||
import { renderToFile, Font } from "@react-pdf/renderer";
|
||||
import { createElement } from "react";
|
||||
|
||||
// Standard Font Registrations to prevent crashes when PDFs use custom web fonts
|
||||
Font.register({
|
||||
family: 'Outfit',
|
||||
fonts: [
|
||||
{ src: 'Helvetica' },
|
||||
{ src: 'Helvetica-Bold', fontWeight: 'bold' },
|
||||
],
|
||||
});
|
||||
|
||||
Font.register({
|
||||
family: 'Inter',
|
||||
fonts: [
|
||||
{ src: 'Helvetica' },
|
||||
{ src: 'Helvetica-Bold', fontWeight: 'bold' },
|
||||
],
|
||||
});
|
||||
|
||||
import { EstimationPDF } from "../components/EstimationPDF.js";
|
||||
import { ConceptPDF } from "../components/ConceptPDF.js";
|
||||
import { InfoPDF } from "../components/InfoPDF.js";
|
||||
import { AgbsPDF } from "../components/AgbsPDF.js";
|
||||
import { PRICING } from "../logic/pricing/constants.js";
|
||||
import { calculateTotals } from "../logic/pricing/calculator.js";
|
||||
|
||||
@@ -21,4 +42,33 @@ export class PdfEngine {
|
||||
|
||||
return outputPath;
|
||||
}
|
||||
|
||||
async generateConceptPdf(concept: any, outputPath: string): Promise<string> {
|
||||
await renderToFile(
|
||||
createElement(ConceptPDF as any, {
|
||||
concept,
|
||||
} as any) as any,
|
||||
outputPath
|
||||
);
|
||||
|
||||
return outputPath;
|
||||
}
|
||||
|
||||
async generateInfoPdf(outputPath: string, options: { headerIcon?: string; footerLogo?: string } = {}): Promise<string> {
|
||||
await renderToFile(
|
||||
createElement(InfoPDF as any, options as any) as any,
|
||||
outputPath
|
||||
);
|
||||
|
||||
return outputPath;
|
||||
}
|
||||
|
||||
async generateAgbsPdf(outputPath: string, options: { headerIcon?: string; footerLogo?: string; mode?: "estimation" | "full" } = {}): Promise<string> {
|
||||
await renderToFile(
|
||||
createElement(AgbsPDF as any, options as any) as any,
|
||||
outputPath
|
||||
);
|
||||
|
||||
return outputPath;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"version": "1.8.21",
|
||||
"publishConfig": {
|
||||
"access": "public",
|
||||
"registry": "https://npm.infra.mintel.me"
|
||||
"registry": "https://git.infra.mintel.me/api/packages/mmintel/npm"
|
||||
},
|
||||
"files": [
|
||||
"base.json",
|
||||
|
||||
Reference in New Issue
Block a user