Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 1s
Monorepo Pipeline / 🧹 Lint (push) Failing after 35s
Monorepo Pipeline / 🧪 Test (push) Failing after 35s
Monorepo Pipeline / 🏗️ Build (push) Failing after 12s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Image Processor (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
109 lines
4.3 KiB
TypeScript
109 lines
4.3 KiB
TypeScript
// ============================================================================
|
|
// Step 01: Extract — Briefing Fact Extraction (Gemini Flash)
|
|
// ============================================================================
|
|
|
|
import { llmJsonRequest } from "../llm-client.js";
|
|
import type { ConceptState, StepResult, PipelineConfig } from "../types.js";
|
|
import { DEFAULT_MODELS } from "../types.js";
|
|
|
|
export async function executeExtract(
|
|
state: ConceptState,
|
|
config: PipelineConfig,
|
|
): Promise<StepResult> {
|
|
const models = { ...DEFAULT_MODELS, ...config.modelsOverride };
|
|
const startTime = Date.now();
|
|
|
|
// Build site context from the deterministic analyzer
|
|
const siteContext = state.siteProfile
|
|
? `
|
|
EXISTING WEBSITE ANALYSIS (FACTS — verifiably crawled, NOT guessed):
|
|
- Domain: ${state.siteProfile.domain}
|
|
- Total pages crawled: ${state.siteProfile.totalPages}
|
|
- Navigation items: ${state.siteProfile.navigation.map((n) => n.label).join(", ") || "nicht erkannt"}
|
|
- Existing features: ${state.siteProfile.existingFeatures.join(", ") || "keine"}
|
|
- Services / Kompetenzen: ${state.siteProfile.services.join(" | ") || "keine"}
|
|
- Employee count (from website text): ${(state.siteProfile as any).employeeCount || "nicht genannt"}
|
|
- Company name: ${state.siteProfile.companyInfo.name || "unbekannt"}
|
|
- Address: ${state.siteProfile.companyInfo.address || "unbekannt"}
|
|
- Tax ID (USt-ID): ${state.siteProfile.companyInfo.taxId || "unbekannt"}
|
|
- HRB: ${state.siteProfile.companyInfo.registerNumber || "unbekannt"}
|
|
- Managing Director: ${state.siteProfile.companyInfo.managingDirector || "unbekannt"}
|
|
- External related domains (HAVE OWN WEBSITES — DO NOT include as sub-pages!): ${state.siteProfile.externalDomains.join(", ") || "keine"}
|
|
- Social links: ${Object.entries(state.siteProfile.socialLinks).map(([k, v]) => `${k}: ${v}`).join(", ") || "keine"}
|
|
`
|
|
: "No existing website data available.";
|
|
|
|
const systemPrompt = `
|
|
You are a precision fact extractor. Your only job: extract verifiable facts from the BRIEFING.
|
|
Output language: GERMAN (strict).
|
|
Output format: flat JSON at root level. No nesting except arrays.
|
|
|
|
### CRITICAL RULES:
|
|
1. "employeeCount": take from SITE ANALYSIS if available. Only override if briefing states something more specific.
|
|
2. External domains (e.g. "etib-ing.com") have their OWN website. NEVER include them as sub-pages.
|
|
3. Videos (Messefilm, Imagefilm) are CONTENT ASSETS, not pages.
|
|
4. If existing site already has search, include "search" in functions.
|
|
5. DO NOT invent pages not mentioned in briefing or existing navigation.
|
|
|
|
### CONSERVATIVE RULE:
|
|
- simple lists (Jobs, Referenzen, Messen) = pages, NOT features
|
|
- Assume "page" as default. Only add "feature" for complex interactive systems.
|
|
|
|
### OUTPUT FORMAT:
|
|
{
|
|
"companyName": string,
|
|
"companyAddress": string,
|
|
"personName": string,
|
|
"email": string,
|
|
"existingWebsite": string,
|
|
"websiteTopic": string, // MAX 3 words
|
|
"isRelaunch": boolean,
|
|
"employeeCount": string, // from site analysis, e.g. "über 50"
|
|
"pages": string[], // ALL pages: ["Startseite", "Über Uns", "Leistungen", ...]
|
|
"functions": string[], // search, forms, maps, video, cookie_consent, etc.
|
|
"assets": string[], // existing_website, logo, media, photos, videos
|
|
"deadline": string,
|
|
"targetAudience": string,
|
|
"cmsSetup": boolean,
|
|
"multilang": boolean
|
|
}
|
|
|
|
BANNED OUTPUT KEYS: "selectedPages", "otherPages", "features", "apiSystems" — use pages[] and functions[] ONLY.
|
|
`;
|
|
|
|
const userPrompt = `BRIEFING (TRUTH SOURCE):
|
|
${state.briefing}
|
|
|
|
COMMENTS:
|
|
${state.comments || "keine"}
|
|
|
|
${siteContext}`;
|
|
|
|
try {
|
|
const { data, usage } = await llmJsonRequest({
|
|
model: models.flash,
|
|
systemPrompt,
|
|
userPrompt,
|
|
apiKey: config.openrouterKey,
|
|
});
|
|
|
|
return {
|
|
success: true,
|
|
data,
|
|
usage: {
|
|
step: "01-extract",
|
|
model: models.flash,
|
|
promptTokens: usage.promptTokens,
|
|
completionTokens: usage.completionTokens,
|
|
cost: usage.cost,
|
|
durationMs: Date.now() - startTime,
|
|
},
|
|
};
|
|
} catch (err) {
|
|
return {
|
|
success: false,
|
|
error: `Extract step failed: ${(err as Error).message}`,
|
|
};
|
|
}
|
|
}
|