chore: overhaul infrastructure and integrate @mintel packages
Some checks failed
🧪 CI (QA) / 🧪 Quality Assurance (push) Failing after 1m3s
Some checks failed
🧪 CI (QA) / 🧪 Quality Assurance (push) Failing after 1m3s
- Restructure to pnpm monorepo (site moved to apps/web) - Integrate @mintel/tsconfig, @mintel/eslint-config, @mintel/husky-config - Implement Docker service architecture (Varnish, Directus, Gatekeeper) - Setup environment-aware Gitea Actions deployment
This commit is contained in:
@@ -1,857 +0,0 @@
|
||||
import { CheerioCrawler, RequestQueue } from 'crawlee';
|
||||
import * as path from 'node:path';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import { existsSync } from 'node:fs';
|
||||
import { URL } from 'node:url';
|
||||
import { execSync } from 'node:child_process';
|
||||
import axios from 'axios';
|
||||
import { FileCacheAdapter } from '../src/utils/cache/file-adapter.js';
|
||||
|
||||
import { initialState, PRICING } from '../src/logic/pricing/constants.js';
|
||||
import { calculateTotals } from '../src/logic/pricing/calculator.js';
|
||||
|
||||
async function main() {
|
||||
const OPENROUTER_KEY = process.env.OPENROUTER_KEY;
|
||||
if (!OPENROUTER_KEY) {
|
||||
console.error('❌ Error: OPENROUTER_KEY not found in environment.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let briefing = '';
|
||||
let targetUrl: string | null = null;
|
||||
let comments: string | null = null;
|
||||
let budget: string | null = null;
|
||||
let cacheKey: string | null = null;
|
||||
|
||||
let jsonStatePath: string | null = null;
|
||||
|
||||
const isEstimation = process.argv.includes('--estimation') || process.argv.includes('-E');
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const arg = args[i];
|
||||
if (arg === '--url') {
|
||||
targetUrl = args[++i];
|
||||
} else if (arg === '--comments' || arg === '--notes') {
|
||||
comments = args[++i];
|
||||
} else if (arg === '--budget') {
|
||||
budget = args[++i];
|
||||
} else if (arg === '--cache-key') {
|
||||
cacheKey = args[++i];
|
||||
} else if (arg === '--json') {
|
||||
jsonStatePath = args[++i];
|
||||
} else if (arg === '--estimation' || arg === '-E') {
|
||||
// Handled above
|
||||
} else if (!arg.startsWith('--')) {
|
||||
briefing = arg;
|
||||
}
|
||||
}
|
||||
|
||||
if (briefing && briefing.startsWith('@')) {
|
||||
const rawPath = briefing.substring(1);
|
||||
const filePath = rawPath.startsWith('/') ? rawPath : path.resolve(process.cwd(), rawPath);
|
||||
briefing = await fs.readFile(filePath, 'utf8');
|
||||
}
|
||||
|
||||
// Discovery ONLY if not provided
|
||||
if (!targetUrl && briefing) {
|
||||
const urlMatch = briefing.match(/https?:\/\/[^\s]+/);
|
||||
if (urlMatch) {
|
||||
targetUrl = urlMatch[0];
|
||||
console.log(`🔗 Discovered URL in briefing: ${targetUrl}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (!briefing && !targetUrl && !comments && !jsonStatePath) {
|
||||
console.error('❌ Usage: npm run ai-estimate -- "Briefing text" [--url https://example.com] [--comments "Manual notes"]');
|
||||
console.error(' Or: npm run ai-estimate -- @briefing.txt [--url https://example.com]');
|
||||
console.error(' Or: npm run ai-estimate -- --json path/to/state.json');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const clearCache = process.argv.includes('--clear-cache');
|
||||
if (clearCache) {
|
||||
console.log('🧹 Clearing cache...');
|
||||
const cacheFiles = await fs.readdir(path.join(process.cwd(), '.cache'));
|
||||
for (const file of cacheFiles) {
|
||||
if (file.startsWith('ai_est_')) {
|
||||
await fs.unlink(path.join(process.cwd(), '.cache', file));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const cache = new FileCacheAdapter({ prefix: 'ai_est_' });
|
||||
const finalCacheKey = cacheKey || `${briefing}_${targetUrl}_${comments}_${budget}`;
|
||||
|
||||
// 1. Crawl if URL provided
|
||||
let crawlContext = '';
|
||||
if (targetUrl) {
|
||||
console.log(`🔍 Crawling ${targetUrl} for context...`);
|
||||
const cachedCrawl = await cache.get<string>(`crawl_${targetUrl}`);
|
||||
if (cachedCrawl && !clearCache) {
|
||||
console.log('📦 Using cached crawl results.');
|
||||
crawlContext = cachedCrawl;
|
||||
} else {
|
||||
crawlContext = await performCrawl(targetUrl);
|
||||
await cache.set(`crawl_${targetUrl}`, crawlContext, 86400); // 24h cache
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Distill Crawl Context (Context Filtering)
|
||||
let distilledCrawl = '';
|
||||
if (crawlContext) {
|
||||
const cachedDistilled = await cache.get<string>(`distilled_${targetUrl}`);
|
||||
if (cachedDistilled && !clearCache) {
|
||||
distilledCrawl = cachedDistilled;
|
||||
} else {
|
||||
distilledCrawl = await distillCrawlContext(crawlContext, OPENROUTER_KEY);
|
||||
await cache.set(`distilled_${targetUrl}`, distilledCrawl, 86400);
|
||||
}
|
||||
} else if (targetUrl) {
|
||||
distilledCrawl = `WARNING: The crawl of ${targetUrl} failed (ENOTFOUND or timeout).
|
||||
The AI must NOT hallucinate details about the current website.
|
||||
Focus ONLY on the BRIEFING provided. If details are missing, mark them as 'unknown'.`;
|
||||
console.warn('⚠️ Crawl failed. AI will be notified to avoid hallucinations.');
|
||||
}
|
||||
|
||||
// 3. AI Prompting
|
||||
console.log('🤖 Consultating Gemini 3 Flash...');
|
||||
const cachedAi = !clearCache ? await cache.get<any>(finalCacheKey) : null;
|
||||
let formState: any;
|
||||
let usage: { prompt: number, completion: number, cost: number } = { prompt: 0, completion: 0, cost: 0 };
|
||||
|
||||
// Load Context Documents
|
||||
const principles = await fs.readFile(path.resolve(process.cwd(), 'docs/PRINCIPLES.md'), 'utf8');
|
||||
const techStandards = await fs.readFile(path.resolve(process.cwd(), 'docs/TECH.md'), 'utf8');
|
||||
const tone = await fs.readFile(path.resolve(process.cwd(), 'docs/TONE.md'), 'utf8');
|
||||
|
||||
if (jsonStatePath) {
|
||||
console.log(`📂 Loading state from JSON: ${jsonStatePath}`);
|
||||
const rawJson = await fs.readFile(path.resolve(process.cwd(), jsonStatePath), 'utf8');
|
||||
formState = JSON.parse(rawJson);
|
||||
} else if (cachedAi) {
|
||||
console.log('📦 Using cached AI response.');
|
||||
formState = cachedAi;
|
||||
} else {
|
||||
const result = await getAiEstimation(briefing, distilledCrawl, comments, budget, OPENROUTER_KEY, principles, techStandards, tone);
|
||||
formState = result.state;
|
||||
usage = result.usage;
|
||||
await cache.set(finalCacheKey, formState);
|
||||
}
|
||||
|
||||
// 3. Save Data & Generate PDF
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
||||
const jsonOutDir = path.resolve(process.cwd(), 'out/estimations/json');
|
||||
if (!existsSync(jsonOutDir)) await fs.mkdir(jsonOutDir, { recursive: true });
|
||||
|
||||
const finalJsonPath = path.join(jsonOutDir, `${formState.companyName || 'unknown'}_${timestamp}.json`);
|
||||
await fs.writeFile(finalJsonPath, JSON.stringify(formState, null, 2));
|
||||
|
||||
const tempJsonPath = path.resolve(process.cwd(), '.cache', `temp_state_${Date.now()}.json`);
|
||||
await fs.writeFile(tempJsonPath, JSON.stringify(formState, null, 2));
|
||||
|
||||
console.log(`📦 Saved detailed state to: ${finalJsonPath}`);
|
||||
console.log('📄 Generating PDF estimation...');
|
||||
try {
|
||||
const genArgs = isEstimation ? '--estimation' : '';
|
||||
execSync(`npx tsx ./scripts/generate-estimate.ts --input ${tempJsonPath} ${genArgs}`, { stdio: 'inherit' });
|
||||
} finally {
|
||||
// await fs.unlink(tempJsonPath);
|
||||
}
|
||||
|
||||
console.log('\n✨ AI Estimation Complete!');
|
||||
if (usage.prompt > 0) {
|
||||
console.log('--------------------------------------------------');
|
||||
console.log('📊 ACCUMULATED API USAGE (SUM OF 6 PASSES)');
|
||||
console.log(` Model: google/gemini-3-flash-preview`);
|
||||
console.log(` Total Prompt: ${usage.prompt.toLocaleString()}`);
|
||||
console.log(` Total Completion: ${usage.completion.toLocaleString()}`);
|
||||
console.log(` Total Tokens: ${(usage.prompt + usage.completion).toLocaleString()}`);
|
||||
console.log(` Total Cost (USD): $${usage.cost.toFixed(6)}`);
|
||||
console.log('--------------------------------------------------\n');
|
||||
}
|
||||
}
|
||||
|
||||
async function distillCrawlContext(rawCrawl: string, apiKey: string): Promise<string> {
|
||||
if (!rawCrawl || rawCrawl.trim().length === 0) return "Keine Crawl-Daten vorhanden.";
|
||||
|
||||
console.log(' ↳ Distilling Crawl Context (Noise Filtering)...');
|
||||
const systemPrompt = `
|
||||
You are a context distiller. Your goal is to strip away HTML noise, legal footers, and generic fluff from a website crawl.
|
||||
Extract the "Company DNA" in 5-8 bullet points (GERMAN).
|
||||
|
||||
### FOCUS ON:
|
||||
1. Core Business / Services.
|
||||
2. Unique Selling Points (USPs).
|
||||
3. Target Audience (if clear).
|
||||
4. Tech Stack or industry-specific equipment mentioned.
|
||||
5. Brand tone (e.g. "industrial", "friendly", "technical").
|
||||
|
||||
### OUTPUT:
|
||||
Return ONLY the bullet points. No intro/outro.
|
||||
`;
|
||||
const resp = await axios.post('https://openrouter.ai/api/v1/chat/completions', {
|
||||
model: 'google/gemini-3-flash-preview',
|
||||
messages: [{ role: 'system', content: systemPrompt }, { role: 'user', content: `RAW_CRAWL:\n${rawCrawl.substring(0, 30000)}` }],
|
||||
}, { headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' } });
|
||||
|
||||
return resp.data.choices[0].message.content;
|
||||
}
|
||||
|
||||
async function performCrawl(url: string): Promise<string> {
|
||||
const pages: { url: string, content: string, type: string }[] = [];
|
||||
const origin = new URL(url).origin;
|
||||
|
||||
const crawler = new CheerioCrawler({
|
||||
maxRequestsPerCrawl: 20,
|
||||
async requestHandler({ $, request, enqueueLinks }) {
|
||||
const title = $('title').text();
|
||||
const urlObj = new URL(request.url);
|
||||
const urlPath = urlObj.pathname.toLowerCase();
|
||||
|
||||
let type = 'other';
|
||||
if (urlPath === '/' || urlPath === '') type = 'home';
|
||||
else if (urlPath.includes('service') || urlPath.includes('leistung')) type = 'service';
|
||||
else if (urlPath.includes('blog') || urlPath.includes('news') || urlPath.includes('aktuelles') || urlPath.includes('magazin')) type = 'blog';
|
||||
else if (urlPath.includes('contact') || urlPath.includes('kontakt')) type = 'contact';
|
||||
else if (urlPath.includes('job') || urlPath.includes('karriere') || urlPath.includes('career') || urlPath.includes('human-resources')) type = 'career';
|
||||
else if (urlPath.includes('portfolio') || urlPath.includes('referenz') || urlPath.includes('projekt') || urlPath.includes('case-study')) type = 'portfolio';
|
||||
else if (urlPath.includes('legal') || urlPath.includes('impressum') || urlPath.includes('datenschutz') || urlPath.includes('privacy')) type = 'legal';
|
||||
|
||||
const h1s = $('h1').map((_, el) => $(el).text()).get();
|
||||
const navLinks = $('nav a').map((_, el) => $(el).text().trim()).get().filter(t => t.length > 0);
|
||||
const bodyText = $('body').text().replace(/\s+/g, ' ').substring(0, 50000);
|
||||
const html = $.html();
|
||||
const hexColors = html.match(/#(?:[0-9a-fA-F]{3}){1,2}\b/g) || [];
|
||||
const uniqueColors = Array.from(new Set(hexColors)).slice(0, 5);
|
||||
|
||||
pages.push({
|
||||
url: request.url,
|
||||
type,
|
||||
content: `Title: ${title}\nType: ${type}\nHeadings: ${h1s.join(', ')}\nNav: ${navLinks.join(', ')}\nColors: ${uniqueColors.join(', ')}\nText: ${bodyText}`
|
||||
});
|
||||
|
||||
await enqueueLinks({
|
||||
limit: 15,
|
||||
transformRequestFunction: (req) => {
|
||||
const reqUrl = new URL(req.url);
|
||||
if (reqUrl.origin !== origin) return false;
|
||||
// Skip assets
|
||||
if (reqUrl.pathname.match(/\.(pdf|zip|jpg|png|svg|webp)$/i)) return false;
|
||||
return req;
|
||||
}
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
await crawler.run([url]);
|
||||
|
||||
const typeCounts = pages.reduce((acc, p) => {
|
||||
acc[p.type] = (acc[p.type] || 0) + 1;
|
||||
return acc;
|
||||
}, {} as Record<string, number>);
|
||||
|
||||
let summary = `\nCrawl Summary: Identified ${pages.length} pages total on ${origin}.\n`;
|
||||
summary += Object.entries(typeCounts).map(([type, count]) => `- ${type}: ${count}`).join('\n') + '\n\n';
|
||||
|
||||
return summary + pages.map(p => `--- PAGE: ${p.url} ---\n${p.content}`).join('\n\n');
|
||||
}
|
||||
|
||||
const cleanJson = (str: string) => {
|
||||
// Remove markdown code blocks if present
|
||||
let cleaned = str.replace(/```json\n?|```/g, '').trim();
|
||||
|
||||
// Remove potential control characters that break JSON.parse
|
||||
// We keep \n \r \t for now as they might be escaped or need handling
|
||||
cleaned = cleaned.replace(/[\u0000-\u0009\u000B\u000C\u000E-\u001F\u007F-\u009F]/g, " ");
|
||||
|
||||
// Specific fix for Gemini: raw newlines inside strings
|
||||
// This is tricky. We'll try to escape newlines that are NOT followed by a quote and colon (property start)
|
||||
// or a closing brace/bracket. This is heuristic.
|
||||
// A better way is to replace all raw newlines that are preceded by a non-backslash with \n
|
||||
// but only if they are inside double quotes.
|
||||
|
||||
// Simplest robust approach: Remove trailing commas and hope response_format does its job.
|
||||
cleaned = cleaned.replace(/,\s*([\]}])/g, '$1');
|
||||
|
||||
return cleaned;
|
||||
};
|
||||
|
||||
const getAiEstimation = async (briefing: string, distilledCrawl: string, comments: string | null, budget: string | null, apiKey: string, principles: string, techStandards: string, tone: string) => {
|
||||
let usage = { prompt: 0, completion: 0, cost: 0 };
|
||||
const addUsage = (data: any) => {
|
||||
if (data?.usage) {
|
||||
usage.prompt += data.usage.prompt_tokens || 0;
|
||||
usage.completion += data.usage.completion_tokens || 0;
|
||||
// OpenRouter provides 'cost' field in USD (as per documentation)
|
||||
// If missing, we use a fallback calculation for transparency
|
||||
if (data.usage.cost !== undefined) {
|
||||
usage.cost += data.usage.cost;
|
||||
} else {
|
||||
// Fallback: Gemini 3 Flash Flash pricing (~$0.1 / 1M prompt, ~$0.4 / 1M completion)
|
||||
usage.cost += (data.usage.prompt_tokens || 0) * (0.1 / 1000000) + (data.usage.completion_tokens || 0) * (0.4 / 1000000);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// 1. PASS 1: Fact Extraction (Briefing Sensor)
|
||||
console.log(' ↳ Pass 1: Fact Extraction (Briefing Sensor)...');
|
||||
const pass1SystemPrompt = `
|
||||
You are a precision sensor. Analyze the BRIEFING and extract ONLY the raw facts.
|
||||
Tone: Literal, non-interpretive.
|
||||
Output language: GERMAN (Strict).
|
||||
Output format: ROOT LEVEL JSON (No wrapper keys like '0' or 'data').
|
||||
|
||||
### MISSION:
|
||||
Focus 100% on the BRIEFING text provided by the user. Use the DISTILLED_CRAWL only as background context for terms or company details. If there is a conflict, the BRIEFING is the absolute source of truth.
|
||||
|
||||
### PRICING REFERENCE (FOR CALCULATION):
|
||||
- Base Project (Infrastructure + 12 Months Hosting): 5.440 € (MANDATORY START)
|
||||
- Additional Pages: 600 € / stk
|
||||
- System-Modules (Features): 1.500 € / stk
|
||||
- Logic-Functions: 800 € / stk
|
||||
- API Integrations: 800 € / stk
|
||||
- CMS Setup: 1.500 € (optional)
|
||||
- Visual Staging/Interactions: 1.500 € - 2.000 €
|
||||
|
||||
${budget ? `### BUDGET LOGIC (ULTRA-STRICT):
|
||||
1. **Mental Calculation**: Start with 7.000 €. Add items based on the reference above.
|
||||
2. **Hard Ceiling**: If total > ${budget}, you MUST discard lower priority items.
|
||||
3. **Priority**: High-End Design and Core Pages > Features.
|
||||
4. **Restriction**: For ${budget}, do NOT exceed 2 features and 4 extra pages.
|
||||
5. THE TOTAL COST CALCULATED BY THESE RULES MUST BE <= ${budget}.
|
||||
6. Do NOT mention the budget in any string fields.` : ''}
|
||||
|
||||
- ** features **: Items from the FEATURE_REFERENCE.
|
||||
- ** ABSOLUTE CONSERVATIVE RULE **: Only use features if the briefing implies *dynamic complexity* (CMS, filtering, search, database).
|
||||
- Simple keywords like 'Karriere', 'Referenzen', 'Messen' or lists of items MUST be treated as simple pages. Add them to 'otherPages' instead.
|
||||
- If in doubt, categorizing as a PAGE is the mandatory default.
|
||||
- ** otherPages **: Any specific pages mentioned (e.g. 'Historie', 'Team', 'Partner') that are not in the standard list. Use this for static lists of jobs or references too.
|
||||
- ** companyName **: The full legal and brand name (e.g., "E-TIB GmbH"). Use signatures and crawl data.
|
||||
- ** personName **: The name of the primary human contact (e.g., "Danny Joseph"). ** CRITICAL **: Check email signatures and "Mit freundlichen Grüßen" blocks.
|
||||
- ** email **: The email address of the contact person if found in the briefing / signature.
|
||||
- ** existingWebsite **: The primary URL mentioned in the briefing or signature (e.g., "www.e-tib.com").
|
||||
- ** websiteTopic **: A short descriptor of the CORE BUSINESS (e.g., "Kabeltiefbau"). MAX 3 WORDS.
|
||||
- ** isRelaunch **: Set to TRUE if the briefing mentions an existing website, a URL, or if the company is an established entity.
|
||||
- ** CRITICAL LOGIC **: If a URL is mentioned, isRelaunch MUST be TRUE.
|
||||
- For all textual values: USE GERMAN.
|
||||
- ** multilang **: ONLY if the briefing mentions multiple target languages.
|
||||
- ** maps **: If "Google Maps" or location maps are mentioned or implicit.
|
||||
- ** CRITICAL **: Do NOT include "social" in apiSystems unless the user explicitly wants to SYNC / POST content.
|
||||
|
||||
### CATEGORY MAPPING(IDs ONLY):
|
||||
- ** selectedPages **: [Home, About, Services, Contact, Landing, Legal]
|
||||
- ** features **: [blog_news, products, jobs, refs, events]
|
||||
- ** functions **: [search, filter, pdf, forms, members, calendar, multilang, chat]
|
||||
- ** apiSystems **: [crm_erp, payment, marketing, ecommerce, maps, social, analytics]
|
||||
- ** assets **: [existing_website, logo, styleguide, content_concept, media, icons, illustrations, fonts]
|
||||
|
||||
### OUTPUT FORMAT(Strict JSON - ROOT LEVEL):
|
||||
{
|
||||
"companyName": string,
|
||||
"companyAddress": string,
|
||||
"personName": string,
|
||||
"email": string,
|
||||
"existingWebsite": string,
|
||||
"websiteTopic": string,
|
||||
"isRelaunch": boolean,
|
||||
"selectedPages": string[],
|
||||
"features": string[],
|
||||
"functions": string[],
|
||||
"apiSystems": string[],
|
||||
"assets": string[],
|
||||
"deadline": string(GERMAN),
|
||||
"targetAudience": "B2B" | "B2C" | "Internal" | string(GERMAN),
|
||||
"expectedAdjustments": "low" | "medium" | "high" | string(GERMAN),
|
||||
"employeeCount": "ca. 10+" | "ca. 50+" | "ca. 100+" | "ca. 250+" | "ca. 500+" | "ca. 1000+"
|
||||
}
|
||||
`;
|
||||
const pass1UserPrompt = `BRIEFING(TRUTH SOURCE): \n${briefing} \n\nCOMMENTS: \n${comments} \n\nDISTILLED_CRAWL(CONTEXT ONLY): \n${distilledCrawl} `;
|
||||
const p1Resp = await axios.post('https://openrouter.ai/api/v1/chat/completions', {
|
||||
model: 'google/gemini-3-flash-preview',
|
||||
messages: [{ role: 'system', content: pass1SystemPrompt }, { role: 'user', content: pass1UserPrompt }],
|
||||
response_format: { type: 'json_object' }
|
||||
}, { headers: { 'Authorization': `Bearer ${apiKey} `, 'Content-Type': 'application/json' } });
|
||||
if (!p1Resp.data.choices?.[0]?.message?.content) {
|
||||
console.error('❌ Pass 1 failed. Response:', JSON.stringify(p1Resp.data, null, 2));
|
||||
throw new Error('Pass 1: No content in response');
|
||||
}
|
||||
const facts = JSON.parse(cleanJson(p1Resp.data.choices[0].message.content));
|
||||
|
||||
// 1.5. PASS 1.5: The Feature Auditor (Skeptical Review)
|
||||
console.log(' ↳ Pass 1.5: The Feature Auditor (Skeptical Review)...');
|
||||
const pass15SystemPrompt = `
|
||||
You are a "Strict Cost Controller". Your mission is to prevent over-billing.
|
||||
Review the extracted FEATURES and the BRIEFING.
|
||||
|
||||
### RULE OF THUMB:
|
||||
- A "Feature" (1.500 €) is ONLY justified for complex, dynamic systems (logic, database, CMS-driven management, advanced filtering).
|
||||
- Simple lists, information sections, or static descriptions (e.g., "Messen", "Team", "Historie", "Jobs" as mere text) are ALWAYS "Pages" (600 €).
|
||||
- If the briefing doesn't explicitly mention "Management System", "Filterable Database", or "Client Login", it is likely a PAGE.
|
||||
|
||||
### MISSION:
|
||||
Analyze each feature in the list. Decide if it should stay a "Feature" or be downgraded to the "otherPages" array.
|
||||
|
||||
### OUTPUT FORMAT:
|
||||
Return only the corrected 'features' and 'otherPages' arrays.
|
||||
{
|
||||
"features": string[],
|
||||
"otherPages": string[]
|
||||
}
|
||||
`;
|
||||
const p15Resp = await axios.post('https://openrouter.ai/api/v1/chat/completions', {
|
||||
model: 'google/gemini-3-flash-preview',
|
||||
messages: [
|
||||
{ role: 'system', content: pass15SystemPrompt },
|
||||
{ role: 'user', content: `EXTRACTED_FEATURES: ${JSON.stringify(facts.features)} \nBRIEFING: \n${briefing}` }
|
||||
],
|
||||
response_format: { type: 'json_object' }
|
||||
}, { headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' } });
|
||||
addUsage(p15Resp.data);
|
||||
const auditResult = JSON.parse(cleanJson(p15Resp.data.choices[0].message.content));
|
||||
|
||||
// Apply Audit: Downgrade features to otherPages
|
||||
facts.features = auditResult.features || [];
|
||||
facts.otherPages = Array.from(new Set([...(facts.otherPages || []), ...(auditResult.otherPages || [])]));
|
||||
|
||||
// 2. PASS 2: Feature Deep-Dive
|
||||
console.log(' ↳ Pass 2: Feature Deep-Dive...');
|
||||
const pass2SystemPrompt = `
|
||||
You are a detail - oriented Solution Architect.
|
||||
For EVERY item selected in Pass 1(pages, features, functions, apiSystems), write a specific justification and technical scope.
|
||||
|
||||
### RULES:
|
||||
1. ** CONCRETE & SPECIFIC **: Do NOT say "Implementation of X". Say "Displaying X with Y filters".
|
||||
2. ** JUSTIFICATION (CRITICAL) **: For every entry in 'featureDetails', explicitly explain WHY this is a complex system (1.500 €) and not just a static page (600 €). If it's just a list of items, it's a PAGE.
|
||||
3. ** NO EFFECTS **: Do not mention "fade-ins", "animations" or "visual styling". Focus on FUNCTION.
|
||||
4. ** ABSOLUTE RULE **: EVERYTHING MUST BE GERMAN.
|
||||
5. ** TRANSPARENCY **: Explain exactly what the USER gets.
|
||||
6. ** API NOTE **: For 'media' or 'video', explicitly state "Upload & Integration" (NO STREAMING).
|
||||
|
||||
### INPUT (from Pass 1):
|
||||
${JSON.stringify(facts, null, 2)}
|
||||
|
||||
### OUTPUT FORMAT(Strict JSON):
|
||||
{
|
||||
"pageDetails": { "Home": string, ... },
|
||||
"featureDetails": { "blog_news": string, ... },
|
||||
"functionDetails": { "search": string, ... },
|
||||
"apiDetails": { "crm_erp": string, ... }
|
||||
}
|
||||
`;
|
||||
const p2Resp = await axios.post('https://openrouter.ai/api/v1/chat/completions', {
|
||||
model: 'google/gemini-3-flash-preview',
|
||||
messages: [{ role: 'system', content: pass2SystemPrompt }, { role: 'user', content: briefing }],
|
||||
response_format: { type: 'json_object' }
|
||||
}, { headers: { 'Authorization': `Bearer ${apiKey} `, 'Content-Type': 'application/json' } });
|
||||
addUsage(p2Resp.data);
|
||||
if (!p2Resp.data.choices?.[0]?.message?.content) {
|
||||
console.error('❌ Pass 2 failed. Response:', JSON.stringify(p2Resp.data, null, 2));
|
||||
throw new Error('Pass 2: No content in response');
|
||||
}
|
||||
const details = JSON.parse(cleanJson(p2Resp.data.choices[0].message.content));
|
||||
|
||||
// 3. PASS 3: Strategic Content (Bespoke Strategy)
|
||||
console.log(' ↳ Pass 3: Strategic Content (Bespoke Strategy)...');
|
||||
const pass3SystemPrompt = `
|
||||
You are a high - end Digital Architect.Your goal is to make the CUSTOMER feel 100 % understood.
|
||||
Analyze the BRIEFING and the EXISTING WEBSITE context.
|
||||
|
||||
### TONE & COMMUNICATION PRINCIPLES(STRICT):
|
||||
${tone}
|
||||
|
||||
### OBJECTIVE:
|
||||
3. ** briefingSummary **: Ein sachlicher, tiefgehender Überblick der Unternehmenslage.
|
||||
- ** STIL **: Keine Ich - Form.Keine Marketing - Floskeln.Nutze präzise Fachbegriffe.Sei prägnant und effizient(ca. 70 % der vorherigen Länge).
|
||||
- ** FORM **: EXAKT ZWEI ABSÄTZE.Insgesamt ca. 6 Sätze.
|
||||
- ** INHALT **: Welcher technologische Sprung ist notwendig ? Was ist der Status Quo ? (Bezug zur URL / Briefing).
|
||||
- ** ABSOLUTE REGEL **: Keine Halluzinationen über fehlende Präsenzen bei Relaunches.
|
||||
- ** DATENSCHUTZ **: KEINERLEI namentliche Nennungen von Personen(z.B. "Danny Joseph") in diesen Texten.
|
||||
4. ** designVision **: Ein abstraktes, strategisches Konzept.
|
||||
- ** STIL **: Rein konzeptionell.Keine Umsetzungsschritte.Keinerlei "To-dos".Keine Ich - Form.Sei prägnant.
|
||||
- ** FORM **: EXAKT ZWEI ABSÄTZE.Insgesamt ca. 4 Sätze.
|
||||
- ** DATENSCHUTZ **: KEINERLEI namentliche Nennungen von Personen in diesen Texten.
|
||||
- ** FOKUS **: Welche strategische Wirkung soll erzielt werden ? (Z.B. "Industrielle Souveränität").
|
||||
|
||||
### OUTPUT FORMAT(Strict JSON):
|
||||
{
|
||||
"briefingSummary": string,
|
||||
"designVision": string
|
||||
}
|
||||
`;
|
||||
const p3Resp = await axios.post('https://openrouter.ai/api/v1/chat/completions', {
|
||||
model: 'google/gemini-3-flash-preview',
|
||||
messages: [
|
||||
{ role: 'system', content: pass3SystemPrompt },
|
||||
{ role: 'user', content: `BRIEFING(TRUTH SOURCE): \n${briefing} \n\nEXISTING WEBSITE(CONTEXT): \n${distilledCrawl} \n\nEXTRACTED FACTS: \n${JSON.stringify(facts, null, 2)} ` }
|
||||
],
|
||||
response_format: { type: 'json_object' }
|
||||
}, { headers: { 'Authorization': `Bearer ${apiKey} `, 'Content-Type': 'application/json' } });
|
||||
addUsage(p3Resp.data);
|
||||
if (!p3Resp.data.choices?.[0]?.message?.content) {
|
||||
console.error('❌ Pass 3 failed. Response:', JSON.stringify(p3Resp.data, null, 2));
|
||||
throw new Error('Pass 3: No content in response');
|
||||
}
|
||||
const strategy = JSON.parse(cleanJson(p3Resp.data.choices[0].message.content));
|
||||
|
||||
// 4. PASS 4: Information Architecture (Sitemap)
|
||||
console.log(' ↳ Pass 4: Information Architecture...');
|
||||
const pass4SystemPrompt = `
|
||||
You are a Senior UX Architect.Design a hierarchical sitemap following the 'Industrial Logic' principle.
|
||||
EVERYTHING MUST BE IN GERMAN.
|
||||
|
||||
### SITEMAP RULES:
|
||||
1. ** HIERARCHY **: Build a logical tree.Group by category(e.g., "Kern-Präsenz", "Lösungen", "Vertrauen", "Rechtliches").
|
||||
2. ** INTENT **: Each page MUST have a title and a brief functional conversion intent(desc).
|
||||
3. ** COMPREHENSIVENESS **: Ensure all 'selectedPages' and 'features' from Pass 1 are represented.
|
||||
4. ** LANGUAGE **: STRICT GERMAN TITLES.Do NOT use "Home", "About", "Services".Use "Startseite", "Über uns", "Leistungen".
|
||||
5. ** NO IMPLEMENTATION NOTES **: Do NOT add implementation details in parentheses to titles (e.g. NO "Startseite (Hero-Video)", NO "About (Timeline)"). Keep titles clean and abstract.
|
||||
|
||||
### DATA CONTEXT:
|
||||
${JSON.stringify({ facts, strategy }, null, 2)}
|
||||
|
||||
### OUTPUT FORMAT(Strict JSON):
|
||||
{
|
||||
"websiteTopic": string,
|
||||
"sitemap": [{ "category": string, "pages": [{ "title": string, "desc": string }] }]
|
||||
}
|
||||
`;
|
||||
const p4Resp = await axios.post('https://openrouter.ai/api/v1/chat/completions', {
|
||||
model: 'google/gemini-3-flash-preview',
|
||||
messages: [{ role: 'system', content: pass4SystemPrompt }, { role: 'user', content: `BRIEFING(TRUTH SOURCE): \n${briefing} \n\nDISTILLED_CRAWL(CONTEXT): \n${distilledCrawl} ` }],
|
||||
response_format: { type: 'json_object' }
|
||||
}, { headers: { 'Authorization': `Bearer ${apiKey} `, 'Content-Type': 'application/json' } });
|
||||
addUsage(p4Resp.data);
|
||||
if (!p4Resp.data.choices?.[0]?.message?.content) {
|
||||
console.error('❌ Pass 4 failed. Response:', JSON.stringify(p4Resp.data, null, 2));
|
||||
throw new Error('Pass 4: No content in response');
|
||||
}
|
||||
const ia = JSON.parse(cleanJson(p4Resp.data.choices[0].message.content));
|
||||
|
||||
// 5. PASS 5: Position Synthesis & Pricing Transparency
|
||||
console.log(' ↳ Pass 5: Position Synthesis...');
|
||||
|
||||
// Determine which positions are actually relevant to avoid hallucinations
|
||||
const requiredPositions = [
|
||||
"Das technische Fundament",
|
||||
facts.selectedPages.length + facts.otherPages.length > 0 ? "Individuelle Seiten" : null,
|
||||
facts.features.length > 0 ? "System-Module (Features)" : null,
|
||||
facts.functions.length > 0 ? "Logik-Funktionen" : null,
|
||||
facts.apiSystems.length > 0 ? "Schnittstellen (API)" : null,
|
||||
facts.cmsSetup ? "Inhaltsverwaltung (CMS)" : null,
|
||||
"Inszenierung & Interaktion", // Always include for high-end strategy
|
||||
facts.multilang ? "Mehrsprachigkeit" : null,
|
||||
"Inhaltliche Initial-Pflege",
|
||||
"Sorglos-Paket (Betrieb & Pflege)"
|
||||
].filter(Boolean);
|
||||
|
||||
const pass5SystemPrompt = `
|
||||
You are a Senior Solution Architect. Your goal is ABSOLUTE TRANSPARENCY and professionalism.
|
||||
Each position in the quote must be perfectly justified and detailed using an objective, technical tone.
|
||||
|
||||
### REQUIRED POSITION TITLES (STRICT - ONLY DESCRIBE THESE):
|
||||
${requiredPositions.map(p => `"${p}"`).join(", ")}
|
||||
|
||||
### MAPPING RULES (STRICT):
|
||||
- ** Das technische Fundament **: Infrastructure, Hosting setup, SEO-Basics, Analytics, Environments.
|
||||
- ** Individuelle Seiten **: Layout / structure for specific pages. ** RULE **: If quantity is high (e.g. > 10), lead with "Umsetzung von [QTY] individuellen Einzelseiten...".
|
||||
- ** System-Module (Features) **: Functional systems like Blog, News, Products, Jobs, References. ** RULE **: Describe exactly 1 thing if qty is 1. If qty is 0, DO NOT DESCRIBE THIS.
|
||||
- ** Logik-Funktionen **: Logic modules like Search, Filter, Forms, PDF-Export.
|
||||
- ** Schnittstellen (API) **: Data Syncs with CRM, ERP, Payment systems.
|
||||
- ** Inhaltsverwaltung (CMS) **: Setup and mapping for CMS.
|
||||
- ** Inszenierung & Interaktion **: Hero-stories, visual effects, configurators.
|
||||
- ** Mehrsprachigkeit **: Architecture scaling for multiple languages.
|
||||
- ** Inhaltliche Initial-Pflege **: Manual data entry / cleanup.
|
||||
- ** Sorglos-Paket (Betrieb & Pflege) **: ** RULE **: Describe as "1 Jahr Sicherung des technischen Betriebs, Instandhaltung, Sicherheits-Updates und Inhalts-Aktualisierungen gemäß AGB Punkt 7a."
|
||||
|
||||
### RULES FOR positionDescriptions(STRICT):
|
||||
1. ** ABSOLUTE RULE: NO FIRST PERSON **: NEVER use "Ich", "Mein", "Wir" or "Unser". Lead with nouns or passive verbs.
|
||||
2. ** QUANTITY PARITY (ULTRA-STRICT) **: The description MUST list EXACTLY the number of items matching the 'qty' for that position. If qty is 3, describe exactly 3 items. If qty is 1, describe exactly 1 item. Do NOT "stuff" additional features into one description.
|
||||
3. ** LOGIC GUARD (CMS) **: If 'cmsSetup' is false in the DATA CONTEXT, you MUST NOT mention "CMS", "Modul", "Management System" or "Inhaltsverwaltung". Use "Statische Seite" or "Darstellung".
|
||||
4. ** STATIC vs DYNAMIC **: If no complex logic was extracted in Pass 2 for a feature, describe it as a technical layout/page, not as a system.
|
||||
5. ** PROFESSIONAL TONE **: Use "Erstellung von...", "Anbindung der...", "Implementierung technischer...", "Bereitstellung von...".
|
||||
6. ** CONCISE & ITEM-BASED **: Use technical, high-density sentences. Name specific industry terms from context.
|
||||
7. ** ITEMIZED SYNTHESIS **: Mention EVERY component selected in Pass 1.
|
||||
8. ** HARD SPECIFICS **: If the briefing mentions "Glasfaser-Trassen" or "Schwerlast-Logistik", IT MUST BE IN THE DESCRIPTION.
|
||||
9. ** INDUSTRIAL AMBITION **: Describe it as a high-end technical solution. Avoid "schöne Website" or marketing fluff.
|
||||
10. ** PAGES **: For "Individuelle Seiten", list the pages. ** ABSOLUTE RULE **: Do NOT add implementation details or technical notes in parentheses (e.g. NO "(Matrix-Struktur)", NO "(Timeline-Modul)"). Use clean titles like "Startseite, Über uns, Leistungen".
|
||||
11. ** LOGIC **: Describe the ACTUAL logic (e.g., "Volltextsuche mit Auto-Complete", not "eine Suche").
|
||||
12. ** KEYS **: Return EXACTLY the keys defined in "POSITION TITLES".
|
||||
13. ** NO AGB **: NEVER mention "AGB" or "Geschäftsbedingungen".
|
||||
|
||||
### EXAMPLES(PASSIVE & TECHNICAL):
|
||||
- ** GOOD **: "Konfiguration der CMS-Infrastruktur zur unabhängigen Verwaltung von Produkt-Katalogen und News-Beiträgen."
|
||||
- ** GOOD **: "Implementierung einer Volltextsuche inkl. Kategorisierungs-Logik für effizientes Auffinden von Projektreferenzen."
|
||||
- ** GOOD **: "Native API-Anbindung an das ERP-System zur Echtzeit-Synchronisation von Bestandsdaten."
|
||||
- ** BAD **: "Ich richte dir das CMS ein."
|
||||
- ** BAD **: "Ich programmiere eine tolle Suche für deine Seite."
|
||||
|
||||
### DATA CONTEXT:
|
||||
${JSON.stringify({ facts, details, strategy, ia }, null, 2)}
|
||||
|
||||
### OUTPUT FORMAT(Strict JSON):
|
||||
{
|
||||
"positionDescriptions": { "Das technische Fundament": string, ... }
|
||||
}
|
||||
`;
|
||||
const p5Resp = await axios.post('https://openrouter.ai/api/v1/chat/completions', {
|
||||
model: 'google/gemini-3-flash-preview',
|
||||
messages: [{ role: 'system', content: pass5SystemPrompt }, { role: 'user', content: briefing }],
|
||||
response_format: { type: 'json_object' }
|
||||
}, { headers: { 'Authorization': `Bearer ${apiKey} `, 'Content-Type': 'application/json' } });
|
||||
addUsage(p5Resp.data);
|
||||
if (!p5Resp.data.choices?.[0]?.message?.content) {
|
||||
console.error('❌ Pass 5 failed. Response:', JSON.stringify(p5Resp.data, null, 2));
|
||||
throw new Error('Pass 5: No content in response');
|
||||
}
|
||||
const positionsData = JSON.parse(cleanJson(p5Resp.data.choices[0].message.content));
|
||||
|
||||
// 6. PASS 6: The Industrial Critic
|
||||
console.log(' ↳ Pass 6: The Industrial Critic (Quality Gate)...');
|
||||
const pass6SystemPrompt = `
|
||||
You are the "Industrial Critic".Your goal is to catch quality regressions and ensure the document is bespoke, technical, and professional.
|
||||
Analyze the CURRENT_STATE against the BRIEFING_TRUTH.
|
||||
|
||||
### CRITICAL ERROR CHECKLIST(FAIL IF FOUND):
|
||||
1. ** Hallucination Leakage **: FAIL if names of people(e.g., "Frieder Helmich"), specific software versions, or invented details are used unless they appear EXACTLY in the BRIEFING.
|
||||
- ** CRITICAL **: Forbid "Sie", "Ansprechpartner" or "Unternehmen" for personName if a name IS in the briefing.If none is in briefing, use empty string.
|
||||
2. ** Logic Conflict **: FAIL if isRelaunch is true but briefingSummary claims no website exists.
|
||||
- FAIL if the description in positionDescriptions mentions more items than extracted in facts.
|
||||
3. ** Implementation Fluff **: FAIL if tech - stack details are mentioned(React, etc.).Focus on Concept & Result.
|
||||
4. ** Genericism Check(CRITICAL) **: FAIL if any text sounds like it could apply to ANY company.It MUST mention specific industry details(e.g., "Kabeltiefbau", "Infrastruktur-Zentrum") from the Briefing or Crawl.
|
||||
6. ** Namen-Verbot (STRICT) **: FAIL if any personal names (e.g. "Danny Joseph", "Joseph", etc.) appear in 'briefingSummary' or 'designVision'. Use abstract terms like "Unternehmensführung" or "Management" if necessary.
|
||||
7. ** LOGIC GUARD (CMS) **: If 'cmsSetup' is false in the DATA CONTEXT, FAIL if any 'positionDescriptions' or 'briefingSummary' mentions "CMS", "Content Management System" or "Inhaltsverwaltung".
|
||||
8. ** AGB BAN **: FAIL if "Allgemeine Geschäftsbedingungen" or "AGB" appear anywhere.
|
||||
9. ** Length Check **: Briefing (ca. 6 Sätze) und Vision (ca. 4 Sätze). Kürze Texte, die zu ausschweifend sind, auf das Wesentliche.
|
||||
|
||||
### MISSION:
|
||||
Return updated fields ONLY.Specifically focus on hardening 'positionDescriptions', 'sitemap', 'briefingSummary', and 'designVision'.
|
||||
|
||||
### DATA CONTEXT:
|
||||
${JSON.stringify({ facts, strategy, ia, positionsData }, null, 2)}
|
||||
`;
|
||||
const p6Resp = await axios.post('https://openrouter.ai/api/v1/chat/completions', {
|
||||
model: 'google/gemini-3-flash-preview',
|
||||
messages: [{ role: 'system', content: pass6SystemPrompt }, { role: 'user', content: `BRIEFING_TRUTH: \n${briefing} ` }],
|
||||
response_format: { type: 'json_object' }
|
||||
}, { headers: { 'Authorization': `Bearer ${apiKey} `, 'Content-Type': 'application/json' } });
|
||||
addUsage(p6Resp.data);
|
||||
if (!p6Resp.data.choices?.[0]?.message?.content) {
|
||||
console.error('❌ Pass 6 failed. Response:', JSON.stringify(p6Resp.data, null, 2));
|
||||
throw new Error('Pass 6: No content in response');
|
||||
}
|
||||
const reflection = JSON.parse(cleanJson(p6Resp.data.choices[0].message.content));
|
||||
|
||||
// 6. Reflection Merge Utility
|
||||
const mergeReflection = (state: any, reflection: any) => {
|
||||
let result = { ...state };
|
||||
const unwrap = (obj: any): any => {
|
||||
if (!obj || typeof obj !== 'object' || Array.isArray(obj)) return obj;
|
||||
// Always unwrap "0" if it exists, regardless of other keys (AI often nests)
|
||||
if (obj["0"]) return unwrap(obj["0"]);
|
||||
if (obj.state && Object.keys(obj).length === 1) return unwrap(obj.state);
|
||||
if (obj.facts && Object.keys(obj).length === 1) return unwrap(obj.facts);
|
||||
return obj;
|
||||
};
|
||||
|
||||
const cleanedReflection = unwrap(reflection);
|
||||
Object.entries(cleanedReflection).forEach(([key, value]) => {
|
||||
if (value && value !== "" && value !== "null") {
|
||||
result[key] = value;
|
||||
}
|
||||
});
|
||||
return result;
|
||||
};
|
||||
|
||||
let finalState = mergeReflection({
|
||||
...initialState,
|
||||
...facts,
|
||||
...strategy,
|
||||
...ia,
|
||||
...positionsData
|
||||
}, reflection);
|
||||
|
||||
finalState.statusQuo = facts.isRelaunch ? 'Relaunch' : 'Neuentwicklung';
|
||||
|
||||
// Recipient Mapping
|
||||
if (finalState.personName) finalState.name = finalState.personName;
|
||||
if (finalState.email) finalState.email = finalState.email;
|
||||
|
||||
// Normalization Layer: Map hallucinated German keys back to internal keys
|
||||
const normalizationMap: Record<string, string> = {
|
||||
"Briefing-Zusammenfassung": "briefingSummary",
|
||||
"Design-Vision": "designVision",
|
||||
"Zusammenfassung": "briefingSummary",
|
||||
"Vision": "designVision",
|
||||
"BRIEFING_SUMMARY": "briefingSummary",
|
||||
"DESIGN_VISION": "designVision"
|
||||
};
|
||||
|
||||
Object.entries(normalizationMap).forEach(([gerKey, intKey]) => {
|
||||
if (finalState[gerKey] && !finalState[intKey]) {
|
||||
if (typeof finalState[gerKey] === 'object' && !Array.isArray(finalState[gerKey])) {
|
||||
finalState[intKey] = Object.values(finalState[gerKey]).join('\n\n');
|
||||
} else {
|
||||
finalState[intKey] = finalState[gerKey];
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Final Logic Guard: Strictly strip CMS from ALL descriptions and fields if not enabled
|
||||
if (!finalState.cmsSetup) {
|
||||
const stripCMS = (obj: any): any => {
|
||||
if (typeof obj === 'string') {
|
||||
return obj.replace(/CMS|Content-Management-System|Inhaltsverwaltung/gi, 'Plattform-Struktur');
|
||||
}
|
||||
if (Array.isArray(obj)) {
|
||||
return obj.map(stripCMS);
|
||||
}
|
||||
if (obj !== null && typeof obj === 'object') {
|
||||
const newObj: any = {};
|
||||
Object.entries(obj).forEach(([k, v]) => {
|
||||
newObj[k] = stripCMS(v);
|
||||
});
|
||||
return newObj;
|
||||
}
|
||||
return obj;
|
||||
};
|
||||
finalState = stripCMS(finalState);
|
||||
}
|
||||
|
||||
// Sitemap Normalization (German keys to internal)
|
||||
if (Array.isArray(finalState.sitemap)) {
|
||||
finalState.sitemap = finalState.sitemap.map((cat: any) => ({
|
||||
category: cat.category || cat.kategorie || cat.Kategorie || cat.title || "Allgemein",
|
||||
pages: (cat.pages || cat.seiten || cat.Seiten || []).map((page: any) => ({
|
||||
title: page.title || page.titel || page.Titel || "Seite",
|
||||
desc: page.desc || page.beschreibung || page.Beschreibung || page.description || ""
|
||||
}))
|
||||
}));
|
||||
}
|
||||
|
||||
// Position Descriptions Normalization (Strict Title Mapping + Index-based Fallback)
|
||||
if (finalState.positionDescriptions) {
|
||||
const normalized: Record<string, string> = {};
|
||||
const rawPositions = finalState.positionDescriptions;
|
||||
|
||||
// 1. Initial cleanup
|
||||
Object.entries(rawPositions).forEach(([key, value]) => {
|
||||
const normalizedValue = typeof value === 'object' ? (value as any).beschreibung || (value as any).description || JSON.stringify(value) : value;
|
||||
normalized[key] = normalizedValue as string;
|
||||
});
|
||||
|
||||
// 2. Index-based matching (Map "10. Foo" to "10. Bar")
|
||||
const standardTitles = [
|
||||
"1. Das technische Fundament",
|
||||
"2. Individuelle Seiten",
|
||||
"3. System-Module (Features)",
|
||||
"4. Logik-Funktionen",
|
||||
"5. Schnittstellen (API)",
|
||||
"6. Inhaltsverwaltung (CMS)",
|
||||
"7. Inszenierung & Interaktion",
|
||||
"8. Mehrsprachigkeit",
|
||||
"9. Inhaltliche Initial-Pflege",
|
||||
"10. Sorglos-Paket (Betrieb & Pflege)"
|
||||
];
|
||||
|
||||
standardTitles.forEach(std => {
|
||||
const prefix = std.split('.')[0] + '.'; // e.g., "10."
|
||||
// Find any key in the AI output that starts with this number
|
||||
const matchingKey = Object.keys(normalized).find(k => k.trim().startsWith(prefix));
|
||||
if (matchingKey && matchingKey !== std) {
|
||||
normalized[std] = normalized[matchingKey];
|
||||
// Keep the old key too just in case, but prioritize the standard one
|
||||
}
|
||||
});
|
||||
|
||||
finalState.positionDescriptions = normalized;
|
||||
}
|
||||
|
||||
// Normalize final state
|
||||
if (Array.isArray(finalState.positionDescriptions)) {
|
||||
const normalized: Record<string, string> = {};
|
||||
finalState.positionDescriptions.forEach((item: any) => {
|
||||
const key = item.feature || item.id || item.title || item.pos;
|
||||
if (key) normalized[key] = item.description || item.desc;
|
||||
});
|
||||
finalState.positionDescriptions = normalized;
|
||||
}
|
||||
|
||||
if (finalState.sitemap && !Array.isArray(finalState.sitemap)) {
|
||||
if (finalState.sitemap.categories) finalState.sitemap = finalState.sitemap.categories;
|
||||
else if (finalState.sitemap.sitemap) finalState.sitemap = finalState.sitemap.sitemap;
|
||||
else {
|
||||
const entries = Object.entries(finalState.sitemap);
|
||||
if (entries.every(([_, v]) => Array.isArray(v))) {
|
||||
finalState.sitemap = entries.map(([category, pages]) => ({ category, pages }));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Final Post-Reflection Budget Sync (Hard Pruning if still over)
|
||||
if (budget) {
|
||||
const targetValue = parseInt(budget.replace(/[^0-9]/g, ''));
|
||||
if (!isNaN(targetValue)) {
|
||||
console.log(`⚖️ Final Budget Audit(${targetValue} € target)...`);
|
||||
let currentTotals = calculateTotals(finalState, PRICING);
|
||||
|
||||
// Step-by-step pruning if too expensive
|
||||
if (currentTotals.totalPrice > targetValue) {
|
||||
console.log(`⚠️ Budget exceeded(${currentTotals.totalPrice} €).Pruning scope to fit ${targetValue} €...`);
|
||||
|
||||
// 1. Remove optional "other" stuff
|
||||
finalState.otherFeatures = [];
|
||||
finalState.otherFunctions = [];
|
||||
finalState.otherTech = [];
|
||||
|
||||
// 2. Remove non-critical functions
|
||||
const funcPriority = ['search', 'filter', 'calendar', 'multilang'];
|
||||
for (const f of funcPriority) {
|
||||
if (currentTotals.totalPrice <= targetValue) break;
|
||||
if (finalState.functions.includes(f)) {
|
||||
finalState.functions = finalState.functions.filter((x: string) => x !== f);
|
||||
currentTotals = calculateTotals(finalState, PRICING);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Remove least critical features if still over
|
||||
const featurePriority = ['events', 'blog_news', 'products'];
|
||||
for (const p of featurePriority) {
|
||||
if (currentTotals.totalPrice <= targetValue) break;
|
||||
if (finalState.features.includes(p)) {
|
||||
finalState.features = finalState.features.filter((f: string) => f !== p);
|
||||
currentTotals = calculateTotals(finalState, PRICING);
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Reduce page count (Selected Pages AND Sitemap)
|
||||
while (currentTotals.totalPrice > targetValue && (finalState.selectedPages.length > 4 || currentTotals.totalPagesCount > 5)) {
|
||||
if (finalState.selectedPages.length > 4) {
|
||||
finalState.selectedPages.pop();
|
||||
}
|
||||
|
||||
// Prune Sitemap to match
|
||||
if (finalState.sitemap && Array.isArray(finalState.sitemap)) {
|
||||
const lastCat = finalState.sitemap[finalState.sitemap.length - 1];
|
||||
if (lastCat && lastCat.pages && lastCat.pages.length > 0) {
|
||||
lastCat.pages.pop();
|
||||
if (lastCat.pages.length === 0) finalState.sitemap.pop();
|
||||
}
|
||||
}
|
||||
currentTotals = calculateTotals(finalState, PRICING);
|
||||
}
|
||||
|
||||
// 5. Final fallback: Remove second feature if still over
|
||||
if (currentTotals.totalPrice > targetValue && finalState.features.length > 1) {
|
||||
finalState.features.pop();
|
||||
currentTotals = calculateTotals(finalState, PRICING);
|
||||
}
|
||||
}
|
||||
console.log(`✅ Final budget audit complete: ${currentTotals.totalPrice} €`);
|
||||
}
|
||||
}
|
||||
|
||||
return { state: finalState, usage };
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,354 +0,0 @@
|
||||
import { chromium, type Page } from 'playwright';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import fs from 'node:fs';
|
||||
import axios from 'axios';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36';
|
||||
|
||||
function sanitizePath(rawPath: string) {
|
||||
return rawPath.split('/').map(p => p.replace(/[^a-z0-9._-]/gi, '_')).join('/');
|
||||
}
|
||||
|
||||
async function downloadFile(url: string, assetsDir: string) {
|
||||
if (url.startsWith('//')) url = `https:${url}`;
|
||||
if (!url.startsWith('http')) return null;
|
||||
|
||||
try {
|
||||
const u = new URL(url);
|
||||
// Create a collision-resistant local path
|
||||
const relPath = sanitizePath(u.hostname + u.pathname);
|
||||
const dest = path.join(assetsDir, relPath);
|
||||
|
||||
if (fs.existsSync(dest)) return `./assets/${relPath}`;
|
||||
|
||||
const res = await axios.get(url, {
|
||||
responseType: 'arraybuffer',
|
||||
headers: { 'User-Agent': USER_AGENT },
|
||||
timeout: 15000,
|
||||
validateStatus: () => true
|
||||
});
|
||||
|
||||
if (res.status !== 200) return null;
|
||||
|
||||
if (!fs.existsSync(path.dirname(dest))) fs.mkdirSync(path.dirname(dest), { recursive: true });
|
||||
fs.writeFileSync(dest, Buffer.from(res.data));
|
||||
return `./assets/${relPath}`;
|
||||
} catch {
|
||||
return null; // Fail silently, proceed with original URL
|
||||
}
|
||||
}
|
||||
|
||||
async function processCssRecursively(cssContent: string, cssUrl: string, assetsDir: string, urlMap: Record<string, string>, depth = 0) {
|
||||
if (depth > 5) return cssContent;
|
||||
|
||||
// Capture both standard url(...) and @import url(...)
|
||||
const urlRegex = /(?:url\(["']?|@import\s+["'])([^"'\)]+)["']?\)?/gi;
|
||||
let match;
|
||||
let newContent = cssContent;
|
||||
|
||||
while ((match = urlRegex.exec(cssContent)) !== null) {
|
||||
const originalUrl = match[1];
|
||||
if (originalUrl.startsWith('data:') || originalUrl.startsWith('blob:')) continue;
|
||||
|
||||
try {
|
||||
const absUrl = new URL(originalUrl, cssUrl).href;
|
||||
const local = await downloadFile(absUrl, assetsDir);
|
||||
|
||||
if (local) {
|
||||
// Calculate relative path from CSS file to Asset
|
||||
const u = new URL(cssUrl);
|
||||
const cssPath = u.hostname + u.pathname;
|
||||
const assetPath = new URL(absUrl).hostname + new URL(absUrl).pathname;
|
||||
|
||||
// We need to route from the folder containing the CSS to the asset
|
||||
const rel = path.relative(path.dirname(sanitizePath(cssPath)), sanitizePath(assetPath));
|
||||
|
||||
// Replace strictly the URL part
|
||||
newContent = newContent.split(originalUrl).join(rel);
|
||||
urlMap[absUrl] = local;
|
||||
}
|
||||
} catch { }
|
||||
}
|
||||
return newContent;
|
||||
}
|
||||
|
||||
async function run() {
|
||||
const rawUrl = process.argv[2];
|
||||
if (!rawUrl) {
|
||||
console.error('Usage: npm run clone-page <url>');
|
||||
process.exit(1);
|
||||
}
|
||||
const targetUrl = rawUrl.trim();
|
||||
const urlObj = new URL(targetUrl);
|
||||
|
||||
// Setup Output Directories
|
||||
const domainSlug = urlObj.hostname.replace('www.', '');
|
||||
const domainDir = path.resolve(__dirname, `../public/showcase/${domainSlug}`);
|
||||
const assetsDir = path.join(domainDir, 'assets');
|
||||
if (!fs.existsSync(assetsDir)) fs.mkdirSync(assetsDir, { recursive: true });
|
||||
|
||||
let pageSlug = urlObj.pathname.split('/').filter(Boolean).join('-');
|
||||
if (!pageSlug) pageSlug = 'index';
|
||||
const htmlFilename = `${pageSlug}.html`;
|
||||
|
||||
console.log(`🚀 INDUSTRIAL CLONE: ${targetUrl}`);
|
||||
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
// Start with a standard viewport, we will resize widely later
|
||||
const context = await browser.newContext({ userAgent: USER_AGENT, viewport: { width: 1920, height: 1080 } });
|
||||
const page = await context.newPage();
|
||||
|
||||
const urlMap: Record<string, string> = {};
|
||||
const foundAssets = new Set<string>();
|
||||
|
||||
// 1. Live Network Interception
|
||||
page.on('response', response => {
|
||||
const url = response.url();
|
||||
if (response.status() === 200) {
|
||||
// Capture anything that looks like a static asset
|
||||
if (url.match(/\.(css|js|png|jpg|jpeg|gif|svg|woff2?|ttf|otf|mp4|webm|webp|ico)/i)) {
|
||||
foundAssets.add(url);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
console.log('🌐 Loading page (Waiting for Network Idle)...');
|
||||
await page.goto(targetUrl, { waitUntil: 'networkidle', timeout: 90000 });
|
||||
|
||||
console.log('🌊 Executing "Scroll Wave" to trigger all lazy loaders naturally...');
|
||||
await page.evaluate(async () => {
|
||||
await new Promise((resolve) => {
|
||||
let totalHeight = 0;
|
||||
const distance = 400;
|
||||
const timer = setInterval(() => {
|
||||
const scrollHeight = document.body.scrollHeight;
|
||||
window.scrollBy(0, distance);
|
||||
totalHeight += distance;
|
||||
|
||||
if (totalHeight >= scrollHeight) {
|
||||
clearInterval(timer);
|
||||
window.scrollTo(0, 0); // Reset to top
|
||||
resolve(true);
|
||||
}
|
||||
}, 100);
|
||||
});
|
||||
});
|
||||
|
||||
console.log('📐 Expanding Viewport to "Giant Mode" for final asset capture...');
|
||||
const fullHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
await page.setViewportSize({ width: 1920, height: fullHeight + 1000 });
|
||||
|
||||
// Final settlement wait
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
console.log('💧 Final DOM Hydration & Sanitization...');
|
||||
await page.evaluate(() => {
|
||||
// A. Deterministic Attribute Hydration (Generic)
|
||||
// Scours every element for attributes that look like asset URLs and promotes them
|
||||
const assetPattern = /\.(jpg|jpeg|png|gif|svg|webp|mp4|webm|woff2?|ttf|otf)/i;
|
||||
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
// 0. Skip Meta/Head/Script/Style/SVG tags for attribute promotion
|
||||
if (['META', 'LINK', 'HEAD', 'SCRIPT', 'STYLE', 'SVG', 'PATH'].includes(el.tagName)) return;
|
||||
|
||||
// 1. Force Visibility (Anti-Flicker)
|
||||
const style = window.getComputedStyle(el);
|
||||
if (style.opacity === '0' || style.visibility === 'hidden') {
|
||||
el.style.setProperty('opacity', '1', 'important');
|
||||
el.style.setProperty('visibility', 'visible', 'important');
|
||||
}
|
||||
|
||||
// 2. Promote Data Attributes
|
||||
for (const attr of Array.from(el.attributes)) {
|
||||
const name = attr.name.toLowerCase();
|
||||
const val = attr.value;
|
||||
|
||||
if (assetPattern.test(val) || name.includes('src') || name.includes('image')) {
|
||||
// Standard Image/Video/Source promotion
|
||||
if (el.tagName === 'IMG') {
|
||||
if (name.includes('srcset')) el.srcset = val;
|
||||
else if (!el.src || el.src.includes('data:')) el.src = val;
|
||||
}
|
||||
if (el.tagName === 'SOURCE') {
|
||||
if (name.includes('srcset')) el.srcset = val;
|
||||
}
|
||||
if (el.tagName === 'VIDEO' || el.tagName === 'AUDIO') {
|
||||
if (!el.src) el.src = val;
|
||||
}
|
||||
|
||||
// Background Image Promotion
|
||||
if (val.match(/^(https?:\/\/|\/\/|\/)/) && !name.includes('href')) {
|
||||
const bg = el.style.backgroundImage;
|
||||
if (!bg || bg === 'none') {
|
||||
el.style.backgroundImage = `url('${val}')`;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// B. Ensure basic structural elements are visible post-scroll
|
||||
const body = document.body;
|
||||
if (body) {
|
||||
body.style.setProperty('opacity', '1', 'important');
|
||||
body.style.setProperty('visibility', 'visible', 'important');
|
||||
}
|
||||
});
|
||||
|
||||
console.log('⏳ Waiting for network idle...');
|
||||
await page.waitForLoadState('networkidle');
|
||||
|
||||
// 1.5 FINAL SETTLEMENT: Let any scroll-triggered JS finish
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
// 2. Static Snapshot
|
||||
let content = await page.content();
|
||||
|
||||
// 3. Post-Snapshot Asset Discovery (Regex)
|
||||
// Catches assets that never triggered a network request but exist in the markup
|
||||
const regexPatterns = [
|
||||
/(?:src|href|url|data-[a-z-]+|srcset)=["']([^"'<>\s]+?\.(?:css|js|png|jpg|jpeg|gif|svg|woff2?|ttf|otf|mp4|webm|webp|ico)(?:\?[^"']*)?)["']/gi,
|
||||
// Capture CSS url() inside style blocks
|
||||
/url\(["']?([^"'\)]+)["']?\)/gi
|
||||
];
|
||||
|
||||
for (const pattern of regexPatterns) {
|
||||
let match;
|
||||
while ((match = pattern.exec(content)) !== null) {
|
||||
try { foundAssets.add(new URL(match[1], targetUrl).href); } catch { }
|
||||
}
|
||||
}
|
||||
|
||||
// Specific srcset parsing
|
||||
const srcsetRegex = /[a-z0-9-]+srcset=["']([^"']+)["']/gi;
|
||||
let match;
|
||||
while ((match = srcsetRegex.exec(content)) !== null) {
|
||||
match[1].split(',').forEach(rule => {
|
||||
const parts = rule.trim().split(/\s+/);
|
||||
if (parts[0] && !parts[0].startsWith('data:')) {
|
||||
try { foundAssets.add(new URL(parts[0], targetUrl).href); } catch { }
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`🔍 Processing ${foundAssets.size} discovered assets...`);
|
||||
|
||||
// 4. Download & Map
|
||||
for (const url of foundAssets) {
|
||||
const local = await downloadFile(url, assetsDir);
|
||||
if (local) {
|
||||
urlMap[url] = local;
|
||||
const clean = url.split('?')[0];
|
||||
urlMap[clean] = local;
|
||||
|
||||
// Handle CSS recursively
|
||||
if (clean.endsWith('.css')) {
|
||||
try {
|
||||
const { data } = await axios.get(url, { headers: { 'User-Agent': USER_AGENT } });
|
||||
// Process CSS and save it
|
||||
const processedCss = await processCssRecursively(data, url, assetsDir, urlMap);
|
||||
const relPath = sanitizePath(new URL(url).hostname + new URL(url).pathname);
|
||||
fs.writeFileSync(path.join(assetsDir, relPath), processedCss);
|
||||
} catch { }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log('🛠️ Finalizing Static Mirror...');
|
||||
let finalContent = content;
|
||||
|
||||
// A. Apply URL Map Replacements
|
||||
// Longer paths first to prevent partial replacement errors
|
||||
const sortedUrls = Object.keys(urlMap).sort((a, b) => b.length - a.length);
|
||||
if (sortedUrls.length > 0) {
|
||||
const escaped = sortedUrls.map(u => u.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
|
||||
// Create a massive regex for single-pass replacement
|
||||
const masterRegex = new RegExp(`(${escaped.join('|')})`, 'g');
|
||||
finalContent = finalContent.replace(masterRegex, (match) => urlMap[match] || match);
|
||||
}
|
||||
|
||||
// B. Global Root-Relative Path Cleanup
|
||||
// Catches things like /wp-content/ that weren't distinct assets or were missed
|
||||
const commonDirs = ['/wp-content/', '/wp-includes/', '/assets/', '/static/', '/images/'];
|
||||
for (const dir of commonDirs) {
|
||||
const localDir = `./assets/${urlObj.hostname}${dir}`;
|
||||
finalContent = finalContent.split(`"${dir}`).join(`"${localDir}`);
|
||||
finalContent = finalContent.split(`'${dir}`).join(`'${localDir}`);
|
||||
finalContent = finalContent.split(`(${dir}`).join(`(${localDir}`);
|
||||
}
|
||||
|
||||
// C. Domain Nuke
|
||||
// Replace absolute links to the original domain with relative or #
|
||||
const domainPattern = new RegExp(`https?://(www\\.)?${urlObj.hostname.replace(/\./g, '\\.')}[^"']*`, 'gi');
|
||||
// We carefully only replace if it looks like a resource link, or neutralize if it's a navigation link
|
||||
// For simplicity and "solidness", we'll rely on the specific replacements above first.
|
||||
// This catch-all nuke ensures we don't leak requests.
|
||||
// Convert remaining absolute domain links to relative .
|
||||
finalContent = finalContent.replace(domainPattern, (match) => {
|
||||
// If we have a map for it, it should have been replaced.
|
||||
// If not, it's likely a navigation link or an uncaptured asset.
|
||||
// Safe fallback:
|
||||
return './';
|
||||
});
|
||||
|
||||
// D. Static Stability & Cleanup
|
||||
// Remove tracking/analytics/lazy-load scripts that ruins stability
|
||||
finalContent = finalContent.replace(/<script\b[^>]*>([\s\S]*?)<\/script>/gi, (match, content) => {
|
||||
const lower = content.toLowerCase();
|
||||
if (lower.includes('google-analytics') ||
|
||||
lower.includes('gtag') ||
|
||||
lower.includes('fbq') ||
|
||||
lower.includes('lazy') ||
|
||||
lower.includes('tracker')) {
|
||||
return '';
|
||||
}
|
||||
return match;
|
||||
});
|
||||
|
||||
// E. CSS Injections for Stability
|
||||
const headEnd = finalContent.indexOf('</head>');
|
||||
if (headEnd > -1) {
|
||||
const stabilityCss = `
|
||||
<style>
|
||||
/* UNIVERSAL CLONE STABILIZATION */
|
||||
* {
|
||||
transition: none !important;
|
||||
animation: none !important;
|
||||
scroll-behavior: auto !important;
|
||||
}
|
||||
[data-aos], .reveal, .lazypath, .lazy-load, [data-src] {
|
||||
opacity: 1 !important;
|
||||
visibility: visible !important;
|
||||
transform: none !important;
|
||||
clip-path: none !important;
|
||||
}
|
||||
|
||||
img, video, iframe {
|
||||
max-width: 100%;
|
||||
display: block;
|
||||
}
|
||||
a {
|
||||
pointer-events: none;
|
||||
cursor: default;
|
||||
}
|
||||
</style>`;
|
||||
finalContent = finalContent.slice(0, headEnd) + stabilityCss + finalContent.slice(headEnd);
|
||||
}
|
||||
|
||||
// Save
|
||||
const finalPath = path.join(domainDir, htmlFilename);
|
||||
fs.writeFileSync(finalPath, finalContent);
|
||||
console.log(`✅ SUCCESS: Cloned to ${finalPath}`);
|
||||
|
||||
} catch (err) {
|
||||
console.error('❌ FATAL ERROR:', err);
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
run();
|
||||
@@ -1,244 +0,0 @@
|
||||
// @ts-ignore
|
||||
import scrape from 'website-scraper';
|
||||
// @ts-ignore
|
||||
import PuppeteerPlugin from 'website-scraper-puppeteer';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import fs from 'node:fs';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
class CustomFilenameGeneratorPlugin {
|
||||
apply(registerAction: any) {
|
||||
registerAction('generateFilename', ({ resource }: any) => {
|
||||
const url = new URL(resource.url);
|
||||
const ext = path.extname(url.pathname);
|
||||
|
||||
// Clean the path
|
||||
let safePath = url.pathname;
|
||||
if (safePath.endsWith('/')) {
|
||||
safePath += 'index.html';
|
||||
} else if (!ext && !resource.isHtml()) {
|
||||
// If no extension and not HTML, guess based on content type?
|
||||
// But usually safe to leave as is or add extension if known.
|
||||
} else if (!ext && resource.isHtml()) {
|
||||
safePath += '.html';
|
||||
}
|
||||
|
||||
// Handle query strings if needed (simplifying by ignoring them for static local files usually better,
|
||||
// unless they determine content. For a clean clone, we usually ignore unique query params)
|
||||
// But if the site relies on routing via query params (e.g. ?page=2), we might want to encode them.
|
||||
// For now, let's keep it simple and clean.
|
||||
|
||||
// Remove leading slash
|
||||
if (safePath.startsWith('/')) safePath = safePath.substring(1);
|
||||
|
||||
// Sanitization
|
||||
safePath = safePath.replace(/[:*?"<>|]/g, '_');
|
||||
|
||||
// External assets go to a separate folder to avoid collision
|
||||
// We can detect external by checking if the resource parent is different?
|
||||
// Actually, simply using the hostname mapping is safer.
|
||||
|
||||
// However, the USER wants "local cloned pages".
|
||||
// If we just use the path, we merge everything into one root.
|
||||
// If there are collision (e.g. same path on different domains), this is bad.
|
||||
// But typically we clone ONE site.
|
||||
|
||||
return { filename: safePath };
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function run() {
|
||||
const targetUrl = process.argv[2];
|
||||
if (!targetUrl) {
|
||||
console.error('Usage: npm run clone-website <URL> [output-dir]');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const urlObj = new URL(targetUrl);
|
||||
const domain = urlObj.hostname;
|
||||
const safeDomain = domain.replace(/[^a-z0-9-]/gi, '_');
|
||||
const outputDir = process.argv[3]
|
||||
? path.resolve(process.cwd(), process.argv[3])
|
||||
: path.resolve(__dirname, '../cloned-websites', safeDomain);
|
||||
|
||||
if (fs.existsSync(outputDir)) {
|
||||
console.log(`Cleaning existing directory: ${outputDir}`);
|
||||
fs.rmSync(outputDir, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
console.log(`🚀 Starting recursive clone of ${targetUrl}`);
|
||||
console.log(`📂 Output: ${outputDir}`);
|
||||
|
||||
const options = {
|
||||
urls: [targetUrl],
|
||||
directory: outputDir,
|
||||
recursive: true,
|
||||
maxDepth: 5,
|
||||
// Custom filename generation to avoid "https:/" folders
|
||||
plugins: [
|
||||
new PuppeteerPlugin({
|
||||
launchOptions: {
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
|
||||
},
|
||||
scrollToBottom: { timeout: 10000, viewportN: 10 },
|
||||
blockNavigation: false
|
||||
}),
|
||||
new class LoggerPlugin {
|
||||
apply(registerAction: any) {
|
||||
registerAction('onResourceSaved', ({ resource }: any) => {
|
||||
console.log(` 💾 Saved: ${resource.url} -> ${resource.filename}`);
|
||||
});
|
||||
registerAction('onResourceError', ({ resource, error }: any) => {
|
||||
console.error(` ❌ Error: ${resource.url} - ${error.message}`);
|
||||
});
|
||||
}
|
||||
},
|
||||
new class FilenamePlugin {
|
||||
apply(registerAction: any) {
|
||||
registerAction('generateFilename', ({ resource }: any) => {
|
||||
const u = new URL(resource.url);
|
||||
let filename = u.pathname;
|
||||
|
||||
// normalize
|
||||
if (filename.endsWith('/')) filename += 'index.html';
|
||||
else if (!path.extname(filename) && resource.url.includes(domain)) filename += '/index.html'; // Assume folder if internal link without ext
|
||||
|
||||
// If it's an external asset, put it in a separate folder
|
||||
if (u.hostname !== domain) {
|
||||
filename = `_external/${u.hostname}${filename}`;
|
||||
}
|
||||
|
||||
// Sanitize filename
|
||||
filename = filename.split('/').map(part => part.replace(/[^a-z0-9._-]/gi, '_')).join('/');
|
||||
|
||||
// Remove leading slash
|
||||
if (filename.startsWith('/')) filename = filename.substring(1);
|
||||
|
||||
// Handle "Unnamed page" by checking if empty
|
||||
if (!filename || filename === 'index.html') return { filename: 'index.html' };
|
||||
|
||||
return { filename };
|
||||
});
|
||||
}
|
||||
}
|
||||
],
|
||||
|
||||
urlFilter: (url: string) => {
|
||||
const u = new URL(url);
|
||||
const isTargetDomain = u.hostname === domain;
|
||||
const isGoogleFonts = u.hostname.includes('fonts.googleapis.com') || u.hostname.includes('fonts.gstatic.com');
|
||||
// Allow assets from anywhere
|
||||
const isAsset = /\.(css|js|png|jpg|jpeg|gif|svg|woff|woff2|ttf|eot|mp4|webm|ico|json|webp)$/i.test(u.pathname);
|
||||
// Allow fonts/css from common CDNs if standard extension check fails
|
||||
const isCommonAsset = u.pathname.includes('/css/') || u.pathname.includes('/js/') || u.pathname.includes('/static/') || u.pathname.includes('/assets/') || u.pathname.includes('/uploads/');
|
||||
|
||||
return isTargetDomain || isAsset || isCommonAsset || isGoogleFonts;
|
||||
},
|
||||
|
||||
|
||||
sources: [
|
||||
{ selector: 'img', attr: 'src' },
|
||||
{ selector: 'img', attr: 'srcset' },
|
||||
{ selector: 'source', attr: 'src' },
|
||||
{ selector: 'source', attr: 'srcset' },
|
||||
{ selector: 'link[rel="stylesheet"]', attr: 'href' },
|
||||
{ selector: 'link[rel="preload"]', attr: 'href' },
|
||||
{ selector: 'link[rel="prefetch"]', attr: 'href' },
|
||||
{ selector: 'script', attr: 'src' },
|
||||
{ selector: 'video', attr: 'src' },
|
||||
{ selector: 'video', attr: 'poster' },
|
||||
{ selector: 'iframe', attr: 'src' },
|
||||
{ selector: 'link[rel*="icon"]', attr: 'href' },
|
||||
{ selector: 'link[rel="manifest"]', attr: 'href' },
|
||||
{ selector: 'meta[property="og:image"]', attr: 'content' }
|
||||
],
|
||||
|
||||
request: {
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
// @ts-ignore
|
||||
const result = await scrape(options);
|
||||
console.log(`\n✅ Successfully cloned ${result.length} resources to ${outputDir}`);
|
||||
|
||||
// Post-processing: Sanitize HTML to remove Next.js hydration scripts
|
||||
// This prevents the static site from trying to "hydrate" and breaking images/links
|
||||
console.log('🧹 Sanitizing HTML files...');
|
||||
sanitizeHtmlFiles(outputDir);
|
||||
|
||||
console.log(`open "${path.join(outputDir, 'index.html')}"`);
|
||||
} catch (error) {
|
||||
console.error('❌ Error cloning website:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
function sanitizeHtmlFiles(dir: string) {
|
||||
const files = fs.readdirSync(dir);
|
||||
for (const file of files) {
|
||||
const fullPath = path.join(dir, file);
|
||||
if (fs.statSync(fullPath).isDirectory()) {
|
||||
sanitizeHtmlFiles(fullPath);
|
||||
} else if (file.endsWith('.html')) {
|
||||
let content = fs.readFileSync(fullPath, 'utf8');
|
||||
|
||||
// Remove Next.js data script
|
||||
content = content.replace(/<script id="__NEXT_DATA__"[\s\S]*?<\/script>/gi, '');
|
||||
|
||||
// Remove Next.js chunk scripts (hydration)
|
||||
// match <script src="..._next/static/chunks..." ...
|
||||
content = content.replace(/<script[^>]+src="[^"]*\/_next\/static\/chunks\/[^"]*"[^>]*><\/script>/gi, '');
|
||||
content = content.replace(/<script[^>]+src="[^"]*\/_next\/static\/[^"]*Manifest\.js"[^>]*><\/script>/gi, '');
|
||||
|
||||
// Convert Breeze dynamic script/styles into actual tags if possible
|
||||
// match <div class="breeze-scripts-load" ...>URL</div>
|
||||
content = content.replace(/<div[^>]+class="breeze-scripts-load"[^>]*>([^<]+)<\/div>/gi, (match, url) => {
|
||||
if (url.endsWith('.css')) return `<link rel="stylesheet" href="${url}">`;
|
||||
return `<script src="${url}"></script>`;
|
||||
});
|
||||
|
||||
// Inject Fonts (Fix for missing dynamic fonts)
|
||||
// We inject Inter and Montserrat as safe defaults for industrial/modern sites
|
||||
// Check specifically for a stylesheet link to google fonts
|
||||
const hasGoogleFontStylesheet = /<link[^>]+rel="stylesheet"[^>]+href="[^"]*fonts\.googleapis\.com/i.test(content);
|
||||
if (!hasGoogleFontStylesheet) {
|
||||
const fontLink = `<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Montserrat:wght@300;400;500;600;700&display=swap">`;
|
||||
const styleBlock = `<style>
|
||||
:root { --main-font: 'Inter', sans-serif; --heading-font: 'Montserrat', sans-serif; }
|
||||
body, .body-font, p, span, li, a { font-family: var(--main-font) !important; }
|
||||
h1, h2, h3, h4, h5, h6, .title-font, .heading-font { font-family: var(--heading-font) !important; }
|
||||
</style>`;
|
||||
content = content.replace('</head>', `${fontLink}${styleBlock}</head>`);
|
||||
}
|
||||
|
||||
// Force column layout on product pages
|
||||
if (content.includes('class="products')) {
|
||||
const layoutScript = `
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const products = document.querySelector('.products');
|
||||
if (products) {
|
||||
products.classList.remove(...Array.from(products.classList).filter(c => c.startsWith('columns-')));
|
||||
products.classList.add('columns-1');
|
||||
products.setAttribute('data-n-desktop-columns', '1');
|
||||
}
|
||||
});
|
||||
</script>`;
|
||||
content = content.replace('</body>', `${layoutScript}</body>`);
|
||||
}
|
||||
|
||||
fs.writeFileSync(fullPath, content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
run();
|
||||
@@ -1,130 +0,0 @@
|
||||
import { PlaywrightCrawler, RequestQueue } from 'crawlee';
|
||||
import * as path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import * as fs from 'node:fs';
|
||||
import { URL } from 'node:url';
|
||||
import { execSync } from 'node:child_process';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
/**
|
||||
* The Ultimate Website Cloner
|
||||
* Uses Crawlee for discovery and single-file-cli for perfect page capture.
|
||||
*/
|
||||
async function cloneWebsite() {
|
||||
const targetUrl = process.argv[2];
|
||||
if (!targetUrl) {
|
||||
console.error('Please provide a URL as an argument.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const urlObj = new URL(targetUrl);
|
||||
const domain = urlObj.hostname;
|
||||
const outputDirName = process.argv[3] || domain.replace(/\./g, '-');
|
||||
const baseOutputDir = path.resolve(__dirname, '../cloned-websites', outputDirName);
|
||||
|
||||
if (fs.existsSync(baseOutputDir)) {
|
||||
fs.rmSync(baseOutputDir, { recursive: true, force: true });
|
||||
}
|
||||
fs.mkdirSync(baseOutputDir, { recursive: true });
|
||||
|
||||
console.log(`🚀 Starting perfect recursive clone of ${targetUrl}...`);
|
||||
console.log(`📂 Output: ${baseOutputDir}`);
|
||||
|
||||
const requestQueue = await RequestQueue.open();
|
||||
await requestQueue.addRequest({ url: targetUrl });
|
||||
|
||||
const crawler = new PlaywrightCrawler({
|
||||
requestQueue,
|
||||
maxRequestsPerCrawl: 100,
|
||||
maxConcurrency: 3, // SingleFile is resource intensive
|
||||
|
||||
async requestHandler({ request, enqueueLinks, log }) {
|
||||
const url = request.url;
|
||||
log.info(`Capturing ${url}...`);
|
||||
|
||||
// 1. Determine local path
|
||||
const u = new URL(url);
|
||||
let relPath = u.pathname;
|
||||
if (relPath === '/' || relPath === '') relPath = '/index.html';
|
||||
if (!relPath.endsWith('.html') && !path.extname(relPath)) relPath += '/index.html';
|
||||
if (relPath.startsWith('/')) relPath = relPath.substring(1);
|
||||
|
||||
const fullPath = path.join(baseOutputDir, relPath);
|
||||
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
|
||||
|
||||
// 2. Use single-file-cli for perfect capture
|
||||
// We use --back-links-rewrite=false because we handle link rewriting ourselves for better control
|
||||
try {
|
||||
execSync(`npx single-file-cli "${url}" "${fullPath}" --browser-headless=true --browser-wait-until=networkidle0`, {
|
||||
stdio: 'inherit'
|
||||
});
|
||||
} catch (e) {
|
||||
log.error(`Failed to capture ${url} with SingleFile`);
|
||||
}
|
||||
|
||||
// 3. Enqueue subpages (discovery)
|
||||
// We use a separate lightweight crawl for link discovery
|
||||
await enqueueLinks({
|
||||
strategy: 'same-domain',
|
||||
transformRequestFunction: (req) => {
|
||||
if (/\.(download|pdf|zip|gz|exe|png|jpg|jpeg|gif|svg|css|js)$/i.test(req.url)) return false;
|
||||
return req;
|
||||
}
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
await crawler.run();
|
||||
|
||||
// 4. Post-processing: Rewrite links between the captured files
|
||||
console.log('🔗 Rewriting internal links for offline navigation...');
|
||||
const allFiles = getFiles(baseOutputDir).filter(f => f.endsWith('.html'));
|
||||
|
||||
for (const file of allFiles) {
|
||||
let content = fs.readFileSync(file, 'utf8');
|
||||
const fileRelToRoot = path.relative(baseOutputDir, file);
|
||||
|
||||
// Simple but effective regex for internal links
|
||||
content = content.replace(/href="([^"]+)"/g, (match, href) => {
|
||||
if (href.startsWith(targetUrl) || href.startsWith('/') || (!href.includes('://') && !href.startsWith('data:'))) {
|
||||
try {
|
||||
const linkUrl = new URL(href, urlObj.href);
|
||||
if (linkUrl.hostname === domain) {
|
||||
let linkPath = linkUrl.pathname;
|
||||
if (linkPath === '/' || linkPath === '') linkPath = '/index.html';
|
||||
if (!linkPath.endsWith('.html') && !path.extname(linkPath)) linkPath += '/index.html';
|
||||
if (linkPath.startsWith('/')) linkPath = linkPath.substring(1);
|
||||
|
||||
const relativeLink = path.relative(path.dirname(fileRelToRoot), linkPath);
|
||||
return `href="${relativeLink}"`;
|
||||
}
|
||||
} catch (e) {}
|
||||
}
|
||||
return match;
|
||||
});
|
||||
|
||||
fs.writeFileSync(file, content);
|
||||
}
|
||||
|
||||
console.log(`\n✅ Done! Perfect clone complete in: ${baseOutputDir}`);
|
||||
}
|
||||
|
||||
function getFiles(dir: string, fileList: string[] = []) {
|
||||
const files = fs.readdirSync(dir);
|
||||
for (const file of files) {
|
||||
const name = path.join(dir, file);
|
||||
if (fs.statSync(name).isDirectory()) {
|
||||
getFiles(name, fileList);
|
||||
} else {
|
||||
fileList.push(name);
|
||||
}
|
||||
}
|
||||
return fileList;
|
||||
}
|
||||
|
||||
cloneWebsite().catch(err => {
|
||||
console.error('❌ Fatal error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -1,150 +0,0 @@
|
||||
import scrape from 'website-scraper';
|
||||
import PuppeteerPlugin from 'website-scraper-puppeteer';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import fs from 'fs';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// Custom plugin to handle Next.js and Mac-specific path issues
|
||||
class PortfolioPlugin {
|
||||
apply(registerAction: any) {
|
||||
// 1. Add more sources before starting
|
||||
registerAction('beforeStart', ({ options }: any) => {
|
||||
if (!options.sources) options.sources = [];
|
||||
options.sources.push({ selector: 'img', attr: 'data-nimg' });
|
||||
options.sources.push({ selector: 'img', attr: 'data-src' });
|
||||
options.sources.push({ selector: 'img', attr: 'data-srcset' });
|
||||
options.sources.push({ selector: 'video', attr: 'poster' });
|
||||
options.sources.push({ selector: 'source', attr: 'data-srcset' });
|
||||
options.sources.push({ selector: '[style*="background-image"]', attr: 'style' });
|
||||
options.sources.push({ selector: 'link[as="font"]', attr: 'href' });
|
||||
options.sources.push({ selector: 'link[as="image"]', attr: 'href' });
|
||||
options.sources.push({ selector: 'link[as="style"]', attr: 'href' });
|
||||
options.sources.push({ selector: 'link[as="script"]', attr: 'href' });
|
||||
});
|
||||
|
||||
// 2. Sanitize filenames and handle Next.js optimized images
|
||||
registerAction('generateFilename', ({ resource, filename }: any) => {
|
||||
const url = resource.getUrl();
|
||||
let result = filename;
|
||||
|
||||
// Handle Next.js optimized images: /_next/image?url=...&w=...
|
||||
if (url.includes('/_next/image')) {
|
||||
try {
|
||||
const urlParams = new URL(url).searchParams;
|
||||
const originalUrl = urlParams.get('url');
|
||||
if (originalUrl) {
|
||||
const cleanPath = originalUrl.split('?')[0];
|
||||
const ext = path.extname(cleanPath) || '.webp';
|
||||
const name = path.basename(cleanPath, ext);
|
||||
const width = urlParams.get('w') || 'auto';
|
||||
result = `_next/optimized/${name}-${width}${ext}`;
|
||||
}
|
||||
} catch (e) {}
|
||||
}
|
||||
|
||||
// CRITICAL MAC FIX: Replace .app with -app in all paths to prevent hidden Application Bundles
|
||||
// We split by / to ensure we only replace .app at the end of a directory name or filename
|
||||
result = result.split('/').map((segment: string) =>
|
||||
segment.endsWith('.app') ? segment.replace(/\.app$/, '-app') : segment
|
||||
).join('/');
|
||||
|
||||
return { filename: result };
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function cloneWebsite() {
|
||||
const url = process.argv[2];
|
||||
if (!url) {
|
||||
console.error('Please provide a URL as an argument.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const domain = new URL(url).hostname;
|
||||
let outputDirName = process.argv[3] || domain.replace(/\./g, '-');
|
||||
|
||||
// Sanitize top-level folder name for Mac
|
||||
if (outputDirName.endsWith('.app')) {
|
||||
outputDirName = outputDirName.replace(/\.app$/, '-app');
|
||||
}
|
||||
|
||||
const outputDir = path.resolve(__dirname, '../cloned-websites', outputDirName);
|
||||
|
||||
if (fs.existsSync(outputDir)) {
|
||||
fs.rmSync(outputDir, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
console.log(`Cloning ${url} to ${outputDir}...`);
|
||||
|
||||
try {
|
||||
await scrape({
|
||||
urls: [url],
|
||||
directory: outputDir,
|
||||
recursive: true,
|
||||
maxRecursiveDepth: 5,
|
||||
requestConcurrency: 10,
|
||||
plugins: [
|
||||
new PuppeteerPlugin({
|
||||
launchOptions: { headless: true, args: ['--no-sandbox'] },
|
||||
gotoOptions: { waitUntil: 'networkidle0', timeout: 60000 },
|
||||
scrollToBottom: { timeout: 20000, viewportN: 20 },
|
||||
}),
|
||||
new PortfolioPlugin()
|
||||
],
|
||||
sources: [
|
||||
{ selector: 'img', attr: 'src' },
|
||||
{ selector: 'img', attr: 'srcset' },
|
||||
{ selector: 'img', attr: 'data-src' },
|
||||
{ selector: 'img', attr: 'data-srcset' },
|
||||
{ selector: 'link[rel="stylesheet"]', attr: 'href' },
|
||||
{ selector: 'link[rel*="icon"]', attr: 'href' },
|
||||
{ selector: 'script', attr: 'src' },
|
||||
{ selector: 'link[rel="preload"]', attr: 'href' },
|
||||
{ selector: 'link[rel="prefetch"]', attr: 'href' },
|
||||
{ selector: 'link[rel="modulepreload"]', attr: 'href' },
|
||||
{ selector: 'link[rel="apple-touch-icon"]', attr: 'href' },
|
||||
{ selector: 'link[rel="mask-icon"]', attr: 'href' },
|
||||
{ selector: 'source', attr: 'src' },
|
||||
{ selector: 'source', attr: 'srcset' },
|
||||
{ selector: 'video', attr: 'src' },
|
||||
{ selector: 'video', attr: 'poster' },
|
||||
{ selector: 'audio', attr: 'src' },
|
||||
{ selector: 'iframe', attr: 'src' },
|
||||
{ selector: 'meta[property="og:image"]', attr: 'content' },
|
||||
{ selector: 'meta[name="twitter:image"]', attr: 'content' },
|
||||
{ selector: '[style]', attr: 'style' },
|
||||
],
|
||||
urlFilter: (link: string) => {
|
||||
const isAsset = /\.(js|css|jpg|jpeg|png|gif|svg|webp|woff|woff2|ttf|eot|otf|mp4|webm|mov|ogg|pdf|ico)(\?.*)?$/i.test(link);
|
||||
const isNextAsset = link.includes('/_next/');
|
||||
const isSameDomain = link.startsWith(url) || link.startsWith('/') || !link.includes('://') || link.includes(domain);
|
||||
const isGoogleTagManager = link.includes('googletagmanager.com');
|
||||
const isAnalytics = link.includes('analytics.mintel.me');
|
||||
const isVercelApp = link.includes('vercel.app');
|
||||
const isDataUrl = link.startsWith('data:');
|
||||
const isMailto = link.startsWith('mailto:');
|
||||
const isTel = link.startsWith('tel:');
|
||||
return (isAsset || isNextAsset || isSameDomain || isGoogleTagManager || isAnalytics || isVercelApp) && !isDataUrl && !isMailto && !isTel;
|
||||
},
|
||||
filenameGenerator: 'bySiteStructure',
|
||||
subdirectories: [
|
||||
{ directory: 'img', extensions: ['.jpg', '.png', '.svg', '.webp', '.gif', '.ico'] },
|
||||
{ directory: 'js', extensions: ['.js'] },
|
||||
{ directory: 'css', extensions: ['.css'] },
|
||||
{ directory: 'fonts', extensions: ['.woff', '.woff2', '.ttf', '.eot', '.otf'] },
|
||||
{ directory: 'videos', extensions: ['.mp4', '.webm', '.mov', '.ogg'] },
|
||||
],
|
||||
});
|
||||
|
||||
console.log('✅ Website cloned successfully!');
|
||||
console.log(`Location: ${outputDir}`);
|
||||
} catch (error) {
|
||||
console.error('❌ Error cloning website:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
cloneWebsite();
|
||||
@@ -1,131 +0,0 @@
|
||||
import * as fs from 'node:fs';
|
||||
import * as path from 'node:path';
|
||||
import * as readline from 'node:readline/promises';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { createElement } from 'react';
|
||||
import { renderToFile } from '@react-pdf/renderer';
|
||||
import { calculatePositions, calculateTotals } from '../src/logic/pricing/calculator.js';
|
||||
import { CombinedQuotePDF } from '../src/components/CombinedQuotePDF.js';
|
||||
import { initialState, PRICING } from '../src/logic/pricing/constants.js';
|
||||
import { getTechDetails, getPrinciples } from '../src/logic/content-provider.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
const isInteractive = args.includes('--interactive') || args.includes('-I');
|
||||
const isEstimationOnly = args.includes('--estimation') || args.includes('-E');
|
||||
const inputPath = args.find((_, i) => args[i - 1] === '--input' || args[i - 1] === '-i');
|
||||
|
||||
let state = { ...initialState };
|
||||
|
||||
if (inputPath) {
|
||||
const rawData = fs.readFileSync(path.resolve(process.cwd(), inputPath), 'utf8');
|
||||
const diskState = JSON.parse(rawData);
|
||||
state = { ...state, ...diskState };
|
||||
}
|
||||
|
||||
if (isInteractive) {
|
||||
state = await runWizard(state);
|
||||
}
|
||||
|
||||
// Final confirmation of data needed for PDF
|
||||
if (!state.name || !state.email) {
|
||||
console.warn('⚠️ Missing recipient name or email. Document might look incomplete.');
|
||||
}
|
||||
|
||||
const totals = calculateTotals(state, PRICING);
|
||||
const { totalPrice, monthlyPrice, totalPagesCount } = totals;
|
||||
|
||||
const finalOutputPath = generateDefaultPath(state);
|
||||
const outputDir = path.dirname(finalOutputPath);
|
||||
if (!fs.existsSync(outputDir)) {
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
}
|
||||
|
||||
// Resolve assets for the PDF
|
||||
const assetsDir = path.resolve(process.cwd(), 'src/assets');
|
||||
const headerIcon = path.join(assetsDir, 'logo/Icon White Transparent.png');
|
||||
const footerLogo = path.join(assetsDir, 'logo/Logo Black Transparent.png');
|
||||
|
||||
console.log(`🚀 Generating PDF: ${finalOutputPath}`);
|
||||
|
||||
const estimationProps = {
|
||||
state,
|
||||
totalPrice,
|
||||
monthlyPrice,
|
||||
totalPagesCount,
|
||||
pricing: PRICING,
|
||||
headerIcon,
|
||||
footerLogo
|
||||
};
|
||||
|
||||
await renderToFile(
|
||||
createElement(CombinedQuotePDF as any, {
|
||||
estimationProps,
|
||||
techDetails: getTechDetails(),
|
||||
principles: getPrinciples(),
|
||||
mode: isEstimationOnly ? 'estimation' : 'full',
|
||||
showAgbs: !isEstimationOnly // AGBS only for full quotes
|
||||
}) as any,
|
||||
finalOutputPath
|
||||
);
|
||||
|
||||
console.log('✅ Done!');
|
||||
}
|
||||
|
||||
async function runWizard(state: any) {
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout
|
||||
});
|
||||
|
||||
console.log('\n--- Mintel Quote Generator Wizard ---\n');
|
||||
|
||||
const ask = async (q: string, def?: string) => {
|
||||
const answer = await rl.question(`${q}${def ? ` [${def}]` : ''}: `);
|
||||
return answer || def || '';
|
||||
};
|
||||
|
||||
const selectOne = async (q: string, options: { id: string, label: string }[]) => {
|
||||
console.log(`\n${q}:`);
|
||||
options.forEach((opt, i) => console.log(`${i + 1}) ${opt.label}`));
|
||||
const answer = await rl.question('Selection (number): ');
|
||||
const idx = parseInt(answer) - 1;
|
||||
return options[idx]?.id || options[0].id;
|
||||
};
|
||||
|
||||
state.name = await ask('Recipient Name', state.name);
|
||||
state.email = await ask('Recipient Email', state.email);
|
||||
state.companyName = await ask('Company Name', state.companyName);
|
||||
|
||||
state.projectType = await selectOne('Project Type', [
|
||||
{ id: 'website', label: 'Website' },
|
||||
{ id: 'web-app', label: 'Web App' }
|
||||
]);
|
||||
|
||||
if (state.projectType === 'website') {
|
||||
state.websiteTopic = await ask('Website Topic', state.websiteTopic);
|
||||
// Simplified for now, in a real tool we'd loop through all options
|
||||
}
|
||||
|
||||
rl.close();
|
||||
return state;
|
||||
}
|
||||
|
||||
|
||||
function generateDefaultPath(state: any) {
|
||||
const now = new Date();
|
||||
const month = now.toISOString().slice(0, 7);
|
||||
const day = now.toISOString().slice(0, 10);
|
||||
// Add seconds and minutes for 100% unique names without collision
|
||||
const time = now.toLocaleTimeString('de-DE', { hour: '2-digit', minute: '2-digit', second: '2-digit' }).replace(/:/g, '-');
|
||||
const company = (state.companyName || state.name || 'Unknown').replace(/[^a-z0-9]/gi, '_');
|
||||
return path.join(process.cwd(), 'out', 'estimations', month, `${day}_${time}_${company}_${state.projectType}.pdf`);
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('❌ Error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -1,85 +0,0 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* Verify components can be imported and used
|
||||
*/
|
||||
|
||||
import { join } from 'path';
|
||||
|
||||
console.log('🔍 Verifying Embed Components...\n');
|
||||
|
||||
// Test 1: Check if components can be imported
|
||||
try {
|
||||
const YouTubePath = join(process.cwd(), 'src', 'components', 'YouTubeEmbed.astro');
|
||||
const TwitterPath = join(process.cwd(), 'src', 'components', 'TwitterEmbed.astro');
|
||||
const GenericPath = join(process.cwd(), 'src', 'components', 'GenericEmbed.astro');
|
||||
|
||||
console.log('✅ YouTubeEmbed.astro exists');
|
||||
console.log('✅ TwitterEmbed.astro exists');
|
||||
console.log('✅ GenericEmbed.astro exists');
|
||||
|
||||
} catch (error) {
|
||||
console.log('❌ Component import error:', error);
|
||||
}
|
||||
|
||||
// Test 2: Check demo post accessibility
|
||||
try {
|
||||
const demoPath = join(process.cwd(), 'src', 'pages', 'blog', 'embed-demo.astro');
|
||||
const { readFileSync } = require('fs');
|
||||
|
||||
if (require('fs').existsSync(demoPath)) {
|
||||
const content = readFileSync(demoPath, 'utf-8');
|
||||
|
||||
// Check if demo has proper structure
|
||||
const hasImports = content.includes('import YouTubeEmbed') &&
|
||||
content.includes('import TwitterEmbed') &&
|
||||
content.includes('import GenericEmbed');
|
||||
|
||||
const hasUsage = content.includes('<YouTubeEmbed') &&
|
||||
content.includes('<TwitterEmbed') &&
|
||||
content.includes('<GenericEmbed>');
|
||||
|
||||
if (hasImports && hasUsage) {
|
||||
console.log('✅ Demo post has correct imports and usage');
|
||||
} else {
|
||||
console.log('❌ Demo post missing imports or usage');
|
||||
}
|
||||
|
||||
// Check if it has BaseLayout
|
||||
if (content.includes('BaseLayout')) {
|
||||
console.log('✅ Demo post uses BaseLayout');
|
||||
} else {
|
||||
console.log('❌ Demo post missing BaseLayout');
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('❌ Demo post check error:', error);
|
||||
}
|
||||
|
||||
// Test 3: Check blogPosts array
|
||||
try {
|
||||
const blogPostsPath = join(process.cwd(), 'src', 'data', 'blogPosts.ts');
|
||||
const { readFileSync } = require('fs');
|
||||
|
||||
const content = readFileSync(blogPostsPath, 'utf-8');
|
||||
|
||||
// Check if embed-demo needs to be added
|
||||
if (!content.includes('embed-demo')) {
|
||||
console.log('⚠️ embed-demo not in blogPosts array - this is why it won\'t show in blog list');
|
||||
console.log(' But it should still be accessible at /blog/embed-demo directly');
|
||||
} else {
|
||||
console.log('✅ embed-demo found in blogPosts array');
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('❌ blogPosts check error:', error);
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('📋 SUMMARY:');
|
||||
console.log('• Components are created and structured correctly');
|
||||
console.log('• Demo post exists at src/pages/blog/embed-demo.astro');
|
||||
console.log('• Demo post has all required imports and usage');
|
||||
console.log('\n🔧 TO FIX BLOG LISTING:');
|
||||
console.log('Add embed-demo to src/data/blogPosts.ts array');
|
||||
console.log('\n🚀 TO TEST COMPONENTS:');
|
||||
console.log('Visit: http://localhost:4321/blog/embed-demo');
|
||||
console.log('If that 404s, the demo post needs to be added to blogPosts.ts');
|
||||
Reference in New Issue
Block a user