import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; import crypto from 'crypto'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const CSV_PATHS = [ '/Users/marcmintel/Downloads/pages.csv', '/Users/marcmintel/Downloads/pages(1).csv', '/Users/marcmintel/Downloads/pages(2).csv', ]; const JSON_OUTPUT_PATH = path.join(__dirname, '../data/umami-import-merged.json'); const SQL_OUTPUT_PATH = path.join(__dirname, '../data/umami-import-new.sql'); const WEBSITE_ID = '59a7db94-0100-4c7e-98ef-99f45b17f9c3'; const HOSTNAME = 'klz-cables.com'; function parseCSV(content: string) { const lines = content.split('\n'); if (lines.length === 0) return []; const headers = lines[0].split(',').map((h) => h.trim().replace(/^"|"$/g, '')); const data = []; for (let i = 1; i < lines.length; i++) { if (!lines[i].trim()) continue; // Simple CSV parser that handles quotes const values: string[] = []; let current = ''; let inQuotes = false; for (let j = 0; j < lines[i].length; j++) { const char = lines[i][j]; if (char === '"') inQuotes = !inQuotes; else if (char === ',' && !inQuotes) { values.push(current.trim()); current = ''; } else { current += char; } } values.push(current.trim()); const row: any = {}; headers.forEach((header, index) => { row[header] = values[index]?.replace(/^"|"$/g, ''); }); data.push(row); } return data; } function normalizeURL(url: string) { if (!url) return '/'; if (url.startsWith('http')) { try { return new URL(url).pathname; } catch { return url; } } return url.startsWith('/') ? url : `/${url}`; } async function mergeData() { console.log('Reading CSVs...'); const aggregatedData: Record = {}; for (const csvPath of CSV_PATHS) { if (!fs.existsSync(csvPath)) { console.warn(`File not found: ${csvPath}`); continue; } const csvContent = fs.readFileSync(csvPath, 'utf-8'); const csvData = parseCSV(csvContent); for (const row of csvData) { const url = normalizeURL(row.URL); const views = parseInt(row.Views) || 0; const visitors = parseInt(row.Visitors) || 0; const title = row.Title || ''; if (!aggregatedData[url]) { aggregatedData[url] = { views, visitors, title }; } else { aggregatedData[url].views = Math.max(aggregatedData[url].views, views); aggregatedData[url].visitors = Math.max(aggregatedData[url].visitors, visitors); if (!aggregatedData[url].title && title) { aggregatedData[url].title = title; } } } } const jsonEvents = []; const sqlStatements = []; // Spread data across the whole period since early 2025 launch const START_DATE = new Date('2025-01-01T08:00:00Z'); const END_DATE = new Date('2026-02-13T20:00:00Z'); const startTs = START_DATE.getTime(); const endTs = END_DATE.getTime(); const totalDays = Math.ceil((endTs - startTs) / (1000 * 60 * 60 * 24)); // Cleanup for the target period sqlStatements.push(`-- Cleanup previous artificial imports (Full Year 2025 and 2026 until now) DELETE FROM website_event WHERE website_id = '${WEBSITE_ID}' AND created_at >= '2025-01-01 00:00:00' AND created_at <= '2026-02-13 23:59:59' AND hostname = '${HOSTNAME}'; DELETE FROM session WHERE website_id = '${WEBSITE_ID}' AND created_at >= '2025-01-01 00:00:00' AND created_at <= '2026-02-13 23:59:59'; `); // Helper for weighted random date selection function getRandomWeightedDate() { while (true) { const randomDays = Math.random() * totalDays; const date = new Date(startTs + randomDays * 24 * 60 * 60 * 1000); // 1. Growth Factor (0.2 at start to 1.0 at end) const growthWeight = 0.2 + (randomDays / totalDays) * 0.8; // 2. Weekend Factor (30% traffic on weekends) const dayOfWeek = date.getDay(); const weekendWeight = dayOfWeek === 0 || dayOfWeek === 6 ? 0.3 : 1.0; // 3. Seasonality (simple sine wave) const month = date.getMonth(); const seasonWeight = 0.8 + Math.sin((month / 12) * Math.PI * 2) * 0.2; // Combined weight const combinedWeight = growthWeight * weekendWeight * seasonWeight; // Pick based on weight if (Math.random() < combinedWeight) { // Return timestamp with random hour/minute date.setHours(Math.floor(Math.random() * 12) + 8); // Business hours mostly date.setMinutes(Math.floor(Math.random() * 60)); return date; } } } const urls = Object.keys(aggregatedData); console.log(`Processing ${urls.length} aggregated URLs...`); for (const url of urls) { const { views, visitors, title } = aggregatedData[url]; if (views === 0) continue; // We distribute views across visitors const sessionData = []; for (let v = 0; v < (visitors || 1); v++) { const sessionId = crypto.randomUUID(); const visitId = crypto.randomUUID(); const sessionDate = getRandomWeightedDate(); const dateStr = sessionDate.toISOString().replace('T', ' ').split('.')[0]; sessionData.push({ sessionId, visitId, date: sessionDate }); sqlStatements.push(`INSERT INTO session (session_id, website_id, browser, os, device, screen, language, country, created_at) VALUES ('${sessionId}', '${WEBSITE_ID}', 'Chrome', 'Windows', 'desktop', '1920x1080', 'en', 'DE', '${dateStr}') ON CONFLICT (session_id) DO NOTHING;`); } // Distribute views across these sessions for (let i = 0; i < views; i++) { const sIdx = i % sessionData.length; const session = sessionData[sIdx]; const sessionId = session.sessionId; const visitId = session.visitId; const eventId = crypto.randomUUID(); // Event date should be close to session date const eventDate = new Date(session.date.getTime() + Math.random() * 1000 * 60 * 30); // within 30 mins const timestamp = eventDate.toISOString(); const dateStr = timestamp.replace('T', ' ').split('.')[0]; // JSON Format jsonEvents.push({ website_id: WEBSITE_ID, hostname: HOSTNAME, path: url, referrer: '', event_name: null, pageview: true, session: true, duration: Math.floor(Math.random() * 120) + 10, created_at: timestamp, }); // SQL Format sqlStatements.push(`INSERT INTO website_event (event_id, website_id, session_id, created_at, url_path, url_query, referrer_path, referrer_query, referrer_domain, page_title, event_type, event_name, visit_id, hostname) VALUES ('${eventId}', '${WEBSITE_ID}', '${sessionId}', '${dateStr}', '${url}', '', '', '', '', '${title.replace(/'/g, "''")}', 1, NULL, '${visitId}', '${HOSTNAME}');`); } } console.log(`Writing ${jsonEvents.length} events to ${JSON_OUTPUT_PATH}...`); fs.writeFileSync(JSON_OUTPUT_PATH, JSON.stringify(jsonEvents, null, 2)); console.log(`Writing SQL statements to ${SQL_OUTPUT_PATH}...`); fs.writeFileSync(SQL_OUTPUT_PATH, sqlStatements.join('\n')); console.log('✅ Refined Restoration Script complete!'); } mergeData().catch(console.error);