206 lines
7.2 KiB
TypeScript
206 lines
7.2 KiB
TypeScript
import fs from 'fs';
|
|
import path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
import crypto from 'crypto';
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
|
|
const CSV_PATHS = [
|
|
'/Users/marcmintel/Downloads/pages.csv',
|
|
'/Users/marcmintel/Downloads/pages(1).csv',
|
|
'/Users/marcmintel/Downloads/pages(2).csv',
|
|
];
|
|
const JSON_OUTPUT_PATH = path.join(__dirname, '../data/umami-import-merged.json');
|
|
const SQL_OUTPUT_PATH = path.join(__dirname, '../data/umami-import-new.sql');
|
|
const WEBSITE_ID = '59a7db94-0100-4c7e-98ef-99f45b17f9c3';
|
|
const HOSTNAME = 'klz-cables.com';
|
|
|
|
function parseCSV(content: string) {
|
|
const lines = content.split('\n');
|
|
if (lines.length === 0) return [];
|
|
const headers = lines[0].split(',').map((h) => h.trim().replace(/^"|"$/g, ''));
|
|
const data = [];
|
|
|
|
for (let i = 1; i < lines.length; i++) {
|
|
if (!lines[i].trim()) continue;
|
|
|
|
// Simple CSV parser that handles quotes
|
|
const values: string[] = [];
|
|
let current = '';
|
|
let inQuotes = false;
|
|
for (let j = 0; j < lines[i].length; j++) {
|
|
const char = lines[i][j];
|
|
if (char === '"') inQuotes = !inQuotes;
|
|
else if (char === ',' && !inQuotes) {
|
|
values.push(current.trim());
|
|
current = '';
|
|
} else {
|
|
current += char;
|
|
}
|
|
}
|
|
values.push(current.trim());
|
|
|
|
const row: any = {};
|
|
headers.forEach((header, index) => {
|
|
row[header] = values[index]?.replace(/^"|"$/g, '');
|
|
});
|
|
data.push(row);
|
|
}
|
|
return data;
|
|
}
|
|
|
|
function normalizeURL(url: string) {
|
|
if (!url) return '/';
|
|
if (url.startsWith('http')) {
|
|
try {
|
|
return new URL(url).pathname;
|
|
} catch {
|
|
return url;
|
|
}
|
|
}
|
|
return url.startsWith('/') ? url : `/${url}`;
|
|
}
|
|
|
|
async function mergeData() {
|
|
console.log('Reading CSVs...');
|
|
const aggregatedData: Record<string, { views: number; visitors: number; title: string }> = {};
|
|
|
|
for (const csvPath of CSV_PATHS) {
|
|
if (!fs.existsSync(csvPath)) {
|
|
console.warn(`File not found: ${csvPath}`);
|
|
continue;
|
|
}
|
|
const csvContent = fs.readFileSync(csvPath, 'utf-8');
|
|
const csvData = parseCSV(csvContent);
|
|
|
|
for (const row of csvData) {
|
|
const url = normalizeURL(row.URL);
|
|
const views = parseInt(row.Views) || 0;
|
|
const visitors = parseInt(row.Visitors) || 0;
|
|
const title = row.Title || '';
|
|
|
|
if (!aggregatedData[url]) {
|
|
aggregatedData[url] = { views, visitors, title };
|
|
} else {
|
|
aggregatedData[url].views = Math.max(aggregatedData[url].views, views);
|
|
aggregatedData[url].visitors = Math.max(aggregatedData[url].visitors, visitors);
|
|
if (!aggregatedData[url].title && title) {
|
|
aggregatedData[url].title = title;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const jsonEvents = [];
|
|
const sqlStatements = [];
|
|
|
|
// Spread data across the whole period since early 2025 launch
|
|
const START_DATE = new Date('2025-01-01T08:00:00Z');
|
|
const END_DATE = new Date('2026-02-13T20:00:00Z');
|
|
const startTs = START_DATE.getTime();
|
|
const endTs = END_DATE.getTime();
|
|
const totalDays = Math.ceil((endTs - startTs) / (1000 * 60 * 60 * 24));
|
|
|
|
// Cleanup for the target period
|
|
sqlStatements.push(`-- Cleanup previous artificial imports (Full Year 2025 and 2026 until now)
|
|
DELETE FROM website_event WHERE website_id = '${WEBSITE_ID}' AND created_at >= '2025-01-01 00:00:00' AND created_at <= '2026-02-13 23:59:59' AND hostname = '${HOSTNAME}';
|
|
DELETE FROM session WHERE website_id = '${WEBSITE_ID}' AND created_at >= '2025-01-01 00:00:00' AND created_at <= '2026-02-13 23:59:59';
|
|
`);
|
|
|
|
// Helper for weighted random date selection
|
|
function getRandomWeightedDate() {
|
|
while (true) {
|
|
const randomDays = Math.random() * totalDays;
|
|
const date = new Date(startTs + randomDays * 24 * 60 * 60 * 1000);
|
|
|
|
// 1. Growth Factor (0.2 at start to 1.0 at end)
|
|
const growthWeight = 0.2 + (randomDays / totalDays) * 0.8;
|
|
|
|
// 2. Weekend Factor (30% traffic on weekends)
|
|
const dayOfWeek = date.getDay();
|
|
const weekendWeight = dayOfWeek === 0 || dayOfWeek === 6 ? 0.3 : 1.0;
|
|
|
|
// 3. Seasonality (simple sine wave)
|
|
const month = date.getMonth();
|
|
const seasonWeight = 0.8 + Math.sin((month / 12) * Math.PI * 2) * 0.2;
|
|
|
|
// Combined weight
|
|
const combinedWeight = growthWeight * weekendWeight * seasonWeight;
|
|
|
|
// Pick based on weight
|
|
if (Math.random() < combinedWeight) {
|
|
// Return timestamp with random hour/minute
|
|
date.setHours(Math.floor(Math.random() * 12) + 8); // Business hours mostly
|
|
date.setMinutes(Math.floor(Math.random() * 60));
|
|
return date;
|
|
}
|
|
}
|
|
}
|
|
|
|
const urls = Object.keys(aggregatedData);
|
|
console.log(`Processing ${urls.length} aggregated URLs...`);
|
|
|
|
for (const url of urls) {
|
|
const { views, visitors, title } = aggregatedData[url];
|
|
if (views === 0) continue;
|
|
|
|
// We distribute views across visitors
|
|
const sessionData = [];
|
|
for (let v = 0; v < (visitors || 1); v++) {
|
|
const sessionId = crypto.randomUUID();
|
|
const visitId = crypto.randomUUID();
|
|
|
|
const sessionDate = getRandomWeightedDate();
|
|
const dateStr = sessionDate.toISOString().replace('T', ' ').split('.')[0];
|
|
|
|
sessionData.push({ sessionId, visitId, date: sessionDate });
|
|
|
|
sqlStatements.push(`INSERT INTO session (session_id, website_id, browser, os, device, screen, language, country, created_at)
|
|
VALUES ('${sessionId}', '${WEBSITE_ID}', 'Chrome', 'Windows', 'desktop', '1920x1080', 'en', 'DE', '${dateStr}')
|
|
ON CONFLICT (session_id) DO NOTHING;`);
|
|
}
|
|
|
|
// Distribute views across these sessions
|
|
for (let i = 0; i < views; i++) {
|
|
const sIdx = i % sessionData.length;
|
|
const session = sessionData[sIdx];
|
|
const sessionId = session.sessionId;
|
|
const visitId = session.visitId;
|
|
const eventId = crypto.randomUUID();
|
|
|
|
// Event date should be close to session date
|
|
const eventDate = new Date(session.date.getTime() + Math.random() * 1000 * 60 * 30); // within 30 mins
|
|
const timestamp = eventDate.toISOString();
|
|
const dateStr = timestamp.replace('T', ' ').split('.')[0];
|
|
|
|
// JSON Format
|
|
jsonEvents.push({
|
|
website_id: WEBSITE_ID,
|
|
hostname: HOSTNAME,
|
|
path: url,
|
|
referrer: '',
|
|
event_name: null,
|
|
pageview: true,
|
|
session: true,
|
|
duration: Math.floor(Math.random() * 120) + 10,
|
|
created_at: timestamp,
|
|
});
|
|
|
|
// SQL Format
|
|
sqlStatements.push(`INSERT INTO website_event (event_id, website_id, session_id, created_at, url_path, url_query, referrer_path, referrer_query, referrer_domain, page_title, event_type, event_name, visit_id, hostname)
|
|
VALUES ('${eventId}', '${WEBSITE_ID}', '${sessionId}', '${dateStr}', '${url}', '', '', '', '', '${title.replace(/'/g, "''")}', 1, NULL, '${visitId}', '${HOSTNAME}');`);
|
|
}
|
|
}
|
|
|
|
console.log(`Writing ${jsonEvents.length} events to ${JSON_OUTPUT_PATH}...`);
|
|
fs.writeFileSync(JSON_OUTPUT_PATH, JSON.stringify(jsonEvents, null, 2));
|
|
|
|
console.log(`Writing SQL statements to ${SQL_OUTPUT_PATH}...`);
|
|
fs.writeFileSync(SQL_OUTPUT_PATH, sqlStatements.join('\n'));
|
|
|
|
console.log('✅ Refined Restoration Script complete!');
|
|
}
|
|
|
|
mergeData().catch(console.error);
|