#!/usr/bin/env node const fs = require('fs'); const path = require('path'); const { glob } = require('glob'); /** * Clean MDX files by removing unnecessary HTML wrappers and data attributes */ async function cleanMDXFiles() { const mdxFiles = await glob('data/blog/**/*.mdx'); console.log(`Found ${mdxFiles.length} MDX files to clean...`); for (const filePath of mdxFiles) { try { let content = fs.readFileSync(filePath, 'utf-8'); const originalContent = content; // Remove all data-start and data-end attributes content = content.replace(/\s*data-start="[^"]*"/g, ''); content = content.replace(/\s*data-end="[^"]*"/g, ''); // Remove all class="" attributes content = content.replace(/\s*class="[^"]*"/g, ''); // Remove all dir="auto" attributes content = content.replace(/\s*dir="auto"/g, ''); // Remove all data-message-* attributes content = content.replace(/\s*data-message-[^=]*="[^"]*"/g, ''); // Remove unnecessary wrapper divs (gizmo, flex containers, etc.) content = content.replace(/
]*>/g, ''); content = content.replace(/
]*>/g, ''); content = content.replace(/<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>/g, ''); content = content.replace(/<\/div>\s*<\/div>\s*<\/div>\s*<\/div>/g, ''); // Clean up h2 tags - remove attributes but keep content content = content.replace(/]*>(.*?)<\/h2>/g, '## $1'); // Clean up p tags - remove attributes but keep content content = content.replace(/]*>(.*?)<\/p>/g, (match, p1) => { // If it's just whitespace, skip it if (!p1.trim()) return ''; return p1.trim() + '\n'; }); // Clean up li tags content = content.replace(/]*>\s*]*>(.*?)<\/p>\s*<\/li>/g, '- $1'); content = content.replace(/]*>(.*?)<\/li>/g, '- $1'); // Clean up ul tags content = content.replace(/]*>/g, ''); content = content.replace(/<\/ul>/g, ''); // Clean up strong tags - remove data attributes content = content.replace(/]*>(.*?)<\/strong>/g, '**$1**'); // Clean up anchor tags - remove data attributes and nofollow content = content.replace(/]*rel="noopener noreferrer nofollow"[^>]*>(.*?)<\/a>/g, '[$2]($1)'); content = content.replace(/]*>(.*?)<\/a>/g, '[$2]($1)'); // Convert Visual Link Preview HTML to component content = content.replace( /