119 lines
4.7 KiB
JavaScript
119 lines
4.7 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
const { glob } = require('glob');
|
|
|
|
/**
|
|
* Clean MDX files by removing unnecessary HTML wrappers and data attributes
|
|
*/
|
|
async function cleanMDXFiles() {
|
|
const mdxFiles = await glob('data/blog/**/*.mdx');
|
|
|
|
console.log(`Found ${mdxFiles.length} MDX files to clean...`);
|
|
|
|
for (const filePath of mdxFiles) {
|
|
try {
|
|
let content = fs.readFileSync(filePath, 'utf-8');
|
|
const originalContent = content;
|
|
|
|
// Remove all data-start and data-end attributes
|
|
content = content.replace(/\s*data-start="[^"]*"/g, '');
|
|
content = content.replace(/\s*data-end="[^"]*"/g, '');
|
|
|
|
// Remove all class="" attributes
|
|
content = content.replace(/\s*class="[^"]*"/g, '');
|
|
|
|
// Remove all dir="auto" attributes
|
|
content = content.replace(/\s*dir="auto"/g, '');
|
|
|
|
// Remove all data-message-* attributes
|
|
content = content.replace(/\s*data-message-[^=]*="[^"]*"/g, '');
|
|
|
|
// Remove unnecessary wrapper divs (gizmo, flex containers, etc.)
|
|
content = content.replace(/<div class="flex-shrink-0[^>]*>[\s\S]*?<div class="markdown prose[^>]*>/g, '');
|
|
content = content.replace(/<div class="flex max-w-full[^>]*>[\s\S]*?<div class="markdown prose[^>]*>/g, '');
|
|
content = content.replace(/<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>/g, '');
|
|
content = content.replace(/<\/div>\s*<\/div>\s*<\/div>\s*<\/div>/g, '');
|
|
|
|
// Clean up h2 tags - remove attributes but keep content
|
|
content = content.replace(/<h2[^>]*>(.*?)<\/h2>/g, '## $1');
|
|
|
|
// Clean up p tags - remove attributes but keep content
|
|
content = content.replace(/<p[^>]*>(.*?)<\/p>/g, (match, p1) => {
|
|
// If it's just whitespace, skip it
|
|
if (!p1.trim()) return '';
|
|
return p1.trim() + '\n';
|
|
});
|
|
|
|
// Clean up li tags
|
|
content = content.replace(/<li[^>]*>\s*<p[^>]*>(.*?)<\/p>\s*<\/li>/g, '- $1');
|
|
content = content.replace(/<li[^>]*>(.*?)<\/li>/g, '- $1');
|
|
|
|
// Clean up ul tags
|
|
content = content.replace(/<ul[^>]*>/g, '');
|
|
content = content.replace(/<\/ul>/g, '');
|
|
|
|
// Clean up strong tags - remove data attributes
|
|
content = content.replace(/<strong[^>]*>(.*?)<\/strong>/g, '**$1**');
|
|
|
|
// Clean up anchor tags - remove data attributes and nofollow
|
|
content = content.replace(/<a href="([^"]*)"[^>]*rel="noopener noreferrer nofollow"[^>]*>(.*?)<\/a>/g, '[$2]($1)');
|
|
content = content.replace(/<a href="([^"]*)"[^>]*>(.*?)<\/a>/g, '[$2]($1)');
|
|
|
|
// Convert Visual Link Preview HTML to component
|
|
content = content.replace(
|
|
/<div class="vlp-link-container[^>]*>[\s\S]*?<a href="([^"]*)"[^>]*title="([^"]*)"[^>]*>[\s\S]*?<img[^>]*src="([^"]*)"[^>]*>[\s\S]*?<div class="vlp-block-0[^>]*>([^<]*)<\/div>[\s\S]*?<div class="vlp-block-1[^>]*>([^<]*)<\/div>[\s\S]*?<\/div>/g,
|
|
(match, url, title, image, blockTitle, summary) => {
|
|
return `\n<VisualLinkPreview \n url="${url}"\n title="${blockTitle || title}"\n summary="${summary}"\n image="${image}"\n/>\n`;
|
|
}
|
|
);
|
|
|
|
// Remove empty lines (more than 2 consecutive)
|
|
content = content.replace(/\n{3,}/g, '\n\n');
|
|
|
|
// Ensure frontmatter has excerpt and category if missing
|
|
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
|
if (frontmatterMatch) {
|
|
let frontmatter = frontmatterMatch[1];
|
|
|
|
// Add excerpt if missing
|
|
if (!frontmatter.includes('excerpt:')) {
|
|
// Try to extract first paragraph as excerpt
|
|
const firstParagraph = content.match(/---\n\n(.*?)\n\n/s);
|
|
if (firstParagraph) {
|
|
const excerpt = firstParagraph[1]
|
|
.replace(/[#*\[\]]/g, '')
|
|
.replace(/\n/g, ' ')
|
|
.trim()
|
|
.substring(0, 200);
|
|
frontmatter += `\nexcerpt: ${excerpt}`;
|
|
}
|
|
}
|
|
|
|
// Add category if missing (try to infer from content or use default)
|
|
if (!frontmatter.includes('category:')) {
|
|
frontmatter += `\ncategory: Kabel Technologie`;
|
|
}
|
|
|
|
content = content.replace(/^---\n[\s\S]*?\n---/, `---\n${frontmatter}\n---`);
|
|
}
|
|
|
|
// Only write if content changed
|
|
if (content !== originalContent) {
|
|
fs.writeFileSync(filePath, content, 'utf-8');
|
|
console.log(`✓ Cleaned: ${filePath}`);
|
|
} else {
|
|
console.log(`- Skipped (no changes): ${filePath}`);
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error(`✗ Error processing ${filePath}:`, error.message);
|
|
}
|
|
}
|
|
|
|
console.log('\nDone!');
|
|
}
|
|
|
|
cleanMDXFiles().catch(console.error);
|