wip
This commit is contained in:
118
scripts/clean-mdx.js
Normal file
118
scripts/clean-mdx.js
Normal file
@@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { glob } = require('glob');
|
||||
|
||||
/**
|
||||
* Clean MDX files by removing unnecessary HTML wrappers and data attributes
|
||||
*/
|
||||
async function cleanMDXFiles() {
|
||||
const mdxFiles = await glob('data/blog/**/*.mdx');
|
||||
|
||||
console.log(`Found ${mdxFiles.length} MDX files to clean...`);
|
||||
|
||||
for (const filePath of mdxFiles) {
|
||||
try {
|
||||
let content = fs.readFileSync(filePath, 'utf-8');
|
||||
const originalContent = content;
|
||||
|
||||
// Remove all data-start and data-end attributes
|
||||
content = content.replace(/\s*data-start="[^"]*"/g, '');
|
||||
content = content.replace(/\s*data-end="[^"]*"/g, '');
|
||||
|
||||
// Remove all class="" attributes
|
||||
content = content.replace(/\s*class="[^"]*"/g, '');
|
||||
|
||||
// Remove all dir="auto" attributes
|
||||
content = content.replace(/\s*dir="auto"/g, '');
|
||||
|
||||
// Remove all data-message-* attributes
|
||||
content = content.replace(/\s*data-message-[^=]*="[^"]*"/g, '');
|
||||
|
||||
// Remove unnecessary wrapper divs (gizmo, flex containers, etc.)
|
||||
content = content.replace(/<div class="flex-shrink-0[^>]*>[\s\S]*?<div class="markdown prose[^>]*>/g, '');
|
||||
content = content.replace(/<div class="flex max-w-full[^>]*>[\s\S]*?<div class="markdown prose[^>]*>/g, '');
|
||||
content = content.replace(/<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>/g, '');
|
||||
content = content.replace(/<\/div>\s*<\/div>\s*<\/div>\s*<\/div>/g, '');
|
||||
|
||||
// Clean up h2 tags - remove attributes but keep content
|
||||
content = content.replace(/<h2[^>]*>(.*?)<\/h2>/g, '## $1');
|
||||
|
||||
// Clean up p tags - remove attributes but keep content
|
||||
content = content.replace(/<p[^>]*>(.*?)<\/p>/g, (match, p1) => {
|
||||
// If it's just whitespace, skip it
|
||||
if (!p1.trim()) return '';
|
||||
return p1.trim() + '\n';
|
||||
});
|
||||
|
||||
// Clean up li tags
|
||||
content = content.replace(/<li[^>]*>\s*<p[^>]*>(.*?)<\/p>\s*<\/li>/g, '- $1');
|
||||
content = content.replace(/<li[^>]*>(.*?)<\/li>/g, '- $1');
|
||||
|
||||
// Clean up ul tags
|
||||
content = content.replace(/<ul[^>]*>/g, '');
|
||||
content = content.replace(/<\/ul>/g, '');
|
||||
|
||||
// Clean up strong tags - remove data attributes
|
||||
content = content.replace(/<strong[^>]*>(.*?)<\/strong>/g, '**$1**');
|
||||
|
||||
// Clean up anchor tags - remove data attributes and nofollow
|
||||
content = content.replace(/<a href="([^"]*)"[^>]*rel="noopener noreferrer nofollow"[^>]*>(.*?)<\/a>/g, '[$2]($1)');
|
||||
content = content.replace(/<a href="([^"]*)"[^>]*>(.*?)<\/a>/g, '[$2]($1)');
|
||||
|
||||
// Convert Visual Link Preview HTML to component
|
||||
content = content.replace(
|
||||
/<div class="vlp-link-container[^>]*>[\s\S]*?<a href="([^"]*)"[^>]*title="([^"]*)"[^>]*>[\s\S]*?<img[^>]*src="([^"]*)"[^>]*>[\s\S]*?<div class="vlp-block-0[^>]*>([^<]*)<\/div>[\s\S]*?<div class="vlp-block-1[^>]*>([^<]*)<\/div>[\s\S]*?<\/div>/g,
|
||||
(match, url, title, image, blockTitle, summary) => {
|
||||
return `\n<VisualLinkPreview \n url="${url}"\n title="${blockTitle || title}"\n summary="${summary}"\n image="${image}"\n/>\n`;
|
||||
}
|
||||
);
|
||||
|
||||
// Remove empty lines (more than 2 consecutive)
|
||||
content = content.replace(/\n{3,}/g, '\n\n');
|
||||
|
||||
// Ensure frontmatter has excerpt and category if missing
|
||||
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
||||
if (frontmatterMatch) {
|
||||
let frontmatter = frontmatterMatch[1];
|
||||
|
||||
// Add excerpt if missing
|
||||
if (!frontmatter.includes('excerpt:')) {
|
||||
// Try to extract first paragraph as excerpt
|
||||
const firstParagraph = content.match(/---\n\n(.*?)\n\n/s);
|
||||
if (firstParagraph) {
|
||||
const excerpt = firstParagraph[1]
|
||||
.replace(/[#*\[\]]/g, '')
|
||||
.replace(/\n/g, ' ')
|
||||
.trim()
|
||||
.substring(0, 200);
|
||||
frontmatter += `\nexcerpt: ${excerpt}`;
|
||||
}
|
||||
}
|
||||
|
||||
// Add category if missing (try to infer from content or use default)
|
||||
if (!frontmatter.includes('category:')) {
|
||||
frontmatter += `\ncategory: Kabel Technologie`;
|
||||
}
|
||||
|
||||
content = content.replace(/^---\n[\s\S]*?\n---/, `---\n${frontmatter}\n---`);
|
||||
}
|
||||
|
||||
// Only write if content changed
|
||||
if (content !== originalContent) {
|
||||
fs.writeFileSync(filePath, content, 'utf-8');
|
||||
console.log(`✓ Cleaned: ${filePath}`);
|
||||
} else {
|
||||
console.log(`- Skipped (no changes): ${filePath}`);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error(`✗ Error processing ${filePath}:`, error.message);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\nDone!');
|
||||
}
|
||||
|
||||
cleanMDXFiles().catch(console.error);
|
||||
136
scripts/fetch-posts.js
Normal file
136
scripts/fetch-posts.js
Normal file
@@ -0,0 +1,136 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const cheerio = require('cheerio');
|
||||
|
||||
const API_URL = 'https://klz-cables.com/wp-json/wp/v2/posts?per_page=100&_embed';
|
||||
|
||||
async function fetchPosts() {
|
||||
console.log('Fetching posts...');
|
||||
const response = await fetch(API_URL);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch posts: ${response.statusText}`);
|
||||
}
|
||||
const posts = await response.json();
|
||||
console.log(`Fetched ${posts.length} posts.`);
|
||||
return posts;
|
||||
}
|
||||
|
||||
function cleanContent(content) {
|
||||
let cleaned = content;
|
||||
|
||||
// Decode HTML entities first to make regex easier
|
||||
cleaned = cleaned.replace(/”/g, '"').replace(/“/g, '"').replace(/’/g, "'").replace(/&/g, '&').replace(/″/g, '"');
|
||||
|
||||
// Remove vc_row and vc_column wrappers
|
||||
cleaned = cleaned.replace(/\[\/?vc_row.*?\]/g, '');
|
||||
cleaned = cleaned.replace(/\[\/?vc_column.*?\]/g, '');
|
||||
|
||||
// Remove vc_column_text wrapper but keep content
|
||||
cleaned = cleaned.replace(/\[vc_column_text.*?\]/g, '');
|
||||
cleaned = cleaned.replace(/\[\/vc_column_text\]/g, '');
|
||||
|
||||
// Convert split_line_heading to h2
|
||||
cleaned = cleaned.replace(/\[split_line_heading[^\]]*text_content="([^"]+)"[^\]]*\](?:\[\/split_line_heading\])?/g, '<h2>$1</h2>');
|
||||
|
||||
// Remove other shortcodes
|
||||
cleaned = cleaned.replace(/\[image_with_animation.*?\]/g, '');
|
||||
cleaned = cleaned.replace(/\[divider.*?\]/g, '');
|
||||
cleaned = cleaned.replace(/\[nectar_global_section.*?\]/g, '');
|
||||
|
||||
// Use Cheerio for HTML manipulation
|
||||
const $ = cheerio.load(cleaned, { xmlMode: false, decodeEntities: false });
|
||||
|
||||
// Convert VisualLinkPreview
|
||||
$('.vlp-link-container').each((i, el) => {
|
||||
const $el = $(el);
|
||||
const url = $el.find('a.vlp-link').attr('href');
|
||||
const title = $el.find('.vlp-link-title').text().trim() || $el.find('a.vlp-link').attr('title');
|
||||
const image = $el.find('.vlp-link-image img').attr('src');
|
||||
const summary = $el.find('.vlp-link-summary').text().trim();
|
||||
|
||||
if (url && title) {
|
||||
// We use a placeholder to avoid Cheerio messing up the React component syntax
|
||||
const component = `__VISUAL_LINK_PREVIEW_START__ url="${url}" title="${title}" image="${image || ''}" summary="${summary || ''}" __VISUAL_LINK_PREVIEW_END__`;
|
||||
$el.replaceWith(component);
|
||||
}
|
||||
});
|
||||
|
||||
// Remove data attributes
|
||||
$('*').each((i, el) => {
|
||||
const attribs = el.attribs;
|
||||
for (const name in attribs) {
|
||||
if (name.startsWith('data-')) {
|
||||
$(el).removeAttr(name);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Unwrap divs (remove div tags but keep content)
|
||||
$('div').each((i, el) => {
|
||||
$(el).replaceWith($(el).html());
|
||||
});
|
||||
|
||||
// Remove empty paragraphs
|
||||
$('p').each((i, el) => {
|
||||
if ($(el).text().trim() === '' && $(el).children().length === 0) {
|
||||
$(el).remove();
|
||||
}
|
||||
});
|
||||
|
||||
let output = $('body').html() || '';
|
||||
|
||||
// Restore VisualLinkPreview
|
||||
output = output.replace(/__VISUAL_LINK_PREVIEW_START__/g, '<VisualLinkPreview').replace(/__VISUAL_LINK_PREVIEW_END__/g, '/>');
|
||||
|
||||
return output.trim();
|
||||
}
|
||||
|
||||
function generateMdx(post) {
|
||||
const title = post.title.rendered.replace(/”/g, '"').replace(/“/g, '"').replace(/’/g, "'").replace(/&/g, '&');
|
||||
const date = post.date;
|
||||
const slug = post.slug;
|
||||
const lang = post.lang || 'en'; // Default to en if not specified
|
||||
|
||||
let featuredImage = '';
|
||||
if (post._embedded && post._embedded['wp:featuredmedia'] && post._embedded['wp:featuredmedia'][0]) {
|
||||
featuredImage = post._embedded['wp:featuredmedia'][0].source_url;
|
||||
}
|
||||
|
||||
const content = cleanContent(post.content.rendered);
|
||||
|
||||
return `---
|
||||
title: "${title}"
|
||||
date: '${date}'
|
||||
featuredImage: ${featuredImage}
|
||||
locale: ${lang}
|
||||
---
|
||||
|
||||
${content}
|
||||
`;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const posts = await fetchPosts();
|
||||
|
||||
for (const post of posts) {
|
||||
const lang = post.lang || 'en';
|
||||
const slug = post.slug;
|
||||
const mdxContent = generateMdx(post);
|
||||
|
||||
const dir = path.join('data/blog', lang);
|
||||
if (!fs.existsSync(dir)) {
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
|
||||
const filePath = path.join(dir, `${slug}.mdx`);
|
||||
fs.writeFileSync(filePath, mdxContent);
|
||||
console.log(`Saved ${filePath}`);
|
||||
}
|
||||
console.log('Done.');
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user