wip

2026-01-17 00:32:38 +01:00
parent 3dd4f7f121
commit 4c7f0e36bf
69 changed files with 8234 additions and 7556 deletions
--- a/scripts/clean-mdx.js
+++ b/scripts/clean-mdx.js
@@ -0,0 +1,118 @@
+#!/usr/bin/env node
+
+const fs = require('fs');
+const path = require('path');
+const { glob } = require('glob');
+
+/**
+ * Clean MDX files by removing unnecessary HTML wrappers and data attributes
+ */
+async function cleanMDXFiles() {
+  const mdxFiles = await glob('data/blog/**/*.mdx');
+  
+  console.log(`Found ${mdxFiles.length} MDX files to clean...`);
+  
+  for (const filePath of mdxFiles) {
+    try {
+      let content = fs.readFileSync(filePath, 'utf-8');
+      const originalContent = content;
+      
+      // Remove all data-start and data-end attributes
+      content = content.replace(/\s*data-start="[^"]*"/g, '');
+      content = content.replace(/\s*data-end="[^"]*"/g, '');
+      
+      // Remove all class="" attributes
+      content = content.replace(/\s*class="[^"]*"/g, '');
+      
+      // Remove all dir="auto" attributes
+      content = content.replace(/\s*dir="auto"/g, '');
+      
+      // Remove all data-message-* attributes
+      content = content.replace(/\s*data-message-[^=]*="[^"]*"/g, '');
+      
+      // Remove unnecessary wrapper divs (gizmo, flex containers, etc.)
+      content = content.replace(/<div class="flex-shrink-0[^>]*>[\s\S]*?<div class="markdown prose[^>]*>/g, '');
+      content = content.replace(/<div class="flex max-w-full[^>]*>[\s\S]*?<div class="markdown prose[^>]*>/g, '');
+      content = content.replace(/<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>\s*<\/div>/g, '');
+      content = content.replace(/<\/div>\s*<\/div>\s*<\/div>\s*<\/div>/g, '');
+      
+      // Clean up h2 tags - remove attributes but keep content
+      content = content.replace(/<h2[^>]*>(.*?)<\/h2>/g, '## $1');
+      
+      // Clean up p tags - remove attributes but keep content
+      content = content.replace(/<p[^>]*>(.*?)<\/p>/g, (match, p1) => {
+        // If it's just whitespace, skip it
+        if (!p1.trim()) return '';
+        return p1.trim() + '\n';
+      });
+      
+      // Clean up li tags
+      content = content.replace(/<li[^>]*>\s*<p[^>]*>(.*?)<\/p>\s*<\/li>/g, '- $1');
+      content = content.replace(/<li[^>]*>(.*?)<\/li>/g, '- $1');
+      
+      // Clean up ul tags
+      content = content.replace(/<ul[^>]*>/g, '');
+      content = content.replace(/<\/ul>/g, '');
+      
+      // Clean up strong tags - remove data attributes
+      content = content.replace(/<strong[^>]*>(.*?)<\/strong>/g, '**$1**');
+      
+      // Clean up anchor tags - remove data attributes and nofollow
+      content = content.replace(/<a href="([^"]*)"[^>]*rel="noopener noreferrer nofollow"[^>]*>(.*?)<\/a>/g, '[$2]($1)');
+      content = content.replace(/<a href="([^"]*)"[^>]*>(.*?)<\/a>/g, '[$2]($1)');
+      
+      // Convert Visual Link Preview HTML to component
+      content = content.replace(
+        /<div class="vlp-link-container[^>]*>[\s\S]*?<a href="([^"]*)"[^>]*title="([^"]*)"[^>]*>[\s\S]*?<img[^>]*src="([^"]*)"[^>]*>[\s\S]*?<div class="vlp-block-0[^>]*>([^<]*)<\/div>[\s\S]*?<div class="vlp-block-1[^>]*>([^<]*)<\/div>[\s\S]*?<\/div>/g,
+        (match, url, title, image, blockTitle, summary) => {
+          return `\n<VisualLinkPreview \n  url="${url}"\n  title="${blockTitle || title}"\n  summary="${summary}"\n  image="${image}"\n/>\n`;
+        }
+      );
+      
+      // Remove empty lines (more than 2 consecutive)
+      content = content.replace(/\n{3,}/g, '\n\n');
+      
+      // Ensure frontmatter has excerpt and category if missing
+      const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
+      if (frontmatterMatch) {
+        let frontmatter = frontmatterMatch[1];
+        
+        // Add excerpt if missing
+        if (!frontmatter.includes('excerpt:')) {
+          // Try to extract first paragraph as excerpt
+          const firstParagraph = content.match(/---\n\n(.*?)\n\n/s);
+          if (firstParagraph) {
+            const excerpt = firstParagraph[1]
+              .replace(/[#*\[\]]/g, '')
+              .replace(/\n/g, ' ')
+              .trim()
+              .substring(0, 200);
+            frontmatter += `\nexcerpt: ${excerpt}`;
+          }
+        }
+        
+        // Add category if missing (try to infer from content or use default)
+        if (!frontmatter.includes('category:')) {
+          frontmatter += `\ncategory: Kabel Technologie`;
+        }
+        
+        content = content.replace(/^---\n[\s\S]*?\n---/, `---\n${frontmatter}\n---`);
+      }
+      
+      // Only write if content changed
+      if (content !== originalContent) {
+        fs.writeFileSync(filePath, content, 'utf-8');
+        console.log(`✓ Cleaned: ${filePath}`);
+      } else {
+        console.log(`- Skipped (no changes): ${filePath}`);
+      }
+      
+    } catch (error) {
+      console.error(`✗ Error processing ${filePath}:`, error.message);
+    }
+  }
+  
+  console.log('\nDone!');
+}
+
+cleanMDXFiles().catch(console.error);
--- a/scripts/fetch-posts.js
+++ b/scripts/fetch-posts.js
@@ -0,0 +1,136 @@
+const fs = require('fs');
+const path = require('path');
+const cheerio = require('cheerio');
+
+const API_URL = 'https://klz-cables.com/wp-json/wp/v2/posts?per_page=100&_embed';
+
+async function fetchPosts() {
+  console.log('Fetching posts...');
+  const response = await fetch(API_URL);
+  if (!response.ok) {
+    throw new Error(`Failed to fetch posts: ${response.statusText}`);
+  }
+  const posts = await response.json();
+  console.log(`Fetched ${posts.length} posts.`);
+  return posts;
+}
+
+function cleanContent(content) {
+  let cleaned = content;
+
+  // Decode HTML entities first to make regex easier
+  cleaned = cleaned.replace(/&#8221;/g, '"').replace(/&#8220;/g, '"').replace(/&#8217;/g, "'").replace(/&/g, '&').replace(/&#8243;/g, '"');
+
+  // Remove vc_row and vc_column wrappers
+  cleaned = cleaned.replace(/\[\/?vc_row.*?\]/g, '');
+  cleaned = cleaned.replace(/\[\/?vc_column.*?\]/g, '');
+  
+  // Remove vc_column_text wrapper but keep content
+  cleaned = cleaned.replace(/\[vc_column_text.*?\]/g, '');
+  cleaned = cleaned.replace(/\[\/vc_column_text\]/g, '');
+
+  // Convert split_line_heading to h2
+  cleaned = cleaned.replace(/\[split_line_heading[^\]]*text_content="([^"]+)"[^\]]*\](?:\[\/split_line_heading\])?/g, '<h2>$1</h2>');
+  
+  // Remove other shortcodes
+  cleaned = cleaned.replace(/\[image_with_animation.*?\]/g, '');
+  cleaned = cleaned.replace(/\[divider.*?\]/g, '');
+  cleaned = cleaned.replace(/\[nectar_global_section.*?\]/g, '');
+
+  // Use Cheerio for HTML manipulation
+  const $ = cheerio.load(cleaned, { xmlMode: false, decodeEntities: false });
+
+  // Convert VisualLinkPreview
+  $('.vlp-link-container').each((i, el) => {
+    const $el = $(el);
+    const url = $el.find('a.vlp-link').attr('href');
+    const title = $el.find('.vlp-link-title').text().trim() || $el.find('a.vlp-link').attr('title');
+    const image = $el.find('.vlp-link-image img').attr('src');
+    const summary = $el.find('.vlp-link-summary').text().trim();
+
+    if (url && title) {
+      // We use a placeholder to avoid Cheerio messing up the React component syntax
+      const component = `__VISUAL_LINK_PREVIEW_START__ url="${url}" title="${title}" image="${image || ''}" summary="${summary || ''}" __VISUAL_LINK_PREVIEW_END__`;
+      $el.replaceWith(component);
+    }
+  });
+
+  // Remove data attributes
+  $('*').each((i, el) => {
+    const attribs = el.attribs;
+    for (const name in attribs) {
+      if (name.startsWith('data-')) {
+        $(el).removeAttr(name);
+      }
+    }
+  });
+
+  // Unwrap divs (remove div tags but keep content)
+  $('div').each((i, el) => {
+    $(el).replaceWith($(el).html());
+  });
+
+  // Remove empty paragraphs
+  $('p').each((i, el) => {
+    if ($(el).text().trim() === '' && $(el).children().length === 0) {
+      $(el).remove();
+    }
+  });
+
+  let output = $('body').html() || '';
+  
+  // Restore VisualLinkPreview
+  output = output.replace(/__VISUAL_LINK_PREVIEW_START__/g, '<VisualLinkPreview').replace(/__VISUAL_LINK_PREVIEW_END__/g, '/>');
+
+  return output.trim();
+}
+
+function generateMdx(post) {
+  const title = post.title.rendered.replace(/&#8221;/g, '"').replace(/&#8220;/g, '"').replace(/&#8217;/g, "'").replace(/&/g, '&');
+  const date = post.date;
+  const slug = post.slug;
+  const lang = post.lang || 'en'; // Default to en if not specified
+  
+  let featuredImage = '';
+  if (post._embedded && post._embedded['wp:featuredmedia'] && post._embedded['wp:featuredmedia'][0]) {
+    featuredImage = post._embedded['wp:featuredmedia'][0].source_url;
+  }
+
+  const content = cleanContent(post.content.rendered);
+
+  return `---
+title: "${title}"
+date: '${date}'
+featuredImage: ${featuredImage}
+locale: ${lang}
+---
+
+${content}
+`;
+}
+
+async function main() {
+  try {
+    const posts = await fetchPosts();
+
+    for (const post of posts) {
+      const lang = post.lang || 'en';
+      const slug = post.slug;
+      const mdxContent = generateMdx(post);
+      
+      const dir = path.join('data/blog', lang);
+      if (!fs.existsSync(dir)) {
+        fs.mkdirSync(dir, { recursive: true });
+      }
+
+      const filePath = path.join(dir, `${slug}.mdx`);
+      fs.writeFileSync(filePath, mdxContent);
+      console.log(`Saved ${filePath}`);
+    }
+    console.log('Done.');
+  } catch (error) {
+    console.error('Error:', error);
+  }
+}
+
+main();