feat: content engine

2026-02-22 02:39:27 +01:00
parent a9adb2eff7
commit 3a1a88db89
11 changed files with 942 additions and 172 deletions
--- a/packages/content-engine/src/generator.ts
+++ b/packages/content-engine/src/generator.ts
@@ -1,5 +1,5 @@
 import OpenAI from "openai";
-import { ResearchAgent, Fact, SocialPost } from "@mintel/journaling";
+import { ResearchAgent, type Fact, type SocialPost } from "@mintel/journaling";
 import { MemeGenerator, MemeSuggestion } from "@mintel/meme-generator";
 import * as fs from "node:fs/promises";
 import * as path from "node:path";
@@ -237,11 +237,21 @@ REGELN:
        console.log(`   → ${factInsertions.length} fact enrichments planned`);
      }

-      // ----- STEP 1.5: Social Media Search -----
-      console.log("📱 Identifying real social media posts...");
-      const socialPosts = await this.researchAgent.findSocialPosts(
-        content.substring(0, 200),
-      );
+      // ----- STEP 1.5: Social Media Extraction (no LLM — regex only) -----
+      console.log("📱 Extracting existing social media embeds...");
+      const socialPosts = this.researchAgent.extractSocialPosts(content);
+
+      // If none exist, fetch real ones via Serper API
+      if (socialPosts.length === 0) {
+        console.log(
+          "   → None found. Fetching real social posts via Serper API...",
+        );
+        const newPosts = await this.researchAgent.fetchRealSocialPosts(
+          content.slice(0, 500),
+        );
+        socialPosts.push(...newPosts);
+      }
+
      if (socialPosts.length > 0) {
        console.log(
          `📝 Planning placement for ${socialPosts.length} social media posts...`,
@@ -593,7 +603,7 @@ RULES:
  - youtube -> <YouTubeEmbed videoId="ID" />
  - twitter -> <TwitterEmbed tweetId="ID" theme="light" />
  - linkedin -> <LinkedInEmbed urn="ID" />
- Add a 1-sentence intro paragraph above the embed to contextualize it.
+- Add a 1-sentence intro paragraph above the embed to contextualize it naturally in the flow of the text (e.g. "Wie Experte XY im folgenden Video detailliert erklärt:"). This context is MANDATORY. Do not just drop the Component without text reference.

 CONTEXT:
 ${context.slice(0, 3000)}
@@ -842,6 +852,11 @@ Tone: ${tone}.
 Facts: ${factsContext}
 ${componentsContext}

+BLOG POST BEST PRACTICES (MANDATORY):
+- DEVIL'S ADVOCATE: Füge zwingend eine kurze kritische Sektion ein (z.B. mit \`<ComparisonRow>\` oder \`<IconList>\`), in der du offen die Nachteile/Kosten/Haken deiner eigenen Lösung ansprichst ("Der Haken an der Sache...").
+- FAQ GENERATOR: Am absoluten Ende des Artikels erstellst du zwingend eine Markdown-Liste mit den 3 wichtigsten Fragen (FAQ) und Antworten (jeweils 2 Sätze) für Google Rich Snippets.
+- Nutze wo passend die obigen React-Komponenten für ein hochwertiges Layout.
+
 Format as Markdown. Start with # H1.
 For places where a diagram would help, insert: <!-- DIAGRAM_PLACEHOLDER: Concept Name -->
 Return ONLY raw content.`,
@@ -891,6 +906,7 @@ RULES:
 - CRITICAL: Generate ONLY ONE single connected graph. Do NOT generate multiple independent graphs or isolated subgraphs in the same Mermaid block.
 - No nested subgraphs. Keep instructions short.
 - Use double-quoted labels for nodes: A["Label"]
+- VERY CRITICAL: DO NOT use curly braces '{}' or brackets '[]' inside labels unless they are wrapped in double quotes (e.g. A["Text {with braces}"]). 
 - VERY CRITICAL: DO NOT use any HTML tags (no <br>, no <br/>, no <b>, etc).
 - VERY CRITICAL: DO NOT use special characters like '&', '<', '>', or double-quotes inside the label strings. They break the mermaid parser in our environment.
 - Return ONLY the raw mermaid code. No markdown blocks, no backticks.
--- a/packages/content-engine/src/orchestrator.ts
+++ b/packages/content-engine/src/orchestrator.ts
@@ -1,11 +1,13 @@
 import OpenAI from "openai";
-import { ResearchAgent, Fact, SocialPost } from "@mintel/journaling";
+import { ResearchAgent, type Fact, type SocialPost } from "@mintel/journaling";
+import { ThumbnailGenerator } from "@mintel/thumbnail-generator";
 import { ComponentDefinition } from "./generator";
 import * as fs from "node:fs/promises";
 import * as path from "node:path";

 export interface OrchestratorConfig {
  apiKey: string;
+  replicateApiKey?: string;
  model?: string;
 }

@@ -14,6 +16,7 @@ export interface OptimizationTask {
  projectContext: string;
  availableComponents?: ComponentDefinition[];
  instructions?: string;
+  internalLinks?: { title: string; slug: string }[];
 }

 export interface OptimizeFileOptions {
@@ -24,6 +27,7 @@ export interface OptimizeFileOptions {
 export class AiBlogPostOrchestrator {
  private openai: OpenAI;
  private researchAgent: ResearchAgent;
+  private thumbnailGenerator?: ThumbnailGenerator;
  private model: string;

  constructor(config: OrchestratorConfig) {
@@ -37,6 +41,11 @@ export class AiBlogPostOrchestrator {
      },
    });
    this.researchAgent = new ResearchAgent(config.apiKey);
+    if (config.replicateApiKey) {
+      this.thumbnailGenerator = new ThumbnailGenerator({
+        replicateApiKey: config.replicateApiKey,
+      });
+    }
  }

  /**
@@ -76,9 +85,15 @@ export class AiBlogPostOrchestrator {

    const content = await fs.readFile(absPath, "utf8");

-    const fmMatch = content.match(/^---\s*\n([\s\S]*?)\n---/);
-    const frontmatter = fmMatch ? fmMatch[0] : "";
-    const body = fmMatch ? content.slice(frontmatter.length).trim() : content;
+    // Idea 4: We no longer split frontmatter and body. We pass the whole file
+    // to the LLM so it can optimize the SEO title and description.
+
+    // Idea 1: Build Internal Link Graph
+    const blogDir = path.dirname(absPath);
+    const internalLinks = await this.buildInternalLinkGraph(
+      blogDir,
+      path.basename(absPath),
+    );

    console.log(`📖 Loading context from: ${options.contextDir}`);
    const projectContext = await this.loadContext(options.contextDir);
@@ -89,50 +104,199 @@ export class AiBlogPostOrchestrator {
    }

    const optimizedContent = await this.optimizeDocument({
-      content: body,
+      content: content,
      projectContext,
      availableComponents: options.availableComponents,
+      internalLinks: internalLinks, // pass to orchestrator
    });

-    const finalOutput = frontmatter
-      ? `${frontmatter}\n\n${optimizedContent}`
-      : optimizedContent;
+    // Idea 4b: Extract the potentially updated title to rename the file (SEO Slug)
+    const newFmMatch = optimizedContent.match(/^---\s*\n([\s\S]*?)\n---/);
+    let finalPath = absPath;
+    let finalSlug = path.basename(absPath, ".mdx");

-    await fs.writeFile(`${absPath}.bak`, content); // Keep simple backup
-    await fs.writeFile(absPath, finalOutput);
-    console.log(`✅ Saved optimized file to: ${absPath}`);
+    if (newFmMatch && newFmMatch[1]) {
+      const titleMatch = newFmMatch[1].match(/title:\s*["']([^"']+)["']/);
+      if (titleMatch && titleMatch[1]) {
+        const newTitle = titleMatch[1];
+        // Generate SEO Slug
+        finalSlug = newTitle
+          .toLowerCase()
+          .replace(/ä/g, "ae")
+          .replace(/ö/g, "oe")
+          .replace(/ü/g, "ue")
+          .replace(/ß/g, "ss")
+          .replace(/[^a-z0-9]+/g, "-")
+          .replace(/^-+|-+$/g, "");
+
+        const newAbsPath = path.join(path.dirname(absPath), `${finalSlug}.mdx`);
+        if (newAbsPath !== absPath) {
+          console.log(
+            `🔄 SEO Title changed! Renaming file to: ${finalSlug}.mdx`,
+          );
+          // Delete old file if the title changed significantly
+          try {
+            await fs.unlink(absPath);
+          } catch (e) {
+            /* ignore */
+          }
+          finalPath = newAbsPath;
+        }
+      }
+    }
+
+    // Idea 5: Automatic Thumbnails
+    let finalContent = optimizedContent;
+
+    // Skip if thumbnail already exists in frontmatter
+    const hasExistingThumbnail = /thumbnail:\s*["'][^"']+["']/.test(
+      finalContent,
+    );
+
+    if (this.thumbnailGenerator && !hasExistingThumbnail) {
+      console.log("🎨 Phase 5: Generating visual thumbnail...");
+      try {
+        const visualPrompt = await this.generateVisualPrompt(finalContent);
+        // We assume public dir is relative to where this runs, usually monorepo root or apps/web
+        const webPublicDir = path.resolve(process.cwd(), "apps/web/public");
+        const thumbnailRelPath = `/blog/${finalSlug}.png`;
+        const thumbnailAbsPath = path.join(
+          webPublicDir,
+          "blog",
+          `${finalSlug}.png`,
+        );
+
+        await this.thumbnailGenerator.generateImage(
+          visualPrompt,
+          thumbnailAbsPath,
+        );
+
+        // Update frontmatter with thumbnail (SEO: we also want it as a hero)
+        if (finalContent.includes("thumbnail:")) {
+          finalContent = finalContent.replace(
+            /thumbnail:\s*["'].*?["']/,
+            `thumbnail: "${thumbnailRelPath}"`,
+          );
+        } else {
+          finalContent = finalContent.replace(
+            /(title:\s*["'].*?["'])/,
+            `$1\nthumbnail: "${thumbnailRelPath}"`,
+          );
+        }
+      } catch (e) {
+        console.warn("⚠️ Thumbnail generation failed, skipping:", e);
+      }
+    }
+
+    await fs.writeFile(finalPath, finalContent);
+    console.log(`✅ Saved optimized file to: ${finalPath}`);
+  }
+
+  private async generateVisualPrompt(content: string): Promise<string> {
+    const response = await this.openai.chat.completions.create({
+      model: this.model,
+      messages: [
+        {
+          role: "system",
+          content: `You are a Visual Discovery Agent for an architectural design system.
+Review the provided blog post and create a 1-sentence abstract visual description for an image generator (like Flux).
+
+THEME: Technical blueprint / structural illustration.
+STYLE: Clean lines, geometric shapes, monochrome base with one highlighter accent color (green, pink, or yellow). 
+NO TEXT. NO PEOPLE. NO REALISTIC PHOTOS.
+FOCUS: The core metaphor or technical concept of the article.
+
+Example output: "A complex network of glowing fiber optic nodes forming a recursive pyramid structure, technical blue lineart style."`,
+        },
+        { role: "user", content: content.slice(0, 5000) },
+      ],
+      max_tokens: 100,
+    });
+    return (
+      response.choices[0].message.content ||
+      "Technical architectural blueprint of a digital system"
+    );
+  }
+
+  private async buildInternalLinkGraph(
+    blogDir: string,
+    currentFile: string,
+  ): Promise<{ title: string; slug: string }[]> {
+    try {
+      const files = await fs.readdir(blogDir);
+      const mdxFiles = files.filter(
+        (f) => f.endsWith(".mdx") && f !== currentFile,
+      );
+      const graph: { title: string; slug: string }[] = [];
+
+      for (const file of mdxFiles) {
+        const fileContent = await fs.readFile(path.join(blogDir, file), "utf8");
+        const titleMatch = fileContent.match(/title:\s*["']([^"']+)["']/);
+        if (titleMatch && titleMatch[1]) {
+          graph.push({
+            title: titleMatch[1],
+            slug: `/blog/${file.replace(".mdx", "")}`,
+          });
+        }
+      }
+      return graph;
+    } catch (e) {
+      console.warn("Could not build internal link graph", e);
+      return [];
+    }
  }

  /**
   * Executes the 3-step optimization pipeline:
   * 1. Fakten recherchieren
-   * 2. Social Posts recherchieren
+   * 2. Bestehende Social Posts extrahieren (kein LLM — nur Regex)
   * 3. AI anweisen daraus Artikel zu erstellen
   */
  async optimizeDocument(task: OptimizationTask): Promise<string> {
    console.log(`🚀 Starting AI Orchestration Pipeline (${this.model})...`);

-    // 1. Fakten recherchieren
-    console.log("1️⃣ Recherchiere Fakten...");
+    // 1. Fakten & Konkurrenz recherchieren
+    console.log("1️⃣ Recherchiere Fakten und analysiere Konkurrenz...");
    const researchTopics = await this.identifyTopics(task.content);
    const facts: Fact[] = [];
-    for (const topic of researchTopics) {
-      const topicFacts = await this.researchAgent.researchTopic(topic);
-      facts.push(...topicFacts);
-    }
+    const competitorInsights: string[] = [];

-    // 2. Social Posts recherchieren
-    console.log(
-      "2️⃣ Recherchiere Social Media Posts (YouTube, Twitter, LinkedIn)...",
-    );
-    // Use the first 2000 chars to find relevant social posts
-    const socialPosts = await this.researchAgent.findSocialPosts(
-      task.content.substring(0, 2000),
+    // Paralellize competitor research and fact research
+    await Promise.all(
+      researchTopics.map(async (topic) => {
+        const [topicFacts, insights] = await Promise.all([
+          this.researchAgent.researchTopic(topic),
+          this.researchAgent.researchCompetitors(topic),
+        ]);
+        facts.push(...topicFacts);
+        competitorInsights.push(...insights);
+      }),
    );

+    // 2. Bestehende Social Posts aus dem Content extrahieren (deterministisch, kein LLM)
+    console.log("2️⃣ Extrahiere bestehende Social Media Embeds aus Content...");
+    const socialPosts = this.researchAgent.extractSocialPosts(task.content);
+
+    // Wenn keine vorhanden sind, besorge echte von der Serper API
+    if (socialPosts.length === 0) {
+      console.log(
+        "   → Keine bestehenden Posts gefunden. Suche neue über Serper API...",
+      );
+      const realPosts = await this.researchAgent.fetchRealSocialPosts(
+        task.content.slice(0, 500),
+      );
+      socialPosts.push(...realPosts);
+    }
+
    // 3. AI anweisen daraus Artikel zu erstellen
    console.log("3️⃣ Erstelle optimierten Artikel (Agentic Rewrite)...");
-    return await this.compileArticle(task, facts, socialPosts);
+    return await this.compileArticle(
+      task,
+      facts,
+      competitorInsights,
+      socialPosts,
+      task.internalLinks || [],
+    );
  }

  private async identifyTopics(content: string): Promise<string[]> {
@@ -170,22 +334,55 @@ Return ONLY the JSON.`,
  private async compileArticle(
    task: OptimizationTask,
    facts: Fact[],
+    competitorInsights: string[],
    socialPosts: SocialPost[],
+    internalLinks: { title: string; slug: string }[],
    retryCount = 0,
  ): Promise<string> {
    const factsText = facts
      .map((f, i) => `${i + 1}. ${f.statement} [Source: ${f.source}]`)
      .join("\n");

-    const socialText = socialPosts
-      .map(
-        (p, i) =>
-          `Platform: ${p.platform}, ID: ${p.embedId} (${p.description})`,
-      )
-      .join("\n");
+    let socialText = `CRITICAL RULE: NO VERIFIED SOCIAL MEDIA POSTS FOUND. You MUST NOT use <YouTubeEmbed />, <TwitterEmbed />, or <LinkedInEmbed /> under ANY circumstances in this article. DO NOT hallucinate IDs.`;
+
+    if (socialPosts.length > 0) {
+      const allowedTags: string[] = [];
+      if (socialPosts.some((p) => p.platform === "youtube"))
+        allowedTags.push('<YouTubeEmbed videoId="..." />');
+      if (socialPosts.some((p) => p.platform === "twitter"))
+        allowedTags.push('<TwitterEmbed tweetId="..." />');
+      if (socialPosts.some((p) => p.platform === "linkedin"))
+        allowedTags.push('<LinkedInEmbed url="..." />');
+
+      socialText = `Social Media Posts to embed (use ONLY these tags, do not use others: ${allowedTags.join(", ")}):\n${socialPosts.map((p) => `Platform: ${p.platform}, ID: ${p.embedId} (${p.description})`).join("\n")}\nCRITICAL: Do not invent any IDs that are not explicitly listed in the list above.`;
+    }

    const componentsText = (task.availableComponents || [])
-      .map((c) => `<${c.name}>: ${c.description}\n  Example: ${c.usageExample}`)
+      .filter((c) => {
+        if (
+          c.name === "YouTubeEmbed" &&
+          !socialPosts.some((p) => p.platform === "youtube")
+        )
+          return false;
+        if (
+          c.name === "TwitterEmbed" &&
+          !socialPosts.some((p) => p.platform === "twitter")
+        )
+          return false;
+        if (
+          c.name === "LinkedInEmbed" &&
+          !socialPosts.some((p) => p.platform === "linkedin")
+        )
+          return false;
+        return true;
+      })
+      .map((c) => {
+        // Ensure LinkedInEmbed usage example consistently uses 'url'
+        if (c.name === "LinkedInEmbed") {
+          return `<${c.name}>: ${c.description}\n  Example: <LinkedInEmbed url="https://www.linkedin.com/posts/..." />`;
+        }
+        return `<${c.name}>: ${c.description}\n  Example: ${c.usageExample}`;
+      })
      .join("\n\n");

    const response = await this.openai.chat.completions.create({
@@ -202,30 +399,47 @@ CONTEXT & RULES:
 Project Context / Tone:
 ${task.projectContext}

-Facts to weave in:
-${factsText || "None"}
+FACTS TO INTEGRATE:
+${factsText || "No new facts needed."}

-Social Media Posts to embed (use <YouTubeEmbed videoId="..." />, <TwitterEmbed tweetId="..." />, or <LinkedInEmbed url="..." />):
-${socialText || "None"}
+COMPETITOR BENCHMARK (TOP RANKING ARTICLES):
+Here are snippets from the top 5 ranking Google articles for this topic. Read them carefully and ensure our article covers these topics but is fundamentally BETTER, deeper, and more authoritative:
+${competitorInsights.length > 0 ? competitorInsights.join("\n") : "No competitor insights found."}

-Available MDX Components you can use contextually:
-${componentsText || "None"}
+AVAILABLE UI COMPONENTS:
+${componentsText}
+
+SOCIAL MEDIA POSTS:
+${socialText}
+
+INTERNAL LINKING GRAPH:
+Hier sind unsere existierenden Blog-Posts (Titel und URL-Slug). Finde 2-3 passende Stellen im Text, um organisch mit regulärem Markdown (\`[passender Text]([slug])\`) auf diese Posts zu verlinken. Nutze KEIN <ExternalLink> für B2B-interne Links.
+${internalLinks.length > 0 ? internalLinks.map((l) => `- "${l.title}" -> ${l.slug}`).join("\n") : "Keine internen Links verfügbar."}

 Special Instructions from User:
 ${task.instructions || "None"}

 BLOG POST BEST PRACTICES (MANDATORY):
+- DEVIL'S ADVOCATE: Füge zwingend eine kurze kritische Sektion ein (z.B. mit \`<ComparisonRow>\` oder \`<IconList>\`), in der du offen die Nachteile/Kosten/Haken deiner eigenen Lösung ansprichst ("Der Haken an der Sache..."). Das baut Vertrauen bei B2B Entscheidenden auf.
+- FAQ GENERATOR: Am absoluten Ende des Artikels erstellst du zwingend eine Markdown-Liste mit den 3 wichtigsten Fragen (FAQ) und Antworten (jeweils 2 Sätze) für Google Rich Snippets. Nutze dazu das \`<FAQSection>\` Component oder normales Markdown.
+- SUBTLE CTAs: Webe 1-2 subtile CTAs für High-End Website Entwicklung ein (Beispiel: \`<Button href="/contact" variant="outline" size="normal">Performance-Check anfragen</Button>\` oder \`<Button href="/contact">Digitale Architektur anfragen</Button>\`). Platziere diese zwingend organisch nach Abschnitten mit hohem Mehrwert.
+- Zitat-Varianten: Wenn du Organisationen oder Studien zitierst, nutze \`<ArticleQuote isCompany={true} ... />\`. Für Personen lass \`isCompany\` weg.
 - Füge zwingend ein prägnantes 'TL;DR' ganz am Anfang ein.
 - Füge ein sauberes '<TableOfContents />' ein.
 - Verwende unsere Komponenten stilvoll für Visualisierungen.
 - Agiere als hochprofessioneller Digital Architect und entferne alte MDX-Metadaten im Body.
- Fazit: Schließe JEDEN Artikel ZWINGEND mit einem starken, klaren 'Fazit' ab (z.B. als <H2>Fazit: ...</H2> gefolgt von deinen Empfehlungen).
+- Fazit: Schließe JEDEN Artikel ZWINGEND mit einem starken, klaren 'Fazit' ab.
+
+STRICT MDX OUTPUT RULES:
+1. ONLY use the exact components defined above.
+2. For Social Media Embeds, you MUST ONLY use the EXACT IDs provided in the list above. Do NOT invent IDs.
+3. If ANY verified social media posts are provided, you MUST integrate at least one naturally with a contextual sentence.
+4. Keep the original content blocks and headings as much as possible, just improve flow.
+5. FRONTMATTER SEO (Idea 4): Ich übergebe dir die KOMPLETTE Datei inklusive Markdown-Frontmatter (--- ... ---). Du MUSST das Frontmatter ebenfalls zurückgeben! Optimiere darin den \`title\` und die \`description\` maximal für B2B SEO. Lasse die anderen Keys im Frontmatter (date, tags) unangetastet.

 CRITICAL GUIDELINES (NEVER BREAK THESE):
-1. ONLY return the content for the BODY of the MDX file.
-2. DO NOT INCLUDE FRONTMATTER (blocks starting and ending with ---). I ALREADY HAVE THE FRONTMATTER.
-3. DO NOT REPEAT METADATA IN THE BODY. Do not output lines like "title: ...", "description: ...", "date: ..." inside the text.
-4. DO NOT INCLUDE MARKDOWN WRAPPERS (do not wrap in \`\`\`mdx ... \`\`\`).
+1. THE OUTPUT MUST START WITH YAML FRONTMATTER AND END WITH THE MDX BODY.
+2. DO NOT INCLUDE MARKDOWN WRAPPERS (do not wrap in \`\`\`mdx ... \`\`\`).
 5. Be clean. Do NOT clump all components together. Provide 3-4 paragraphs of normal text between visual items.
 6. If you insert components, ensure their syntax is 100% valid JSX/MDX.
 7. CRITICAL MERMAID RULE: If you use <Mermaid>, the inner content MUST be 100% valid Mermaid.js syntax. NO HTML inside labels. NO quotes inside brackets without valid syntax.
@@ -239,7 +453,7 @@ CRITICAL GUIDELINES (NEVER BREAK THESE):
    });

    let rawContent = response.choices[0].message.content || task.content;
-    rawContent = this.cleanResponse(rawContent);
+    rawContent = this.cleanResponse(rawContent, socialPosts);

    // Validation Layer: Check Mermaid syntax
    if (retryCount < 2 && rawContent.includes("<Mermaid>")) {
@@ -266,7 +480,9 @@ CRITICAL GUIDELINES (NEVER BREAK THESE):
            content: `The previous attempt failed because you generated invalid Mermaid.js syntax. Please rewrite the MDX and FIX the following Mermaid errors. \n\nErrors:\n${errorFeedback}\n\nOriginal Draft:\n${task.content}`,
          },
          facts,
+          competitorInsights,
          socialPosts,
+          internalLinks,
          retryCount + 1,
        );
      }
@@ -320,11 +536,7 @@ CRITICAL GUIDELINES (NEVER BREAK THESE):
    }
  }

-  /**
-   * Post-processing to ensure the AI didn't include "help" text,
-   * duplicate frontmatter, or markdown wrappers.
-   */
-  private cleanResponse(content: string): string {
+  private cleanResponse(content: string, socialPosts: SocialPost[]): string {
    let cleaned = content.trim();

    // 1. Strip Markdown Wrappers (e.g. ```mdx ... ```)
@@ -334,16 +546,52 @@ CRITICAL GUIDELINES (NEVER BREAK THESE):
        .replace(/\n?```\s*$/, "");
    }

-    // 2. Strip redundant frontmatter (the AI sometimes helpfully repeats it)
-    // Look for the --- delimiters and remove the block if it exists
-    const fmRegex = /^---\s*\n([\s\S]*?)\n---\s*\n?/;
-    const match = cleaned.match(fmRegex);
-    if (match) {
-      console.log(
-        "♻️  Stripping redundant frontmatter detected in AI response...",
-      );
-      cleaned = cleaned.replace(fmRegex, "").trim();
-    }
+    // 2. We NO LONGER strip redundant frontmatter, because we requested the LLM to output it.
+    // Ensure the output actually has frontmatter, if not, something went wrong, but we just pass it along.
+
+    // 3. Strip any social embeds the AI hallucinated (IDs not in our extracted set)
+    const knownYtIds = new Set(
+      socialPosts.filter((p) => p.platform === "youtube").map((p) => p.embedId),
+    );
+    const knownTwIds = new Set(
+      socialPosts.filter((p) => p.platform === "twitter").map((p) => p.embedId),
+    );
+    const knownLiIds = new Set(
+      socialPosts
+        .filter((p) => p.platform === "linkedin")
+        .map((p) => p.embedId),
+    );
+
+    cleaned = cleaned.replace(
+      /<YouTubeEmbed[^>]*videoId="([^"]+)"[^>]*\/>/gi,
+      (tag, id) => {
+        if (knownYtIds.has(id)) return tag;
+        console.log(
+          `🛑 Stripped hallucinated YouTubeEmbed with videoId="${id}"`,
+        );
+        return "";
+      },
+    );
+
+    cleaned = cleaned.replace(
+      /<TwitterEmbed[^>]*tweetId="([^"]+)"[^>]*\/>/gi,
+      (tag, id) => {
+        if (knownTwIds.has(id)) return tag;
+        console.log(
+          `🛑 Stripped hallucinated TwitterEmbed with tweetId="${id}"`,
+        );
+        return "";
+      },
+    );
+
+    cleaned = cleaned.replace(
+      /<LinkedInEmbed[^>]*(?:url|urn)="([^"]+)"[^>]*\/>/gi,
+      (tag, id) => {
+        if (knownLiIds.has(id)) return tag;
+        console.log(`🛑 Stripped hallucinated LinkedInEmbed with id="${id}"`);
+        return "";
+      },
+    );

    return cleaned;
  }