at-mintel/packages/content-engine/src/orchestrator.ts

import OpenAI from "openai";
import { ResearchAgent, type Fact, type SocialPost } from "@mintel/journaling";
import { ThumbnailGenerator } from "@mintel/thumbnail-generator";
import { ComponentDefinition } from "./generator";
import * as fs from "node:fs/promises";
import * as path from "node:path";

export interface OrchestratorConfig {
  apiKey: string;
  replicateApiKey?: string;
  model?: string;
}

export interface OptimizationTask {
  content: string;
  projectContext: string;
  availableComponents?: ComponentDefinition[];
  instructions?: string;
  internalLinks?: { title: string; slug: string }[];
  customSources?: string[];
}

export interface OptimizeFileOptions {
  contextDir: string;
  availableComponents?: ComponentDefinition[];
  shouldRename?: boolean;
}

export class AiBlogPostOrchestrator {
  private openai: OpenAI;
  private researchAgent: ResearchAgent;
  private thumbnailGenerator?: ThumbnailGenerator;
  private model: string;

  constructor(config: OrchestratorConfig) {
    this.model = config.model || "google/gemini-3-flash-preview";
    this.openai = new OpenAI({
      apiKey: config.apiKey,
      baseURL: "https://openrouter.ai/api/v1",
      defaultHeaders: {
        "HTTP-Referer": "https://mintel.me",
        "X-Title": "Mintel AI Blog Post Orchestrator",
      },
    });
    this.researchAgent = new ResearchAgent(config.apiKey);
    if (config.replicateApiKey) {
      this.thumbnailGenerator = new ThumbnailGenerator({
        replicateApiKey: config.replicateApiKey,
      });
    }
  }

  /**
   * Reusable context loader. Loads all .md and .txt files from a directory into a single string.
   */
  async loadContext(dirPath: string): Promise<string> {
    try {
      const resolvedDir = path.resolve(process.cwd(), dirPath);
      const files = await fs.readdir(resolvedDir);
      const textFiles = files.filter((f) => /\.(md|txt)$/i.test(f)).sort();
      const contents: string[] = [];

      for (const file of textFiles) {
        const filePath = path.join(resolvedDir, file);
        const text = await fs.readFile(filePath, "utf8");
        contents.push(`=== ${file} ===\n${text.trim()}`);
      }

      return contents.join("\n\n");
    } catch (e) {
      console.warn(`⚠️ Could not load context from ${dirPath}: ${e}`);
      return "";
    }
  }

  /**
   * Reads a file, extracts frontmatter, loads context, optimizes body, and writes it back.
   */
  async optimizeFile(
    targetFile: string,
    options: OptimizeFileOptions,
  ): Promise<void> {
    const absPath = path.isAbsolute(targetFile)
      ? targetFile
      : path.resolve(process.cwd(), targetFile);
    console.log(`📄 Processing File: ${path.basename(absPath)}`);

    const content = await fs.readFile(absPath, "utf8");

    // Idea 4: We no longer split frontmatter and body. We pass the whole file
    // to the LLM so it can optimize the SEO title and description.

    // Idea 1: Build Internal Link Graph
    const blogDir = path.dirname(absPath);
    const internalLinks = await this.buildInternalLinkGraph(
      blogDir,
      path.basename(absPath),
    );

    console.log(`📖 Loading context from: ${options.contextDir}`);
    const projectContext = await this.loadContext(options.contextDir);
    if (!projectContext) {
      console.warn(
        "⚠️ No project context loaded. AI might miss specific guidelines.",
      );
    }

    const optimizedContent = await this.optimizeDocument({
      content: content,
      projectContext,
      availableComponents: options.availableComponents,
      internalLinks: internalLinks, // pass to orchestrator
    });

    // Idea 4b: Extract the potentially updated title to rename the file (SEO Slug)
    const newFmMatch = optimizedContent.match(/^---\s*\n([\s\S]*?)\n---/);
    let finalPath = absPath;
    let finalSlug = path.basename(absPath, ".mdx");

    if (options.shouldRename && newFmMatch && newFmMatch[1]) {
      const titleMatch = newFmMatch[1].match(/title:\s*["']([^"']+)["']/);
      if (titleMatch && titleMatch[1]) {
        const newTitle = titleMatch[1];
        // Generate SEO Slug
        finalSlug = newTitle
          .toLowerCase()
          .replace(/ä/g, "ae")
          .replace(/ö/g, "oe")
          .replace(/ü/g, "ue")
          .replace(/ß/g, "ss")
          .replace(/[^a-z0-9]+/g, "-")
          .replace(/^-+|-+$/g, "");

        const newAbsPath = path.join(path.dirname(absPath), `${finalSlug}.mdx`);
        if (newAbsPath !== absPath) {
          console.log(
            `🔄 SEO Title changed! Renaming file to: ${finalSlug}.mdx`,
          );
          // Delete old file if the title changed significantly
          try {
            await fs.unlink(absPath);
          } catch (_err) {
            // ignore
          }
          finalPath = newAbsPath;
        }
      }
    } else if (newFmMatch && newFmMatch[1]) {
      console.log(
        `ℹ️ Rename skipped (permalink stability active). If you want to rename, use --rename.`,
      );
    }

    // Idea 5: Automatic Thumbnails
    let finalContent = optimizedContent;

    // Skip if thumbnail already exists in frontmatter
    const hasExistingThumbnail = /thumbnail:\s*["'][^"']+["']/.test(
      finalContent,
    );

    if (this.thumbnailGenerator && !hasExistingThumbnail) {
      console.log("🎨 Phase 5: Generating/Linking visual thumbnail...");
      try {
        const webPublicDir = path.resolve(process.cwd(), "apps/web/public");
        const thumbnailRelPath = `/blog/${finalSlug}.png`;
        const thumbnailAbsPath = path.join(
          webPublicDir,
          "blog",
          `${finalSlug}.png`,
        );

        // Check if the physical file already exists
        let physicalFileExists = false;
        try {
          await fs.access(thumbnailAbsPath);
          physicalFileExists = true;
        } catch (_err) {
          // File does not exist
        }

        if (physicalFileExists) {
          console.log(
            `⏭️ Thumbnail already exists on disk, skipping generation: ${thumbnailAbsPath}`,
          );
        } else {
          const visualPrompt = await this.generateVisualPrompt(finalContent);
          await this.thumbnailGenerator.generateImage(
            visualPrompt,
            thumbnailAbsPath,
          );
        }

        // Update frontmatter with thumbnail
        if (finalContent.includes("thumbnail:")) {
          finalContent = finalContent.replace(
            /thumbnail:\s*["'].*?["']/,
            `thumbnail: "${thumbnailRelPath}"`,
          );
        } else {
          finalContent = finalContent.replace(
            /(title:\s*["'].*?["'])/,
            `$1\nthumbnail: "${thumbnailRelPath}"`,
          );
        }
      } catch (e) {
        console.warn("⚠️ Thumbnail processing failed, skipping:", e);
      }
    }

    await fs.writeFile(finalPath, finalContent);
    console.log(`✅ Saved optimized file to: ${finalPath}`);
  }

  async generateSlug(
    content: string,
    title?: string,
    instructions?: string,
  ): Promise<string> {
    const response = await this.openai.chat.completions.create({
      model: "google/gemini-3-flash-preview",
      messages: [
        {
          role: "system",
          content: `You generate SEO-optimized URL slugs for B2B blog posts based on the provided content.
Return ONLY a JSON object with a single string field "slug".
Example: {"slug": "how-to-optimize-react-performance"}
Rules: Use lowercase letters, numbers, and hyphens only. No special characters. Keep it concise (2-5 words).`,
        },
        {
          role: "user",
          content: `Title: ${title || "Unknown"}\n\nContent:\n${content.slice(0, 3000)}...${instructions ? `\n\nEDITOR INSTRUCTIONS:\nPlease strictly follow these instructions from the editor when generating the slug:\n${instructions}` : ""}`,
        },
      ],
      response_format: { type: "json_object" },
    });

    try {
      const parsed = JSON.parse(
        response.choices[0].message.content || '{"slug": ""}',
      );
      const slug = parsed.slug || "new-post";
      return slug
        .toLowerCase()
        .replace(/[^a-z0-9]+/g, "-")
        .replace(/^-+|-+$/g, "");
    } catch {
      return "new-post";
    }
  }

  public async generateVisualPrompt(
    content: string,
    instructions?: string,
  ): Promise<string> {
    const response = await this.openai.chat.completions.create({
      model: this.model,
      messages: [
        {
          role: "system",
          content: `You are a Visual Discovery Agent for an architectural design system.
Review the provided blog post and create a 1-sentence abstract visual description for an image generator (like Flux).

THEME: Technical blueprint / structural illustration.
STYLE: Clean lines, geometric shapes, monochrome base with one highlighter accent color (green, pink, or yellow).
NO TEXT. NO PEOPLE. NO REALISTIC PHOTOS.
FOCUS: The core metaphor or technical concept of the article.

Example output: "A complex network of glowing fiber optic nodes forming a recursive pyramid structure, technical blue lineart style."`,
        },
        {
          role: "user",
          content: `${content.slice(0, 5000)}${instructions ? `\n\nEDITOR INSTRUCTIONS:\nPlease strictly follow these instructions from the editor when generating the visual prompt:\n${instructions}` : ""}`,
        },
      ],
      max_tokens: 100,
    });
    return (
      response.choices[0].message.content ||
      "Technical architectural blueprint of a digital system"
    );
  }

  private async buildInternalLinkGraph(
    blogDir: string,
    currentFile: string,
  ): Promise<{ title: string; slug: string }[]> {
    try {
      const files = await fs.readdir(blogDir);
      const mdxFiles = files.filter(
        (f) => f.endsWith(".mdx") && f !== currentFile,
      );
      const graph: { title: string; slug: string }[] = [];

      for (const file of mdxFiles) {
        const fileContent = await fs.readFile(path.join(blogDir, file), "utf8");
        const titleMatch = fileContent.match(/title:\s*["']([^"']+)["']/);
        if (titleMatch && titleMatch[1]) {
          graph.push({
            title: titleMatch[1],
            slug: `/blog/${file.replace(".mdx", "")}`,
          });
        }
      }
      return graph;
    } catch (e) {
      console.warn("Could not build internal link graph", e);
      return [];
    }
  }

  /**
   * Executes the 3-step optimization pipeline:
   * 1. Fakten recherchieren
   * 2. Bestehende Social Posts extrahieren (kein LLM — nur Regex)
   * 3. AI anweisen daraus Artikel zu erstellen
   */
  async optimizeDocument(task: OptimizationTask): Promise<string> {
    console.log(`🚀 Starting AI Orchestration Pipeline (${this.model})...`);

    // 1. Fakten & Konkurrenz recherchieren
    console.log("1️⃣ Recherchiere Fakten und analysiere Konkurrenz...");
    const researchTopics = await this.identifyTopics(task.content);
    const facts: Fact[] = [];
    const competitorInsights: string[] = [];

    // Paralellize competitor research and fact research
    await Promise.all(
      researchTopics.map(async (topic) => {
        const [topicFacts, insights] = await Promise.all([
          this.researchAgent.researchTopic(topic),
          this.researchAgent.researchCompetitors(topic),
        ]);
        facts.push(...topicFacts);
        competitorInsights.push(...insights);
      }),
    );

    // 2. Bestehende Social Posts aus dem Content extrahieren (deterministisch, kein LLM)
    console.log("2️⃣ Extrahiere bestehende Social Media Embeds aus Content...");
    const socialPosts = this.researchAgent.extractSocialPosts(task.content);

    // Wenn keine vorhanden sind, besorge echte von der Serper API
    if (socialPosts.length === 0) {
      console.log(
        "   → Keine bestehenden Posts gefunden. Suche neue über Serper API...",
      );
      const realPosts = await this.researchAgent.fetchRealSocialPosts(
        task.content.slice(0, 500),
        task.customSources,
      );
      socialPosts.push(...realPosts);
    }

    // 3. AI anweisen daraus Artikel zu erstellen
    console.log("3️⃣ Erstelle optimierten Artikel (Agentic Rewrite)...");
    return await this.compileArticle(
      task,
      facts,
      competitorInsights,
      socialPosts,
      task.internalLinks || [],
    );
  }

  private async identifyTopics(content: string): Promise<string[]> {
    const response = await this.openai.chat.completions.create({
      model: "google/gemini-3-flash-preview", // fast structured model for topic extraction
      messages: [
        {
          role: "system",
          content: `Analyze the following blog post and identify 1 to 2 key topics or claims that would benefit from statistical data or external verification.
Return JSON: { "topics": ["topic 1", "topic 2"] }
Return ONLY the JSON.`,
        },
        {
          role: "user",
          content: content.slice(0, 4000),
        },
      ],
      response_format: { type: "json_object" },
    });

    try {
      const raw = response.choices[0].message.content || '{"topics": []}';
      const cleaned = raw
        .trim()
        .replace(/^```(?:json)?\s*\n?/, "")
        .replace(/\n?```\s*$/, "");
      const parsed = JSON.parse(cleaned);
      return parsed.topics || [];
    } catch (e) {
      console.warn("⚠️ Failed to parse research topics", e);
      return [];
    }
  }

  private async compileArticle(
    task: OptimizationTask,
    facts: Fact[],
    competitorInsights: string[],
    socialPosts: SocialPost[],
    internalLinks: { title: string; slug: string }[],
    retryCount = 0,
  ): Promise<string> {
    const factsText = facts
      .map((f, i) => `${i + 1}. ${f.statement} [Source: ${f.source}]`)
      .join("\n");

    let socialText = `CRITICAL RULE: NO VERIFIED SOCIAL MEDIA POSTS FOUND. You MUST NOT use <YouTubeEmbed />, <TwitterEmbed />, or <LinkedInEmbed /> under ANY circumstances in this article. DO NOT hallucinate IDs.`;

    if (socialPosts.length > 0) {
      const allowedTags: string[] = [];
      if (socialPosts.some((p) => p.platform === "youtube"))
        allowedTags.push('<YouTubeEmbed videoId="..." />');
      if (socialPosts.some((p) => p.platform === "twitter"))
        allowedTags.push('<TwitterEmbed tweetId="..." />');
      if (socialPosts.some((p) => p.platform === "linkedin"))
        allowedTags.push('<LinkedInEmbed url="..." />');

      socialText = `Social Media Posts to embed (use ONLY these tags, do not use others: ${allowedTags.join(", ")}):\n${socialPosts.map((p) => `Platform: ${p.platform}, ID: ${p.embedId} (${p.description})`).join("\n")}\nCRITICAL: Do not invent any IDs that are not explicitly listed in the list above.`;
    }

    const componentsText = (task.availableComponents || [])
      .filter((c) => {
        if (
          c.name === "YouTubeEmbed" &&
          !socialPosts.some((p) => p.platform === "youtube")
        )
          return false;
        if (
          c.name === "TwitterEmbed" &&
          !socialPosts.some((p) => p.platform === "twitter")
        )
          return false;
        if (
          c.name === "LinkedInEmbed" &&
          !socialPosts.some((p) => p.platform === "linkedin")
        )
          return false;
        return true;
      })
      .map((c) => {
        // Ensure LinkedInEmbed usage example consistently uses 'url'
        if (c.name === "LinkedInEmbed") {
          return `<${c.name}>: ${c.description}\n  Example: <LinkedInEmbed url="https://www.linkedin.com/posts/..." />`;
        }
        return `<${c.name}>: ${c.description}\n  Example: ${c.usageExample}`;
      })
      .join("\n\n");

    const memeTemplates = [
      "db", // Distracted Boyfriend
      "gb", // Galaxy Brain
      "fine", // This is Fine
      "ds", // Daily Struggle
      "gru", // Gru's Plan
      "cmm", // Change My Mind
      "astronaut", // Always Has Been (ahb)
      "disastergirl",
      "pigeon", // Is this a pigeon?
      "rollsafe",
      "slap", // Will Smith
      "exit", // Left Exit 12
      "mordor",
      "panik-kalm-panik",
      "woman-cat", // Woman yelling at cat
      "grumpycat",
      "sadfrog",
      "stonks",
      "same", // They're the same picture
      "spongebob",
    ];
    const forcedMeme =
      memeTemplates[Math.floor(Math.random() * memeTemplates.length)];

    const response = await this.openai.chat.completions.create({
      model: this.model,
      messages: [
        {
          role: "system",
          content: `You are an expert MDX Editor and Digital Architect.

YOUR TASK:
Take the given draft blog post and rewrite/enhance it into a final, error-free MDX file. Maintain the author's original German text, meaning, and tone, but enrich it gracefully.

CONTEXT & RULES:
Project Context / Tone:
${task.projectContext}

FACTS TO INTEGRATE:
${factsText || "No new facts needed."}

COMPETITOR BENCHMARK (TOP RANKING ARTICLES):
Here are snippets from the top 5 ranking Google articles for this topic. Read them carefully and ensure our article covers these topics but is fundamentally BETTER, deeper, and more authoritative:
${competitorInsights.length > 0 ? competitorInsights.join("\n") : "No competitor insights found."}

AVAILABLE UI COMPONENTS:
${componentsText}

SOCIAL MEDIA POSTS:
${socialText}

INTERNAL LINKING GRAPH:
Hier sind unsere existierenden Blog-Posts (Titel und URL-Slug). Finde 2-3 passende Stellen im Text, um organisch mit regulärem Markdown (\`[passender Text]([slug])\`) auf diese Posts zu verlinken. Nutze KEIN <ExternalLink> für B2B-interne Links.
${internalLinks.length > 0 ? internalLinks.map((l) => `- "${l.title}" -> ${l.slug}`).join("\n") : "Keine internen Links verfügbar."}

Special Instructions from User:
${task.instructions || "None"}

BLOG POST BEST PRACTICES (MANDATORY):
- DEVIL'S ADVOCATE: Füge zwingend eine kurze kritische Sektion ein (z.B. mit \`<ComparisonRow>\` oder \`<IconList>\`), in der du offen die Nachteile/Kosten/Haken deiner eigenen Lösung ansprichst ("Der Haken an der Sache..."). Das baut Vertrauen bei B2B Entscheidenden auf.
- FAQ GENERATOR: Am absoluten Ende des Artikels erstellst du zwingend eine Markdown-Liste mit den 3 wichtigsten Fragen (FAQ) und Antworten (jeweils 2 Sätze) für Google Rich Snippets. Nutze dazu das FAQSection Component oder normales Markdown.
- SUBTLE CTAs: Webe 1-2 subtile CTAs für High-End Website Entwicklung ein. Nutze ZWINGEND die Komponente [LeadMagnet] für diese Zwecke anstelle von einfachen Buttons. [LeadMagnet] bietet mehr Kontext und Vertrauen. Beispiel: <LeadMagnet title="Performance-Check anfragen" description="Wir analysieren Ihre Core Web Vitals und decken Umsatzpotenziale auf." buttonText="Jetzt analysieren lassen" href="/contact" variant="performance" />. Die Texte im LeadMagnet müssen absolut überzeugend, hochprofessionell und B2B-fokussiert sein (KEIN Robotik-Marketing-Sprech).
- MEME DIVERSITY: Du MUSST ZWINGEND für jedes Meme (sofern passend) abwechslungsreiche Templates nutzen. Um dies zu garantieren, wurde für diesen Artikel das folgende Template ausgewählt: '${forcedMeme}'. Du MUSST EXAKT DIESES TEMPLATE NUTZEN. Versuche nicht, es durch ein Standard-Template wie 'drake' zu ersetzen!
- Zitat-Varianten: Wenn du Organisationen oder Studien zitierst, nutze ArticleQuote (mit isCompany=true für Firmen). Für Personen lass isCompany weg.
- Füge zwingend ein prägnantes 'TL;DR' ganz am Anfang ein.
- Verwende unsere Komponenten stilvoll für Visualisierungen.
- Agiere als hochprofessioneller Digital Architect und entferne alte MDX-Metadaten im Body.
- Fazit: Schließe JEDEN Artikel ZWINGEND mit einem starken, klaren 'Fazit' ab.
- ORIGINAL LANGUAGE QUOTES: Übersetze NIEMALS Zitate (z.B. in ArticleQuote). Behalte das Original (z.B. Englisch), wenn du Studien von Deloitte, McKinsey oder Aussagen von CEOs zitierst. Das erhöht die Authentizität im B2B-Mittelstand.
- CONTENT PRUNING: Wenn das dir übergebene MDX bereits interaktive Komponenten (z.B. \`<YouTubeEmbed>\`) enthält, die **nicht** oder **nicht mehr** zum inhaltlichen Fokus passen (z.B. irrelevante Videos oder platzhalter-ähnliche Snippets), MUSST du diese radikal **entfernen**. Behalte keine halluzinierten oder unpassenden Medien, nur weil sie schon da waren.

STRICT MDX OUTPUT RULES:
1. ONLY use the exact components defined above.
2. For Social Media Embeds, you MUST ONLY use the EXACT IDs provided in the list above. Do NOT invent IDs.
3. If ANY verified social media posts are provided, you MUST integrate at least one naturally with a contextual sentence.
4. Keep the original content blocks and headings as much as possible, just improve flow.
5. FRONTMATTER SEO (Idea 4): Ich übergebe dir die KOMPLETTE Datei inklusive Markdown-Frontmatter (--- ... ---). Du MUSST das Frontmatter ebenfalls zurückgeben! Optimiere darin den \`title\` und die \`description\` maximal für B2B SEO. Lasse die anderen Keys im Frontmatter (date, tags) unangetastet.

CRITICAL GUIDELINES (NEVER BREAK THESE):
1. THE OUTPUT MUST START WITH YAML FRONTMATTER AND END WITH THE MDX BODY.
2. DO NOT INCLUDE MARKDOWN WRAPPERS (do not wrap in \`\`\`mdx ... \`\`\`).
5. Be clean. Do NOT clump all components together. Provide 3-4 paragraphs of normal text between visual items.
6. If you insert components, ensure their syntax is 100% valid JSX/MDX.
7. CRITICAL MERMAID RULE: If you use <Mermaid>, the inner content MUST be 100% valid Mermaid.js syntax. NO HTML inside labels. NO quotes inside brackets without valid syntax.
8. Do NOT hallucinate links or facts. Use only what is provided.`,
        },
        {
          role: "user",
          content: task.content,
        },
      ],
    });

    let rawContent = response.choices[0].message.content || task.content;
    rawContent = this.cleanResponse(rawContent, socialPosts);

    // --- Autonomous Validation Layer ---
    let hasError = false;
    let errorFeedback = "";

    // 1. Validate Meme Templates
    const memeRegex = /<ArticleMeme[^>]+template=["']([^"']+)["'][^>]*>/g;
    let memeMatch;
    const invalidMemes: string[] = [];
    while ((memeMatch = memeRegex.exec(rawContent)) !== null) {
      if (!memeTemplates.includes(memeMatch[1])) {
        invalidMemes.push(memeMatch[1]);
      }
    }
    if (invalidMemes.length > 0) {
      hasError = true;
      errorFeedback += `\n- You hallucinated invalid meme templates: ${invalidMemes.join(", ")}. You MUST ONLY use templates from this exact list: ${memeTemplates.join(", ")}. DO NOT INVENT TEMPLATES.\n`;
    }

    // 2. Validate Mermaid Syntax
    if (rawContent.includes("<Mermaid>")) {
      console.log("🔍 Validating Mermaid syntax in AI response...");
      const mermaidBlocks = this.extractMermaidBlocks(rawContent);

      for (const block of mermaidBlocks) {
        const validationResult = await this.validateMermaidSyntax(block);
        if (!validationResult.valid) {
          hasError = true;
          errorFeedback += `\n- Invalid Mermaid block:\n${block}\nError context: ${validationResult.error}\n`;
        }
      }
    }

    if (hasError && retryCount < 3) {
      console.log(
        `❌ Validation errors detected. Retrying compilation (Attempt ${retryCount + 1}/3)...`,
      );
      return this.compileArticle(
        {
          ...task,
          content: `CRITICAL ERROR IN PREVIOUS ATTEMPT:\nYour generated MDX contained the following errors that MUST be fixed:\n${errorFeedback}\n\nPlease rewrite the MDX and FIX these errors. Pay strict attention to the rules.\n\nOriginal Draft:\n${task.content}`,
        },
        facts,
        competitorInsights,
        socialPosts,
        internalLinks,
        retryCount + 1,
      );
    }

    return rawContent;
  }

  private extractMermaidBlocks(content: string): string[] {
    const blocks: string[] = [];
    // Regex to match <Mermaid>...</Mermaid> blocks across multiple lines
    const regex = /<Mermaid>([\s\S]*?)<\/Mermaid>/g;
    let match;
    while ((match = regex.exec(content)) !== null) {
      if (match[1]) {
        blocks.push(match[1].trim());
      }
    }
    return blocks;
  }

  private async validateMermaidSyntax(
    graph: string,
  ): Promise<{ valid: boolean; error?: string }> {
    // Fast LLM validation to catch common syntax errors like unbalanced quotes or HTML entities
    try {
      const validationResponse = await this.openai.chat.completions.create({
        model: "google/gemini-3-flash-preview", // Switch from gpt-4o-mini to user requested model
        messages: [
          {
            role: "system",
            content:
              'You are a strict Mermaid.js compiler. Analyze the given Mermaid syntax. If it is 100% valid and will render without exceptions, reply ONLY with "VALID". If it has syntax errors (e.g., HTML inside labels, unescaped quotes, unclosed brackets), reply ONLY with "INVALID" followed by a short explanation of the exact error.',
          },
          {
            role: "user",
            content: graph,
          },
        ],
      });

      const reply =
        validationResponse.choices[0].message.content?.trim() || "VALID";
      if (reply.startsWith("INVALID")) {
        return { valid: false, error: reply };
      }
      return { valid: true };
    } catch (e) {
      console.error("Syntax validation LLM call failed, passing through:", e);
      return { valid: true }; // Fallback to passing if validator fails
    }
  }

  private cleanResponse(content: string, socialPosts: SocialPost[]): string {
    let cleaned = content.trim();

    // 1. Strip Markdown Wrappers (e.g. ```mdx ... ```)
    if (cleaned.startsWith("```")) {
      cleaned = cleaned
        .replace(/^```[a-zA-Z]*\n?/, "")
        .replace(/\n?```\s*$/, "");
    }

    // 2. We NO LONGER strip redundant frontmatter, because we requested the LLM to output it.
    // Ensure the output actually has frontmatter, if not, something went wrong, but we just pass it along.

    // 3. Strip any social embeds the AI hallucinated (IDs not in our extracted set)
    const knownYtIds = new Set(
      socialPosts.filter((p) => p.platform === "youtube").map((p) => p.embedId),
    );
    const knownTwIds = new Set(
      socialPosts.filter((p) => p.platform === "twitter").map((p) => p.embedId),
    );
    const knownLiIds = new Set(
      socialPosts
        .filter((p) => p.platform === "linkedin")
        .map((p) => p.embedId),
    );

    cleaned = cleaned.replace(
      /<YouTubeEmbed[^>]*videoId="([^"]+)"[^>]*\/>/gi,
      (tag, id) => {
        if (knownYtIds.has(id)) return tag;
        console.log(
          `🛑 Stripped hallucinated YouTubeEmbed with videoId="${id}"`,
        );
        return "";
      },
    );

    cleaned = cleaned.replace(
      /<TwitterEmbed[^>]*tweetId="([^"]+)"[^>]*\/>/gi,
      (tag, id) => {
        if (knownTwIds.has(id)) return tag;
        console.log(
          `🛑 Stripped hallucinated TwitterEmbed with tweetId="${id}"`,
        );
        return "";
      },
    );

    cleaned = cleaned.replace(
      /<LinkedInEmbed[^>]*(?:url|urn)="([^"]+)"[^>]*\/>/gi,
      (tag, id) => {
        if (knownLiIds.has(id)) return tag;
        console.log(`🛑 Stripped hallucinated LinkedInEmbed with id="${id}"`);
        return "";
      },
    );

    return cleaned;
  }
}