feat: content engine

2026-02-21 19:08:06 +01:00
parent 3f1c37813a
commit a50b8d6393
32 changed files with 2816 additions and 189 deletions
--- a/packages/content-engine/src/generator.ts
+++ b/packages/content-engine/src/generator.ts
@@ -0,0 +1,974 @@
+import OpenAI from "openai";
+import { ResearchAgent, Fact, SocialPost } from "@mintel/journaling";
+import { MemeGenerator, MemeSuggestion } from "@mintel/meme-generator";
+import * as fs from "node:fs/promises";
+import * as path from "node:path";
+
+export interface ComponentDefinition {
+  name: string;
+  description: string;
+  usageExample: string;
+}
+
+export interface BlogPostOptions {
+  topic: string;
+  tone?: string;
+  targetAudience?: string;
+  includeMemes?: boolean;
+  includeDiagrams?: boolean;
+  includeResearch?: boolean;
+  availableComponents?: ComponentDefinition[];
+}
+
+export interface OptimizationOptions {
+  enhanceFacts?: boolean;
+  addMemes?: boolean;
+  addDiagrams?: boolean;
+  availableComponents?: ComponentDefinition[];
+  projectContext?: string;
+  /** Target audience description for all AI prompts */
+  targetAudience?: string;
+  /** Tone/persona description for all AI prompts */
+  tone?: string;
+  /** Prompt for DALL-E 3 style generation */
+  memeStylePrompt?: string;
+  /** Path to the docs folder (e.g. apps/web/docs) for full persona/tone context */
+  docsPath?: string;
+}
+
+export interface GeneratedPost {
+  title: string;
+  content: string;
+  research: Fact[];
+  memes: MemeSuggestion[];
+  diagrams: string[];
+}
+
+interface Insertion {
+  afterSection: number;
+  content: string;
+}
+
+// Model configuration: specialized models for different tasks
+const MODELS = {
+  // Structured JSON output, research planning, diagram models: {
+  STRUCTURED: "google/gemini-2.5-flash",
+  ROUTING: "google/gemini-2.5-flash",
+  CONTENT: "google/gemini-2.5-pro",
+  // Mermaid diagram generation - User requested Pro
+  DIAGRAM: "google/gemini-2.5-pro",
+} as const;
+
+/** Strip markdown fences that some models wrap around JSON despite response_format */
+function safeParseJSON(raw: string, fallback: any = {}): any {
+  let cleaned = raw.trim();
+  // Remove ```json ... ``` or ``` ... ``` wrapping
+  if (cleaned.startsWith("```")) {
+    cleaned = cleaned
+      .replace(/^```(?:json)?\s*\n?/, "")
+      .replace(/\n?```\s*$/, "");
+  }
+  try {
+    return JSON.parse(cleaned);
+  } catch (e) {
+    console.warn(
+      "⚠️ Failed to parse JSON response, using fallback:",
+      (e as Error).message,
+    );
+    return fallback;
+  }
+}
+
+export class ContentGenerator {
+  private openai: OpenAI;
+  private researchAgent: ResearchAgent;
+  private memeGenerator: MemeGenerator;
+
+  constructor(apiKey: string) {
+    this.openai = new OpenAI({
+      apiKey,
+      baseURL: "https://openrouter.ai/api/v1",
+      defaultHeaders: {
+        "HTTP-Referer": "https://mintel.me",
+        "X-Title": "Mintel Content Engine",
+      },
+    });
+    this.researchAgent = new ResearchAgent(apiKey);
+    this.memeGenerator = new MemeGenerator(apiKey);
+  }
+
+  // =========================================================================
+  // generatePost — for new posts (unchanged from original)
+  // =========================================================================
+  async generatePost(options: BlogPostOptions): Promise<GeneratedPost> {
+    const {
+      topic,
+      tone = "professional yet witty",
+      includeResearch = true,
+      availableComponents = [],
+    } = options;
+    console.log(`🚀 Starting content generation for: "${topic}"`);
+
+    let facts: Fact[] = [];
+    if (includeResearch) {
+      console.log("📚 Gathering research...");
+      facts = await this.researchAgent.researchTopic(topic);
+    }
+
+    console.log("📝 Creating outline...");
+    const outline = await this.createOutline(topic, facts, tone);
+
+    console.log("✍️ Drafting content...");
+    let content = await this.draftContent(
+      topic,
+      outline,
+      facts,
+      tone,
+      availableComponents,
+    );
+
+    const diagrams: string[] = [];
+    if (options.includeDiagrams) {
+      content = await this.processDiagramPlaceholders(content, diagrams);
+    }
+
+    const memes: MemeSuggestion[] = [];
+    if (options.includeMemes) {
+      const memeIdeas = await this.memeGenerator.generateMemeIdeas(
+        content.slice(0, 4000),
+      );
+      memes.push(...memeIdeas);
+    }
+
+    return { title: outline.title, content, research: facts, memes, diagrams };
+  }
+
+  // =========================================================================
+  // generateTldr — Creates a TL;DR block for the given content
+  // =========================================================================
+  async generateTldr(content: string): Promise<string> {
+    const context = content.slice(0, 3000);
+    const response = await this.openai.chat.completions.create({
+      model: MODELS.CONTENT,
+      messages: [
+        {
+          role: "system",
+          content: `Du bist ein kompromissloser Digital Architect.
+Erstelle ein "TL;DR" für diesen Artikel.
+
+REGELN:
+- 3 knackige Bulletpoints
+- TON: Sarkastisch, direkt, provokant ("Finger in die Wunde")
+- Fokussiere auf den wirtschaftlichen Schaden von schlechter Tech
+- Formatiere als MDX-Komponente:
+<div className="my-8 p-6 bg-slate-50 border-l-4 border-blue-600 rounded-r-xl">
+  <H3>TL;DR: Warum Ihr Geld verbrennt</H3>
+  <ul className="list-disc pl-5 space-y-2 mb-0">
+    <li>Punkt 1</li>
+    <li>Punkt 2</li>
+    <li>Punkt 3</li>
+  </ul>
+</div>`,
+        },
+        {
+          role: "user",
+          content: context,
+        },
+      ],
+    });
+    return response.choices[0].message.content?.trim() ?? "";
+  }
+
+  // =========================================================================
+  // optimizePost — ADDITIVE architecture (never rewrites original content)
+  // =========================================================================
+  async optimizePost(
+    content: string,
+    options: OptimizationOptions,
+  ): Promise<GeneratedPost> {
+    console.log("🚀 Optimizing existing content (additive mode)...");
+
+    // Load docs context if provided
+    let docsContext = "";
+    if (options.docsPath) {
+      docsContext = await this.loadDocsContext(options.docsPath);
+      console.log(`📖 Loaded ${docsContext.length} chars of docs context`);
+    }
+
+    const fullContext = [options.projectContext || "", docsContext]
+      .filter(Boolean)
+      .join("\n\n---\n\n");
+
+    // Split content into numbered sections for programmatic insertion
+    const sections = this.splitIntoSections(content);
+    console.log(`📋 Content has ${sections.length} sections`);
+
+    const insertions: Insertion[] = [];
+    const facts: Fact[] = [];
+    const diagrams: string[] = [];
+    const memes: MemeSuggestion[] = [];
+
+    // Build a numbered content map for LLM reference (read-only)
+    const sectionMap = this.buildSectionMap(sections);
+
+    // ----- STEP 1: Research -----
+    if (options.enhanceFacts) {
+      console.log("🔍 Identifying research topics...");
+      const researchTopics = await this.identifyResearchTopics(
+        content,
+        fullContext,
+      );
+      console.log(`📚 Researching: ${researchTopics.join(", ")}`);
+
+      for (const topic of researchTopics) {
+        const topicFacts = await this.researchAgent.researchTopic(topic);
+        facts.push(...topicFacts);
+      }
+
+      if (facts.length > 0) {
+        console.log(`📝 Planning fact insertions for ${facts.length} facts...`);
+        const factInsertions = await this.planFactInsertions(
+          sectionMap,
+          sections,
+          facts,
+          fullContext,
+        );
+        insertions.push(...factInsertions);
+        console.log(`   → ${factInsertions.length} fact enrichments planned`);
+      }
+
+      // ----- STEP 1.5: Social Media Search -----
+      console.log("📱 Identifying real social media posts...");
+      const socialPosts = await this.researchAgent.findSocialPosts(
+        content.substring(0, 200),
+      );
+      if (socialPosts.length > 0) {
+        console.log(
+          `📝 Planning placement for ${socialPosts.length} social media posts...`,
+        );
+        const socialInsertions = await this.planSocialMediaInsertions(
+          sectionMap,
+          sections,
+          socialPosts,
+          fullContext,
+        );
+        insertions.push(...socialInsertions);
+        console.log(
+          `   → ${socialInsertions.length} social embeddings planned`,
+        );
+      }
+    }
+
+    // ----- STEP 2: Component suggestions -----
+    if (options.availableComponents && options.availableComponents.length > 0) {
+      console.log("🧩 Planning component additions...");
+      const componentInsertions = await this.planComponentInsertions(
+        sectionMap,
+        sections,
+        options.availableComponents,
+        fullContext,
+      );
+      insertions.push(...componentInsertions);
+      console.log(
+        `   → ${componentInsertions.length} component additions planned`,
+      );
+    }
+
+    // ----- STEP 3: Diagram generation -----
+    if (options.addDiagrams) {
+      console.log("📊 Planning diagrams...");
+      const diagramPlans = await this.planDiagramInsertions(
+        sectionMap,
+        sections,
+        fullContext,
+      );
+
+      for (const plan of diagramPlans) {
+        const mermaidCode = await this.generateMermaid(plan.concept);
+        if (!mermaidCode) {
+          console.warn(`   ⏭️ Skipping invalid diagram for: "${plan.concept}"`);
+          continue;
+        }
+        diagrams.push(mermaidCode);
+        const diagramId = plan.concept
+          .toLowerCase()
+          .replace(/\s+/g, "-")
+          .replace(/[^a-z0-9-]/g, "")
+          .slice(0, 40);
+        insertions.push({
+          afterSection: plan.afterSection,
+          content: `<div className="my-8">\n  <Mermaid id="${diagramId}" title="${plan.concept}" showShare={true}>\n${mermaidCode}\n  </Mermaid>\n</div>`,
+        });
+      }
+      console.log(
+        `   → ${diagramPlans.length} diagrams planned, ${diagrams.length} valid`,
+      );
+    }
+
+    // ----- STEP 4: Meme placement (memegen.link via ArticleMeme) -----
+    if (options.addMemes) {
+      console.log("✨ Generating meme ideas...");
+      let memeIdeas = await this.memeGenerator.generateMemeIdeas(
+        content.slice(0, 4000),
+      );
+
+      // User requested to explicitly limit memes to max 1 per page to prevent duplication
+      if (memeIdeas.length > 1) {
+        memeIdeas = [memeIdeas[0]];
+      }
+
+      memes.push(...memeIdeas);
+
+      if (memeIdeas.length > 0) {
+        console.log(
+          `🎨 Planning meme placement for ${memeIdeas.length} memes...`,
+        );
+        const memePlacements = await this.planMemePlacements(
+          sectionMap,
+          sections,
+          memeIdeas,
+        );
+
+        for (let i = 0; i < memeIdeas.length; i++) {
+          const meme = memeIdeas[i];
+          if (
+            memePlacements[i] !== undefined &&
+            memePlacements[i] >= 0 &&
+            memePlacements[i] < sections.length
+          ) {
+            const captionsStr = meme.captions.join("|");
+            insertions.push({
+              afterSection: memePlacements[i],
+              content: `<div className="my-8">\n  <ArticleMeme template="${meme.template}" captions="${captionsStr}" />\n</div>`,
+            });
+          }
+        }
+        console.log(`   → ${memeIdeas.length} memes placed`);
+      }
+    }
+
+    // ----- Enforce visual spacing (no consecutive visualizations) -----
+    this.enforceVisualSpacing(insertions, sections);
+
+    // ----- Apply all insertions to original content -----
+    console.log(
+      `\n🔧 Applying ${insertions.length} insertions to original content...`,
+    );
+    let optimizedContent = this.applyInsertions(sections, insertions);
+
+    // ----- FINAL AGENTIC REWRITE (Replaces dumb regex scripts) -----
+    console.log(
+      `\n🧠 Agentic Rewrite: Polishing MDX, fixing syntax, and deduplicating...`,
+    );
+    const finalRewrite = await this.openai.chat.completions.create({
+      model: MODELS.CONTENT,
+      messages: [
+        {
+          role: "system",
+          content: `You are an expert MDX Editor. Your task is to take a draft blog post and output the FINAL, error-free MDX code.
+
+CRITICAL RULES:
+1. DEDUPLICATION: Ensure there is MAX ONE <ArticleMeme> in the entire post. Remove any duplicates or outdated memes. Ensure there is MAX ONE TL;DR section. Ensure there are no duplicate components.
+2. TEXT-TO-COMPONENT RATIO: Ensure there are at least 3-4 paragraphs of normal text between any two visual components (<Mermaid>, <ArticleMeme>, <StatsGrid>, <BoldNumber>, etc.). If they are clumped together, spread them out or delete the less important ones.
+3. SYNTAX: Fix any broken Mermaid/MDX syntax (e.g. unclosed tags, bad quotes).
+4. FIDELITY: Preserve the author's original German text, meaning, and tone. Smooth out transitions into the components.
+5. NO HALLUCINATION: Do not invent new URLs or facts. Keep the data provided in the draft.
+6. OUTPUT: Return ONLY the raw MDX content. No markdown code blocks (\`\`\`mdx), no preamble. Just the raw code file.`,
+        },
+        {
+          role: "user",
+          content: optimizedContent,
+        },
+      ],
+    });
+
+    optimizedContent =
+      finalRewrite.choices[0].message.content?.trim() || optimizedContent;
+
+    // Strip any residual markdown formatting fences just in case
+    if (optimizedContent.startsWith("```")) {
+      optimizedContent = optimizedContent
+        .replace(/^```[a-zA-Z]*\n/, "")
+        .replace(/\n```$/, "");
+    }
+
+    return {
+      title: "Optimized Content",
+      content: optimizedContent,
+      research: facts,
+      memes,
+      diagrams,
+    };
+  }
+
+  // =========================================================================
+  // ADDITIVE HELPERS — these return JSON instructions, never rewrite content
+  // =========================================================================
+
+  private splitIntoSections(content: string): string[] {
+    // Split on double newlines (paragraph/block boundaries in MDX)
+    return content.split(/\n\n+/);
+  }
+
+  private applyInsertions(sections: string[], insertions: Insertion[]): string {
+    // Sort by section index DESCENDING to avoid index shifting
+    const sorted = [...insertions].sort(
+      (a, b) => b.afterSection - a.afterSection,
+    );
+    const result = [...sections];
+    for (const ins of sorted) {
+      const idx = Math.min(ins.afterSection + 1, result.length);
+      result.splice(idx, 0, ins.content);
+    }
+    return result.join("\n\n");
+  }
+
+  /**
+   * Enforce visual spacing: visual components must have at least 2 text sections between them.
+   * This prevents walls of visualizations and maintains reading flow.
+   */
+  private enforceVisualSpacing(
+    insertions: Insertion[],
+    sections: string[],
+  ): void {
+    const visualPatterns = [
+      "<Mermaid",
+      "<ArticleMeme",
+      "<StatsGrid",
+      "<StatsDisplay",
+      "<BoldNumber",
+      "<MetricBar",
+      "<ComparisonRow",
+      "<PremiumComparisonChart",
+      "<DiagramFlow",
+      "<DiagramPie",
+      "<DiagramGantt",
+      "<DiagramState",
+      "<DiagramSequence",
+      "<DiagramTimeline",
+      "<Carousel",
+      "<WebVitalsScore",
+      "<WaterfallChart",
+    ];
+    const isVisual = (content: string) =>
+      visualPatterns.some((p) => content.includes(p));
+
+    // Sort by section ascending
+    insertions.sort((a, b) => a.afterSection - b.afterSection);
+
+    // Minimum gap of 10 sections between visual components (= ~6-8 text paragraphs)
+    // User requested a better text-to-component ratio (not 1:1)
+    const MIN_VISUAL_GAP = 10;
+
+    for (let i = 1; i < insertions.length; i++) {
+      if (
+        isVisual(insertions[i].content) &&
+        isVisual(insertions[i - 1].content)
+      ) {
+        const gap = insertions[i].afterSection - insertions[i - 1].afterSection;
+        if (gap < MIN_VISUAL_GAP) {
+          const newPos = Math.min(
+            insertions[i - 1].afterSection + MIN_VISUAL_GAP,
+            sections.length - 1,
+          );
+          insertions[i].afterSection = newPos;
+        }
+      }
+    }
+  }
+
+  private buildSectionMap(sections: string[]): string {
+    return sections
+      .map((s, i) => {
+        const preview = s.trim().replace(/\n/g, " ").slice(0, 120);
+        return `[${i}] ${preview}${s.length > 120 ? "…" : ""}`;
+      })
+      .join("\n");
+  }
+
+  private async loadDocsContext(docsPath: string): Promise<string> {
+    try {
+      const files = await fs.readdir(docsPath);
+      const mdFiles = files.filter((f) => f.endsWith(".md")).sort();
+      const contents: string[] = [];
+
+      for (const file of mdFiles) {
+        const filePath = path.join(docsPath, file);
+        const text = await fs.readFile(filePath, "utf8");
+        contents.push(`=== ${file} ===\n${text.trim()}`);
+      }
+
+      return contents.join("\n\n");
+    } catch (e) {
+      console.warn(`⚠️ Could not load docs from ${docsPath}: ${e}`);
+      return "";
+    }
+  }
+
+  // --- Fact insertion planning (Claude Sonnet — precise content understanding) ---
+  private async planFactInsertions(
+    sectionMap: string,
+    sections: string[],
+    facts: Fact[],
+    context: string,
+  ): Promise<Insertion[]> {
+    const factsText = facts
+      .map((f, i) => `${i + 1}. ${f.statement} [Source: ${f.source}]`)
+      .join("\n");
+
+    const response = await this.openai.chat.completions.create({
+      model: MODELS.CONTENT,
+      messages: [
+        {
+          role: "system",
+          content: `You enrich a German blog post by ADDING new paragraphs with researched facts.
+
+RULES:
+- Do NOT rewrite or modify any existing content
+- Only produce NEW <Paragraph> blocks to INSERT after a specific section number
+- Maximum 5 insertions (only the most impactful facts)
+- Match the post's tone and style (see context below)
+- Use the post's JSX components: <Paragraph>, <Marker> for emphasis
+- Cite sources using ExternalLink: <ExternalLink href="URL">Source: Name</ExternalLink>
+- Write in German, active voice, Ich-Form where appropriate
+
+CONTEXT (tone, style, persona):
+${context.slice(0, 3000)}
+
+EXISTING SECTIONS (read-only — do NOT modify these):
+${sectionMap}
+
+FACTS TO INTEGRATE:
+${factsText}
+
+Return JSON:
+{ "insertions": [{ "afterSection": 3, "content": "<Paragraph>\\n  Fact-enriched paragraph text. [Source: Name]\\n</Paragraph>" }] }
+Return ONLY the JSON.`,
+        },
+      ],
+      response_format: { type: "json_object" },
+    });
+
+    const result = safeParseJSON(
+      response.choices[0].message.content || '{"insertions": []}',
+      { insertions: [] },
+    );
+    return (result.insertions || []).filter(
+      (i: any) =>
+        typeof i.afterSection === "number" &&
+        i.afterSection >= 0 &&
+        i.afterSection < sections.length &&
+        typeof i.content === "string",
+    );
+  }
+
+  // --- Social Media insertion planning ---
+  private async planSocialMediaInsertions(
+    sectionMap: string,
+    sections: string[],
+    posts: SocialPost[],
+    context: string,
+  ): Promise<Insertion[]> {
+    if (!posts || posts.length === 0) return [];
+
+    const postsText = posts
+      .map(
+        (p, i) =>
+          `[${i}] Platform: ${p.platform}, ID: ${p.embedId} (${p.description})`,
+      )
+      .join("\n");
+
+    const response = await this.openai.chat.completions.create({
+      model: MODELS.CONTENT,
+      messages: [
+        {
+          role: "system",
+          content: `You enhance a German blog post by embedding relevant social media posts (YouTube, Twitter, LinkedIn).
+
+RULES:
+- Do NOT rewrite any existing content
+- Return exactly 1 or 2 high-impact insertions
+- Choose the best fitting post(s) from the provided list
+- Use the correct component based on the platform:
+  - youtube -> <YouTubeEmbed videoId="ID" />
+  - twitter -> <TwitterEmbed tweetId="ID" theme="light" />
+  - linkedin -> <LinkedInEmbed urn="ID" />
+- Add a 1-sentence intro paragraph above the embed to contextualize it.
+
+CONTEXT:
+${context.slice(0, 3000)}
+
+SOCIAL POSTS AVAILABLE TO EMBED:
+${postsText}
+
+EXISTING SECTIONS:
+${sectionMap}
+
+Return JSON:
+{ "insertions": [{ "afterSection": 4, "content": "<Paragraph>Wie Experten passend bemerken:</Paragraph>\\n\\n<TwitterEmbed tweetId=\\"123456\\" theme=\\"light\\" />" }] }
+Return ONLY the JSON.`,
+        },
+      ],
+      response_format: { type: "json_object" },
+    });
+
+    const result = safeParseJSON(
+      response.choices[0].message.content || '{"insertions": []}',
+      { insertions: [] },
+    );
+    return (result.insertions || []).filter(
+      (i: any) =>
+        typeof i.afterSection === "number" &&
+        i.afterSection >= 0 &&
+        i.afterSection < sections.length &&
+        typeof i.content === "string",
+    );
+  }
+
+  // --- Component insertion planning (Claude Sonnet — understands JSX context) ---
+  private async planComponentInsertions(
+    sectionMap: string,
+    sections: string[],
+    components: ComponentDefinition[],
+    context: string,
+  ): Promise<Insertion[]> {
+    const fullContent = sections.join("\n\n");
+    const componentsText = components
+      .map((c) => `<${c.name}>: ${c.description}\n  Example: ${c.usageExample}`)
+      .join("\n\n");
+    const usedComponents = components
+      .filter((c) => fullContent.includes(`<${c.name}`))
+      .map((c) => c.name);
+
+    const response = await this.openai.chat.completions.create({
+      model: MODELS.CONTENT,
+      messages: [
+        {
+          role: "system",
+          content: `You enhance a German blog post by ADDING interactive UI components.
+
+STRICT BALANCE RULES:
+- Maximum 3–4 component additions total
+- There MUST be at least 3–4 text paragraphs between any two visual components
+- Visual components MUST NEVER appear directly after each other
+- Each unique component type should only appear ONCE (e.g., only one WebVitalsScore, one WaterfallChart)
+- Multiple MetricBar or ComparisonRow in sequence are OK (they form a group)
+
+CONTENT RULES:
+- Do NOT rewrite any existing content — only ADD new component blocks
+- Do NOT add components already present: ${usedComponents.join(", ") || "none"}
+- Statistics MUST have comparison context (before/after, competitor vs us) — never standalone numbers
+- All BoldNumber components MUST include source and sourceUrl props
+- All ArticleQuote components MUST include source and sourceUrl; add "(übersetzt)" if translated
+- MetricBar value must be a real number > 0, not placeholder zeros
+- Carousel items array must have at least 2 items with substantive content
+- Use exact JSX syntax from the examples
+
+CONTEXT:
+${context.slice(0, 3000)}
+
+EXISTING SECTIONS (read-only):
+${sectionMap}
+
+AVAILABLE COMPONENTS:
+${componentsText}
+
+Return JSON:
+{ "insertions": [{ "afterSection": 5, "content": "<StatsDisplay value=\\"100\\" label=\\"PageSpeed Score\\" subtext=\\"Kein Kompromiss.\\" />" }] }
+Return ONLY the JSON.`,
+        },
+      ],
+      response_format: { type: "json_object" },
+    });
+
+    const result = safeParseJSON(
+      response.choices[0].message.content || '{"insertions": []}',
+      { insertions: [] },
+    );
+    return (result.insertions || []).filter(
+      (i: any) =>
+        typeof i.afterSection === "number" &&
+        i.afterSection >= 0 &&
+        i.afterSection < sections.length &&
+        typeof i.content === "string",
+    );
+  }
+
+  // --- Diagram planning (Gemini Flash — structured output) ---
+  private async planDiagramInsertions(
+    sectionMap: string,
+    sections: string[],
+    context: string,
+  ): Promise<{ afterSection: number; concept: string }[]> {
+    const fullContent = sections.join("\n\n");
+    const hasDiagrams =
+      fullContent.includes("<Mermaid") || fullContent.includes("<Diagram");
+
+    const response = await this.openai.chat.completions.create({
+      model: MODELS.STRUCTURED,
+      messages: [
+        {
+          role: "system",
+          content: `Analyze this German blog post and suggest 1-2 Mermaid diagrams.
+${hasDiagrams ? "The post already has diagrams. Only suggest NEW concepts not already visualized." : ""}
+${context.slice(0, 1500)}
+
+SECTIONS:
+${sectionMap}
+
+Return JSON:
+{ "diagrams": [{ "afterSection": 5, "concept": "Descriptive concept name" }] }
+Maximum 2 diagrams. Return ONLY the JSON.`,
+        },
+      ],
+      response_format: { type: "json_object" },
+    });
+
+    const result = safeParseJSON(
+      response.choices[0].message.content || '{"diagrams": []}',
+      { diagrams: [] },
+    );
+    return (result.diagrams || []).filter(
+      (d: any) =>
+        typeof d.afterSection === "number" &&
+        d.afterSection >= 0 &&
+        d.afterSection < sections.length,
+    );
+  }
+
+  // --- Meme placement planning (Gemini Flash — structural positioning) ---
+  private async planMemePlacements(
+    sectionMap: string,
+    sections: string[],
+    memes: MemeSuggestion[],
+  ): Promise<number[]> {
+    const memesText = memes
+      .map((m, i) => `${i}: "${m.template}" — ${m.captions.join(" / ")}`)
+      .join("\n");
+
+    const response = await this.openai.chat.completions.create({
+      model: MODELS.STRUCTURED,
+      messages: [
+        {
+          role: "system",
+          content: `Place ${memes.length} memes at appropriate positions in this blog post.
+Rules: Space them out evenly, place between thematic sections, never at position 0 (the very start).
+
+SECTIONS:
+${sectionMap}
+
+MEMES:
+${memesText}
+
+Return JSON: { "placements": [sectionNumber, sectionNumber, ...] }
+One section number per meme, in the same order as the memes list. Return ONLY JSON.`,
+        },
+      ],
+      response_format: { type: "json_object" },
+    });
+
+    const result = safeParseJSON(
+      response.choices[0].message.content || '{"placements": []}',
+      { placements: [] },
+    );
+    return result.placements || [];
+  }
+
+  // =========================================================================
+  // SHARED HELPERS
+  // =========================================================================
+
+  private async createOutline(
+    topic: string,
+    facts: Fact[],
+    tone: string,
+  ): Promise<{ title: string; sections: string[] }> {
+    const factsContext = facts
+      .map((f) => `- ${f.statement} (${f.source})`)
+      .join("\n");
+    const response = await this.openai.chat.completions.create({
+      model: MODELS.STRUCTURED,
+      messages: [
+        {
+          role: "system",
+          content: `Create a blog post outline on "${topic}".
+Tone: ${tone}.
+Incorporating these facts:
+${factsContext}
+
+Return JSON: { "title": "Catchy Title", "sections": ["Introduction", "Section 1", "Conclusion"] }
+Return ONLY the JSON.`,
+        },
+      ],
+      response_format: { type: "json_object" },
+    });
+    return safeParseJSON(
+      response.choices[0].message.content || '{"title": "", "sections": []}',
+      { title: "", sections: [] },
+    );
+  }
+
+  private async draftContent(
+    topic: string,
+    outline: { title: string; sections: string[] },
+    facts: Fact[],
+    tone: string,
+    components: ComponentDefinition[],
+  ): Promise<string> {
+    const factsContext = facts
+      .map((f) => `- ${f.statement} (Source: ${f.source})`)
+      .join("\n");
+    const componentsContext =
+      components.length > 0
+        ? `\n\nAvailable Components:\n` +
+          components
+            .map(
+              (c) =>
+                `- <${c.name}>: ${c.description}\n  Example: ${c.usageExample}`,
+            )
+            .join("\n")
+        : "";
+
+    const response = await this.openai.chat.completions.create({
+      model: MODELS.CONTENT,
+      messages: [
+        {
+          role: "system",
+          content: `Write a blog post based on this outline:
+Title: ${outline.title}
+Sections: ${outline.sections.join(", ")}
+
+Tone: ${tone}.
+Facts: ${factsContext}
+${componentsContext}
+
+Format as Markdown. Start with # H1.
+For places where a diagram would help, insert: <!-- DIAGRAM_PLACEHOLDER: Concept Name -->
+Return ONLY raw content.`,
+        },
+      ],
+    });
+    return response.choices[0].message.content || "";
+  }
+
+  private async processDiagramPlaceholders(
+    content: string,
+    diagrams: string[],
+  ): Promise<string> {
+    const matches = content.matchAll(/<!-- DIAGRAM_PLACEHOLDER: (.+?) -->/g);
+    let processedContent = content;
+
+    for (const match of Array.from(matches)) {
+      const concept = match[1];
+      const diagram = await this.generateMermaid(concept);
+      diagrams.push(diagram);
+      const diagramId = concept
+        .toLowerCase()
+        .replace(/\s+/g, "-")
+        .replace(/[^a-z0-9-]/g, "")
+        .slice(0, 40);
+      const mermaidJsx = `\n<div className="my-8">\n  <Mermaid id="${diagramId}" title="${concept}" showShare={true}>\n${diagram}\n  </Mermaid>\n</div>\n`;
+      processedContent = processedContent.replace(
+        `<!-- DIAGRAM_PLACEHOLDER: ${concept} -->`,
+        mermaidJsx,
+      );
+    }
+    return processedContent;
+  }
+
+  private async generateMermaid(concept: string): Promise<string> {
+    const response = await this.openai.chat.completions.create({
+      model: MODELS.DIAGRAM,
+      messages: [
+        {
+          role: "system",
+          content: `Generate a Mermaid.js diagram for: "${concept}".
+
+RULES:
+- Use clear labels in German where appropriate
+- Keep it EXTREMELY SIMPLE AND COMPACT: strictly max 3-4 nodes for a tiny visual footprint.
+- Prefer vertical layouts (TD) over horizontal (LR) to prevent wide overflowing graphs.
+- CRITICAL: Generate ONLY ONE single connected graph. Do NOT generate multiple independent graphs or isolated subgraphs in the same Mermaid block.
+- No nested subgraphs. Keep instructions short.
+- Use double-quoted labels for nodes: A["Label"]
+- VERY CRITICAL: DO NOT use any HTML tags (no <br>, no <br/>, no <b>, etc).
+- VERY CRITICAL: DO NOT use special characters like '&', '<', '>', or double-quotes inside the label strings. They break the mermaid parser in our environment.
+- Return ONLY the raw mermaid code. No markdown blocks, no backticks.
+- The first line MUST be a valid mermaid diagram type: graph, flowchart, sequenceDiagram, pie, gantt, stateDiagram, timeline`,
+        },
+      ],
+    });
+
+    const code =
+      response.choices[0].message.content
+        ?.replace(/```mermaid/g, "")
+        .replace(/```/g, "")
+        .trim() || "";
+
+    // Validate: must start with a valid mermaid keyword
+    const validStarts = [
+      "graph",
+      "flowchart",
+      "sequenceDiagram",
+      "pie",
+      "gantt",
+      "stateDiagram",
+      "timeline",
+      "classDiagram",
+      "erDiagram",
+    ];
+    const firstLine = code.split("\n")[0]?.trim().toLowerCase() || "";
+    const isValid = validStarts.some((keyword) =>
+      firstLine.startsWith(keyword),
+    );
+
+    if (!isValid || code.length < 10) {
+      console.warn(
+        `⚠️ Mermaid: Invalid diagram generated for "${concept}", skipping`,
+      );
+      return "";
+    }
+
+    return code;
+  }
+
+  private async identifyResearchTopics(
+    content: string,
+    context: string,
+  ): Promise<string[]> {
+    try {
+      console.log("Sending request to OpenRouter...");
+      const response = await this.openai.chat.completions.create({
+        model: MODELS.STRUCTURED,
+        messages: [
+          {
+            role: "system",
+            content: `Analyze the following blog post and identify 3 key topics or claims that would benefit from statistical data or external verification.
+Return relevant, specific research queries (not too broad).
+
+Context: ${context.slice(0, 1500)}
+
+Return JSON: { "topics": ["topic 1", "topic 2", "topic 3"] }
+Return ONLY the JSON.`,
+          },
+          {
+            role: "user",
+            content: content.slice(0, 4000),
+          },
+        ],
+        response_format: { type: "json_object" },
+      });
+      console.log("Got response from OpenRouter");
+      const parsed = safeParseJSON(
+        response.choices[0].message.content || '{"topics": []}',
+        { topics: [] },
+      );
+      return (parsed.topics || []).map((t: any) =>
+        typeof t === "string" ? t : JSON.stringify(t),
+      );
+    } catch (e: any) {
+      console.error("Error in identifyResearchTopics:", e);
+      throw e;
+    }
+  }
+}
--- a/packages/content-engine/src/index.ts
+++ b/packages/content-engine/src/index.ts
@@ -0,0 +1,2 @@
+export * from "./generator";
+export * from "./orchestrator";
--- a/packages/content-engine/src/orchestrator.ts
+++ b/packages/content-engine/src/orchestrator.ts
@@ -0,0 +1,350 @@
+import OpenAI from "openai";
+import { ResearchAgent, Fact, SocialPost } from "@mintel/journaling";
+import { ComponentDefinition } from "./generator";
+import * as fs from "node:fs/promises";
+import * as path from "node:path";
+
+export interface OrchestratorConfig {
+  apiKey: string;
+  model?: string;
+}
+
+export interface OptimizationTask {
+  content: string;
+  projectContext: string;
+  availableComponents?: ComponentDefinition[];
+  instructions?: string;
+}
+
+export interface OptimizeFileOptions {
+  contextDir: string;
+  availableComponents?: ComponentDefinition[];
+}
+
+export class AiBlogPostOrchestrator {
+  private openai: OpenAI;
+  private researchAgent: ResearchAgent;
+  private model: string;
+
+  constructor(config: OrchestratorConfig) {
+    this.model = config.model || "google/gemini-3-flash-preview";
+    this.openai = new OpenAI({
+      apiKey: config.apiKey,
+      baseURL: "https://openrouter.ai/api/v1",
+      defaultHeaders: {
+        "HTTP-Referer": "https://mintel.me",
+        "X-Title": "Mintel AI Blog Post Orchestrator",
+      },
+    });
+    this.researchAgent = new ResearchAgent(config.apiKey);
+  }
+
+  /**
+   * Reusable context loader. Loads all .md and .txt files from a directory into a single string.
+   */
+  async loadContext(dirPath: string): Promise<string> {
+    try {
+      const resolvedDir = path.resolve(process.cwd(), dirPath);
+      const files = await fs.readdir(resolvedDir);
+      const textFiles = files.filter((f) => /\.(md|txt)$/i.test(f)).sort();
+      const contents: string[] = [];
+
+      for (const file of textFiles) {
+        const filePath = path.join(resolvedDir, file);
+        const text = await fs.readFile(filePath, "utf8");
+        contents.push(`=== ${file} ===\n${text.trim()}`);
+      }
+
+      return contents.join("\n\n");
+    } catch (e) {
+      console.warn(`⚠️ Could not load context from ${dirPath}: ${e}`);
+      return "";
+    }
+  }
+
+  /**
+   * Reads a file, extracts frontmatter, loads context, optimizes body, and writes it back.
+   */
+  async optimizeFile(
+    targetFile: string,
+    options: OptimizeFileOptions,
+  ): Promise<void> {
+    const absPath = path.isAbsolute(targetFile)
+      ? targetFile
+      : path.resolve(process.cwd(), targetFile);
+    console.log(`📄 Processing File: ${path.basename(absPath)}`);
+
+    const content = await fs.readFile(absPath, "utf8");
+
+    const fmMatch = content.match(/^---\s*\n([\s\S]*?)\n---/);
+    const frontmatter = fmMatch ? fmMatch[0] : "";
+    const body = fmMatch ? content.slice(frontmatter.length).trim() : content;
+
+    console.log(`📖 Loading context from: ${options.contextDir}`);
+    const projectContext = await this.loadContext(options.contextDir);
+    if (!projectContext) {
+      console.warn(
+        "⚠️ No project context loaded. AI might miss specific guidelines.",
+      );
+    }
+
+    const optimizedContent = await this.optimizeDocument({
+      content: body,
+      projectContext,
+      availableComponents: options.availableComponents,
+    });
+
+    const finalOutput = frontmatter
+      ? `${frontmatter}\n\n${optimizedContent}`
+      : optimizedContent;
+
+    await fs.writeFile(`${absPath}.bak`, content); // Keep simple backup
+    await fs.writeFile(absPath, finalOutput);
+    console.log(`✅ Saved optimized file to: ${absPath}`);
+  }
+
+  /**
+   * Executes the 3-step optimization pipeline:
+   * 1. Fakten recherchieren
+   * 2. Social Posts recherchieren
+   * 3. AI anweisen daraus Artikel zu erstellen
+   */
+  async optimizeDocument(task: OptimizationTask): Promise<string> {
+    console.log(`🚀 Starting AI Orchestration Pipeline (${this.model})...`);
+
+    // 1. Fakten recherchieren
+    console.log("1️⃣ Recherchiere Fakten...");
+    const researchTopics = await this.identifyTopics(task.content);
+    const facts: Fact[] = [];
+    for (const topic of researchTopics) {
+      const topicFacts = await this.researchAgent.researchTopic(topic);
+      facts.push(...topicFacts);
+    }
+
+    // 2. Social Posts recherchieren
+    console.log(
+      "2️⃣ Recherchiere Social Media Posts (YouTube, Twitter, LinkedIn)...",
+    );
+    // Use the first 2000 chars to find relevant social posts
+    const socialPosts = await this.researchAgent.findSocialPosts(
+      task.content.substring(0, 2000),
+    );
+
+    // 3. AI anweisen daraus Artikel zu erstellen
+    console.log("3️⃣ Erstelle optimierten Artikel (Agentic Rewrite)...");
+    return await this.compileArticle(task, facts, socialPosts);
+  }
+
+  private async identifyTopics(content: string): Promise<string[]> {
+    const response = await this.openai.chat.completions.create({
+      model: "google/gemini-2.5-flash", // fast structured model for topic extraction
+      messages: [
+        {
+          role: "system",
+          content: `Analyze the following blog post and identify 1 to 2 key topics or claims that would benefit from statistical data or external verification.
+Return JSON: { "topics": ["topic 1", "topic 2"] }
+Return ONLY the JSON.`,
+        },
+        {
+          role: "user",
+          content: content.slice(0, 4000),
+        },
+      ],
+      response_format: { type: "json_object" },
+    });
+
+    try {
+      const raw = response.choices[0].message.content || '{"topics": []}';
+      const cleaned = raw
+        .trim()
+        .replace(/^```(?:json)?\s*\n?/, "")
+        .replace(/\n?```\s*$/, "");
+      const parsed = JSON.parse(cleaned);
+      return parsed.topics || [];
+    } catch (e) {
+      console.warn("⚠️ Failed to parse research topics", e);
+      return [];
+    }
+  }
+
+  private async compileArticle(
+    task: OptimizationTask,
+    facts: Fact[],
+    socialPosts: SocialPost[],
+    retryCount = 0,
+  ): Promise<string> {
+    const factsText = facts
+      .map((f, i) => `${i + 1}. ${f.statement} [Source: ${f.source}]`)
+      .join("\n");
+
+    const socialText = socialPosts
+      .map(
+        (p, i) =>
+          `Platform: ${p.platform}, ID: ${p.embedId} (${p.description})`,
+      )
+      .join("\n");
+
+    const componentsText = (task.availableComponents || [])
+      .map((c) => `<${c.name}>: ${c.description}\n  Example: ${c.usageExample}`)
+      .join("\n\n");
+
+    const response = await this.openai.chat.completions.create({
+      model: this.model,
+      messages: [
+        {
+          role: "system",
+          content: `You are an expert MDX Editor and Digital Architect. 
+
+YOUR TASK:
+Take the given draft blog post and rewrite/enhance it into a final, error-free MDX file. Maintain the author's original German text, meaning, and tone, but enrich it gracefully.
+
+CONTEXT & RULES:
+Project Context / Tone:
+${task.projectContext}
+
+Facts to weave in:
+${factsText || "None"}
+
+Social Media Posts to embed (use <YouTubeEmbed videoId="..." />, <TwitterEmbed tweetId="..." />, or <LinkedInEmbed url="..." />):
+${socialText || "None"}
+
+Available MDX Components you can use contextually:
+${componentsText || "None"}
+
+Special Instructions from User:
+${task.instructions || "None"}
+
+BLOG POST BEST PRACTICES (MANDATORY):
+- Füge zwingend ein prägnantes 'TL;DR' ganz am Anfang ein.
+- Füge ein sauberes '<TableOfContents />' ein.
+- Verwende unsere Komponenten stilvoll für Visualisierungen.
+- Agiere als hochprofessioneller Digital Architect und entferne alte MDX-Metadaten im Body.
+- Fazit: Schließe JEDEN Artikel ZWINGEND mit einem starken, klaren 'Fazit' ab (z.B. als <H2>Fazit: ...</H2> gefolgt von deinen Empfehlungen).
+
+CRITICAL GUIDELINES (NEVER BREAK THESE):
+1. ONLY return the content for the BODY of the MDX file.
+2. DO NOT INCLUDE FRONTMATTER (blocks starting and ending with ---). I ALREADY HAVE THE FRONTMATTER.
+3. DO NOT REPEAT METADATA IN THE BODY. Do not output lines like "title: ...", "description: ...", "date: ..." inside the text.
+4. DO NOT INCLUDE MARKDOWN WRAPPERS (do not wrap in \`\`\`mdx ... \`\`\`).
+5. Be clean. Do NOT clump all components together. Provide 3-4 paragraphs of normal text between visual items.
+6. If you insert components, ensure their syntax is 100% valid JSX/MDX.
+7. CRITICAL MERMAID RULE: If you use <Mermaid>, the inner content MUST be 100% valid Mermaid.js syntax. NO HTML inside labels. NO quotes inside brackets without valid syntax.
+8. Do NOT hallucinate links or facts. Use only what is provided.`,
+        },
+        {
+          role: "user",
+          content: task.content,
+        },
+      ],
+    });
+
+    let rawContent = response.choices[0].message.content || task.content;
+    rawContent = this.cleanResponse(rawContent);
+
+    // Validation Layer: Check Mermaid syntax
+    if (retryCount < 2 && rawContent.includes("<Mermaid>")) {
+      console.log("🔍 Validating Mermaid syntax in AI response...");
+      const mermaidBlocks = this.extractMermaidBlocks(rawContent);
+      let hasError = false;
+      let errorFeedback = "";
+
+      for (const block of mermaidBlocks) {
+        const validationResult = await this.validateMermaidSyntax(block);
+        if (!validationResult.valid) {
+          hasError = true;
+          errorFeedback += `\nInvalid Mermaid block:\n${block}\nError context: ${validationResult.error}\n\n`;
+        }
+      }
+
+      if (hasError) {
+        console.log(
+          `❌ Invalid Mermaid syntax detected. Retrying compilation (Attempt ${retryCount + 1}/2)...`,
+        );
+        return this.compileArticle(
+          {
+            ...task,
+            content: `The previous attempt failed because you generated invalid Mermaid.js syntax. Please rewrite the MDX and FIX the following Mermaid errors. \n\nErrors:\n${errorFeedback}\n\nOriginal Draft:\n${task.content}`,
+          },
+          facts,
+          socialPosts,
+          retryCount + 1,
+        );
+      }
+    }
+
+    return rawContent;
+  }
+
+  private extractMermaidBlocks(content: string): string[] {
+    const blocks: string[] = [];
+    // Regex to match <Mermaid>...</Mermaid> blocks across multiple lines
+    const regex = /<Mermaid>([\s\S]*?)<\/Mermaid>/g;
+    let match;
+    while ((match = regex.exec(content)) !== null) {
+      if (match[1]) {
+        blocks.push(match[1].trim());
+      }
+    }
+    return blocks;
+  }
+
+  private async validateMermaidSyntax(
+    graph: string,
+  ): Promise<{ valid: boolean; error?: string }> {
+    // Fast LLM validation to catch common syntax errors like unbalanced quotes or HTML entities
+    try {
+      const validationResponse = await this.openai.chat.completions.create({
+        model: "google/gemini-3-flash-preview", // Switch from gpt-4o-mini to user requested model
+        messages: [
+          {
+            role: "system",
+            content:
+              'You are a strict Mermaid.js compiler. Analyze the given Mermaid syntax. If it is 100% valid and will render without exceptions, reply ONLY with "VALID". If it has syntax errors (e.g., HTML inside labels, unescaped quotes, unclosed brackets), reply ONLY with "INVALID" followed by a short explanation of the exact error.',
+          },
+          {
+            role: "user",
+            content: graph,
+          },
+        ],
+      });
+
+      const reply =
+        validationResponse.choices[0].message.content?.trim() || "VALID";
+      if (reply.startsWith("INVALID")) {
+        return { valid: false, error: reply };
+      }
+      return { valid: true };
+    } catch (e) {
+      console.error("Syntax validation LLM call failed, passing through:", e);
+      return { valid: true }; // Fallback to passing if validator fails
+    }
+  }
+
+  /**
+   * Post-processing to ensure the AI didn't include "help" text,
+   * duplicate frontmatter, or markdown wrappers.
+   */
+  private cleanResponse(content: string): string {
+    let cleaned = content.trim();
+
+    // 1. Strip Markdown Wrappers (e.g. ```mdx ... ```)
+    if (cleaned.startsWith("```")) {
+      cleaned = cleaned
+        .replace(/^```[a-zA-Z]*\n?/, "")
+        .replace(/\n?```\s*$/, "");
+    }
+
+    // 2. Strip redundant frontmatter (the AI sometimes helpfully repeats it)
+    // Look for the --- delimiters and remove the block if it exists
+    const fmRegex = /^---\s*\n([\s\S]*?)\n---\s*\n?/;
+    const match = cleaned.match(fmRegex);
+    if (match) {
+      console.log(
+        "♻️  Stripping redundant frontmatter detected in AI response...",
+      );
+      cleaned = cleaned.replace(fmRegex, "").trim();
+    }
+
+    return cleaned;
+  }
+}