feat: content engine

2026-02-21 19:08:06 +01:00
parent 3f1c37813a
commit a50b8d6393
32 changed files with 2816 additions and 189 deletions
--- a/packages/content-engine/src/orchestrator.ts
+++ b/packages/content-engine/src/orchestrator.ts
@@ -0,0 +1,350 @@
+import OpenAI from "openai";
+import { ResearchAgent, Fact, SocialPost } from "@mintel/journaling";
+import { ComponentDefinition } from "./generator";
+import * as fs from "node:fs/promises";
+import * as path from "node:path";
+
+export interface OrchestratorConfig {
+  apiKey: string;
+  model?: string;
+}
+
+export interface OptimizationTask {
+  content: string;
+  projectContext: string;
+  availableComponents?: ComponentDefinition[];
+  instructions?: string;
+}
+
+export interface OptimizeFileOptions {
+  contextDir: string;
+  availableComponents?: ComponentDefinition[];
+}
+
+export class AiBlogPostOrchestrator {
+  private openai: OpenAI;
+  private researchAgent: ResearchAgent;
+  private model: string;
+
+  constructor(config: OrchestratorConfig) {
+    this.model = config.model || "google/gemini-3-flash-preview";
+    this.openai = new OpenAI({
+      apiKey: config.apiKey,
+      baseURL: "https://openrouter.ai/api/v1",
+      defaultHeaders: {
+        "HTTP-Referer": "https://mintel.me",
+        "X-Title": "Mintel AI Blog Post Orchestrator",
+      },
+    });
+    this.researchAgent = new ResearchAgent(config.apiKey);
+  }
+
+  /**
+   * Reusable context loader. Loads all .md and .txt files from a directory into a single string.
+   */
+  async loadContext(dirPath: string): Promise<string> {
+    try {
+      const resolvedDir = path.resolve(process.cwd(), dirPath);
+      const files = await fs.readdir(resolvedDir);
+      const textFiles = files.filter((f) => /\.(md|txt)$/i.test(f)).sort();
+      const contents: string[] = [];
+
+      for (const file of textFiles) {
+        const filePath = path.join(resolvedDir, file);
+        const text = await fs.readFile(filePath, "utf8");
+        contents.push(`=== ${file} ===\n${text.trim()}`);
+      }
+
+      return contents.join("\n\n");
+    } catch (e) {
+      console.warn(`⚠️ Could not load context from ${dirPath}: ${e}`);
+      return "";
+    }
+  }
+
+  /**
+   * Reads a file, extracts frontmatter, loads context, optimizes body, and writes it back.
+   */
+  async optimizeFile(
+    targetFile: string,
+    options: OptimizeFileOptions,
+  ): Promise<void> {
+    const absPath = path.isAbsolute(targetFile)
+      ? targetFile
+      : path.resolve(process.cwd(), targetFile);
+    console.log(`📄 Processing File: ${path.basename(absPath)}`);
+
+    const content = await fs.readFile(absPath, "utf8");
+
+    const fmMatch = content.match(/^---\s*\n([\s\S]*?)\n---/);
+    const frontmatter = fmMatch ? fmMatch[0] : "";
+    const body = fmMatch ? content.slice(frontmatter.length).trim() : content;
+
+    console.log(`📖 Loading context from: ${options.contextDir}`);
+    const projectContext = await this.loadContext(options.contextDir);
+    if (!projectContext) {
+      console.warn(
+        "⚠️ No project context loaded. AI might miss specific guidelines.",
+      );
+    }
+
+    const optimizedContent = await this.optimizeDocument({
+      content: body,
+      projectContext,
+      availableComponents: options.availableComponents,
+    });
+
+    const finalOutput = frontmatter
+      ? `${frontmatter}\n\n${optimizedContent}`
+      : optimizedContent;
+
+    await fs.writeFile(`${absPath}.bak`, content); // Keep simple backup
+    await fs.writeFile(absPath, finalOutput);
+    console.log(`✅ Saved optimized file to: ${absPath}`);
+  }
+
+  /**
+   * Executes the 3-step optimization pipeline:
+   * 1. Fakten recherchieren
+   * 2. Social Posts recherchieren
+   * 3. AI anweisen daraus Artikel zu erstellen
+   */
+  async optimizeDocument(task: OptimizationTask): Promise<string> {
+    console.log(`🚀 Starting AI Orchestration Pipeline (${this.model})...`);
+
+    // 1. Fakten recherchieren
+    console.log("1️⃣ Recherchiere Fakten...");
+    const researchTopics = await this.identifyTopics(task.content);
+    const facts: Fact[] = [];
+    for (const topic of researchTopics) {
+      const topicFacts = await this.researchAgent.researchTopic(topic);
+      facts.push(...topicFacts);
+    }
+
+    // 2. Social Posts recherchieren
+    console.log(
+      "2️⃣ Recherchiere Social Media Posts (YouTube, Twitter, LinkedIn)...",
+    );
+    // Use the first 2000 chars to find relevant social posts
+    const socialPosts = await this.researchAgent.findSocialPosts(
+      task.content.substring(0, 2000),
+    );
+
+    // 3. AI anweisen daraus Artikel zu erstellen
+    console.log("3️⃣ Erstelle optimierten Artikel (Agentic Rewrite)...");
+    return await this.compileArticle(task, facts, socialPosts);
+  }
+
+  private async identifyTopics(content: string): Promise<string[]> {
+    const response = await this.openai.chat.completions.create({
+      model: "google/gemini-2.5-flash", // fast structured model for topic extraction
+      messages: [
+        {
+          role: "system",
+          content: `Analyze the following blog post and identify 1 to 2 key topics or claims that would benefit from statistical data or external verification.
+Return JSON: { "topics": ["topic 1", "topic 2"] }
+Return ONLY the JSON.`,
+        },
+        {
+          role: "user",
+          content: content.slice(0, 4000),
+        },
+      ],
+      response_format: { type: "json_object" },
+    });
+
+    try {
+      const raw = response.choices[0].message.content || '{"topics": []}';
+      const cleaned = raw
+        .trim()
+        .replace(/^```(?:json)?\s*\n?/, "")
+        .replace(/\n?```\s*$/, "");
+      const parsed = JSON.parse(cleaned);
+      return parsed.topics || [];
+    } catch (e) {
+      console.warn("⚠️ Failed to parse research topics", e);
+      return [];
+    }
+  }
+
+  private async compileArticle(
+    task: OptimizationTask,
+    facts: Fact[],
+    socialPosts: SocialPost[],
+    retryCount = 0,
+  ): Promise<string> {
+    const factsText = facts
+      .map((f, i) => `${i + 1}. ${f.statement} [Source: ${f.source}]`)
+      .join("\n");
+
+    const socialText = socialPosts
+      .map(
+        (p, i) =>
+          `Platform: ${p.platform}, ID: ${p.embedId} (${p.description})`,
+      )
+      .join("\n");
+
+    const componentsText = (task.availableComponents || [])
+      .map((c) => `<${c.name}>: ${c.description}\n  Example: ${c.usageExample}`)
+      .join("\n\n");
+
+    const response = await this.openai.chat.completions.create({
+      model: this.model,
+      messages: [
+        {
+          role: "system",
+          content: `You are an expert MDX Editor and Digital Architect. 
+
+YOUR TASK:
+Take the given draft blog post and rewrite/enhance it into a final, error-free MDX file. Maintain the author's original German text, meaning, and tone, but enrich it gracefully.
+
+CONTEXT & RULES:
+Project Context / Tone:
+${task.projectContext}
+
+Facts to weave in:
+${factsText || "None"}
+
+Social Media Posts to embed (use <YouTubeEmbed videoId="..." />, <TwitterEmbed tweetId="..." />, or <LinkedInEmbed url="..." />):
+${socialText || "None"}
+
+Available MDX Components you can use contextually:
+${componentsText || "None"}
+
+Special Instructions from User:
+${task.instructions || "None"}
+
+BLOG POST BEST PRACTICES (MANDATORY):
+- Füge zwingend ein prägnantes 'TL;DR' ganz am Anfang ein.
+- Füge ein sauberes '<TableOfContents />' ein.
+- Verwende unsere Komponenten stilvoll für Visualisierungen.
+- Agiere als hochprofessioneller Digital Architect und entferne alte MDX-Metadaten im Body.
+- Fazit: Schließe JEDEN Artikel ZWINGEND mit einem starken, klaren 'Fazit' ab (z.B. als <H2>Fazit: ...</H2> gefolgt von deinen Empfehlungen).
+
+CRITICAL GUIDELINES (NEVER BREAK THESE):
+1. ONLY return the content for the BODY of the MDX file.
+2. DO NOT INCLUDE FRONTMATTER (blocks starting and ending with ---). I ALREADY HAVE THE FRONTMATTER.
+3. DO NOT REPEAT METADATA IN THE BODY. Do not output lines like "title: ...", "description: ...", "date: ..." inside the text.
+4. DO NOT INCLUDE MARKDOWN WRAPPERS (do not wrap in \`\`\`mdx ... \`\`\`).
+5. Be clean. Do NOT clump all components together. Provide 3-4 paragraphs of normal text between visual items.
+6. If you insert components, ensure their syntax is 100% valid JSX/MDX.
+7. CRITICAL MERMAID RULE: If you use <Mermaid>, the inner content MUST be 100% valid Mermaid.js syntax. NO HTML inside labels. NO quotes inside brackets without valid syntax.
+8. Do NOT hallucinate links or facts. Use only what is provided.`,
+        },
+        {
+          role: "user",
+          content: task.content,
+        },
+      ],
+    });
+
+    let rawContent = response.choices[0].message.content || task.content;
+    rawContent = this.cleanResponse(rawContent);
+
+    // Validation Layer: Check Mermaid syntax
+    if (retryCount < 2 && rawContent.includes("<Mermaid>")) {
+      console.log("🔍 Validating Mermaid syntax in AI response...");
+      const mermaidBlocks = this.extractMermaidBlocks(rawContent);
+      let hasError = false;
+      let errorFeedback = "";
+
+      for (const block of mermaidBlocks) {
+        const validationResult = await this.validateMermaidSyntax(block);
+        if (!validationResult.valid) {
+          hasError = true;
+          errorFeedback += `\nInvalid Mermaid block:\n${block}\nError context: ${validationResult.error}\n\n`;
+        }
+      }
+
+      if (hasError) {
+        console.log(
+          `❌ Invalid Mermaid syntax detected. Retrying compilation (Attempt ${retryCount + 1}/2)...`,
+        );
+        return this.compileArticle(
+          {
+            ...task,
+            content: `The previous attempt failed because you generated invalid Mermaid.js syntax. Please rewrite the MDX and FIX the following Mermaid errors. \n\nErrors:\n${errorFeedback}\n\nOriginal Draft:\n${task.content}`,
+          },
+          facts,
+          socialPosts,
+          retryCount + 1,
+        );
+      }
+    }
+
+    return rawContent;
+  }
+
+  private extractMermaidBlocks(content: string): string[] {
+    const blocks: string[] = [];
+    // Regex to match <Mermaid>...</Mermaid> blocks across multiple lines
+    const regex = /<Mermaid>([\s\S]*?)<\/Mermaid>/g;
+    let match;
+    while ((match = regex.exec(content)) !== null) {
+      if (match[1]) {
+        blocks.push(match[1].trim());
+      }
+    }
+    return blocks;
+  }
+
+  private async validateMermaidSyntax(
+    graph: string,
+  ): Promise<{ valid: boolean; error?: string }> {
+    // Fast LLM validation to catch common syntax errors like unbalanced quotes or HTML entities
+    try {
+      const validationResponse = await this.openai.chat.completions.create({
+        model: "google/gemini-3-flash-preview", // Switch from gpt-4o-mini to user requested model
+        messages: [
+          {
+            role: "system",
+            content:
+              'You are a strict Mermaid.js compiler. Analyze the given Mermaid syntax. If it is 100% valid and will render without exceptions, reply ONLY with "VALID". If it has syntax errors (e.g., HTML inside labels, unescaped quotes, unclosed brackets), reply ONLY with "INVALID" followed by a short explanation of the exact error.',
+          },
+          {
+            role: "user",
+            content: graph,
+          },
+        ],
+      });
+
+      const reply =
+        validationResponse.choices[0].message.content?.trim() || "VALID";
+      if (reply.startsWith("INVALID")) {
+        return { valid: false, error: reply };
+      }
+      return { valid: true };
+    } catch (e) {
+      console.error("Syntax validation LLM call failed, passing through:", e);
+      return { valid: true }; // Fallback to passing if validator fails
+    }
+  }
+
+  /**
+   * Post-processing to ensure the AI didn't include "help" text,
+   * duplicate frontmatter, or markdown wrappers.
+   */
+  private cleanResponse(content: string): string {
+    let cleaned = content.trim();
+
+    // 1. Strip Markdown Wrappers (e.g. ```mdx ... ```)
+    if (cleaned.startsWith("```")) {
+      cleaned = cleaned
+        .replace(/^```[a-zA-Z]*\n?/, "")
+        .replace(/\n?```\s*$/, "");
+    }
+
+    // 2. Strip redundant frontmatter (the AI sometimes helpfully repeats it)
+    // Look for the --- delimiters and remove the block if it exists
+    const fmRegex = /^---\s*\n([\s\S]*?)\n---\s*\n?/;
+    const match = cleaned.match(fmRegex);
+    if (match) {
+      console.log(
+        "♻️  Stripping redundant frontmatter detected in AI response...",
+      );
+      cleaned = cleaned.replace(fmRegex, "").trim();
+    }
+
+    return cleaned;
+  }
+}