feat: content engine

This commit is contained in:
2026-02-21 19:08:06 +01:00
parent 3f1c37813a
commit a50b8d6393
32 changed files with 2816 additions and 189 deletions

View File

@@ -0,0 +1,350 @@
import OpenAI from "openai";
import { ResearchAgent, Fact, SocialPost } from "@mintel/journaling";
import { ComponentDefinition } from "./generator";
import * as fs from "node:fs/promises";
import * as path from "node:path";
export interface OrchestratorConfig {
apiKey: string;
model?: string;
}
export interface OptimizationTask {
content: string;
projectContext: string;
availableComponents?: ComponentDefinition[];
instructions?: string;
}
export interface OptimizeFileOptions {
contextDir: string;
availableComponents?: ComponentDefinition[];
}
export class AiBlogPostOrchestrator {
private openai: OpenAI;
private researchAgent: ResearchAgent;
private model: string;
constructor(config: OrchestratorConfig) {
this.model = config.model || "google/gemini-3-flash-preview";
this.openai = new OpenAI({
apiKey: config.apiKey,
baseURL: "https://openrouter.ai/api/v1",
defaultHeaders: {
"HTTP-Referer": "https://mintel.me",
"X-Title": "Mintel AI Blog Post Orchestrator",
},
});
this.researchAgent = new ResearchAgent(config.apiKey);
}
/**
* Reusable context loader. Loads all .md and .txt files from a directory into a single string.
*/
async loadContext(dirPath: string): Promise<string> {
try {
const resolvedDir = path.resolve(process.cwd(), dirPath);
const files = await fs.readdir(resolvedDir);
const textFiles = files.filter((f) => /\.(md|txt)$/i.test(f)).sort();
const contents: string[] = [];
for (const file of textFiles) {
const filePath = path.join(resolvedDir, file);
const text = await fs.readFile(filePath, "utf8");
contents.push(`=== ${file} ===\n${text.trim()}`);
}
return contents.join("\n\n");
} catch (e) {
console.warn(`⚠️ Could not load context from ${dirPath}: ${e}`);
return "";
}
}
/**
* Reads a file, extracts frontmatter, loads context, optimizes body, and writes it back.
*/
async optimizeFile(
targetFile: string,
options: OptimizeFileOptions,
): Promise<void> {
const absPath = path.isAbsolute(targetFile)
? targetFile
: path.resolve(process.cwd(), targetFile);
console.log(`📄 Processing File: ${path.basename(absPath)}`);
const content = await fs.readFile(absPath, "utf8");
const fmMatch = content.match(/^---\s*\n([\s\S]*?)\n---/);
const frontmatter = fmMatch ? fmMatch[0] : "";
const body = fmMatch ? content.slice(frontmatter.length).trim() : content;
console.log(`📖 Loading context from: ${options.contextDir}`);
const projectContext = await this.loadContext(options.contextDir);
if (!projectContext) {
console.warn(
"⚠️ No project context loaded. AI might miss specific guidelines.",
);
}
const optimizedContent = await this.optimizeDocument({
content: body,
projectContext,
availableComponents: options.availableComponents,
});
const finalOutput = frontmatter
? `${frontmatter}\n\n${optimizedContent}`
: optimizedContent;
await fs.writeFile(`${absPath}.bak`, content); // Keep simple backup
await fs.writeFile(absPath, finalOutput);
console.log(`✅ Saved optimized file to: ${absPath}`);
}
/**
* Executes the 3-step optimization pipeline:
* 1. Fakten recherchieren
* 2. Social Posts recherchieren
* 3. AI anweisen daraus Artikel zu erstellen
*/
async optimizeDocument(task: OptimizationTask): Promise<string> {
console.log(`🚀 Starting AI Orchestration Pipeline (${this.model})...`);
// 1. Fakten recherchieren
console.log("1⃣ Recherchiere Fakten...");
const researchTopics = await this.identifyTopics(task.content);
const facts: Fact[] = [];
for (const topic of researchTopics) {
const topicFacts = await this.researchAgent.researchTopic(topic);
facts.push(...topicFacts);
}
// 2. Social Posts recherchieren
console.log(
"2⃣ Recherchiere Social Media Posts (YouTube, Twitter, LinkedIn)...",
);
// Use the first 2000 chars to find relevant social posts
const socialPosts = await this.researchAgent.findSocialPosts(
task.content.substring(0, 2000),
);
// 3. AI anweisen daraus Artikel zu erstellen
console.log("3⃣ Erstelle optimierten Artikel (Agentic Rewrite)...");
return await this.compileArticle(task, facts, socialPosts);
}
private async identifyTopics(content: string): Promise<string[]> {
const response = await this.openai.chat.completions.create({
model: "google/gemini-2.5-flash", // fast structured model for topic extraction
messages: [
{
role: "system",
content: `Analyze the following blog post and identify 1 to 2 key topics or claims that would benefit from statistical data or external verification.
Return JSON: { "topics": ["topic 1", "topic 2"] }
Return ONLY the JSON.`,
},
{
role: "user",
content: content.slice(0, 4000),
},
],
response_format: { type: "json_object" },
});
try {
const raw = response.choices[0].message.content || '{"topics": []}';
const cleaned = raw
.trim()
.replace(/^```(?:json)?\s*\n?/, "")
.replace(/\n?```\s*$/, "");
const parsed = JSON.parse(cleaned);
return parsed.topics || [];
} catch (e) {
console.warn("⚠️ Failed to parse research topics", e);
return [];
}
}
private async compileArticle(
task: OptimizationTask,
facts: Fact[],
socialPosts: SocialPost[],
retryCount = 0,
): Promise<string> {
const factsText = facts
.map((f, i) => `${i + 1}. ${f.statement} [Source: ${f.source}]`)
.join("\n");
const socialText = socialPosts
.map(
(p, i) =>
`Platform: ${p.platform}, ID: ${p.embedId} (${p.description})`,
)
.join("\n");
const componentsText = (task.availableComponents || [])
.map((c) => `<${c.name}>: ${c.description}\n Example: ${c.usageExample}`)
.join("\n\n");
const response = await this.openai.chat.completions.create({
model: this.model,
messages: [
{
role: "system",
content: `You are an expert MDX Editor and Digital Architect.
YOUR TASK:
Take the given draft blog post and rewrite/enhance it into a final, error-free MDX file. Maintain the author's original German text, meaning, and tone, but enrich it gracefully.
CONTEXT & RULES:
Project Context / Tone:
${task.projectContext}
Facts to weave in:
${factsText || "None"}
Social Media Posts to embed (use <YouTubeEmbed videoId="..." />, <TwitterEmbed tweetId="..." />, or <LinkedInEmbed url="..." />):
${socialText || "None"}
Available MDX Components you can use contextually:
${componentsText || "None"}
Special Instructions from User:
${task.instructions || "None"}
BLOG POST BEST PRACTICES (MANDATORY):
- Füge zwingend ein prägnantes 'TL;DR' ganz am Anfang ein.
- Füge ein sauberes '<TableOfContents />' ein.
- Verwende unsere Komponenten stilvoll für Visualisierungen.
- Agiere als hochprofessioneller Digital Architect und entferne alte MDX-Metadaten im Body.
- Fazit: Schließe JEDEN Artikel ZWINGEND mit einem starken, klaren 'Fazit' ab (z.B. als <H2>Fazit: ...</H2> gefolgt von deinen Empfehlungen).
CRITICAL GUIDELINES (NEVER BREAK THESE):
1. ONLY return the content for the BODY of the MDX file.
2. DO NOT INCLUDE FRONTMATTER (blocks starting and ending with ---). I ALREADY HAVE THE FRONTMATTER.
3. DO NOT REPEAT METADATA IN THE BODY. Do not output lines like "title: ...", "description: ...", "date: ..." inside the text.
4. DO NOT INCLUDE MARKDOWN WRAPPERS (do not wrap in \`\`\`mdx ... \`\`\`).
5. Be clean. Do NOT clump all components together. Provide 3-4 paragraphs of normal text between visual items.
6. If you insert components, ensure their syntax is 100% valid JSX/MDX.
7. CRITICAL MERMAID RULE: If you use <Mermaid>, the inner content MUST be 100% valid Mermaid.js syntax. NO HTML inside labels. NO quotes inside brackets without valid syntax.
8. Do NOT hallucinate links or facts. Use only what is provided.`,
},
{
role: "user",
content: task.content,
},
],
});
let rawContent = response.choices[0].message.content || task.content;
rawContent = this.cleanResponse(rawContent);
// Validation Layer: Check Mermaid syntax
if (retryCount < 2 && rawContent.includes("<Mermaid>")) {
console.log("🔍 Validating Mermaid syntax in AI response...");
const mermaidBlocks = this.extractMermaidBlocks(rawContent);
let hasError = false;
let errorFeedback = "";
for (const block of mermaidBlocks) {
const validationResult = await this.validateMermaidSyntax(block);
if (!validationResult.valid) {
hasError = true;
errorFeedback += `\nInvalid Mermaid block:\n${block}\nError context: ${validationResult.error}\n\n`;
}
}
if (hasError) {
console.log(
`❌ Invalid Mermaid syntax detected. Retrying compilation (Attempt ${retryCount + 1}/2)...`,
);
return this.compileArticle(
{
...task,
content: `The previous attempt failed because you generated invalid Mermaid.js syntax. Please rewrite the MDX and FIX the following Mermaid errors. \n\nErrors:\n${errorFeedback}\n\nOriginal Draft:\n${task.content}`,
},
facts,
socialPosts,
retryCount + 1,
);
}
}
return rawContent;
}
private extractMermaidBlocks(content: string): string[] {
const blocks: string[] = [];
// Regex to match <Mermaid>...</Mermaid> blocks across multiple lines
const regex = /<Mermaid>([\s\S]*?)<\/Mermaid>/g;
let match;
while ((match = regex.exec(content)) !== null) {
if (match[1]) {
blocks.push(match[1].trim());
}
}
return blocks;
}
private async validateMermaidSyntax(
graph: string,
): Promise<{ valid: boolean; error?: string }> {
// Fast LLM validation to catch common syntax errors like unbalanced quotes or HTML entities
try {
const validationResponse = await this.openai.chat.completions.create({
model: "google/gemini-3-flash-preview", // Switch from gpt-4o-mini to user requested model
messages: [
{
role: "system",
content:
'You are a strict Mermaid.js compiler. Analyze the given Mermaid syntax. If it is 100% valid and will render without exceptions, reply ONLY with "VALID". If it has syntax errors (e.g., HTML inside labels, unescaped quotes, unclosed brackets), reply ONLY with "INVALID" followed by a short explanation of the exact error.',
},
{
role: "user",
content: graph,
},
],
});
const reply =
validationResponse.choices[0].message.content?.trim() || "VALID";
if (reply.startsWith("INVALID")) {
return { valid: false, error: reply };
}
return { valid: true };
} catch (e) {
console.error("Syntax validation LLM call failed, passing through:", e);
return { valid: true }; // Fallback to passing if validator fails
}
}
/**
* Post-processing to ensure the AI didn't include "help" text,
* duplicate frontmatter, or markdown wrappers.
*/
private cleanResponse(content: string): string {
let cleaned = content.trim();
// 1. Strip Markdown Wrappers (e.g. ```mdx ... ```)
if (cleaned.startsWith("```")) {
cleaned = cleaned
.replace(/^```[a-zA-Z]*\n?/, "")
.replace(/\n?```\s*$/, "");
}
// 2. Strip redundant frontmatter (the AI sometimes helpfully repeats it)
// Look for the --- delimiters and remove the block if it exists
const fmRegex = /^---\s*\n([\s\S]*?)\n---\s*\n?/;
const match = cleaned.match(fmRegex);
if (match) {
console.log(
"♻️ Stripping redundant frontmatter detected in AI response...",
);
cleaned = cleaned.replace(fmRegex, "").trim();
}
return cleaned;
}
}