feat: content engine
This commit is contained in:
350
packages/content-engine/src/orchestrator.ts
Normal file
350
packages/content-engine/src/orchestrator.ts
Normal file
@@ -0,0 +1,350 @@
|
||||
import OpenAI from "openai";
|
||||
import { ResearchAgent, Fact, SocialPost } from "@mintel/journaling";
|
||||
import { ComponentDefinition } from "./generator";
|
||||
import * as fs from "node:fs/promises";
|
||||
import * as path from "node:path";
|
||||
|
||||
export interface OrchestratorConfig {
|
||||
apiKey: string;
|
||||
model?: string;
|
||||
}
|
||||
|
||||
export interface OptimizationTask {
|
||||
content: string;
|
||||
projectContext: string;
|
||||
availableComponents?: ComponentDefinition[];
|
||||
instructions?: string;
|
||||
}
|
||||
|
||||
export interface OptimizeFileOptions {
|
||||
contextDir: string;
|
||||
availableComponents?: ComponentDefinition[];
|
||||
}
|
||||
|
||||
export class AiBlogPostOrchestrator {
|
||||
private openai: OpenAI;
|
||||
private researchAgent: ResearchAgent;
|
||||
private model: string;
|
||||
|
||||
constructor(config: OrchestratorConfig) {
|
||||
this.model = config.model || "google/gemini-3-flash-preview";
|
||||
this.openai = new OpenAI({
|
||||
apiKey: config.apiKey,
|
||||
baseURL: "https://openrouter.ai/api/v1",
|
||||
defaultHeaders: {
|
||||
"HTTP-Referer": "https://mintel.me",
|
||||
"X-Title": "Mintel AI Blog Post Orchestrator",
|
||||
},
|
||||
});
|
||||
this.researchAgent = new ResearchAgent(config.apiKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reusable context loader. Loads all .md and .txt files from a directory into a single string.
|
||||
*/
|
||||
async loadContext(dirPath: string): Promise<string> {
|
||||
try {
|
||||
const resolvedDir = path.resolve(process.cwd(), dirPath);
|
||||
const files = await fs.readdir(resolvedDir);
|
||||
const textFiles = files.filter((f) => /\.(md|txt)$/i.test(f)).sort();
|
||||
const contents: string[] = [];
|
||||
|
||||
for (const file of textFiles) {
|
||||
const filePath = path.join(resolvedDir, file);
|
||||
const text = await fs.readFile(filePath, "utf8");
|
||||
contents.push(`=== ${file} ===\n${text.trim()}`);
|
||||
}
|
||||
|
||||
return contents.join("\n\n");
|
||||
} catch (e) {
|
||||
console.warn(`⚠️ Could not load context from ${dirPath}: ${e}`);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a file, extracts frontmatter, loads context, optimizes body, and writes it back.
|
||||
*/
|
||||
async optimizeFile(
|
||||
targetFile: string,
|
||||
options: OptimizeFileOptions,
|
||||
): Promise<void> {
|
||||
const absPath = path.isAbsolute(targetFile)
|
||||
? targetFile
|
||||
: path.resolve(process.cwd(), targetFile);
|
||||
console.log(`📄 Processing File: ${path.basename(absPath)}`);
|
||||
|
||||
const content = await fs.readFile(absPath, "utf8");
|
||||
|
||||
const fmMatch = content.match(/^---\s*\n([\s\S]*?)\n---/);
|
||||
const frontmatter = fmMatch ? fmMatch[0] : "";
|
||||
const body = fmMatch ? content.slice(frontmatter.length).trim() : content;
|
||||
|
||||
console.log(`📖 Loading context from: ${options.contextDir}`);
|
||||
const projectContext = await this.loadContext(options.contextDir);
|
||||
if (!projectContext) {
|
||||
console.warn(
|
||||
"⚠️ No project context loaded. AI might miss specific guidelines.",
|
||||
);
|
||||
}
|
||||
|
||||
const optimizedContent = await this.optimizeDocument({
|
||||
content: body,
|
||||
projectContext,
|
||||
availableComponents: options.availableComponents,
|
||||
});
|
||||
|
||||
const finalOutput = frontmatter
|
||||
? `${frontmatter}\n\n${optimizedContent}`
|
||||
: optimizedContent;
|
||||
|
||||
await fs.writeFile(`${absPath}.bak`, content); // Keep simple backup
|
||||
await fs.writeFile(absPath, finalOutput);
|
||||
console.log(`✅ Saved optimized file to: ${absPath}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes the 3-step optimization pipeline:
|
||||
* 1. Fakten recherchieren
|
||||
* 2. Social Posts recherchieren
|
||||
* 3. AI anweisen daraus Artikel zu erstellen
|
||||
*/
|
||||
async optimizeDocument(task: OptimizationTask): Promise<string> {
|
||||
console.log(`🚀 Starting AI Orchestration Pipeline (${this.model})...`);
|
||||
|
||||
// 1. Fakten recherchieren
|
||||
console.log("1️⃣ Recherchiere Fakten...");
|
||||
const researchTopics = await this.identifyTopics(task.content);
|
||||
const facts: Fact[] = [];
|
||||
for (const topic of researchTopics) {
|
||||
const topicFacts = await this.researchAgent.researchTopic(topic);
|
||||
facts.push(...topicFacts);
|
||||
}
|
||||
|
||||
// 2. Social Posts recherchieren
|
||||
console.log(
|
||||
"2️⃣ Recherchiere Social Media Posts (YouTube, Twitter, LinkedIn)...",
|
||||
);
|
||||
// Use the first 2000 chars to find relevant social posts
|
||||
const socialPosts = await this.researchAgent.findSocialPosts(
|
||||
task.content.substring(0, 2000),
|
||||
);
|
||||
|
||||
// 3. AI anweisen daraus Artikel zu erstellen
|
||||
console.log("3️⃣ Erstelle optimierten Artikel (Agentic Rewrite)...");
|
||||
return await this.compileArticle(task, facts, socialPosts);
|
||||
}
|
||||
|
||||
private async identifyTopics(content: string): Promise<string[]> {
|
||||
const response = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.5-flash", // fast structured model for topic extraction
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: `Analyze the following blog post and identify 1 to 2 key topics or claims that would benefit from statistical data or external verification.
|
||||
Return JSON: { "topics": ["topic 1", "topic 2"] }
|
||||
Return ONLY the JSON.`,
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: content.slice(0, 4000),
|
||||
},
|
||||
],
|
||||
response_format: { type: "json_object" },
|
||||
});
|
||||
|
||||
try {
|
||||
const raw = response.choices[0].message.content || '{"topics": []}';
|
||||
const cleaned = raw
|
||||
.trim()
|
||||
.replace(/^```(?:json)?\s*\n?/, "")
|
||||
.replace(/\n?```\s*$/, "");
|
||||
const parsed = JSON.parse(cleaned);
|
||||
return parsed.topics || [];
|
||||
} catch (e) {
|
||||
console.warn("⚠️ Failed to parse research topics", e);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
private async compileArticle(
|
||||
task: OptimizationTask,
|
||||
facts: Fact[],
|
||||
socialPosts: SocialPost[],
|
||||
retryCount = 0,
|
||||
): Promise<string> {
|
||||
const factsText = facts
|
||||
.map((f, i) => `${i + 1}. ${f.statement} [Source: ${f.source}]`)
|
||||
.join("\n");
|
||||
|
||||
const socialText = socialPosts
|
||||
.map(
|
||||
(p, i) =>
|
||||
`Platform: ${p.platform}, ID: ${p.embedId} (${p.description})`,
|
||||
)
|
||||
.join("\n");
|
||||
|
||||
const componentsText = (task.availableComponents || [])
|
||||
.map((c) => `<${c.name}>: ${c.description}\n Example: ${c.usageExample}`)
|
||||
.join("\n\n");
|
||||
|
||||
const response = await this.openai.chat.completions.create({
|
||||
model: this.model,
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: `You are an expert MDX Editor and Digital Architect.
|
||||
|
||||
YOUR TASK:
|
||||
Take the given draft blog post and rewrite/enhance it into a final, error-free MDX file. Maintain the author's original German text, meaning, and tone, but enrich it gracefully.
|
||||
|
||||
CONTEXT & RULES:
|
||||
Project Context / Tone:
|
||||
${task.projectContext}
|
||||
|
||||
Facts to weave in:
|
||||
${factsText || "None"}
|
||||
|
||||
Social Media Posts to embed (use <YouTubeEmbed videoId="..." />, <TwitterEmbed tweetId="..." />, or <LinkedInEmbed url="..." />):
|
||||
${socialText || "None"}
|
||||
|
||||
Available MDX Components you can use contextually:
|
||||
${componentsText || "None"}
|
||||
|
||||
Special Instructions from User:
|
||||
${task.instructions || "None"}
|
||||
|
||||
BLOG POST BEST PRACTICES (MANDATORY):
|
||||
- Füge zwingend ein prägnantes 'TL;DR' ganz am Anfang ein.
|
||||
- Füge ein sauberes '<TableOfContents />' ein.
|
||||
- Verwende unsere Komponenten stilvoll für Visualisierungen.
|
||||
- Agiere als hochprofessioneller Digital Architect und entferne alte MDX-Metadaten im Body.
|
||||
- Fazit: Schließe JEDEN Artikel ZWINGEND mit einem starken, klaren 'Fazit' ab (z.B. als <H2>Fazit: ...</H2> gefolgt von deinen Empfehlungen).
|
||||
|
||||
CRITICAL GUIDELINES (NEVER BREAK THESE):
|
||||
1. ONLY return the content for the BODY of the MDX file.
|
||||
2. DO NOT INCLUDE FRONTMATTER (blocks starting and ending with ---). I ALREADY HAVE THE FRONTMATTER.
|
||||
3. DO NOT REPEAT METADATA IN THE BODY. Do not output lines like "title: ...", "description: ...", "date: ..." inside the text.
|
||||
4. DO NOT INCLUDE MARKDOWN WRAPPERS (do not wrap in \`\`\`mdx ... \`\`\`).
|
||||
5. Be clean. Do NOT clump all components together. Provide 3-4 paragraphs of normal text between visual items.
|
||||
6. If you insert components, ensure their syntax is 100% valid JSX/MDX.
|
||||
7. CRITICAL MERMAID RULE: If you use <Mermaid>, the inner content MUST be 100% valid Mermaid.js syntax. NO HTML inside labels. NO quotes inside brackets without valid syntax.
|
||||
8. Do NOT hallucinate links or facts. Use only what is provided.`,
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: task.content,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
let rawContent = response.choices[0].message.content || task.content;
|
||||
rawContent = this.cleanResponse(rawContent);
|
||||
|
||||
// Validation Layer: Check Mermaid syntax
|
||||
if (retryCount < 2 && rawContent.includes("<Mermaid>")) {
|
||||
console.log("🔍 Validating Mermaid syntax in AI response...");
|
||||
const mermaidBlocks = this.extractMermaidBlocks(rawContent);
|
||||
let hasError = false;
|
||||
let errorFeedback = "";
|
||||
|
||||
for (const block of mermaidBlocks) {
|
||||
const validationResult = await this.validateMermaidSyntax(block);
|
||||
if (!validationResult.valid) {
|
||||
hasError = true;
|
||||
errorFeedback += `\nInvalid Mermaid block:\n${block}\nError context: ${validationResult.error}\n\n`;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasError) {
|
||||
console.log(
|
||||
`❌ Invalid Mermaid syntax detected. Retrying compilation (Attempt ${retryCount + 1}/2)...`,
|
||||
);
|
||||
return this.compileArticle(
|
||||
{
|
||||
...task,
|
||||
content: `The previous attempt failed because you generated invalid Mermaid.js syntax. Please rewrite the MDX and FIX the following Mermaid errors. \n\nErrors:\n${errorFeedback}\n\nOriginal Draft:\n${task.content}`,
|
||||
},
|
||||
facts,
|
||||
socialPosts,
|
||||
retryCount + 1,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return rawContent;
|
||||
}
|
||||
|
||||
private extractMermaidBlocks(content: string): string[] {
|
||||
const blocks: string[] = [];
|
||||
// Regex to match <Mermaid>...</Mermaid> blocks across multiple lines
|
||||
const regex = /<Mermaid>([\s\S]*?)<\/Mermaid>/g;
|
||||
let match;
|
||||
while ((match = regex.exec(content)) !== null) {
|
||||
if (match[1]) {
|
||||
blocks.push(match[1].trim());
|
||||
}
|
||||
}
|
||||
return blocks;
|
||||
}
|
||||
|
||||
private async validateMermaidSyntax(
|
||||
graph: string,
|
||||
): Promise<{ valid: boolean; error?: string }> {
|
||||
// Fast LLM validation to catch common syntax errors like unbalanced quotes or HTML entities
|
||||
try {
|
||||
const validationResponse = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-3-flash-preview", // Switch from gpt-4o-mini to user requested model
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content:
|
||||
'You are a strict Mermaid.js compiler. Analyze the given Mermaid syntax. If it is 100% valid and will render without exceptions, reply ONLY with "VALID". If it has syntax errors (e.g., HTML inside labels, unescaped quotes, unclosed brackets), reply ONLY with "INVALID" followed by a short explanation of the exact error.',
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: graph,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const reply =
|
||||
validationResponse.choices[0].message.content?.trim() || "VALID";
|
||||
if (reply.startsWith("INVALID")) {
|
||||
return { valid: false, error: reply };
|
||||
}
|
||||
return { valid: true };
|
||||
} catch (e) {
|
||||
console.error("Syntax validation LLM call failed, passing through:", e);
|
||||
return { valid: true }; // Fallback to passing if validator fails
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Post-processing to ensure the AI didn't include "help" text,
|
||||
* duplicate frontmatter, or markdown wrappers.
|
||||
*/
|
||||
private cleanResponse(content: string): string {
|
||||
let cleaned = content.trim();
|
||||
|
||||
// 1. Strip Markdown Wrappers (e.g. ```mdx ... ```)
|
||||
if (cleaned.startsWith("```")) {
|
||||
cleaned = cleaned
|
||||
.replace(/^```[a-zA-Z]*\n?/, "")
|
||||
.replace(/\n?```\s*$/, "");
|
||||
}
|
||||
|
||||
// 2. Strip redundant frontmatter (the AI sometimes helpfully repeats it)
|
||||
// Look for the --- delimiters and remove the block if it exists
|
||||
const fmRegex = /^---\s*\n([\s\S]*?)\n---\s*\n?/;
|
||||
const match = cleaned.match(fmRegex);
|
||||
if (match) {
|
||||
console.log(
|
||||
"♻️ Stripping redundant frontmatter detected in AI response...",
|
||||
);
|
||||
cleaned = cleaned.replace(fmRegex, "").trim();
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user