feat: content engine

This commit is contained in:
2026-02-21 19:08:06 +01:00
parent 3f1c37813a
commit a50b8d6393
32 changed files with 2816 additions and 189 deletions

View File

@@ -0,0 +1,974 @@
import OpenAI from "openai";
import { ResearchAgent, Fact, SocialPost } from "@mintel/journaling";
import { MemeGenerator, MemeSuggestion } from "@mintel/meme-generator";
import * as fs from "node:fs/promises";
import * as path from "node:path";
export interface ComponentDefinition {
name: string;
description: string;
usageExample: string;
}
export interface BlogPostOptions {
topic: string;
tone?: string;
targetAudience?: string;
includeMemes?: boolean;
includeDiagrams?: boolean;
includeResearch?: boolean;
availableComponents?: ComponentDefinition[];
}
export interface OptimizationOptions {
enhanceFacts?: boolean;
addMemes?: boolean;
addDiagrams?: boolean;
availableComponents?: ComponentDefinition[];
projectContext?: string;
/** Target audience description for all AI prompts */
targetAudience?: string;
/** Tone/persona description for all AI prompts */
tone?: string;
/** Prompt for DALL-E 3 style generation */
memeStylePrompt?: string;
/** Path to the docs folder (e.g. apps/web/docs) for full persona/tone context */
docsPath?: string;
}
export interface GeneratedPost {
title: string;
content: string;
research: Fact[];
memes: MemeSuggestion[];
diagrams: string[];
}
interface Insertion {
afterSection: number;
content: string;
}
// Model configuration: specialized models for different tasks
const MODELS = {
// Structured JSON output, research planning, diagram models: {
STRUCTURED: "google/gemini-2.5-flash",
ROUTING: "google/gemini-2.5-flash",
CONTENT: "google/gemini-2.5-pro",
// Mermaid diagram generation - User requested Pro
DIAGRAM: "google/gemini-2.5-pro",
} as const;
/** Strip markdown fences that some models wrap around JSON despite response_format */
function safeParseJSON(raw: string, fallback: any = {}): any {
let cleaned = raw.trim();
// Remove ```json ... ``` or ``` ... ``` wrapping
if (cleaned.startsWith("```")) {
cleaned = cleaned
.replace(/^```(?:json)?\s*\n?/, "")
.replace(/\n?```\s*$/, "");
}
try {
return JSON.parse(cleaned);
} catch (e) {
console.warn(
"⚠️ Failed to parse JSON response, using fallback:",
(e as Error).message,
);
return fallback;
}
}
export class ContentGenerator {
private openai: OpenAI;
private researchAgent: ResearchAgent;
private memeGenerator: MemeGenerator;
constructor(apiKey: string) {
this.openai = new OpenAI({
apiKey,
baseURL: "https://openrouter.ai/api/v1",
defaultHeaders: {
"HTTP-Referer": "https://mintel.me",
"X-Title": "Mintel Content Engine",
},
});
this.researchAgent = new ResearchAgent(apiKey);
this.memeGenerator = new MemeGenerator(apiKey);
}
// =========================================================================
// generatePost — for new posts (unchanged from original)
// =========================================================================
async generatePost(options: BlogPostOptions): Promise<GeneratedPost> {
const {
topic,
tone = "professional yet witty",
includeResearch = true,
availableComponents = [],
} = options;
console.log(`🚀 Starting content generation for: "${topic}"`);
let facts: Fact[] = [];
if (includeResearch) {
console.log("📚 Gathering research...");
facts = await this.researchAgent.researchTopic(topic);
}
console.log("📝 Creating outline...");
const outline = await this.createOutline(topic, facts, tone);
console.log("✍️ Drafting content...");
let content = await this.draftContent(
topic,
outline,
facts,
tone,
availableComponents,
);
const diagrams: string[] = [];
if (options.includeDiagrams) {
content = await this.processDiagramPlaceholders(content, diagrams);
}
const memes: MemeSuggestion[] = [];
if (options.includeMemes) {
const memeIdeas = await this.memeGenerator.generateMemeIdeas(
content.slice(0, 4000),
);
memes.push(...memeIdeas);
}
return { title: outline.title, content, research: facts, memes, diagrams };
}
// =========================================================================
// generateTldr — Creates a TL;DR block for the given content
// =========================================================================
async generateTldr(content: string): Promise<string> {
const context = content.slice(0, 3000);
const response = await this.openai.chat.completions.create({
model: MODELS.CONTENT,
messages: [
{
role: "system",
content: `Du bist ein kompromissloser Digital Architect.
Erstelle ein "TL;DR" für diesen Artikel.
REGELN:
- 3 knackige Bulletpoints
- TON: Sarkastisch, direkt, provokant ("Finger in die Wunde")
- Fokussiere auf den wirtschaftlichen Schaden von schlechter Tech
- Formatiere als MDX-Komponente:
<div className="my-8 p-6 bg-slate-50 border-l-4 border-blue-600 rounded-r-xl">
<H3>TL;DR: Warum Ihr Geld verbrennt</H3>
<ul className="list-disc pl-5 space-y-2 mb-0">
<li>Punkt 1</li>
<li>Punkt 2</li>
<li>Punkt 3</li>
</ul>
</div>`,
},
{
role: "user",
content: context,
},
],
});
return response.choices[0].message.content?.trim() ?? "";
}
// =========================================================================
// optimizePost — ADDITIVE architecture (never rewrites original content)
// =========================================================================
async optimizePost(
content: string,
options: OptimizationOptions,
): Promise<GeneratedPost> {
console.log("🚀 Optimizing existing content (additive mode)...");
// Load docs context if provided
let docsContext = "";
if (options.docsPath) {
docsContext = await this.loadDocsContext(options.docsPath);
console.log(`📖 Loaded ${docsContext.length} chars of docs context`);
}
const fullContext = [options.projectContext || "", docsContext]
.filter(Boolean)
.join("\n\n---\n\n");
// Split content into numbered sections for programmatic insertion
const sections = this.splitIntoSections(content);
console.log(`📋 Content has ${sections.length} sections`);
const insertions: Insertion[] = [];
const facts: Fact[] = [];
const diagrams: string[] = [];
const memes: MemeSuggestion[] = [];
// Build a numbered content map for LLM reference (read-only)
const sectionMap = this.buildSectionMap(sections);
// ----- STEP 1: Research -----
if (options.enhanceFacts) {
console.log("🔍 Identifying research topics...");
const researchTopics = await this.identifyResearchTopics(
content,
fullContext,
);
console.log(`📚 Researching: ${researchTopics.join(", ")}`);
for (const topic of researchTopics) {
const topicFacts = await this.researchAgent.researchTopic(topic);
facts.push(...topicFacts);
}
if (facts.length > 0) {
console.log(`📝 Planning fact insertions for ${facts.length} facts...`);
const factInsertions = await this.planFactInsertions(
sectionMap,
sections,
facts,
fullContext,
);
insertions.push(...factInsertions);
console.log(`${factInsertions.length} fact enrichments planned`);
}
// ----- STEP 1.5: Social Media Search -----
console.log("📱 Identifying real social media posts...");
const socialPosts = await this.researchAgent.findSocialPosts(
content.substring(0, 200),
);
if (socialPosts.length > 0) {
console.log(
`📝 Planning placement for ${socialPosts.length} social media posts...`,
);
const socialInsertions = await this.planSocialMediaInsertions(
sectionMap,
sections,
socialPosts,
fullContext,
);
insertions.push(...socialInsertions);
console.log(
`${socialInsertions.length} social embeddings planned`,
);
}
}
// ----- STEP 2: Component suggestions -----
if (options.availableComponents && options.availableComponents.length > 0) {
console.log("🧩 Planning component additions...");
const componentInsertions = await this.planComponentInsertions(
sectionMap,
sections,
options.availableComponents,
fullContext,
);
insertions.push(...componentInsertions);
console.log(
`${componentInsertions.length} component additions planned`,
);
}
// ----- STEP 3: Diagram generation -----
if (options.addDiagrams) {
console.log("📊 Planning diagrams...");
const diagramPlans = await this.planDiagramInsertions(
sectionMap,
sections,
fullContext,
);
for (const plan of diagramPlans) {
const mermaidCode = await this.generateMermaid(plan.concept);
if (!mermaidCode) {
console.warn(` ⏭️ Skipping invalid diagram for: "${plan.concept}"`);
continue;
}
diagrams.push(mermaidCode);
const diagramId = plan.concept
.toLowerCase()
.replace(/\s+/g, "-")
.replace(/[^a-z0-9-]/g, "")
.slice(0, 40);
insertions.push({
afterSection: plan.afterSection,
content: `<div className="my-8">\n <Mermaid id="${diagramId}" title="${plan.concept}" showShare={true}>\n${mermaidCode}\n </Mermaid>\n</div>`,
});
}
console.log(
`${diagramPlans.length} diagrams planned, ${diagrams.length} valid`,
);
}
// ----- STEP 4: Meme placement (memegen.link via ArticleMeme) -----
if (options.addMemes) {
console.log("✨ Generating meme ideas...");
let memeIdeas = await this.memeGenerator.generateMemeIdeas(
content.slice(0, 4000),
);
// User requested to explicitly limit memes to max 1 per page to prevent duplication
if (memeIdeas.length > 1) {
memeIdeas = [memeIdeas[0]];
}
memes.push(...memeIdeas);
if (memeIdeas.length > 0) {
console.log(
`🎨 Planning meme placement for ${memeIdeas.length} memes...`,
);
const memePlacements = await this.planMemePlacements(
sectionMap,
sections,
memeIdeas,
);
for (let i = 0; i < memeIdeas.length; i++) {
const meme = memeIdeas[i];
if (
memePlacements[i] !== undefined &&
memePlacements[i] >= 0 &&
memePlacements[i] < sections.length
) {
const captionsStr = meme.captions.join("|");
insertions.push({
afterSection: memePlacements[i],
content: `<div className="my-8">\n <ArticleMeme template="${meme.template}" captions="${captionsStr}" />\n</div>`,
});
}
}
console.log(`${memeIdeas.length} memes placed`);
}
}
// ----- Enforce visual spacing (no consecutive visualizations) -----
this.enforceVisualSpacing(insertions, sections);
// ----- Apply all insertions to original content -----
console.log(
`\n🔧 Applying ${insertions.length} insertions to original content...`,
);
let optimizedContent = this.applyInsertions(sections, insertions);
// ----- FINAL AGENTIC REWRITE (Replaces dumb regex scripts) -----
console.log(
`\n🧠 Agentic Rewrite: Polishing MDX, fixing syntax, and deduplicating...`,
);
const finalRewrite = await this.openai.chat.completions.create({
model: MODELS.CONTENT,
messages: [
{
role: "system",
content: `You are an expert MDX Editor. Your task is to take a draft blog post and output the FINAL, error-free MDX code.
CRITICAL RULES:
1. DEDUPLICATION: Ensure there is MAX ONE <ArticleMeme> in the entire post. Remove any duplicates or outdated memes. Ensure there is MAX ONE TL;DR section. Ensure there are no duplicate components.
2. TEXT-TO-COMPONENT RATIO: Ensure there are at least 3-4 paragraphs of normal text between any two visual components (<Mermaid>, <ArticleMeme>, <StatsGrid>, <BoldNumber>, etc.). If they are clumped together, spread them out or delete the less important ones.
3. SYNTAX: Fix any broken Mermaid/MDX syntax (e.g. unclosed tags, bad quotes).
4. FIDELITY: Preserve the author's original German text, meaning, and tone. Smooth out transitions into the components.
5. NO HALLUCINATION: Do not invent new URLs or facts. Keep the data provided in the draft.
6. OUTPUT: Return ONLY the raw MDX content. No markdown code blocks (\`\`\`mdx), no preamble. Just the raw code file.`,
},
{
role: "user",
content: optimizedContent,
},
],
});
optimizedContent =
finalRewrite.choices[0].message.content?.trim() || optimizedContent;
// Strip any residual markdown formatting fences just in case
if (optimizedContent.startsWith("```")) {
optimizedContent = optimizedContent
.replace(/^```[a-zA-Z]*\n/, "")
.replace(/\n```$/, "");
}
return {
title: "Optimized Content",
content: optimizedContent,
research: facts,
memes,
diagrams,
};
}
// =========================================================================
// ADDITIVE HELPERS — these return JSON instructions, never rewrite content
// =========================================================================
private splitIntoSections(content: string): string[] {
// Split on double newlines (paragraph/block boundaries in MDX)
return content.split(/\n\n+/);
}
private applyInsertions(sections: string[], insertions: Insertion[]): string {
// Sort by section index DESCENDING to avoid index shifting
const sorted = [...insertions].sort(
(a, b) => b.afterSection - a.afterSection,
);
const result = [...sections];
for (const ins of sorted) {
const idx = Math.min(ins.afterSection + 1, result.length);
result.splice(idx, 0, ins.content);
}
return result.join("\n\n");
}
/**
* Enforce visual spacing: visual components must have at least 2 text sections between them.
* This prevents walls of visualizations and maintains reading flow.
*/
private enforceVisualSpacing(
insertions: Insertion[],
sections: string[],
): void {
const visualPatterns = [
"<Mermaid",
"<ArticleMeme",
"<StatsGrid",
"<StatsDisplay",
"<BoldNumber",
"<MetricBar",
"<ComparisonRow",
"<PremiumComparisonChart",
"<DiagramFlow",
"<DiagramPie",
"<DiagramGantt",
"<DiagramState",
"<DiagramSequence",
"<DiagramTimeline",
"<Carousel",
"<WebVitalsScore",
"<WaterfallChart",
];
const isVisual = (content: string) =>
visualPatterns.some((p) => content.includes(p));
// Sort by section ascending
insertions.sort((a, b) => a.afterSection - b.afterSection);
// Minimum gap of 10 sections between visual components (= ~6-8 text paragraphs)
// User requested a better text-to-component ratio (not 1:1)
const MIN_VISUAL_GAP = 10;
for (let i = 1; i < insertions.length; i++) {
if (
isVisual(insertions[i].content) &&
isVisual(insertions[i - 1].content)
) {
const gap = insertions[i].afterSection - insertions[i - 1].afterSection;
if (gap < MIN_VISUAL_GAP) {
const newPos = Math.min(
insertions[i - 1].afterSection + MIN_VISUAL_GAP,
sections.length - 1,
);
insertions[i].afterSection = newPos;
}
}
}
}
private buildSectionMap(sections: string[]): string {
return sections
.map((s, i) => {
const preview = s.trim().replace(/\n/g, " ").slice(0, 120);
return `[${i}] ${preview}${s.length > 120 ? "…" : ""}`;
})
.join("\n");
}
private async loadDocsContext(docsPath: string): Promise<string> {
try {
const files = await fs.readdir(docsPath);
const mdFiles = files.filter((f) => f.endsWith(".md")).sort();
const contents: string[] = [];
for (const file of mdFiles) {
const filePath = path.join(docsPath, file);
const text = await fs.readFile(filePath, "utf8");
contents.push(`=== ${file} ===\n${text.trim()}`);
}
return contents.join("\n\n");
} catch (e) {
console.warn(`⚠️ Could not load docs from ${docsPath}: ${e}`);
return "";
}
}
// --- Fact insertion planning (Claude Sonnet — precise content understanding) ---
private async planFactInsertions(
sectionMap: string,
sections: string[],
facts: Fact[],
context: string,
): Promise<Insertion[]> {
const factsText = facts
.map((f, i) => `${i + 1}. ${f.statement} [Source: ${f.source}]`)
.join("\n");
const response = await this.openai.chat.completions.create({
model: MODELS.CONTENT,
messages: [
{
role: "system",
content: `You enrich a German blog post by ADDING new paragraphs with researched facts.
RULES:
- Do NOT rewrite or modify any existing content
- Only produce NEW <Paragraph> blocks to INSERT after a specific section number
- Maximum 5 insertions (only the most impactful facts)
- Match the post's tone and style (see context below)
- Use the post's JSX components: <Paragraph>, <Marker> for emphasis
- Cite sources using ExternalLink: <ExternalLink href="URL">Source: Name</ExternalLink>
- Write in German, active voice, Ich-Form where appropriate
CONTEXT (tone, style, persona):
${context.slice(0, 3000)}
EXISTING SECTIONS (read-only — do NOT modify these):
${sectionMap}
FACTS TO INTEGRATE:
${factsText}
Return JSON:
{ "insertions": [{ "afterSection": 3, "content": "<Paragraph>\\n Fact-enriched paragraph text. [Source: Name]\\n</Paragraph>" }] }
Return ONLY the JSON.`,
},
],
response_format: { type: "json_object" },
});
const result = safeParseJSON(
response.choices[0].message.content || '{"insertions": []}',
{ insertions: [] },
);
return (result.insertions || []).filter(
(i: any) =>
typeof i.afterSection === "number" &&
i.afterSection >= 0 &&
i.afterSection < sections.length &&
typeof i.content === "string",
);
}
// --- Social Media insertion planning ---
private async planSocialMediaInsertions(
sectionMap: string,
sections: string[],
posts: SocialPost[],
context: string,
): Promise<Insertion[]> {
if (!posts || posts.length === 0) return [];
const postsText = posts
.map(
(p, i) =>
`[${i}] Platform: ${p.platform}, ID: ${p.embedId} (${p.description})`,
)
.join("\n");
const response = await this.openai.chat.completions.create({
model: MODELS.CONTENT,
messages: [
{
role: "system",
content: `You enhance a German blog post by embedding relevant social media posts (YouTube, Twitter, LinkedIn).
RULES:
- Do NOT rewrite any existing content
- Return exactly 1 or 2 high-impact insertions
- Choose the best fitting post(s) from the provided list
- Use the correct component based on the platform:
- youtube -> <YouTubeEmbed videoId="ID" />
- twitter -> <TwitterEmbed tweetId="ID" theme="light" />
- linkedin -> <LinkedInEmbed urn="ID" />
- Add a 1-sentence intro paragraph above the embed to contextualize it.
CONTEXT:
${context.slice(0, 3000)}
SOCIAL POSTS AVAILABLE TO EMBED:
${postsText}
EXISTING SECTIONS:
${sectionMap}
Return JSON:
{ "insertions": [{ "afterSection": 4, "content": "<Paragraph>Wie Experten passend bemerken:</Paragraph>\\n\\n<TwitterEmbed tweetId=\\"123456\\" theme=\\"light\\" />" }] }
Return ONLY the JSON.`,
},
],
response_format: { type: "json_object" },
});
const result = safeParseJSON(
response.choices[0].message.content || '{"insertions": []}',
{ insertions: [] },
);
return (result.insertions || []).filter(
(i: any) =>
typeof i.afterSection === "number" &&
i.afterSection >= 0 &&
i.afterSection < sections.length &&
typeof i.content === "string",
);
}
// --- Component insertion planning (Claude Sonnet — understands JSX context) ---
private async planComponentInsertions(
sectionMap: string,
sections: string[],
components: ComponentDefinition[],
context: string,
): Promise<Insertion[]> {
const fullContent = sections.join("\n\n");
const componentsText = components
.map((c) => `<${c.name}>: ${c.description}\n Example: ${c.usageExample}`)
.join("\n\n");
const usedComponents = components
.filter((c) => fullContent.includes(`<${c.name}`))
.map((c) => c.name);
const response = await this.openai.chat.completions.create({
model: MODELS.CONTENT,
messages: [
{
role: "system",
content: `You enhance a German blog post by ADDING interactive UI components.
STRICT BALANCE RULES:
- Maximum 34 component additions total
- There MUST be at least 34 text paragraphs between any two visual components
- Visual components MUST NEVER appear directly after each other
- Each unique component type should only appear ONCE (e.g., only one WebVitalsScore, one WaterfallChart)
- Multiple MetricBar or ComparisonRow in sequence are OK (they form a group)
CONTENT RULES:
- Do NOT rewrite any existing content — only ADD new component blocks
- Do NOT add components already present: ${usedComponents.join(", ") || "none"}
- Statistics MUST have comparison context (before/after, competitor vs us) — never standalone numbers
- All BoldNumber components MUST include source and sourceUrl props
- All ArticleQuote components MUST include source and sourceUrl; add "(übersetzt)" if translated
- MetricBar value must be a real number > 0, not placeholder zeros
- Carousel items array must have at least 2 items with substantive content
- Use exact JSX syntax from the examples
CONTEXT:
${context.slice(0, 3000)}
EXISTING SECTIONS (read-only):
${sectionMap}
AVAILABLE COMPONENTS:
${componentsText}
Return JSON:
{ "insertions": [{ "afterSection": 5, "content": "<StatsDisplay value=\\"100\\" label=\\"PageSpeed Score\\" subtext=\\"Kein Kompromiss.\\" />" }] }
Return ONLY the JSON.`,
},
],
response_format: { type: "json_object" },
});
const result = safeParseJSON(
response.choices[0].message.content || '{"insertions": []}',
{ insertions: [] },
);
return (result.insertions || []).filter(
(i: any) =>
typeof i.afterSection === "number" &&
i.afterSection >= 0 &&
i.afterSection < sections.length &&
typeof i.content === "string",
);
}
// --- Diagram planning (Gemini Flash — structured output) ---
private async planDiagramInsertions(
sectionMap: string,
sections: string[],
context: string,
): Promise<{ afterSection: number; concept: string }[]> {
const fullContent = sections.join("\n\n");
const hasDiagrams =
fullContent.includes("<Mermaid") || fullContent.includes("<Diagram");
const response = await this.openai.chat.completions.create({
model: MODELS.STRUCTURED,
messages: [
{
role: "system",
content: `Analyze this German blog post and suggest 1-2 Mermaid diagrams.
${hasDiagrams ? "The post already has diagrams. Only suggest NEW concepts not already visualized." : ""}
${context.slice(0, 1500)}
SECTIONS:
${sectionMap}
Return JSON:
{ "diagrams": [{ "afterSection": 5, "concept": "Descriptive concept name" }] }
Maximum 2 diagrams. Return ONLY the JSON.`,
},
],
response_format: { type: "json_object" },
});
const result = safeParseJSON(
response.choices[0].message.content || '{"diagrams": []}',
{ diagrams: [] },
);
return (result.diagrams || []).filter(
(d: any) =>
typeof d.afterSection === "number" &&
d.afterSection >= 0 &&
d.afterSection < sections.length,
);
}
// --- Meme placement planning (Gemini Flash — structural positioning) ---
private async planMemePlacements(
sectionMap: string,
sections: string[],
memes: MemeSuggestion[],
): Promise<number[]> {
const memesText = memes
.map((m, i) => `${i}: "${m.template}" — ${m.captions.join(" / ")}`)
.join("\n");
const response = await this.openai.chat.completions.create({
model: MODELS.STRUCTURED,
messages: [
{
role: "system",
content: `Place ${memes.length} memes at appropriate positions in this blog post.
Rules: Space them out evenly, place between thematic sections, never at position 0 (the very start).
SECTIONS:
${sectionMap}
MEMES:
${memesText}
Return JSON: { "placements": [sectionNumber, sectionNumber, ...] }
One section number per meme, in the same order as the memes list. Return ONLY JSON.`,
},
],
response_format: { type: "json_object" },
});
const result = safeParseJSON(
response.choices[0].message.content || '{"placements": []}',
{ placements: [] },
);
return result.placements || [];
}
// =========================================================================
// SHARED HELPERS
// =========================================================================
private async createOutline(
topic: string,
facts: Fact[],
tone: string,
): Promise<{ title: string; sections: string[] }> {
const factsContext = facts
.map((f) => `- ${f.statement} (${f.source})`)
.join("\n");
const response = await this.openai.chat.completions.create({
model: MODELS.STRUCTURED,
messages: [
{
role: "system",
content: `Create a blog post outline on "${topic}".
Tone: ${tone}.
Incorporating these facts:
${factsContext}
Return JSON: { "title": "Catchy Title", "sections": ["Introduction", "Section 1", "Conclusion"] }
Return ONLY the JSON.`,
},
],
response_format: { type: "json_object" },
});
return safeParseJSON(
response.choices[0].message.content || '{"title": "", "sections": []}',
{ title: "", sections: [] },
);
}
private async draftContent(
topic: string,
outline: { title: string; sections: string[] },
facts: Fact[],
tone: string,
components: ComponentDefinition[],
): Promise<string> {
const factsContext = facts
.map((f) => `- ${f.statement} (Source: ${f.source})`)
.join("\n");
const componentsContext =
components.length > 0
? `\n\nAvailable Components:\n` +
components
.map(
(c) =>
`- <${c.name}>: ${c.description}\n Example: ${c.usageExample}`,
)
.join("\n")
: "";
const response = await this.openai.chat.completions.create({
model: MODELS.CONTENT,
messages: [
{
role: "system",
content: `Write a blog post based on this outline:
Title: ${outline.title}
Sections: ${outline.sections.join(", ")}
Tone: ${tone}.
Facts: ${factsContext}
${componentsContext}
Format as Markdown. Start with # H1.
For places where a diagram would help, insert: <!-- DIAGRAM_PLACEHOLDER: Concept Name -->
Return ONLY raw content.`,
},
],
});
return response.choices[0].message.content || "";
}
private async processDiagramPlaceholders(
content: string,
diagrams: string[],
): Promise<string> {
const matches = content.matchAll(/<!-- DIAGRAM_PLACEHOLDER: (.+?) -->/g);
let processedContent = content;
for (const match of Array.from(matches)) {
const concept = match[1];
const diagram = await this.generateMermaid(concept);
diagrams.push(diagram);
const diagramId = concept
.toLowerCase()
.replace(/\s+/g, "-")
.replace(/[^a-z0-9-]/g, "")
.slice(0, 40);
const mermaidJsx = `\n<div className="my-8">\n <Mermaid id="${diagramId}" title="${concept}" showShare={true}>\n${diagram}\n </Mermaid>\n</div>\n`;
processedContent = processedContent.replace(
`<!-- DIAGRAM_PLACEHOLDER: ${concept} -->`,
mermaidJsx,
);
}
return processedContent;
}
private async generateMermaid(concept: string): Promise<string> {
const response = await this.openai.chat.completions.create({
model: MODELS.DIAGRAM,
messages: [
{
role: "system",
content: `Generate a Mermaid.js diagram for: "${concept}".
RULES:
- Use clear labels in German where appropriate
- Keep it EXTREMELY SIMPLE AND COMPACT: strictly max 3-4 nodes for a tiny visual footprint.
- Prefer vertical layouts (TD) over horizontal (LR) to prevent wide overflowing graphs.
- CRITICAL: Generate ONLY ONE single connected graph. Do NOT generate multiple independent graphs or isolated subgraphs in the same Mermaid block.
- No nested subgraphs. Keep instructions short.
- Use double-quoted labels for nodes: A["Label"]
- VERY CRITICAL: DO NOT use any HTML tags (no <br>, no <br/>, no <b>, etc).
- VERY CRITICAL: DO NOT use special characters like '&', '<', '>', or double-quotes inside the label strings. They break the mermaid parser in our environment.
- Return ONLY the raw mermaid code. No markdown blocks, no backticks.
- The first line MUST be a valid mermaid diagram type: graph, flowchart, sequenceDiagram, pie, gantt, stateDiagram, timeline`,
},
],
});
const code =
response.choices[0].message.content
?.replace(/```mermaid/g, "")
.replace(/```/g, "")
.trim() || "";
// Validate: must start with a valid mermaid keyword
const validStarts = [
"graph",
"flowchart",
"sequenceDiagram",
"pie",
"gantt",
"stateDiagram",
"timeline",
"classDiagram",
"erDiagram",
];
const firstLine = code.split("\n")[0]?.trim().toLowerCase() || "";
const isValid = validStarts.some((keyword) =>
firstLine.startsWith(keyword),
);
if (!isValid || code.length < 10) {
console.warn(
`⚠️ Mermaid: Invalid diagram generated for "${concept}", skipping`,
);
return "";
}
return code;
}
private async identifyResearchTopics(
content: string,
context: string,
): Promise<string[]> {
try {
console.log("Sending request to OpenRouter...");
const response = await this.openai.chat.completions.create({
model: MODELS.STRUCTURED,
messages: [
{
role: "system",
content: `Analyze the following blog post and identify 3 key topics or claims that would benefit from statistical data or external verification.
Return relevant, specific research queries (not too broad).
Context: ${context.slice(0, 1500)}
Return JSON: { "topics": ["topic 1", "topic 2", "topic 3"] }
Return ONLY the JSON.`,
},
{
role: "user",
content: content.slice(0, 4000),
},
],
response_format: { type: "json_object" },
});
console.log("Got response from OpenRouter");
const parsed = safeParseJSON(
response.choices[0].message.content || '{"topics": []}',
{ topics: [] },
);
return (parsed.topics || []).map((t: any) =>
typeof t === "string" ? t : JSON.stringify(t),
);
} catch (e: any) {
console.error("Error in identifyResearchTopics:", e);
throw e;
}
}
}

View File

@@ -0,0 +1,2 @@
export * from "./generator";
export * from "./orchestrator";

View File

@@ -0,0 +1,350 @@
import OpenAI from "openai";
import { ResearchAgent, Fact, SocialPost } from "@mintel/journaling";
import { ComponentDefinition } from "./generator";
import * as fs from "node:fs/promises";
import * as path from "node:path";
export interface OrchestratorConfig {
apiKey: string;
model?: string;
}
export interface OptimizationTask {
content: string;
projectContext: string;
availableComponents?: ComponentDefinition[];
instructions?: string;
}
export interface OptimizeFileOptions {
contextDir: string;
availableComponents?: ComponentDefinition[];
}
export class AiBlogPostOrchestrator {
private openai: OpenAI;
private researchAgent: ResearchAgent;
private model: string;
constructor(config: OrchestratorConfig) {
this.model = config.model || "google/gemini-3-flash-preview";
this.openai = new OpenAI({
apiKey: config.apiKey,
baseURL: "https://openrouter.ai/api/v1",
defaultHeaders: {
"HTTP-Referer": "https://mintel.me",
"X-Title": "Mintel AI Blog Post Orchestrator",
},
});
this.researchAgent = new ResearchAgent(config.apiKey);
}
/**
* Reusable context loader. Loads all .md and .txt files from a directory into a single string.
*/
async loadContext(dirPath: string): Promise<string> {
try {
const resolvedDir = path.resolve(process.cwd(), dirPath);
const files = await fs.readdir(resolvedDir);
const textFiles = files.filter((f) => /\.(md|txt)$/i.test(f)).sort();
const contents: string[] = [];
for (const file of textFiles) {
const filePath = path.join(resolvedDir, file);
const text = await fs.readFile(filePath, "utf8");
contents.push(`=== ${file} ===\n${text.trim()}`);
}
return contents.join("\n\n");
} catch (e) {
console.warn(`⚠️ Could not load context from ${dirPath}: ${e}`);
return "";
}
}
/**
* Reads a file, extracts frontmatter, loads context, optimizes body, and writes it back.
*/
async optimizeFile(
targetFile: string,
options: OptimizeFileOptions,
): Promise<void> {
const absPath = path.isAbsolute(targetFile)
? targetFile
: path.resolve(process.cwd(), targetFile);
console.log(`📄 Processing File: ${path.basename(absPath)}`);
const content = await fs.readFile(absPath, "utf8");
const fmMatch = content.match(/^---\s*\n([\s\S]*?)\n---/);
const frontmatter = fmMatch ? fmMatch[0] : "";
const body = fmMatch ? content.slice(frontmatter.length).trim() : content;
console.log(`📖 Loading context from: ${options.contextDir}`);
const projectContext = await this.loadContext(options.contextDir);
if (!projectContext) {
console.warn(
"⚠️ No project context loaded. AI might miss specific guidelines.",
);
}
const optimizedContent = await this.optimizeDocument({
content: body,
projectContext,
availableComponents: options.availableComponents,
});
const finalOutput = frontmatter
? `${frontmatter}\n\n${optimizedContent}`
: optimizedContent;
await fs.writeFile(`${absPath}.bak`, content); // Keep simple backup
await fs.writeFile(absPath, finalOutput);
console.log(`✅ Saved optimized file to: ${absPath}`);
}
/**
* Executes the 3-step optimization pipeline:
* 1. Fakten recherchieren
* 2. Social Posts recherchieren
* 3. AI anweisen daraus Artikel zu erstellen
*/
async optimizeDocument(task: OptimizationTask): Promise<string> {
console.log(`🚀 Starting AI Orchestration Pipeline (${this.model})...`);
// 1. Fakten recherchieren
console.log("1⃣ Recherchiere Fakten...");
const researchTopics = await this.identifyTopics(task.content);
const facts: Fact[] = [];
for (const topic of researchTopics) {
const topicFacts = await this.researchAgent.researchTopic(topic);
facts.push(...topicFacts);
}
// 2. Social Posts recherchieren
console.log(
"2⃣ Recherchiere Social Media Posts (YouTube, Twitter, LinkedIn)...",
);
// Use the first 2000 chars to find relevant social posts
const socialPosts = await this.researchAgent.findSocialPosts(
task.content.substring(0, 2000),
);
// 3. AI anweisen daraus Artikel zu erstellen
console.log("3⃣ Erstelle optimierten Artikel (Agentic Rewrite)...");
return await this.compileArticle(task, facts, socialPosts);
}
private async identifyTopics(content: string): Promise<string[]> {
const response = await this.openai.chat.completions.create({
model: "google/gemini-2.5-flash", // fast structured model for topic extraction
messages: [
{
role: "system",
content: `Analyze the following blog post and identify 1 to 2 key topics or claims that would benefit from statistical data or external verification.
Return JSON: { "topics": ["topic 1", "topic 2"] }
Return ONLY the JSON.`,
},
{
role: "user",
content: content.slice(0, 4000),
},
],
response_format: { type: "json_object" },
});
try {
const raw = response.choices[0].message.content || '{"topics": []}';
const cleaned = raw
.trim()
.replace(/^```(?:json)?\s*\n?/, "")
.replace(/\n?```\s*$/, "");
const parsed = JSON.parse(cleaned);
return parsed.topics || [];
} catch (e) {
console.warn("⚠️ Failed to parse research topics", e);
return [];
}
}
private async compileArticle(
task: OptimizationTask,
facts: Fact[],
socialPosts: SocialPost[],
retryCount = 0,
): Promise<string> {
const factsText = facts
.map((f, i) => `${i + 1}. ${f.statement} [Source: ${f.source}]`)
.join("\n");
const socialText = socialPosts
.map(
(p, i) =>
`Platform: ${p.platform}, ID: ${p.embedId} (${p.description})`,
)
.join("\n");
const componentsText = (task.availableComponents || [])
.map((c) => `<${c.name}>: ${c.description}\n Example: ${c.usageExample}`)
.join("\n\n");
const response = await this.openai.chat.completions.create({
model: this.model,
messages: [
{
role: "system",
content: `You are an expert MDX Editor and Digital Architect.
YOUR TASK:
Take the given draft blog post and rewrite/enhance it into a final, error-free MDX file. Maintain the author's original German text, meaning, and tone, but enrich it gracefully.
CONTEXT & RULES:
Project Context / Tone:
${task.projectContext}
Facts to weave in:
${factsText || "None"}
Social Media Posts to embed (use <YouTubeEmbed videoId="..." />, <TwitterEmbed tweetId="..." />, or <LinkedInEmbed url="..." />):
${socialText || "None"}
Available MDX Components you can use contextually:
${componentsText || "None"}
Special Instructions from User:
${task.instructions || "None"}
BLOG POST BEST PRACTICES (MANDATORY):
- Füge zwingend ein prägnantes 'TL;DR' ganz am Anfang ein.
- Füge ein sauberes '<TableOfContents />' ein.
- Verwende unsere Komponenten stilvoll für Visualisierungen.
- Agiere als hochprofessioneller Digital Architect und entferne alte MDX-Metadaten im Body.
- Fazit: Schließe JEDEN Artikel ZWINGEND mit einem starken, klaren 'Fazit' ab (z.B. als <H2>Fazit: ...</H2> gefolgt von deinen Empfehlungen).
CRITICAL GUIDELINES (NEVER BREAK THESE):
1. ONLY return the content for the BODY of the MDX file.
2. DO NOT INCLUDE FRONTMATTER (blocks starting and ending with ---). I ALREADY HAVE THE FRONTMATTER.
3. DO NOT REPEAT METADATA IN THE BODY. Do not output lines like "title: ...", "description: ...", "date: ..." inside the text.
4. DO NOT INCLUDE MARKDOWN WRAPPERS (do not wrap in \`\`\`mdx ... \`\`\`).
5. Be clean. Do NOT clump all components together. Provide 3-4 paragraphs of normal text between visual items.
6. If you insert components, ensure their syntax is 100% valid JSX/MDX.
7. CRITICAL MERMAID RULE: If you use <Mermaid>, the inner content MUST be 100% valid Mermaid.js syntax. NO HTML inside labels. NO quotes inside brackets without valid syntax.
8. Do NOT hallucinate links or facts. Use only what is provided.`,
},
{
role: "user",
content: task.content,
},
],
});
let rawContent = response.choices[0].message.content || task.content;
rawContent = this.cleanResponse(rawContent);
// Validation Layer: Check Mermaid syntax
if (retryCount < 2 && rawContent.includes("<Mermaid>")) {
console.log("🔍 Validating Mermaid syntax in AI response...");
const mermaidBlocks = this.extractMermaidBlocks(rawContent);
let hasError = false;
let errorFeedback = "";
for (const block of mermaidBlocks) {
const validationResult = await this.validateMermaidSyntax(block);
if (!validationResult.valid) {
hasError = true;
errorFeedback += `\nInvalid Mermaid block:\n${block}\nError context: ${validationResult.error}\n\n`;
}
}
if (hasError) {
console.log(
`❌ Invalid Mermaid syntax detected. Retrying compilation (Attempt ${retryCount + 1}/2)...`,
);
return this.compileArticle(
{
...task,
content: `The previous attempt failed because you generated invalid Mermaid.js syntax. Please rewrite the MDX and FIX the following Mermaid errors. \n\nErrors:\n${errorFeedback}\n\nOriginal Draft:\n${task.content}`,
},
facts,
socialPosts,
retryCount + 1,
);
}
}
return rawContent;
}
private extractMermaidBlocks(content: string): string[] {
const blocks: string[] = [];
// Regex to match <Mermaid>...</Mermaid> blocks across multiple lines
const regex = /<Mermaid>([\s\S]*?)<\/Mermaid>/g;
let match;
while ((match = regex.exec(content)) !== null) {
if (match[1]) {
blocks.push(match[1].trim());
}
}
return blocks;
}
private async validateMermaidSyntax(
graph: string,
): Promise<{ valid: boolean; error?: string }> {
// Fast LLM validation to catch common syntax errors like unbalanced quotes or HTML entities
try {
const validationResponse = await this.openai.chat.completions.create({
model: "google/gemini-3-flash-preview", // Switch from gpt-4o-mini to user requested model
messages: [
{
role: "system",
content:
'You are a strict Mermaid.js compiler. Analyze the given Mermaid syntax. If it is 100% valid and will render without exceptions, reply ONLY with "VALID". If it has syntax errors (e.g., HTML inside labels, unescaped quotes, unclosed brackets), reply ONLY with "INVALID" followed by a short explanation of the exact error.',
},
{
role: "user",
content: graph,
},
],
});
const reply =
validationResponse.choices[0].message.content?.trim() || "VALID";
if (reply.startsWith("INVALID")) {
return { valid: false, error: reply };
}
return { valid: true };
} catch (e) {
console.error("Syntax validation LLM call failed, passing through:", e);
return { valid: true }; // Fallback to passing if validator fails
}
}
/**
* Post-processing to ensure the AI didn't include "help" text,
* duplicate frontmatter, or markdown wrappers.
*/
private cleanResponse(content: string): string {
let cleaned = content.trim();
// 1. Strip Markdown Wrappers (e.g. ```mdx ... ```)
if (cleaned.startsWith("```")) {
cleaned = cleaned
.replace(/^```[a-zA-Z]*\n?/, "")
.replace(/\n?```\s*$/, "");
}
// 2. Strip redundant frontmatter (the AI sometimes helpfully repeats it)
// Look for the --- delimiters and remove the block if it exists
const fmRegex = /^---\s*\n([\s\S]*?)\n---\s*\n?/;
const match = cleaned.match(fmRegex);
if (match) {
console.log(
"♻️ Stripping redundant frontmatter detected in AI response...",
);
cleaned = cleaned.replace(fmRegex, "").trim();
}
return cleaned;
}
}