feat: content engine
Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 2s
Monorepo Pipeline / 🧹 Lint (push) Successful in 1m12s
Monorepo Pipeline / 🧪 Test (push) Successful in 2m59s
Monorepo Pipeline / 🏗️ Build (push) Successful in 6m52s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
🏥 Server Maintenance / 🧹 Prune & Clean (push) Failing after 4s

This commit is contained in:
2026-02-22 02:39:27 +01:00
parent a9adb2eff7
commit 3a1a88db89
11 changed files with 942 additions and 172 deletions

View File

@@ -1,5 +1,5 @@
import OpenAI from "openai";
import { ResearchAgent, Fact, SocialPost } from "@mintel/journaling";
import { ResearchAgent, type Fact, type SocialPost } from "@mintel/journaling";
import { MemeGenerator, MemeSuggestion } from "@mintel/meme-generator";
import * as fs from "node:fs/promises";
import * as path from "node:path";
@@ -237,11 +237,21 @@ REGELN:
console.log(`${factInsertions.length} fact enrichments planned`);
}
// ----- STEP 1.5: Social Media Search -----
console.log("📱 Identifying real social media posts...");
const socialPosts = await this.researchAgent.findSocialPosts(
content.substring(0, 200),
);
// ----- STEP 1.5: Social Media Extraction (no LLM — regex only) -----
console.log("📱 Extracting existing social media embeds...");
const socialPosts = this.researchAgent.extractSocialPosts(content);
// If none exist, fetch real ones via Serper API
if (socialPosts.length === 0) {
console.log(
" → None found. Fetching real social posts via Serper API...",
);
const newPosts = await this.researchAgent.fetchRealSocialPosts(
content.slice(0, 500),
);
socialPosts.push(...newPosts);
}
if (socialPosts.length > 0) {
console.log(
`📝 Planning placement for ${socialPosts.length} social media posts...`,
@@ -593,7 +603,7 @@ RULES:
- youtube -> <YouTubeEmbed videoId="ID" />
- twitter -> <TwitterEmbed tweetId="ID" theme="light" />
- linkedin -> <LinkedInEmbed urn="ID" />
- Add a 1-sentence intro paragraph above the embed to contextualize it.
- Add a 1-sentence intro paragraph above the embed to contextualize it naturally in the flow of the text (e.g. "Wie Experte XY im folgenden Video detailliert erklärt:"). This context is MANDATORY. Do not just drop the Component without text reference.
CONTEXT:
${context.slice(0, 3000)}
@@ -842,6 +852,11 @@ Tone: ${tone}.
Facts: ${factsContext}
${componentsContext}
BLOG POST BEST PRACTICES (MANDATORY):
- DEVIL'S ADVOCATE: Füge zwingend eine kurze kritische Sektion ein (z.B. mit \`<ComparisonRow>\` oder \`<IconList>\`), in der du offen die Nachteile/Kosten/Haken deiner eigenen Lösung ansprichst ("Der Haken an der Sache...").
- FAQ GENERATOR: Am absoluten Ende des Artikels erstellst du zwingend eine Markdown-Liste mit den 3 wichtigsten Fragen (FAQ) und Antworten (jeweils 2 Sätze) für Google Rich Snippets.
- Nutze wo passend die obigen React-Komponenten für ein hochwertiges Layout.
Format as Markdown. Start with # H1.
For places where a diagram would help, insert: <!-- DIAGRAM_PLACEHOLDER: Concept Name -->
Return ONLY raw content.`,
@@ -891,6 +906,7 @@ RULES:
- CRITICAL: Generate ONLY ONE single connected graph. Do NOT generate multiple independent graphs or isolated subgraphs in the same Mermaid block.
- No nested subgraphs. Keep instructions short.
- Use double-quoted labels for nodes: A["Label"]
- VERY CRITICAL: DO NOT use curly braces '{}' or brackets '[]' inside labels unless they are wrapped in double quotes (e.g. A["Text {with braces}"]).
- VERY CRITICAL: DO NOT use any HTML tags (no <br>, no <br/>, no <b>, etc).
- VERY CRITICAL: DO NOT use special characters like '&', '<', '>', or double-quotes inside the label strings. They break the mermaid parser in our environment.
- Return ONLY the raw mermaid code. No markdown blocks, no backticks.

View File

@@ -1,11 +1,13 @@
import OpenAI from "openai";
import { ResearchAgent, Fact, SocialPost } from "@mintel/journaling";
import { ResearchAgent, type Fact, type SocialPost } from "@mintel/journaling";
import { ThumbnailGenerator } from "@mintel/thumbnail-generator";
import { ComponentDefinition } from "./generator";
import * as fs from "node:fs/promises";
import * as path from "node:path";
export interface OrchestratorConfig {
apiKey: string;
replicateApiKey?: string;
model?: string;
}
@@ -14,6 +16,7 @@ export interface OptimizationTask {
projectContext: string;
availableComponents?: ComponentDefinition[];
instructions?: string;
internalLinks?: { title: string; slug: string }[];
}
export interface OptimizeFileOptions {
@@ -24,6 +27,7 @@ export interface OptimizeFileOptions {
export class AiBlogPostOrchestrator {
private openai: OpenAI;
private researchAgent: ResearchAgent;
private thumbnailGenerator?: ThumbnailGenerator;
private model: string;
constructor(config: OrchestratorConfig) {
@@ -37,6 +41,11 @@ export class AiBlogPostOrchestrator {
},
});
this.researchAgent = new ResearchAgent(config.apiKey);
if (config.replicateApiKey) {
this.thumbnailGenerator = new ThumbnailGenerator({
replicateApiKey: config.replicateApiKey,
});
}
}
/**
@@ -76,9 +85,15 @@ export class AiBlogPostOrchestrator {
const content = await fs.readFile(absPath, "utf8");
const fmMatch = content.match(/^---\s*\n([\s\S]*?)\n---/);
const frontmatter = fmMatch ? fmMatch[0] : "";
const body = fmMatch ? content.slice(frontmatter.length).trim() : content;
// Idea 4: We no longer split frontmatter and body. We pass the whole file
// to the LLM so it can optimize the SEO title and description.
// Idea 1: Build Internal Link Graph
const blogDir = path.dirname(absPath);
const internalLinks = await this.buildInternalLinkGraph(
blogDir,
path.basename(absPath),
);
console.log(`📖 Loading context from: ${options.contextDir}`);
const projectContext = await this.loadContext(options.contextDir);
@@ -89,50 +104,199 @@ export class AiBlogPostOrchestrator {
}
const optimizedContent = await this.optimizeDocument({
content: body,
content: content,
projectContext,
availableComponents: options.availableComponents,
internalLinks: internalLinks, // pass to orchestrator
});
const finalOutput = frontmatter
? `${frontmatter}\n\n${optimizedContent}`
: optimizedContent;
// Idea 4b: Extract the potentially updated title to rename the file (SEO Slug)
const newFmMatch = optimizedContent.match(/^---\s*\n([\s\S]*?)\n---/);
let finalPath = absPath;
let finalSlug = path.basename(absPath, ".mdx");
await fs.writeFile(`${absPath}.bak`, content); // Keep simple backup
await fs.writeFile(absPath, finalOutput);
console.log(`✅ Saved optimized file to: ${absPath}`);
if (newFmMatch && newFmMatch[1]) {
const titleMatch = newFmMatch[1].match(/title:\s*["']([^"']+)["']/);
if (titleMatch && titleMatch[1]) {
const newTitle = titleMatch[1];
// Generate SEO Slug
finalSlug = newTitle
.toLowerCase()
.replace(/ä/g, "ae")
.replace(/ö/g, "oe")
.replace(/ü/g, "ue")
.replace(/ß/g, "ss")
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-+|-+$/g, "");
const newAbsPath = path.join(path.dirname(absPath), `${finalSlug}.mdx`);
if (newAbsPath !== absPath) {
console.log(
`🔄 SEO Title changed! Renaming file to: ${finalSlug}.mdx`,
);
// Delete old file if the title changed significantly
try {
await fs.unlink(absPath);
} catch (e) {
/* ignore */
}
finalPath = newAbsPath;
}
}
}
// Idea 5: Automatic Thumbnails
let finalContent = optimizedContent;
// Skip if thumbnail already exists in frontmatter
const hasExistingThumbnail = /thumbnail:\s*["'][^"']+["']/.test(
finalContent,
);
if (this.thumbnailGenerator && !hasExistingThumbnail) {
console.log("🎨 Phase 5: Generating visual thumbnail...");
try {
const visualPrompt = await this.generateVisualPrompt(finalContent);
// We assume public dir is relative to where this runs, usually monorepo root or apps/web
const webPublicDir = path.resolve(process.cwd(), "apps/web/public");
const thumbnailRelPath = `/blog/${finalSlug}.png`;
const thumbnailAbsPath = path.join(
webPublicDir,
"blog",
`${finalSlug}.png`,
);
await this.thumbnailGenerator.generateImage(
visualPrompt,
thumbnailAbsPath,
);
// Update frontmatter with thumbnail (SEO: we also want it as a hero)
if (finalContent.includes("thumbnail:")) {
finalContent = finalContent.replace(
/thumbnail:\s*["'].*?["']/,
`thumbnail: "${thumbnailRelPath}"`,
);
} else {
finalContent = finalContent.replace(
/(title:\s*["'].*?["'])/,
`$1\nthumbnail: "${thumbnailRelPath}"`,
);
}
} catch (e) {
console.warn("⚠️ Thumbnail generation failed, skipping:", e);
}
}
await fs.writeFile(finalPath, finalContent);
console.log(`✅ Saved optimized file to: ${finalPath}`);
}
private async generateVisualPrompt(content: string): Promise<string> {
const response = await this.openai.chat.completions.create({
model: this.model,
messages: [
{
role: "system",
content: `You are a Visual Discovery Agent for an architectural design system.
Review the provided blog post and create a 1-sentence abstract visual description for an image generator (like Flux).
THEME: Technical blueprint / structural illustration.
STYLE: Clean lines, geometric shapes, monochrome base with one highlighter accent color (green, pink, or yellow).
NO TEXT. NO PEOPLE. NO REALISTIC PHOTOS.
FOCUS: The core metaphor or technical concept of the article.
Example output: "A complex network of glowing fiber optic nodes forming a recursive pyramid structure, technical blue lineart style."`,
},
{ role: "user", content: content.slice(0, 5000) },
],
max_tokens: 100,
});
return (
response.choices[0].message.content ||
"Technical architectural blueprint of a digital system"
);
}
private async buildInternalLinkGraph(
blogDir: string,
currentFile: string,
): Promise<{ title: string; slug: string }[]> {
try {
const files = await fs.readdir(blogDir);
const mdxFiles = files.filter(
(f) => f.endsWith(".mdx") && f !== currentFile,
);
const graph: { title: string; slug: string }[] = [];
for (const file of mdxFiles) {
const fileContent = await fs.readFile(path.join(blogDir, file), "utf8");
const titleMatch = fileContent.match(/title:\s*["']([^"']+)["']/);
if (titleMatch && titleMatch[1]) {
graph.push({
title: titleMatch[1],
slug: `/blog/${file.replace(".mdx", "")}`,
});
}
}
return graph;
} catch (e) {
console.warn("Could not build internal link graph", e);
return [];
}
}
/**
* Executes the 3-step optimization pipeline:
* 1. Fakten recherchieren
* 2. Social Posts recherchieren
* 2. Bestehende Social Posts extrahieren (kein LLM — nur Regex)
* 3. AI anweisen daraus Artikel zu erstellen
*/
async optimizeDocument(task: OptimizationTask): Promise<string> {
console.log(`🚀 Starting AI Orchestration Pipeline (${this.model})...`);
// 1. Fakten recherchieren
console.log("1⃣ Recherchiere Fakten...");
// 1. Fakten & Konkurrenz recherchieren
console.log("1⃣ Recherchiere Fakten und analysiere Konkurrenz...");
const researchTopics = await this.identifyTopics(task.content);
const facts: Fact[] = [];
for (const topic of researchTopics) {
const topicFacts = await this.researchAgent.researchTopic(topic);
facts.push(...topicFacts);
}
const competitorInsights: string[] = [];
// 2. Social Posts recherchieren
console.log(
"2⃣ Recherchiere Social Media Posts (YouTube, Twitter, LinkedIn)...",
);
// Use the first 2000 chars to find relevant social posts
const socialPosts = await this.researchAgent.findSocialPosts(
task.content.substring(0, 2000),
// Paralellize competitor research and fact research
await Promise.all(
researchTopics.map(async (topic) => {
const [topicFacts, insights] = await Promise.all([
this.researchAgent.researchTopic(topic),
this.researchAgent.researchCompetitors(topic),
]);
facts.push(...topicFacts);
competitorInsights.push(...insights);
}),
);
// 2. Bestehende Social Posts aus dem Content extrahieren (deterministisch, kein LLM)
console.log("2⃣ Extrahiere bestehende Social Media Embeds aus Content...");
const socialPosts = this.researchAgent.extractSocialPosts(task.content);
// Wenn keine vorhanden sind, besorge echte von der Serper API
if (socialPosts.length === 0) {
console.log(
" → Keine bestehenden Posts gefunden. Suche neue über Serper API...",
);
const realPosts = await this.researchAgent.fetchRealSocialPosts(
task.content.slice(0, 500),
);
socialPosts.push(...realPosts);
}
// 3. AI anweisen daraus Artikel zu erstellen
console.log("3⃣ Erstelle optimierten Artikel (Agentic Rewrite)...");
return await this.compileArticle(task, facts, socialPosts);
return await this.compileArticle(
task,
facts,
competitorInsights,
socialPosts,
task.internalLinks || [],
);
}
private async identifyTopics(content: string): Promise<string[]> {
@@ -170,22 +334,55 @@ Return ONLY the JSON.`,
private async compileArticle(
task: OptimizationTask,
facts: Fact[],
competitorInsights: string[],
socialPosts: SocialPost[],
internalLinks: { title: string; slug: string }[],
retryCount = 0,
): Promise<string> {
const factsText = facts
.map((f, i) => `${i + 1}. ${f.statement} [Source: ${f.source}]`)
.join("\n");
const socialText = socialPosts
.map(
(p, i) =>
`Platform: ${p.platform}, ID: ${p.embedId} (${p.description})`,
)
.join("\n");
let socialText = `CRITICAL RULE: NO VERIFIED SOCIAL MEDIA POSTS FOUND. You MUST NOT use <YouTubeEmbed />, <TwitterEmbed />, or <LinkedInEmbed /> under ANY circumstances in this article. DO NOT hallucinate IDs.`;
if (socialPosts.length > 0) {
const allowedTags: string[] = [];
if (socialPosts.some((p) => p.platform === "youtube"))
allowedTags.push('<YouTubeEmbed videoId="..." />');
if (socialPosts.some((p) => p.platform === "twitter"))
allowedTags.push('<TwitterEmbed tweetId="..." />');
if (socialPosts.some((p) => p.platform === "linkedin"))
allowedTags.push('<LinkedInEmbed url="..." />');
socialText = `Social Media Posts to embed (use ONLY these tags, do not use others: ${allowedTags.join(", ")}):\n${socialPosts.map((p) => `Platform: ${p.platform}, ID: ${p.embedId} (${p.description})`).join("\n")}\nCRITICAL: Do not invent any IDs that are not explicitly listed in the list above.`;
}
const componentsText = (task.availableComponents || [])
.map((c) => `<${c.name}>: ${c.description}\n Example: ${c.usageExample}`)
.filter((c) => {
if (
c.name === "YouTubeEmbed" &&
!socialPosts.some((p) => p.platform === "youtube")
)
return false;
if (
c.name === "TwitterEmbed" &&
!socialPosts.some((p) => p.platform === "twitter")
)
return false;
if (
c.name === "LinkedInEmbed" &&
!socialPosts.some((p) => p.platform === "linkedin")
)
return false;
return true;
})
.map((c) => {
// Ensure LinkedInEmbed usage example consistently uses 'url'
if (c.name === "LinkedInEmbed") {
return `<${c.name}>: ${c.description}\n Example: <LinkedInEmbed url="https://www.linkedin.com/posts/..." />`;
}
return `<${c.name}>: ${c.description}\n Example: ${c.usageExample}`;
})
.join("\n\n");
const response = await this.openai.chat.completions.create({
@@ -202,30 +399,47 @@ CONTEXT & RULES:
Project Context / Tone:
${task.projectContext}
Facts to weave in:
${factsText || "None"}
FACTS TO INTEGRATE:
${factsText || "No new facts needed."}
Social Media Posts to embed (use <YouTubeEmbed videoId="..." />, <TwitterEmbed tweetId="..." />, or <LinkedInEmbed url="..." />):
${socialText || "None"}
COMPETITOR BENCHMARK (TOP RANKING ARTICLES):
Here are snippets from the top 5 ranking Google articles for this topic. Read them carefully and ensure our article covers these topics but is fundamentally BETTER, deeper, and more authoritative:
${competitorInsights.length > 0 ? competitorInsights.join("\n") : "No competitor insights found."}
Available MDX Components you can use contextually:
${componentsText || "None"}
AVAILABLE UI COMPONENTS:
${componentsText}
SOCIAL MEDIA POSTS:
${socialText}
INTERNAL LINKING GRAPH:
Hier sind unsere existierenden Blog-Posts (Titel und URL-Slug). Finde 2-3 passende Stellen im Text, um organisch mit regulärem Markdown (\`[passender Text]([slug])\`) auf diese Posts zu verlinken. Nutze KEIN <ExternalLink> für B2B-interne Links.
${internalLinks.length > 0 ? internalLinks.map((l) => `- "${l.title}" -> ${l.slug}`).join("\n") : "Keine internen Links verfügbar."}
Special Instructions from User:
${task.instructions || "None"}
BLOG POST BEST PRACTICES (MANDATORY):
- DEVIL'S ADVOCATE: Füge zwingend eine kurze kritische Sektion ein (z.B. mit \`<ComparisonRow>\` oder \`<IconList>\`), in der du offen die Nachteile/Kosten/Haken deiner eigenen Lösung ansprichst ("Der Haken an der Sache..."). Das baut Vertrauen bei B2B Entscheidenden auf.
- FAQ GENERATOR: Am absoluten Ende des Artikels erstellst du zwingend eine Markdown-Liste mit den 3 wichtigsten Fragen (FAQ) und Antworten (jeweils 2 Sätze) für Google Rich Snippets. Nutze dazu das \`<FAQSection>\` Component oder normales Markdown.
- SUBTLE CTAs: Webe 1-2 subtile CTAs für High-End Website Entwicklung ein (Beispiel: \`<Button href="/contact" variant="outline" size="normal">Performance-Check anfragen</Button>\` oder \`<Button href="/contact">Digitale Architektur anfragen</Button>\`). Platziere diese zwingend organisch nach Abschnitten mit hohem Mehrwert.
- Zitat-Varianten: Wenn du Organisationen oder Studien zitierst, nutze \`<ArticleQuote isCompany={true} ... />\`. Für Personen lass \`isCompany\` weg.
- Füge zwingend ein prägnantes 'TL;DR' ganz am Anfang ein.
- Füge ein sauberes '<TableOfContents />' ein.
- Verwende unsere Komponenten stilvoll für Visualisierungen.
- Agiere als hochprofessioneller Digital Architect und entferne alte MDX-Metadaten im Body.
- Fazit: Schließe JEDEN Artikel ZWINGEND mit einem starken, klaren 'Fazit' ab (z.B. als <H2>Fazit: ...</H2> gefolgt von deinen Empfehlungen).
- Fazit: Schließe JEDEN Artikel ZWINGEND mit einem starken, klaren 'Fazit' ab.
STRICT MDX OUTPUT RULES:
1. ONLY use the exact components defined above.
2. For Social Media Embeds, you MUST ONLY use the EXACT IDs provided in the list above. Do NOT invent IDs.
3. If ANY verified social media posts are provided, you MUST integrate at least one naturally with a contextual sentence.
4. Keep the original content blocks and headings as much as possible, just improve flow.
5. FRONTMATTER SEO (Idea 4): Ich übergebe dir die KOMPLETTE Datei inklusive Markdown-Frontmatter (--- ... ---). Du MUSST das Frontmatter ebenfalls zurückgeben! Optimiere darin den \`title\` und die \`description\` maximal für B2B SEO. Lasse die anderen Keys im Frontmatter (date, tags) unangetastet.
CRITICAL GUIDELINES (NEVER BREAK THESE):
1. ONLY return the content for the BODY of the MDX file.
2. DO NOT INCLUDE FRONTMATTER (blocks starting and ending with ---). I ALREADY HAVE THE FRONTMATTER.
3. DO NOT REPEAT METADATA IN THE BODY. Do not output lines like "title: ...", "description: ...", "date: ..." inside the text.
4. DO NOT INCLUDE MARKDOWN WRAPPERS (do not wrap in \`\`\`mdx ... \`\`\`).
1. THE OUTPUT MUST START WITH YAML FRONTMATTER AND END WITH THE MDX BODY.
2. DO NOT INCLUDE MARKDOWN WRAPPERS (do not wrap in \`\`\`mdx ... \`\`\`).
5. Be clean. Do NOT clump all components together. Provide 3-4 paragraphs of normal text between visual items.
6. If you insert components, ensure their syntax is 100% valid JSX/MDX.
7. CRITICAL MERMAID RULE: If you use <Mermaid>, the inner content MUST be 100% valid Mermaid.js syntax. NO HTML inside labels. NO quotes inside brackets without valid syntax.
@@ -239,7 +453,7 @@ CRITICAL GUIDELINES (NEVER BREAK THESE):
});
let rawContent = response.choices[0].message.content || task.content;
rawContent = this.cleanResponse(rawContent);
rawContent = this.cleanResponse(rawContent, socialPosts);
// Validation Layer: Check Mermaid syntax
if (retryCount < 2 && rawContent.includes("<Mermaid>")) {
@@ -266,7 +480,9 @@ CRITICAL GUIDELINES (NEVER BREAK THESE):
content: `The previous attempt failed because you generated invalid Mermaid.js syntax. Please rewrite the MDX and FIX the following Mermaid errors. \n\nErrors:\n${errorFeedback}\n\nOriginal Draft:\n${task.content}`,
},
facts,
competitorInsights,
socialPosts,
internalLinks,
retryCount + 1,
);
}
@@ -320,11 +536,7 @@ CRITICAL GUIDELINES (NEVER BREAK THESE):
}
}
/**
* Post-processing to ensure the AI didn't include "help" text,
* duplicate frontmatter, or markdown wrappers.
*/
private cleanResponse(content: string): string {
private cleanResponse(content: string, socialPosts: SocialPost[]): string {
let cleaned = content.trim();
// 1. Strip Markdown Wrappers (e.g. ```mdx ... ```)
@@ -334,16 +546,52 @@ CRITICAL GUIDELINES (NEVER BREAK THESE):
.replace(/\n?```\s*$/, "");
}
// 2. Strip redundant frontmatter (the AI sometimes helpfully repeats it)
// Look for the --- delimiters and remove the block if it exists
const fmRegex = /^---\s*\n([\s\S]*?)\n---\s*\n?/;
const match = cleaned.match(fmRegex);
if (match) {
console.log(
"♻️ Stripping redundant frontmatter detected in AI response...",
);
cleaned = cleaned.replace(fmRegex, "").trim();
}
// 2. We NO LONGER strip redundant frontmatter, because we requested the LLM to output it.
// Ensure the output actually has frontmatter, if not, something went wrong, but we just pass it along.
// 3. Strip any social embeds the AI hallucinated (IDs not in our extracted set)
const knownYtIds = new Set(
socialPosts.filter((p) => p.platform === "youtube").map((p) => p.embedId),
);
const knownTwIds = new Set(
socialPosts.filter((p) => p.platform === "twitter").map((p) => p.embedId),
);
const knownLiIds = new Set(
socialPosts
.filter((p) => p.platform === "linkedin")
.map((p) => p.embedId),
);
cleaned = cleaned.replace(
/<YouTubeEmbed[^>]*videoId="([^"]+)"[^>]*\/>/gi,
(tag, id) => {
if (knownYtIds.has(id)) return tag;
console.log(
`🛑 Stripped hallucinated YouTubeEmbed with videoId="${id}"`,
);
return "";
},
);
cleaned = cleaned.replace(
/<TwitterEmbed[^>]*tweetId="([^"]+)"[^>]*\/>/gi,
(tag, id) => {
if (knownTwIds.has(id)) return tag;
console.log(
`🛑 Stripped hallucinated TwitterEmbed with tweetId="${id}"`,
);
return "";
},
);
cleaned = cleaned.replace(
/<LinkedInEmbed[^>]*(?:url|urn)="([^"]+)"[^>]*\/>/gi,
(tag, id) => {
if (knownLiIds.has(id)) return tag;
console.log(`🛑 Stripped hallucinated LinkedInEmbed with id="${id}"`);
return "";
},
);
return cleaned;
}