feat: content engine
Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 2s
Monorepo Pipeline / 🧹 Lint (push) Successful in 1m12s
Monorepo Pipeline / 🧪 Test (push) Successful in 2m59s
Monorepo Pipeline / 🏗️ Build (push) Successful in 6m52s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
🏥 Server Maintenance / 🧹 Prune & Clean (push) Failing after 4s

This commit is contained in:
2026-02-22 02:39:27 +01:00
parent a9adb2eff7
commit 3a1a88db89
11 changed files with 942 additions and 172 deletions

View File

@@ -1,6 +1,7 @@
import OpenAI from "openai";
import { DataCommonsClient } from "./clients/data-commons";
import { TrendsClient } from "./clients/trends";
import { SerperClient, type SerperVideoResult } from "./clients/serper";
export interface Fact {
statement: string;
@@ -20,6 +21,7 @@ export class ResearchAgent {
private openai: OpenAI;
private dcClient: DataCommonsClient;
private trendsClient: TrendsClient;
private serperClient: SerperClient;
constructor(apiKey: string) {
this.openai = new OpenAI({
@@ -31,7 +33,8 @@ export class ResearchAgent {
},
});
this.dcClient = new DataCommonsClient();
this.trendsClient = new TrendsClient();
this.trendsClient = new TrendsClient(apiKey);
this.serperClient = new SerperClient(process.env.SERPER_API_KEY);
}
async researchTopic(topic: string): Promise<Fact[]> {
@@ -107,120 +110,151 @@ Return JSON: { "facts": [ { "statement": "...", "source": "Organization Name Onl
return result.facts || [];
}
async findSocialPosts(
/**
* Extracts existing social media embeds from MDX content via regex.
* No LLM involved — purely deterministic parsing.
* Only returns posts that are already present in the article.
*/
extractSocialPosts(content: string): SocialPost[] {
const posts: SocialPost[] = [];
// YouTube: <YouTubeEmbed videoId="..." />
const ytMatches = [
...content.matchAll(/<YouTubeEmbed[^>]*videoId="([^"]+)"[^>]*\/>/gi),
];
for (const match of ytMatches) {
if (!posts.some((p) => p.embedId === match[1])) {
posts.push({
platform: "youtube",
embedId: match[1],
description: "Existing YouTube embed",
});
}
}
// Twitter/X: <TwitterEmbed tweetId="..." />
const twMatches = [
...content.matchAll(/<TwitterEmbed[^>]*tweetId="([^"]+)"[^>]*\/>/gi),
];
for (const match of twMatches) {
if (!posts.some((p) => p.embedId === match[1])) {
posts.push({
platform: "twitter",
embedId: match[1],
description: "Existing Twitter/X embed",
});
}
}
// LinkedIn: <LinkedInEmbed url="..." /> or <LinkedInEmbed urn="..." />
const liMatches = [
...content.matchAll(/<LinkedInEmbed[^>]*(?:url|urn)="([^"]+)"[^>]*\/>/gi),
];
for (const match of liMatches) {
if (!posts.some((p) => p.embedId === match[1])) {
posts.push({
platform: "linkedin",
embedId: match[1],
description: "Existing LinkedIn embed",
});
}
}
if (posts.length > 0) {
console.log(
`📱 Extracted ${posts.length} existing social media embed(s) from content`,
);
} else {
console.log(`📱 No existing social media embeds found in content`);
}
return posts;
}
/**
* Fetches real, verified social media posts using the Serper API (Google Video Search).
* This completely prevents hallucinations as it relies on actual search results.
*/
async fetchRealSocialPosts(
topic: string,
retries = 2,
previousFailures: string[] = [],
retries = 1,
): Promise<SocialPost[]> {
console.log(
`📱 Searching for relevant Social Media Posts: "${topic}"${retries < 2 ? ` (Retry ${2 - retries}/2)` : ""}`,
`🌐 [Serper] Fetching real social media posts for topic: "${topic}"...`,
);
const failureContext =
previousFailures.length > 0
? `\nCRITICAL FAILURE WARNING: The following IDs you generated previously returned 404 Not Found and were Hallucinations: ${previousFailures.join(", ")}. You MUST provide REAL, verifiable IDs. If you cannot 100% guarantee an ID exists, return an empty array instead of guessing.`
: "";
const response = await this.openai.chat.completions.create({
model: "google/gemini-2.5-pro",
// Step 1: Ask the LLM to generate a highly specific YouTube search query
// We want tutorials, explanations, or deep dives.
const queryGen = await this.openai.chat.completions.create({
model: "google/gemini-2.5-flash",
messages: [
{
role: "system",
content: `You are a social media researcher finding high-value, real expert posts and videos to embed in a B2B Tech Blog post about: "${topic}".
Your Goal: Identify 1-3 REAL, highly relevant social media posts (YouTube, Twitter/X, LinkedIn) that provide social proof, expert opinions, or deep dives.${failureContext}
Constraint: You MUST provide the exact mathematical or alphanumeric ID for the embed.
- YouTube: The 11-character video ID (e.g. "dQw4w9WgXcQ")
- Twitter: The numerical tweet ID (e.g. "1753464161943834945")
- LinkedIn: The activity URN (e.g. "urn:li:activity:7153664326573674496" or just the numerical 19-digit ID)
Return JSON exactly as follows:
{
"posts": [
{ "platform": "youtube", "embedId": "dQw4w9WgXcQ", "description": "Google Web Dev explaining Core Web Vitals" }
]
}
Return ONLY the JSON.`,
content: `Generate a YouTube search query to find a high-quality, professional educational video about: "${topic}".
Prefer official tech channels or well-known developers (e.g., Google Chrome Developers, Vercel, Theo - t3.gg, Fireship, etc.).
Return a JSON object with a single string field "query". Example: {"query": "core web vitals explanation google developers"}.
DO NOT USE QUOTES IN THE QUERY ITSELF.`,
},
],
response_format: { type: "json_object" },
});
if (
!response.choices ||
response.choices.length === 0 ||
!response.choices[0].message
) {
console.warn(`⚠️ Social post search failed for concept: "${topic}"`);
try {
let queryStr = "";
const parsed = JSON.parse(
queryGen.choices[0].message.content || '{"query": ""}',
);
queryStr = parsed.query || `${topic} tutorial explanation`;
// Step 2: Search via Serper Video Search
const videos = await this.serperClient.searchVideos(queryStr);
if (!videos || videos.length === 0) {
console.warn(`⚠️ [Serper] No videos found for query: "${queryStr}"`);
if (retries > 0) return this.fetchRealSocialPosts(topic, retries - 1);
return [];
}
// Filter for youtube results
const ytVideos = videos.filter(
(v) => v.link && v.link.includes("youtube.com/watch"),
);
if (ytVideos.length === 0) {
console.warn(`⚠️ [Serper] No YouTube videos in search results.`);
if (retries > 0) return this.fetchRealSocialPosts(topic, retries - 1);
return [];
}
// Pick the best one (usually the first result)
const bestVideo = ytVideos[0];
// Extract the 11-char video ID from the link (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)
const urlObj = new URL(bestVideo.link);
const videoId = urlObj.searchParams.get("v");
if (!videoId) {
console.warn(
`⚠️ [Serper] Could not extract video ID from: ${bestVideo.link}`,
);
return [];
}
console.log(
`✅ [Serper] Found valid YouTube Video: ${videoId} ("${bestVideo.title}")`,
);
return [
{
platform: "youtube",
embedId: videoId,
description: bestVideo.title || "YouTube Video",
},
];
} catch (e) {
console.error("❌ Failed to fetch real social posts:", e);
return [];
}
const result = JSON.parse(response.choices[0].message.content || "{}");
const rawPosts: SocialPost[] = result.posts || [];
// CRITICAL WORKFLOW FIX: Absolutely forbid hallucinations by verifying via oEmbed APIs
const verifiedPosts: SocialPost[] = [];
if (rawPosts.length > 0) {
console.log(
`🛡️ Verifying ${rawPosts.length} generated social ID(s) against network...`,
);
}
const failedIdsForThisRun: string[] = [];
for (const post of rawPosts) {
let isValid = false;
try {
if (post.platform === "youtube") {
const res = await fetch(
`https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v=${post.embedId}`,
);
isValid = res.ok;
} else if (post.platform === "twitter") {
const res = await fetch(
`https://publish.twitter.com/oembed?url=https://twitter.com/x/status/${post.embedId}`,
);
isValid = res.ok;
} else if (post.platform === "linkedin") {
// LinkedIn doesn't have an unauthenticated oEmbed, so we use heuristic URL/URN format validation
if (
post.embedId.includes("urn:li:") ||
post.embedId.includes("linkedin.com") ||
/^\d{19}$/.test(post.embedId)
) {
isValid = true;
}
}
} catch (e) {
isValid = false;
}
if (isValid) {
verifiedPosts.push(post);
console.log(
`✅ Verified real post ID: ${post.embedId} (${post.platform})`,
);
} else {
failedIdsForThisRun.push(post.embedId);
console.warn(
`🛑 Dropped hallucinated or dead post ID: ${post.embedId} (${post.platform})`,
);
}
}
// AGENT SELF-HEALING: If all found posts were hallucinations and we have retries, challenge the LLM to try again
if (verifiedPosts.length === 0 && rawPosts.length > 0 && retries > 0) {
console.warn(
`🔄 Self-Healing triggered: All IDs were hallucinations. Challenging agent to find real IDs...`,
);
return this.findSocialPosts(topic, retries - 1, [
...previousFailures,
...failedIdsForThisRun,
]);
}
return verifiedPosts;
}
private async planResearch(
@@ -273,4 +307,60 @@ CRITICAL: Do NOT provide more than 2 trendsKeywords. Keep it extremely focused.`
return { trendsKeywords: [], dcVariables: [] };
}
}
/**
* Researches the top-ranking competitors on Google for a given topic.
* Extracts their titles and snippets to guide the LLM to write better content.
*/
async researchCompetitors(topic: string, retries = 1): Promise<string[]> {
console.log(
`🔍 [Competitor Research] Fetching top ranking web pages for topic: "${topic.slice(0, 50)}..."`,
);
// Step 1: LLM generates the optimal Google Search query
const queryGen = await this.openai.chat.completions.create({
model: "google/gemini-2.5-flash",
messages: [
{
role: "system",
content: `Generate a Google Search query that a B2B decision maker would use to research the following topic: "${topic}".
Focus on intent-driven keywords.
Return a JSON object with a single string field "query". Example: {"query": "Next.js performance optimization agency"}.
DO NOT USE QUOTES IN THE QUERY ITSELF.`,
},
],
response_format: { type: "json_object" },
});
try {
const parsed = JSON.parse(
queryGen.choices[0].message.content || '{"query": ""}',
);
const queryStr = parsed.query || topic;
// Step 2: Search via Serper Web Search
const organicResults = await this.serperClient.searchWeb(queryStr, 5);
if (!organicResults || organicResults.length === 0) {
console.warn(
`⚠️ [Competitor Research] No web results found for query: "${queryStr}"`,
);
if (retries > 0) return this.researchCompetitors(topic, retries - 1);
return [];
}
// Map to structured insights string
const insights = organicResults.map((result, i) => {
return `[Rank #${i + 1}] Title: "${result.title}" | Snippet: "${result.snippet}"`;
});
console.log(
`✅ [Competitor Research] Analyzed top ${insights.length} competitor articles.`,
);
return insights;
} catch (e) {
console.error("❌ Failed to fetch competitor research:", e);
return [];
}
}
}

View File

@@ -0,0 +1,128 @@
export interface SerperVideoResult {
title: string;
link: string;
snippet?: string;
date?: string;
duration?: string;
channel?: string;
}
export interface SerperVideoResponse {
searchParameters: any;
videos: SerperVideoResult[];
}
export interface SerperWebResult {
title: string;
link: string;
snippet: string;
date?: string;
sitelinks?: any[];
position: number;
}
export interface SerperWebResponse {
searchParameters: any;
organic: SerperWebResult[];
}
export class SerperClient {
private apiKey: string;
constructor(apiKey?: string) {
const key = apiKey || process.env.SERPER_API_KEY;
if (!key) {
console.warn("⚠️ SERPER_API_KEY is not defined. SerperClient will fail.");
}
this.apiKey = key || "";
}
/**
* Performs a video search via Serper (Google Video Search).
* Great for finding relevant YouTube videos.
*/
async searchVideos(
query: string,
num: number = 5,
): Promise<SerperVideoResult[]> {
if (!this.apiKey) {
console.error("❌ SERPER_API_KEY missing - cannot execute search.");
return [];
}
try {
console.log(`🔍 [Serper] Searching videos for: "${query}"`);
const response = await fetch("https://google.serper.dev/videos", {
method: "POST",
headers: {
"X-API-KEY": this.apiKey,
"Content-Type": "application/json",
},
body: JSON.stringify({
q: query,
num: num,
gl: "de", // Germany for localized results
hl: "de", // German language
}),
});
if (!response.ok) {
console.error(
`❌ [Serper] API Error: ${response.status} ${response.statusText}`,
);
const text = await response.text();
console.error(text);
return [];
}
const data = (await response.json()) as SerperVideoResponse;
return data.videos || [];
} catch (e) {
console.error("❌ [Serper] Request failed", e);
return [];
}
}
/**
* Performs a standard web search via Serper.
* Crucial for B2B competitor analysis and context gathering.
*/
async searchWeb(query: string, num: number = 5): Promise<SerperWebResult[]> {
if (!this.apiKey) {
console.error("❌ SERPER_API_KEY missing - cannot execute web search.");
return [];
}
try {
console.log(`🔍 [Serper] Web Search for Competitor Insights: "${query}"`);
const response = await fetch("https://google.serper.dev/search", {
method: "POST",
headers: {
"X-API-KEY": this.apiKey,
"Content-Type": "application/json",
},
body: JSON.stringify({
q: query,
num: num,
gl: "de", // Germany for localized results
hl: "de", // German language
}),
});
if (!response.ok) {
console.error(
`❌ [Serper] API Error: ${response.status} ${response.statusText}`,
);
const text = await response.text();
console.error(text);
return [];
}
const data = (await response.json()) as SerperWebResponse;
return data.organic || [];
} catch (e) {
console.error("❌ [Serper] Web Request failed", e);
return [];
}
}
}

View File

@@ -1,3 +1,4 @@
export * from "./clients/data-commons";
export * from "./clients/trends";
export * from "./clients/serper";
export * from "./agent";