feat: content engine
Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 2s
Monorepo Pipeline / 🧹 Lint (push) Successful in 1m12s
Monorepo Pipeline / 🧪 Test (push) Successful in 2m59s
Monorepo Pipeline / 🏗️ Build (push) Successful in 6m52s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
🏥 Server Maintenance / 🧹 Prune & Clean (push) Failing after 4s
Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 2s
Monorepo Pipeline / 🧹 Lint (push) Successful in 1m12s
Monorepo Pipeline / 🧪 Test (push) Successful in 2m59s
Monorepo Pipeline / 🏗️ Build (push) Successful in 6m52s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
🏥 Server Maintenance / 🧹 Prune & Clean (push) Failing after 4s
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import OpenAI from "openai";
|
||||
import { DataCommonsClient } from "./clients/data-commons";
|
||||
import { TrendsClient } from "./clients/trends";
|
||||
import { SerperClient, type SerperVideoResult } from "./clients/serper";
|
||||
|
||||
export interface Fact {
|
||||
statement: string;
|
||||
@@ -20,6 +21,7 @@ export class ResearchAgent {
|
||||
private openai: OpenAI;
|
||||
private dcClient: DataCommonsClient;
|
||||
private trendsClient: TrendsClient;
|
||||
private serperClient: SerperClient;
|
||||
|
||||
constructor(apiKey: string) {
|
||||
this.openai = new OpenAI({
|
||||
@@ -31,7 +33,8 @@ export class ResearchAgent {
|
||||
},
|
||||
});
|
||||
this.dcClient = new DataCommonsClient();
|
||||
this.trendsClient = new TrendsClient();
|
||||
this.trendsClient = new TrendsClient(apiKey);
|
||||
this.serperClient = new SerperClient(process.env.SERPER_API_KEY);
|
||||
}
|
||||
|
||||
async researchTopic(topic: string): Promise<Fact[]> {
|
||||
@@ -107,120 +110,151 @@ Return JSON: { "facts": [ { "statement": "...", "source": "Organization Name Onl
|
||||
return result.facts || [];
|
||||
}
|
||||
|
||||
async findSocialPosts(
|
||||
/**
|
||||
* Extracts existing social media embeds from MDX content via regex.
|
||||
* No LLM involved — purely deterministic parsing.
|
||||
* Only returns posts that are already present in the article.
|
||||
*/
|
||||
extractSocialPosts(content: string): SocialPost[] {
|
||||
const posts: SocialPost[] = [];
|
||||
|
||||
// YouTube: <YouTubeEmbed videoId="..." />
|
||||
const ytMatches = [
|
||||
...content.matchAll(/<YouTubeEmbed[^>]*videoId="([^"]+)"[^>]*\/>/gi),
|
||||
];
|
||||
for (const match of ytMatches) {
|
||||
if (!posts.some((p) => p.embedId === match[1])) {
|
||||
posts.push({
|
||||
platform: "youtube",
|
||||
embedId: match[1],
|
||||
description: "Existing YouTube embed",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Twitter/X: <TwitterEmbed tweetId="..." />
|
||||
const twMatches = [
|
||||
...content.matchAll(/<TwitterEmbed[^>]*tweetId="([^"]+)"[^>]*\/>/gi),
|
||||
];
|
||||
for (const match of twMatches) {
|
||||
if (!posts.some((p) => p.embedId === match[1])) {
|
||||
posts.push({
|
||||
platform: "twitter",
|
||||
embedId: match[1],
|
||||
description: "Existing Twitter/X embed",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// LinkedIn: <LinkedInEmbed url="..." /> or <LinkedInEmbed urn="..." />
|
||||
const liMatches = [
|
||||
...content.matchAll(/<LinkedInEmbed[^>]*(?:url|urn)="([^"]+)"[^>]*\/>/gi),
|
||||
];
|
||||
for (const match of liMatches) {
|
||||
if (!posts.some((p) => p.embedId === match[1])) {
|
||||
posts.push({
|
||||
platform: "linkedin",
|
||||
embedId: match[1],
|
||||
description: "Existing LinkedIn embed",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (posts.length > 0) {
|
||||
console.log(
|
||||
`📱 Extracted ${posts.length} existing social media embed(s) from content`,
|
||||
);
|
||||
} else {
|
||||
console.log(`📱 No existing social media embeds found in content`);
|
||||
}
|
||||
|
||||
return posts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches real, verified social media posts using the Serper API (Google Video Search).
|
||||
* This completely prevents hallucinations as it relies on actual search results.
|
||||
*/
|
||||
async fetchRealSocialPosts(
|
||||
topic: string,
|
||||
retries = 2,
|
||||
previousFailures: string[] = [],
|
||||
retries = 1,
|
||||
): Promise<SocialPost[]> {
|
||||
console.log(
|
||||
`📱 Searching for relevant Social Media Posts: "${topic}"${retries < 2 ? ` (Retry ${2 - retries}/2)` : ""}`,
|
||||
`🌐 [Serper] Fetching real social media posts for topic: "${topic}"...`,
|
||||
);
|
||||
|
||||
const failureContext =
|
||||
previousFailures.length > 0
|
||||
? `\nCRITICAL FAILURE WARNING: The following IDs you generated previously returned 404 Not Found and were Hallucinations: ${previousFailures.join(", ")}. You MUST provide REAL, verifiable IDs. If you cannot 100% guarantee an ID exists, return an empty array instead of guessing.`
|
||||
: "";
|
||||
|
||||
const response = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.5-pro",
|
||||
// Step 1: Ask the LLM to generate a highly specific YouTube search query
|
||||
// We want tutorials, explanations, or deep dives.
|
||||
const queryGen = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.5-flash",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: `You are a social media researcher finding high-value, real expert posts and videos to embed in a B2B Tech Blog post about: "${topic}".
|
||||
|
||||
Your Goal: Identify 1-3 REAL, highly relevant social media posts (YouTube, Twitter/X, LinkedIn) that provide social proof, expert opinions, or deep dives.${failureContext}
|
||||
|
||||
Constraint: You MUST provide the exact mathematical or alphanumeric ID for the embed.
|
||||
- YouTube: The 11-character video ID (e.g. "dQw4w9WgXcQ")
|
||||
- Twitter: The numerical tweet ID (e.g. "1753464161943834945")
|
||||
- LinkedIn: The activity URN (e.g. "urn:li:activity:7153664326573674496" or just the numerical 19-digit ID)
|
||||
|
||||
Return JSON exactly as follows:
|
||||
{
|
||||
"posts": [
|
||||
{ "platform": "youtube", "embedId": "dQw4w9WgXcQ", "description": "Google Web Dev explaining Core Web Vitals" }
|
||||
]
|
||||
}
|
||||
Return ONLY the JSON.`,
|
||||
content: `Generate a YouTube search query to find a high-quality, professional educational video about: "${topic}".
|
||||
Prefer official tech channels or well-known developers (e.g., Google Chrome Developers, Vercel, Theo - t3.gg, Fireship, etc.).
|
||||
Return a JSON object with a single string field "query". Example: {"query": "core web vitals explanation google developers"}.
|
||||
DO NOT USE QUOTES IN THE QUERY ITSELF.`,
|
||||
},
|
||||
],
|
||||
response_format: { type: "json_object" },
|
||||
});
|
||||
|
||||
if (
|
||||
!response.choices ||
|
||||
response.choices.length === 0 ||
|
||||
!response.choices[0].message
|
||||
) {
|
||||
console.warn(`⚠️ Social post search failed for concept: "${topic}"`);
|
||||
try {
|
||||
let queryStr = "";
|
||||
const parsed = JSON.parse(
|
||||
queryGen.choices[0].message.content || '{"query": ""}',
|
||||
);
|
||||
queryStr = parsed.query || `${topic} tutorial explanation`;
|
||||
|
||||
// Step 2: Search via Serper Video Search
|
||||
const videos = await this.serperClient.searchVideos(queryStr);
|
||||
|
||||
if (!videos || videos.length === 0) {
|
||||
console.warn(`⚠️ [Serper] No videos found for query: "${queryStr}"`);
|
||||
if (retries > 0) return this.fetchRealSocialPosts(topic, retries - 1);
|
||||
return [];
|
||||
}
|
||||
|
||||
// Filter for youtube results
|
||||
const ytVideos = videos.filter(
|
||||
(v) => v.link && v.link.includes("youtube.com/watch"),
|
||||
);
|
||||
|
||||
if (ytVideos.length === 0) {
|
||||
console.warn(`⚠️ [Serper] No YouTube videos in search results.`);
|
||||
if (retries > 0) return this.fetchRealSocialPosts(topic, retries - 1);
|
||||
return [];
|
||||
}
|
||||
|
||||
// Pick the best one (usually the first result)
|
||||
const bestVideo = ytVideos[0];
|
||||
|
||||
// Extract the 11-char video ID from the link (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)
|
||||
const urlObj = new URL(bestVideo.link);
|
||||
const videoId = urlObj.searchParams.get("v");
|
||||
|
||||
if (!videoId) {
|
||||
console.warn(
|
||||
`⚠️ [Serper] Could not extract video ID from: ${bestVideo.link}`,
|
||||
);
|
||||
return [];
|
||||
}
|
||||
|
||||
console.log(
|
||||
`✅ [Serper] Found valid YouTube Video: ${videoId} ("${bestVideo.title}")`,
|
||||
);
|
||||
|
||||
return [
|
||||
{
|
||||
platform: "youtube",
|
||||
embedId: videoId,
|
||||
description: bestVideo.title || "YouTube Video",
|
||||
},
|
||||
];
|
||||
} catch (e) {
|
||||
console.error("❌ Failed to fetch real social posts:", e);
|
||||
return [];
|
||||
}
|
||||
|
||||
const result = JSON.parse(response.choices[0].message.content || "{}");
|
||||
const rawPosts: SocialPost[] = result.posts || [];
|
||||
|
||||
// CRITICAL WORKFLOW FIX: Absolutely forbid hallucinations by verifying via oEmbed APIs
|
||||
const verifiedPosts: SocialPost[] = [];
|
||||
if (rawPosts.length > 0) {
|
||||
console.log(
|
||||
`🛡️ Verifying ${rawPosts.length} generated social ID(s) against network...`,
|
||||
);
|
||||
}
|
||||
|
||||
const failedIdsForThisRun: string[] = [];
|
||||
|
||||
for (const post of rawPosts) {
|
||||
let isValid = false;
|
||||
try {
|
||||
if (post.platform === "youtube") {
|
||||
const res = await fetch(
|
||||
`https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v=${post.embedId}`,
|
||||
);
|
||||
isValid = res.ok;
|
||||
} else if (post.platform === "twitter") {
|
||||
const res = await fetch(
|
||||
`https://publish.twitter.com/oembed?url=https://twitter.com/x/status/${post.embedId}`,
|
||||
);
|
||||
isValid = res.ok;
|
||||
} else if (post.platform === "linkedin") {
|
||||
// LinkedIn doesn't have an unauthenticated oEmbed, so we use heuristic URL/URN format validation
|
||||
if (
|
||||
post.embedId.includes("urn:li:") ||
|
||||
post.embedId.includes("linkedin.com") ||
|
||||
/^\d{19}$/.test(post.embedId)
|
||||
) {
|
||||
isValid = true;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
isValid = false;
|
||||
}
|
||||
|
||||
if (isValid) {
|
||||
verifiedPosts.push(post);
|
||||
console.log(
|
||||
`✅ Verified real post ID: ${post.embedId} (${post.platform})`,
|
||||
);
|
||||
} else {
|
||||
failedIdsForThisRun.push(post.embedId);
|
||||
console.warn(
|
||||
`🛑 Dropped hallucinated or dead post ID: ${post.embedId} (${post.platform})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// AGENT SELF-HEALING: If all found posts were hallucinations and we have retries, challenge the LLM to try again
|
||||
if (verifiedPosts.length === 0 && rawPosts.length > 0 && retries > 0) {
|
||||
console.warn(
|
||||
`🔄 Self-Healing triggered: All IDs were hallucinations. Challenging agent to find real IDs...`,
|
||||
);
|
||||
return this.findSocialPosts(topic, retries - 1, [
|
||||
...previousFailures,
|
||||
...failedIdsForThisRun,
|
||||
]);
|
||||
}
|
||||
|
||||
return verifiedPosts;
|
||||
}
|
||||
|
||||
private async planResearch(
|
||||
@@ -273,4 +307,60 @@ CRITICAL: Do NOT provide more than 2 trendsKeywords. Keep it extremely focused.`
|
||||
return { trendsKeywords: [], dcVariables: [] };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Researches the top-ranking competitors on Google for a given topic.
|
||||
* Extracts their titles and snippets to guide the LLM to write better content.
|
||||
*/
|
||||
async researchCompetitors(topic: string, retries = 1): Promise<string[]> {
|
||||
console.log(
|
||||
`🔍 [Competitor Research] Fetching top ranking web pages for topic: "${topic.slice(0, 50)}..."`,
|
||||
);
|
||||
|
||||
// Step 1: LLM generates the optimal Google Search query
|
||||
const queryGen = await this.openai.chat.completions.create({
|
||||
model: "google/gemini-2.5-flash",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: `Generate a Google Search query that a B2B decision maker would use to research the following topic: "${topic}".
|
||||
Focus on intent-driven keywords.
|
||||
Return a JSON object with a single string field "query". Example: {"query": "Next.js performance optimization agency"}.
|
||||
DO NOT USE QUOTES IN THE QUERY ITSELF.`,
|
||||
},
|
||||
],
|
||||
response_format: { type: "json_object" },
|
||||
});
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(
|
||||
queryGen.choices[0].message.content || '{"query": ""}',
|
||||
);
|
||||
const queryStr = parsed.query || topic;
|
||||
|
||||
// Step 2: Search via Serper Web Search
|
||||
const organicResults = await this.serperClient.searchWeb(queryStr, 5);
|
||||
|
||||
if (!organicResults || organicResults.length === 0) {
|
||||
console.warn(
|
||||
`⚠️ [Competitor Research] No web results found for query: "${queryStr}"`,
|
||||
);
|
||||
if (retries > 0) return this.researchCompetitors(topic, retries - 1);
|
||||
return [];
|
||||
}
|
||||
|
||||
// Map to structured insights string
|
||||
const insights = organicResults.map((result, i) => {
|
||||
return `[Rank #${i + 1}] Title: "${result.title}" | Snippet: "${result.snippet}"`;
|
||||
});
|
||||
|
||||
console.log(
|
||||
`✅ [Competitor Research] Analyzed top ${insights.length} competitor articles.`,
|
||||
);
|
||||
return insights;
|
||||
} catch (e) {
|
||||
console.error("❌ Failed to fetch competitor research:", e);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
128
packages/journaling/src/clients/serper.ts
Normal file
128
packages/journaling/src/clients/serper.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
export interface SerperVideoResult {
|
||||
title: string;
|
||||
link: string;
|
||||
snippet?: string;
|
||||
date?: string;
|
||||
duration?: string;
|
||||
channel?: string;
|
||||
}
|
||||
|
||||
export interface SerperVideoResponse {
|
||||
searchParameters: any;
|
||||
videos: SerperVideoResult[];
|
||||
}
|
||||
|
||||
export interface SerperWebResult {
|
||||
title: string;
|
||||
link: string;
|
||||
snippet: string;
|
||||
date?: string;
|
||||
sitelinks?: any[];
|
||||
position: number;
|
||||
}
|
||||
|
||||
export interface SerperWebResponse {
|
||||
searchParameters: any;
|
||||
organic: SerperWebResult[];
|
||||
}
|
||||
|
||||
export class SerperClient {
|
||||
private apiKey: string;
|
||||
|
||||
constructor(apiKey?: string) {
|
||||
const key = apiKey || process.env.SERPER_API_KEY;
|
||||
if (!key) {
|
||||
console.warn("⚠️ SERPER_API_KEY is not defined. SerperClient will fail.");
|
||||
}
|
||||
this.apiKey = key || "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a video search via Serper (Google Video Search).
|
||||
* Great for finding relevant YouTube videos.
|
||||
*/
|
||||
async searchVideos(
|
||||
query: string,
|
||||
num: number = 5,
|
||||
): Promise<SerperVideoResult[]> {
|
||||
if (!this.apiKey) {
|
||||
console.error("❌ SERPER_API_KEY missing - cannot execute search.");
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
console.log(`🔍 [Serper] Searching videos for: "${query}"`);
|
||||
const response = await fetch("https://google.serper.dev/videos", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"X-API-KEY": this.apiKey,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
q: query,
|
||||
num: num,
|
||||
gl: "de", // Germany for localized results
|
||||
hl: "de", // German language
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(
|
||||
`❌ [Serper] API Error: ${response.status} ${response.statusText}`,
|
||||
);
|
||||
const text = await response.text();
|
||||
console.error(text);
|
||||
return [];
|
||||
}
|
||||
|
||||
const data = (await response.json()) as SerperVideoResponse;
|
||||
return data.videos || [];
|
||||
} catch (e) {
|
||||
console.error("❌ [Serper] Request failed", e);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a standard web search via Serper.
|
||||
* Crucial for B2B competitor analysis and context gathering.
|
||||
*/
|
||||
async searchWeb(query: string, num: number = 5): Promise<SerperWebResult[]> {
|
||||
if (!this.apiKey) {
|
||||
console.error("❌ SERPER_API_KEY missing - cannot execute web search.");
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
console.log(`🔍 [Serper] Web Search for Competitor Insights: "${query}"`);
|
||||
const response = await fetch("https://google.serper.dev/search", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"X-API-KEY": this.apiKey,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
q: query,
|
||||
num: num,
|
||||
gl: "de", // Germany for localized results
|
||||
hl: "de", // German language
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(
|
||||
`❌ [Serper] API Error: ${response.status} ${response.statusText}`,
|
||||
);
|
||||
const text = await response.text();
|
||||
console.error(text);
|
||||
return [];
|
||||
}
|
||||
|
||||
const data = (await response.json()) as SerperWebResponse;
|
||||
return data.organic || [];
|
||||
} catch (e) {
|
||||
console.error("❌ [Serper] Web Request failed", e);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
export * from "./clients/data-commons";
|
||||
export * from "./clients/trends";
|
||||
export * from "./clients/serper";
|
||||
export * from "./agent";
|
||||
|
||||
Reference in New Issue
Block a user