Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 1s
Monorepo Pipeline / 🧹 Lint (push) Failing after 35s
Monorepo Pipeline / 🧪 Test (push) Failing after 35s
Monorepo Pipeline / 🏗️ Build (push) Failing after 12s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Image Processor (push) Has been skipped
Monorepo Pipeline / 🐳 Build Directus (Base) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
426 lines
14 KiB
TypeScript
426 lines
14 KiB
TypeScript
import OpenAI from "openai";
|
|
import { DataCommonsClient } from "./clients/data-commons";
|
|
import { TrendsClient } from "./clients/trends";
|
|
import { SerperClient } from "./clients/serper";
|
|
|
|
export interface Fact {
|
|
statement: string;
|
|
source: string;
|
|
url?: string;
|
|
confidence: "high" | "medium" | "low";
|
|
data?: any;
|
|
}
|
|
|
|
export interface SocialPost {
|
|
platform: "youtube" | "twitter" | "linkedin";
|
|
embedId: string;
|
|
description: string;
|
|
}
|
|
|
|
export class ResearchAgent {
|
|
private openai: OpenAI;
|
|
private dcClient: DataCommonsClient;
|
|
private trendsClient: TrendsClient;
|
|
private serperClient: SerperClient;
|
|
|
|
constructor(apiKey: string) {
|
|
this.openai = new OpenAI({
|
|
apiKey,
|
|
baseURL: "https://openrouter.ai/api/v1",
|
|
defaultHeaders: {
|
|
"HTTP-Referer": "https://mintel.me",
|
|
"X-Title": "Mintel Journaling Agent",
|
|
},
|
|
});
|
|
this.dcClient = new DataCommonsClient();
|
|
this.trendsClient = new TrendsClient(apiKey);
|
|
this.serperClient = new SerperClient(process.env.SERPER_API_KEY);
|
|
}
|
|
|
|
async researchTopic(topic: string): Promise<Fact[]> {
|
|
console.log(`🔎 Researching: ${topic}`);
|
|
|
|
// 1. Plan Research
|
|
const plan = await this.planResearch(topic);
|
|
console.log(`📋 Research Plan:`, plan);
|
|
|
|
const facts: Fact[] = [];
|
|
|
|
// 2. Execute Plan
|
|
// Google Trends
|
|
for (const kw of plan.trendsKeywords) {
|
|
try {
|
|
const data = await this.trendsClient.getInterestOverTime(kw);
|
|
if (data.length > 0) {
|
|
// Analyze trend
|
|
const latest = data[data.length - 1];
|
|
facts.push({
|
|
statement: `Interest in "${kw}" is currently at ${latest.value}% of peak popularity.`,
|
|
source: "Google Trends",
|
|
confidence: "high",
|
|
data: data.slice(-5), // Last 5 points
|
|
});
|
|
}
|
|
} catch (e) {
|
|
console.error(`Error fetching trends for ${kw}`, e);
|
|
}
|
|
}
|
|
|
|
// Data Commons
|
|
// We need DCIDs. LLM should have provided them or we need a search.
|
|
// For this POC, let's assume the LLM provides plausible DCIDs or we skip deep DC integration for now
|
|
// and rely on the LLM's own knowledge + the verified trends.
|
|
// However, if the plan has dcVariables, let's try.
|
|
|
|
// 3. Synthesize & Verify
|
|
// Ask LLM to verify its own knowledge against the data we found (if any) or just use its training data
|
|
// but formatted as "facts".
|
|
|
|
const synthesis = await this.openai.chat.completions.create({
|
|
model: "google/gemini-3-flash-preview",
|
|
messages: [
|
|
{
|
|
role: "system",
|
|
content: `You are a professional digital researcher and fact-checker.
|
|
Topic: "${topic}"
|
|
|
|
Your Goal: Provide 5-7 concrete, verifiable, statistical facts.
|
|
Constraint 1: Cite real sources (e.g. "Google Developers", "HTTP Archive", "Deloitte", "Nielsen Norman Group").
|
|
Constraint 2: DO NOT cite "General Knowledge".
|
|
Constraint 3: CRITICAL MANDATE - NEVER generate or guess URLs. You must hallucinate NO links. Use ONLY the Organization's Name as the "source" field.
|
|
|
|
Return JSON: { "facts": [ { "statement": "...", "source": "Organization Name Only", "confidence": "high" } ] }`,
|
|
},
|
|
{ role: "user", content: "Extract facts." },
|
|
],
|
|
response_format: { type: "json_object" },
|
|
});
|
|
|
|
if (
|
|
!synthesis.choices ||
|
|
synthesis.choices.length === 0 ||
|
|
!synthesis.choices[0].message
|
|
) {
|
|
console.warn(`⚠️ Research synthesis failed for concept: "${topic}"`);
|
|
return [];
|
|
}
|
|
|
|
const result = JSON.parse(synthesis.choices[0].message.content || "{}");
|
|
return result.facts || [];
|
|
}
|
|
|
|
/**
|
|
* Extracts existing social media embeds from MDX content via regex.
|
|
* No LLM involved — purely deterministic parsing.
|
|
* Only returns posts that are already present in the article.
|
|
*/
|
|
extractSocialPosts(content: string): SocialPost[] {
|
|
const posts: SocialPost[] = [];
|
|
|
|
// YouTube: <YouTubeEmbed videoId="..." />
|
|
const ytMatches = [
|
|
...content.matchAll(/<YouTubeEmbed[^>]*videoId="([^"]+)"[^>]*\/>/gi),
|
|
];
|
|
for (const match of ytMatches) {
|
|
if (!posts.some((p) => p.embedId === match[1])) {
|
|
posts.push({
|
|
platform: "youtube",
|
|
embedId: match[1],
|
|
description: "Existing YouTube embed",
|
|
});
|
|
}
|
|
}
|
|
|
|
// Twitter/X: <TwitterEmbed tweetId="..." />
|
|
const twMatches = [
|
|
...content.matchAll(/<TwitterEmbed[^>]*tweetId="([^"]+)"[^>]*\/>/gi),
|
|
];
|
|
for (const match of twMatches) {
|
|
if (!posts.some((p) => p.embedId === match[1])) {
|
|
posts.push({
|
|
platform: "twitter",
|
|
embedId: match[1],
|
|
description: "Existing Twitter/X embed",
|
|
});
|
|
}
|
|
}
|
|
|
|
// LinkedIn: <LinkedInEmbed url="..." /> or <LinkedInEmbed urn="..." />
|
|
const liMatches = [
|
|
...content.matchAll(/<LinkedInEmbed[^>]*(?:url|urn)="([^"]+)"[^>]*\/>/gi),
|
|
];
|
|
for (const match of liMatches) {
|
|
if (!posts.some((p) => p.embedId === match[1])) {
|
|
posts.push({
|
|
platform: "linkedin",
|
|
embedId: match[1],
|
|
description: "Existing LinkedIn embed",
|
|
});
|
|
}
|
|
}
|
|
|
|
if (posts.length > 0) {
|
|
console.log(
|
|
`📱 Extracted ${posts.length} existing social media embed(s) from content`,
|
|
);
|
|
} else {
|
|
console.log(`📱 No existing social media embeds found in content`);
|
|
}
|
|
|
|
return posts;
|
|
}
|
|
|
|
/**
|
|
* Fetches real, verified social media posts using the Serper API (Google Video Search).
|
|
* This completely prevents hallucinations as it relies on actual search results.
|
|
*/
|
|
async fetchRealSocialPosts(
|
|
topic: string,
|
|
customSources?: string[],
|
|
retries = 1,
|
|
): Promise<SocialPost[]> {
|
|
console.log(
|
|
`🌐 [Serper] Fetching real social media posts for topic: "${topic}"...`,
|
|
);
|
|
|
|
// Step 1: Ask the LLM to generate a highly specific YouTube search query
|
|
// We want tutorials, explanations, or deep dives.
|
|
const queryGen = await this.openai.chat.completions.create({
|
|
model: "google/gemini-3-flash-preview",
|
|
messages: [
|
|
{
|
|
role: "system",
|
|
content: `You generate ultra-short, highly relevant YouTube search queries based on a given text context.
|
|
|
|
RULES:
|
|
1. Extract only the 2-4 most important technical or business keywords from the provided text.
|
|
2. Ignore all markdown syntax, frontmatter (---), titles, and descriptions.
|
|
3. Keep the query generic enough to find popular educational tech videos, BUT ensure it specifically targets the core technical subject. Append "tutorial" or "b2b explanation" if necessary to find high-quality content.
|
|
4. DO NOT append specific channel names (e.g., "Fireship", "Vercel") to the query.
|
|
5. DO NOT USE QUOTES IN THE QUERY.
|
|
|
|
Return a JSON object with a single string field "query". Example: {"query": "core web vitals performance tutorial"}`,
|
|
},
|
|
{
|
|
role: "user",
|
|
content: `CONTEXT: ${topic}`,
|
|
},
|
|
],
|
|
response_format: { type: "json_object" },
|
|
});
|
|
|
|
try {
|
|
let queryStr = "";
|
|
const parsed = JSON.parse(
|
|
queryGen.choices[0].message.content || '{"query": ""}',
|
|
);
|
|
queryStr = parsed.query || `${topic} tutorial explanation`;
|
|
|
|
// Step 2: Search via Serper Video Search
|
|
const videos = await this.serperClient.searchVideos(queryStr);
|
|
|
|
if (!videos || videos.length === 0) {
|
|
console.warn(`⚠️ [Serper] No videos found for query: "${queryStr}"`);
|
|
if (retries > 0) return this.fetchRealSocialPosts(topic, customSources, retries - 1);
|
|
return [];
|
|
}
|
|
|
|
// Filter for youtube results
|
|
const ytVideos = videos
|
|
.filter(
|
|
(v) =>
|
|
v.link &&
|
|
v.link.includes("youtube.com/watch") &&
|
|
v.title &&
|
|
v.channel,
|
|
)
|
|
.slice(0, 5); // Take top 5 for evaluation
|
|
|
|
if (ytVideos.length === 0) {
|
|
console.warn(`⚠️ [Serper] No YouTube videos in search results.`);
|
|
if (retries > 0) return this.fetchRealSocialPosts(topic, customSources, retries - 1);
|
|
return [];
|
|
}
|
|
|
|
// Step 3: Ask the LLM to evaluate the relevance of the found videos
|
|
|
|
const sourceExamples = customSources && customSources.length > 0
|
|
? `Specifically prioritize content from: ${customSources.join(", ")}.`
|
|
: `(e.g., Google Developers, Vercel, Theo - t3.gg, Fireship, Syntax, ByteByteGo, IBM Technology, McKinsey, Gartner, Deloitte).`;
|
|
|
|
const evalPrompt = `You are a strict technical evaluator. You must select the MOST RELEVANT educational tech video from the list below based on this core article context: "${topic.slice(0, 800)}..."
|
|
|
|
Videos:
|
|
${ytVideos.map((v, i) => `[ID: ${i}] Title: "${v.title}" | Channel: "${v.channel}" | Snippet: "${v.snippet || "none"}"`).join("\n")}
|
|
|
|
RULES:
|
|
1. The video MUST be highly relevant to the EXACT technical topic of the context.
|
|
2. The channel SHOULD be a high-quality tech, development, or professional B2B channel ${sourceExamples} AVOID gaming, generic vlogs, clickbait, off-topic podcasts, or unrelated topics.
|
|
3. If none of the videos are strictly relevant to the core technical or business subject (e.g. they are just casually mentioning the word), YOU MUST RETURN -1. Be extremely critical. Do not just pick the "best of the worst".
|
|
4. If one is highly relevant, return its ID number.
|
|
|
|
Return ONLY a JSON object: {"bestVideoId": number}`;
|
|
|
|
const evalResponse = await this.openai.chat.completions.create({
|
|
model: "google/gemini-3-flash-preview",
|
|
messages: [{ role: "system", content: evalPrompt }],
|
|
response_format: { type: "json_object" },
|
|
});
|
|
|
|
let bestIdx = -1;
|
|
try {
|
|
const evalParsed = JSON.parse(
|
|
evalResponse.choices[0].message.content || '{"bestVideoId": -1}',
|
|
);
|
|
bestIdx = evalParsed.bestVideoId;
|
|
} catch {
|
|
console.warn("Failed to parse video evaluation response");
|
|
}
|
|
|
|
if (bestIdx < 0 || bestIdx >= ytVideos.length) {
|
|
console.warn(`⚠️ [Serper] LLM rejected all videos as irrelevant.`);
|
|
if (retries > 0) return this.fetchRealSocialPosts(topic, customSources, retries - 1);
|
|
return [];
|
|
}
|
|
|
|
const bestVideo = ytVideos[bestIdx];
|
|
console.log(
|
|
`✅ [Serper] AI selected video: ${bestVideo.title} (Channel: ${bestVideo.channel})`,
|
|
);
|
|
|
|
// Extract the 11-char video ID from the link (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)
|
|
const urlObj = new URL(bestVideo.link);
|
|
const videoId = urlObj.searchParams.get("v");
|
|
|
|
if (!videoId) {
|
|
console.warn(
|
|
`⚠️ [Serper] Could not extract video ID from: ${bestVideo.link}`,
|
|
);
|
|
return [];
|
|
}
|
|
|
|
console.log(
|
|
`✅ [Serper] Found valid YouTube Video: ${videoId} ("${bestVideo.title}")`,
|
|
);
|
|
|
|
return [
|
|
{
|
|
platform: "youtube",
|
|
embedId: videoId,
|
|
description: bestVideo.title || "YouTube Video",
|
|
},
|
|
];
|
|
} catch (e) {
|
|
console.error("❌ Failed to fetch real social posts:", e);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
private async planResearch(
|
|
topic: string,
|
|
): Promise<{ trendsKeywords: string[]; dcVariables: string[] }> {
|
|
const response = await this.openai.chat.completions.create({
|
|
model: "google/gemini-3-flash-preview",
|
|
messages: [
|
|
{
|
|
role: "system",
|
|
content: `Plan research for: "${topic}".
|
|
Return JSON:
|
|
{
|
|
"trendsKeywords": ["list", "of", "max", "2", "keywords"],
|
|
"dcVariables": ["StatisticalVariables", "if", "known", "otherwise", "empty"]
|
|
}
|
|
CRITICAL: Do NOT provide more than 2 trendsKeywords. Keep it extremely focused.`,
|
|
},
|
|
],
|
|
response_format: { type: "json_object" },
|
|
});
|
|
|
|
if (
|
|
!response.choices ||
|
|
response.choices.length === 0 ||
|
|
!response.choices[0].message
|
|
) {
|
|
console.warn(`⚠️ Research planning failed for concept: "${topic}"`);
|
|
return { trendsKeywords: [], dcVariables: [] };
|
|
}
|
|
|
|
try {
|
|
let parsed = JSON.parse(
|
|
response.choices[0].message.content ||
|
|
'{"trendsKeywords": [], "dcVariables": []}',
|
|
);
|
|
if (Array.isArray(parsed)) {
|
|
parsed = parsed[0] || { trendsKeywords: [], dcVariables: [] };
|
|
}
|
|
return {
|
|
trendsKeywords: Array.isArray(parsed.trendsKeywords)
|
|
? parsed.trendsKeywords
|
|
: [],
|
|
dcVariables: Array.isArray(parsed.dcVariables)
|
|
? parsed.dcVariables
|
|
: [],
|
|
};
|
|
} catch (e) {
|
|
console.error("Failed to parse research plan JSON", e);
|
|
return { trendsKeywords: [], dcVariables: [] };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Researches the top-ranking competitors on Google for a given topic.
|
|
* Extracts their titles and snippets to guide the LLM to write better content.
|
|
*/
|
|
async researchCompetitors(topic: string, retries = 1): Promise<string[]> {
|
|
console.log(
|
|
`🔍 [Competitor Research] Fetching top ranking web pages for topic: "${topic.slice(0, 50)}..."`,
|
|
);
|
|
|
|
// Step 1: LLM generates the optimal Google Search query
|
|
const queryGen = await this.openai.chat.completions.create({
|
|
model: "google/gemini-3-flash-preview",
|
|
messages: [
|
|
{
|
|
role: "system",
|
|
content: `Generate a Google Search query that a B2B decision maker would use to research the following topic: "${topic}".
|
|
Focus on intent-driven keywords.
|
|
Return a JSON object with a single string field "query". Example: {"query": "Next.js performance optimization agency"}.
|
|
DO NOT USE QUOTES IN THE QUERY ITSELF.`,
|
|
},
|
|
],
|
|
response_format: { type: "json_object" },
|
|
});
|
|
|
|
try {
|
|
const parsed = JSON.parse(
|
|
queryGen.choices[0].message.content || '{"query": ""}',
|
|
);
|
|
const queryStr = parsed.query || topic;
|
|
|
|
// Step 2: Search via Serper Web Search
|
|
const organicResults = await this.serperClient.searchWeb(queryStr, 5);
|
|
|
|
if (!organicResults || organicResults.length === 0) {
|
|
console.warn(
|
|
`⚠️ [Competitor Research] No web results found for query: "${queryStr}"`,
|
|
);
|
|
if (retries > 0) return this.researchCompetitors(topic, retries - 1);
|
|
return [];
|
|
}
|
|
|
|
// Map to structured insights string
|
|
const insights = organicResults.map((result, i) => {
|
|
return `[Rank #${i + 1}] Title: "${result.title}" | Snippet: "${result.snippet}"`;
|
|
});
|
|
|
|
console.log(
|
|
`✅ [Competitor Research] Analyzed top ${insights.length} competitor articles.`,
|
|
);
|
|
return insights;
|
|
} catch (e) {
|
|
console.error("❌ Failed to fetch competitor research:", e);
|
|
return [];
|
|
}
|
|
}
|
|
}
|