Files
at-mintel/packages/seo-engine/src/engine.ts
Marc Mintel ded9da7d32
Some checks failed
Monorepo Pipeline / ⚡ Prioritize Release (push) Successful in 2s
Monorepo Pipeline / 🧪 Test (push) Failing after 51s
Monorepo Pipeline / 🧹 Lint (push) Failing after 2m25s
Monorepo Pipeline / 🏗️ Build (push) Successful in 2m28s
Monorepo Pipeline / 🚀 Release (push) Has been skipped
Monorepo Pipeline / 🐳 Build Gatekeeper (Product) (push) Has been skipped
Monorepo Pipeline / 🐳 Build Build-Base (push) Has been skipped
Monorepo Pipeline / 🐳 Build Production Runtime (push) Has been skipped
feat(seo-engine): implement competitor scraper, MDX draft editor, and strategy report generator
2026-03-02 10:16:11 +01:00

238 lines
8.6 KiB
TypeScript

import { llmJsonRequest } from "./llm-client.js";
import { fetchSerperData } from "./agents/serper-agent.js";
import { fetchAutocompleteSuggestions } from "./agents/serper-autocomplete.js";
import {
fetchCompetitorRankings,
type CompetitorRanking,
} from "./agents/serper-competitors.js";
import {
scrapeCompetitorUrl,
analyzeCompetitorContent,
type ReverseEngineeredBriefing,
} from "./agents/scraper.js";
import { analyzeContentGaps, type ContentGap } from "./steps/content-gap.js";
import { SEO_SYSTEM_PROMPT } from "./prompts.js";
import type {
ProjectContext,
SeoConfig,
SeoEngineOutput,
TopicCluster,
} from "./types.js";
const DEFAULT_MODEL = "google/gemini-2.5-pro";
export async function runSeoEngine(
context: ProjectContext,
config: SeoConfig,
): Promise<SeoEngineOutput> {
if (!config.serperApiKey)
throw new Error("Missing Serper API Key in SeoConfig.");
if (!config.openRouterApiKey)
throw new Error("Missing OpenRouter API Key in SeoConfig.");
const locale = context.locale || { gl: "de", hl: "de" };
const seedQueries: string[] = [];
// Derive seed queries from context
if (context.companyName) seedQueries.push(context.companyName);
if (context.industry) seedQueries.push(context.industry);
if (context.competitors && context.competitors.length > 0) {
seedQueries.push(...context.competitors.slice(0, 2));
}
if (context.seedKeywords && context.seedKeywords.length > 0) {
seedQueries.push(...context.seedKeywords);
}
if (seedQueries.length === 0) {
throw new Error(
"ProjectContext must provide at least an industry, company name, or seedKeywords.",
);
}
console.log(
`[SEO Engine] Sourcing raw data for ${seedQueries.length} seeds: ${seedQueries.join(", ")}`,
);
// ──────────────────────────────────────────────
// Step 1: Google Search Data + Autocomplete (parallel per seed)
// ──────────────────────────────────────────────
const rawSearchData = new Set<string>();
const allAutocompleteSuggestions = new Set<string>();
const volumeMap = new Map<string, number>(); // keyword → totalResults
const searchPromises = seedQueries.map(async (query) => {
const [searchResult, autocompleteResult] = await Promise.all([
fetchSerperData(query, config.serperApiKey!, locale),
fetchAutocompleteSuggestions(query, config.serperApiKey!, locale),
]);
searchResult.relatedSearches.forEach((r) => rawSearchData.add(r));
searchResult.peopleAlsoAsk.forEach((p) => rawSearchData.add(p));
searchResult.organicSnippets.forEach((o) => rawSearchData.add(o));
autocompleteResult.suggestions.forEach((s) => {
rawSearchData.add(s);
allAutocompleteSuggestions.add(s);
});
if (searchResult.estimatedTotalResults > 0) {
volumeMap.set(query, searchResult.estimatedTotalResults);
}
});
await Promise.all(searchPromises);
const rawTerms = Array.from(rawSearchData);
console.log(
`[SEO Engine] Sourced ${rawTerms.length} raw terms (incl. ${allAutocompleteSuggestions.size} autocomplete). Evaluating with LLM...`,
);
// ──────────────────────────────────────────────
// Step 2: LLM Evaluation + Topic Clustering
// ──────────────────────────────────────────────
const userPrompt = `
PROJECT CONTEXT:
CompanyName: ${context.companyName || "N/A"}
Industry / Main Focus: ${context.industry || "N/A"}
Briefing Summary: ${context.briefing || "N/A"}
Target Audience: ${context.targetAudience || "N/A"}
Known Competitors: ${context.competitors?.join(", ") || "N/A"}
EXTRA STRICT GUIDELINES:
${context.customGuidelines || "None. Apply standard Mintel strict adherence."}
RAW SEARCH TERMS SOURCED FROM GOOGLE (incl. autocomplete, PAA, related, snippets):
${rawTerms.map((t, i) => `${i + 1}. ${t}`).join("\n")}
EVALUATE AND CLUSTER STRICTLY ACCORDING TO SYSTEM INSTRUCTIONS.
`;
const { data: clusterData } = await llmJsonRequest<{
topicClusters: TopicCluster[];
discardedTerms: string[];
}>({
model: config.model || DEFAULT_MODEL,
apiKey: config.openRouterApiKey,
systemPrompt: SEO_SYSTEM_PROMPT,
userPrompt,
});
const topicClusters = clusterData.topicClusters || [];
const discardedTerms = clusterData.discardedTerms || [];
// Attach volume estimates based on totalResults proxy
for (const cluster of topicClusters) {
for (const kw of cluster.secondaryKeywords) {
const vol = volumeMap.get(kw.term);
if (vol !== undefined) {
kw.estimatedVolume =
vol > 1_000_000 ? "high" : vol > 100_000 ? "medium" : "low";
}
}
}
console.log(
`[SEO Engine] LLM clustered ${topicClusters.reduce((a, c) => a + c.secondaryKeywords.length + 1, 0)} keywords into ${topicClusters.length} clusters. Discarded ${discardedTerms.length}.`,
);
// ──────────────────────────────────────────────
// Step 3 & 4: Competitor SERP Analysis & Content Scraping
// ──────────────────────────────────────────────
let competitorRankings: CompetitorRanking[] = [];
const competitorBriefings: Record<string, ReverseEngineeredBriefing> = {};
if (context.competitors && context.competitors.length > 0) {
const primaryKeywords = topicClusters
.map((c) => c.primaryKeyword)
.slice(0, 5);
console.log(
`[SEO Engine] Checking competitor rankings for: ${primaryKeywords.join(", ")}`,
);
const competitorPromises = primaryKeywords.map((kw) =>
fetchCompetitorRankings(
kw,
context.competitors!,
config.serperApiKey!,
locale,
),
);
const results = await Promise.all(competitorPromises);
competitorRankings = results.flat();
console.log(
`[SEO Engine] Found ${competitorRankings.length} competitor rankings.`,
);
// Pick top ranking competitor for each primary keyword to reverse engineer
console.log(`[SEO Engine] Reverse engineering top competitor content...`);
const scrapePromises = primaryKeywords.map(async (kw) => {
const topRanking = competitorRankings.find((r) => r.keyword === kw);
if (!topRanking) return null;
const scraped = await scrapeCompetitorUrl(topRanking.link);
if (!scraped) return null;
const briefing = await analyzeCompetitorContent(scraped, kw, {
openRouterApiKey: config.openRouterApiKey!,
model: config.model,
});
if (briefing) {
competitorBriefings[kw] = briefing;
}
});
await Promise.all(scrapePromises);
console.log(
`[SEO Engine] Generated ${Object.keys(competitorBriefings).length} competitor briefings.`,
);
}
// ──────────────────────────────────────────────
// Step 5: Content Gap Analysis
// ──────────────────────────────────────────────
let contentGaps: ContentGap[] = [];
if (context.existingPages && context.existingPages.length > 0) {
console.log(
`[SEO Engine] Analyzing content gaps against ${context.existingPages.length} existing pages...`,
);
contentGaps = await analyzeContentGaps(
topicClusters,
context.existingPages,
{
openRouterApiKey: config.openRouterApiKey,
model: config.model,
},
);
console.log(`[SEO Engine] Found ${contentGaps.length} content gaps.`);
}
// ──────────────────────────────────────────────
// Optional Keyword Cap
// ──────────────────────────────────────────────
if (config.maxKeywords) {
let count = 0;
for (const cluster of topicClusters) {
cluster.secondaryKeywords = cluster.secondaryKeywords.filter(() => {
if (count < config.maxKeywords!) {
count++;
return true;
}
return false;
});
}
}
console.log(`[SEO Engine] ✅ Complete.`);
return {
topicClusters,
competitorRankings,
competitorBriefings,
contentGaps,
autocompleteSuggestions: Array.from(allAutocompleteSuggestions),
discardedTerms,
};
}