From ded9da7d32c7b3dba379e0c21d2fcb702ca57298 Mon Sep 17 00:00:00 2001 From: Marc Mintel Date: Mon, 2 Mar 2026 10:16:11 +0100 Subject: [PATCH] feat(seo-engine): implement competitor scraper, MDX draft editor, and strategy report generator --- packages/seo-engine/.gitignore | 4 + packages/seo-engine/README.md | 123 +++++++++ packages/seo-engine/package.json | 37 +++ packages/seo-engine/src/agents/scraper.ts | 132 ++++++++++ .../seo-engine/src/agents/serper-agent.ts | 64 +++++ .../src/agents/serper-autocomplete.ts | 43 ++++ .../src/agents/serper-competitors.ts | 75 ++++++ packages/seo-engine/src/editor.ts | 148 +++++++++++ packages/seo-engine/src/engine.ts | 237 ++++++++++++++++++ packages/seo-engine/src/index.ts | 12 + packages/seo-engine/src/llm-client.ts | 153 +++++++++++ packages/seo-engine/src/prompts.ts | 35 +++ packages/seo-engine/src/report.ts | 237 ++++++++++++++++++ packages/seo-engine/src/steps/content-gap.ts | 84 +++++++ packages/seo-engine/src/test-run.ts | 53 ++++ packages/seo-engine/src/test-serper.ts | 38 +++ packages/seo-engine/src/types.ts | 59 +++++ packages/seo-engine/tsconfig.json | 19 ++ packages/seo-engine/tsup.config.ts | 9 + pnpm-lock.yaml | 34 +++ 20 files changed, 1596 insertions(+) create mode 100644 packages/seo-engine/.gitignore create mode 100644 packages/seo-engine/README.md create mode 100644 packages/seo-engine/package.json create mode 100644 packages/seo-engine/src/agents/scraper.ts create mode 100644 packages/seo-engine/src/agents/serper-agent.ts create mode 100644 packages/seo-engine/src/agents/serper-autocomplete.ts create mode 100644 packages/seo-engine/src/agents/serper-competitors.ts create mode 100644 packages/seo-engine/src/editor.ts create mode 100644 packages/seo-engine/src/engine.ts create mode 100644 packages/seo-engine/src/index.ts create mode 100644 packages/seo-engine/src/llm-client.ts create mode 100644 packages/seo-engine/src/prompts.ts create mode 100644 packages/seo-engine/src/report.ts create mode 100644 packages/seo-engine/src/steps/content-gap.ts create mode 100644 packages/seo-engine/src/test-run.ts create mode 100644 packages/seo-engine/src/test-serper.ts create mode 100644 packages/seo-engine/src/types.ts create mode 100644 packages/seo-engine/tsconfig.json create mode 100644 packages/seo-engine/tsup.config.ts diff --git a/packages/seo-engine/.gitignore b/packages/seo-engine/.gitignore new file mode 100644 index 0000000..2348075 --- /dev/null +++ b/packages/seo-engine/.gitignore @@ -0,0 +1,4 @@ +node_modules +dist +.seo-output +.env diff --git a/packages/seo-engine/README.md b/packages/seo-engine/README.md new file mode 100644 index 0000000..b422275 --- /dev/null +++ b/packages/seo-engine/README.md @@ -0,0 +1,123 @@ +# @mintel/seo-engine + +AI-powered SEO keyword discovery, topic clustering, competitor analysis, and content gap identification — grounded in real search data, zero hallucinations. + +## Architecture + +``` +ProjectContext + SeoConfig + │ + ▼ +┌──────────────────────────────────────────┐ +│ SEO Engine Orchestrator │ +│ │ +│ 1. Seed Query Expansion │ +│ (company + industry + seedKeywords) │ +│ │ +│ 2. Data Collection (parallel) │ +│ ├── Serper Search Agent │ +│ │ (related searches, PAA, │ +│ │ organic snippets, volume proxy) │ +│ ├── Serper Autocomplete Agent │ +│ │ (long-tail suggestions) │ +│ └── Serper Competitor Agent │ +│ (top-10 SERP positions) │ +│ │ +│ 3. LLM Evaluation (Gemini/Claude) │ +│ → Strict context filtering │ +│ → Topic Clustering + Intent Mapping │ +│ │ +│ 4. Content Gap Analysis (LLM) │ +│ → Compare clusters vs existing pages │ +│ → Identify missing content │ +└──────────────────────────────────────────┘ + │ + ▼ + SeoEngineOutput + (clusters, gaps, competitors, discarded) +``` + +## Quick Start + +```typescript +import { runSeoEngine } from "@mintel/seo-engine"; + +const result = await runSeoEngine( + { + companyName: "KLZ Cables", + industry: "Mittelspannungskabel, Kabeltiefbau", + briefing: "B2B provider of specialized medium-voltage cables.", + targetAudience: "Bauleiter, Netzbetreiber", + competitors: ["nkt.de", "faberkabel.de"], + seedKeywords: ["NA2XS2Y", "VPE-isoliert"], + existingPages: [ + { url: "/produkte", title: "Produkte" }, + { url: "/kontakt", title: "Kontakt" }, + ], + locale: { gl: "de", hl: "de" }, + }, + { + serperApiKey: process.env.SERPER_API_KEY!, + openRouterApiKey: process.env.OPENROUTER_API_KEY!, + }, +); +``` + +## Configuration + +### `ProjectContext` + +| Field | Type | Description | +| ------------------ | ----------------------------- | ------------------------------------------- | +| `companyName` | `string?` | Client company name | +| `industry` | `string?` | Industry / main focus keywords | +| `briefing` | `string?` | Project briefing text | +| `targetAudience` | `string?` | Who the content targets | +| `competitors` | `string[]?` | Competitor domains to analyze | +| `seedKeywords` | `string[]?` | Explicit seed keywords beyond auto-derived | +| `existingPages` | `{ url, title }[]?` | Current site pages for content gap analysis | +| `customGuidelines` | `string?` | Extra strict filtering rules for the LLM | +| `locale` | `{ gl: string, hl: string }?` | Google locale (default: `de`) | + +### `SeoConfig` + +| Field | Type | Description | +| ------------------ | --------- | -------------------------------------------- | +| `serperApiKey` | `string` | **Required.** Serper API key | +| `openRouterApiKey` | `string` | **Required.** OpenRouter API key | +| `model` | `string?` | LLM model (default: `google/gemini-2.5-pro`) | +| `maxKeywords` | `number?` | Cap total keywords returned | + +## Output + +```typescript +interface SeoEngineOutput { + topicClusters: TopicCluster[]; // Grouped keywords with intent + scores + competitorRankings: CompetitorRanking[]; // Who ranks for your terms + contentGaps: ContentGap[]; // Missing pages you should create + discardedTerms: string[]; // Terms filtered out (with reasons) +} +``` + +## Agents + +| Agent | Source | Data | +| --------------------- | ---------------------- | ----------------------------------- | +| `serper-agent` | Serper `/search` | Related searches, PAA, snippets | +| `serper-autocomplete` | Serper `/autocomplete` | Google Autocomplete long-tail terms | +| `serper-competitors` | Serper `/search` | Competitor SERP positions | + +## API Keys + +- **Serper** — [serper.dev](https://serper.dev) (pay-per-search, ~$0.001/query) +- **OpenRouter** — [openrouter.ai](https://openrouter.ai) (pay-per-token) + +No monthly subscriptions. Pure pay-on-demand. + +## Development + +```bash +pnpm install # from monorepo root +pnpm --filter @mintel/seo-engine build +npx tsx src/test-run.ts # smoke test (needs API keys in .env) +``` diff --git a/packages/seo-engine/package.json b/packages/seo-engine/package.json new file mode 100644 index 0000000..110e1cd --- /dev/null +++ b/packages/seo-engine/package.json @@ -0,0 +1,37 @@ +{ + "name": "@mintel/seo-engine", + "version": "1.0.0", + "private": true, + "description": "AI-powered SEO keyword and topic cluster evaluation engine", + "type": "module", + "main": "./dist/index.js", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "scripts": { + "build": "tsup", + "dev": "tsup --watch", + "test": "vitest", + "clean": "rm -rf dist", + "lint": "eslint src --ext .ts" + }, + "dependencies": { + "axios": "^1.7.9", + "cheerio": "1.0.0-rc.12", + "dotenv": "^16.4.7" + }, + "devDependencies": { + "@mintel/eslint-config": "workspace:*", + "@mintel/tsconfig": "workspace:*", + "@types/node": "^20.17.17", + "tsup": "^8.3.6", + "tsx": "^4.19.2", + "typescript": "^5.7.3", + "vitest": "^3.0.5" + } +} diff --git a/packages/seo-engine/src/agents/scraper.ts b/packages/seo-engine/src/agents/scraper.ts new file mode 100644 index 0000000..f12c7ef --- /dev/null +++ b/packages/seo-engine/src/agents/scraper.ts @@ -0,0 +1,132 @@ +import axios from "axios"; +import * as cheerio from "cheerio"; +import { llmJsonRequest } from "../llm-client.js"; + +export interface ScrapedContext { + url: string; + wordCount: number; + text: string; + headings: { level: number; text: string }[]; +} + +export interface ReverseEngineeredBriefing { + recommendedWordCount: number; + coreTopicsToCover: string[]; + suggestedHeadings: string[]; + entitiesToInclude: string[]; + contentFormat: string; // e.g. "Lange Liste mit Fakten", "Kaufberater", "Lexikon-Eintrag" +} + +/** + * Fetches the HTML of a URL and extracts the main readable text and headings. + */ +export async function scrapeCompetitorUrl( + url: string, +): Promise { + try { + console.log(`[Scraper] Fetching source: ${url}`); + const response = await axios.get(url, { + headers: { + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + }, + timeout: 10000, + }); + + const $ = cheerio.load(response.data); + + // Remove junk elements before extracting text + $( + "script, style, nav, footer, header, aside, .cookie, .banner, iframe", + ).remove(); + + const headings: { level: number; text: string }[] = []; + $(":header").each((_, el) => { + const level = parseInt(el.tagName.replace(/h/i, ""), 10); + const text = $(el).text().trim().replace(/\s+/g, " "); + if (text) headings.push({ level, text }); + }); + + // Extract body text, removing excessive whitespace + const text = $("body").text().replace(/\s+/g, " ").trim(); + const wordCount = text.split(" ").length; + + return { + url, + text: text.slice(0, 15000), // Cap length to prevent blowing up the LLM token limit + wordCount, + headings, + }; + } catch (err) { + console.error( + `[Scraper] Failed to scrape ${url}: ${(err as Error).message}`, + ); + return null; + } +} + +const BRIEFING_SYSTEM_PROMPT = ` +You are a Senior Technical SEO Strategist. +I will give you the scraped text and headings of a competitor's article that currently ranks #1 on Google for our target keyword. + +### OBJECTIVE: +Reverse engineer the content. Tell me EXACTLY what topics, entities, and headings we must include +in our own article to beat this competitor. +Do not just copy their headings. Distill the *core intent* and *required knowledge depth*. + +### RULES: +- If the text is very short (e.g., an e-commerce category page), mention that the format is "Category Page" and recommend a word count +50% higher than theirs. +- Extract hyper-specific entities (e.g. DIN norms, specific materials, specific processes) that prove topic authority. +- LANGUAGE: Match the language of the provided text. + +### OUTPUT FORMAT: +{ + "recommendedWordCount": number, + "coreTopicsToCover": ["string"], + "suggestedHeadings": ["string"], + "entitiesToInclude": ["string"], + "contentFormat": "string" +} +`; + +/** + * Analyzes the scraped context using an LLM to generate a blueprint to beat the competitor. + */ +export async function analyzeCompetitorContent( + context: ScrapedContext, + targetKeyword: string, + config: { openRouterApiKey: string; model?: string }, +): Promise { + const userPrompt = ` +TARGET KEYWORD TO BEAT: "${targetKeyword}" +COMPETITOR URL: ${context.url} +COMPETITOR WORD COUNT: ${context.wordCount} + +COMPETITOR HEADINGS: +${context.headings.map((h) => `H${h.level}: ${h.text}`).join("\n")} + +COMPETITOR TEXT (Truncated): +${context.text} +`; + + try { + const { data } = await llmJsonRequest({ + model: config.model || "google/gemini-2.5-pro", + apiKey: config.openRouterApiKey, + systemPrompt: BRIEFING_SYSTEM_PROMPT, + userPrompt, + }); + + // Ensure numbers are numbers + data.recommendedWordCount = + Number(data.recommendedWordCount) || context.wordCount + 300; + + return data; + } catch (err) { + console.error( + `[Scraper] NLP Analysis failed for ${context.url}:`, + (err as Error).message, + ); + return null; + } +} diff --git a/packages/seo-engine/src/agents/serper-agent.ts b/packages/seo-engine/src/agents/serper-agent.ts new file mode 100644 index 0000000..7357557 --- /dev/null +++ b/packages/seo-engine/src/agents/serper-agent.ts @@ -0,0 +1,64 @@ +import axios from "axios"; + +export interface SerperResult { + relatedSearches: string[]; + peopleAlsoAsk: string[]; + organicSnippets: string[]; + estimatedTotalResults: number; +} + +/** + * Fetch Google search data via Serper's /search endpoint. + * Extracts related searches, People Also Ask, organic snippets, + * and totalResults as a search volume proxy. + */ +export async function fetchSerperData( + query: string, + apiKey: string, + locale: { gl: string; hl: string } = { gl: "de", hl: "de" }, +): Promise { + try { + const response = await axios.post( + "https://google.serper.dev/search", + { + q: query, + gl: locale.gl, + hl: locale.hl, + }, + { + headers: { + "X-API-KEY": apiKey, + "Content-Type": "application/json", + }, + }, + ); + + const data = response.data; + + const relatedSearches = + data.relatedSearches?.map((r: any) => r.query) || []; + const peopleAlsoAsk = data.peopleAlsoAsk?.map((p: any) => p.question) || []; + const organicSnippets = data.organic?.map((o: any) => o.snippet) || []; + const estimatedTotalResults = data.searchInformation?.totalResults + ? parseInt(data.searchInformation.totalResults, 10) + : 0; + + return { + relatedSearches, + peopleAlsoAsk, + organicSnippets, + estimatedTotalResults, + }; + } catch (error) { + console.error( + `Serper API error for query "${query}":`, + (error as Error).message, + ); + return { + relatedSearches: [], + peopleAlsoAsk: [], + organicSnippets: [], + estimatedTotalResults: 0, + }; + } +} diff --git a/packages/seo-engine/src/agents/serper-autocomplete.ts b/packages/seo-engine/src/agents/serper-autocomplete.ts new file mode 100644 index 0000000..fa10742 --- /dev/null +++ b/packages/seo-engine/src/agents/serper-autocomplete.ts @@ -0,0 +1,43 @@ +import axios from "axios"; + +export interface AutocompleteResult { + suggestions: string[]; +} + +/** + * Fetch Google Autocomplete suggestions via Serper's /autocomplete endpoint. + * These represent real user typing behavior — extremely high-signal for long-tail keywords. + */ +export async function fetchAutocompleteSuggestions( + query: string, + apiKey: string, + locale: { gl: string; hl: string } = { gl: "de", hl: "de" }, +): Promise { + try { + const response = await axios.post( + "https://google.serper.dev/autocomplete", + { + q: query, + gl: locale.gl, + hl: locale.hl, + }, + { + headers: { + "X-API-KEY": apiKey, + "Content-Type": "application/json", + }, + }, + ); + + const suggestions = + response.data.suggestions?.map((s: any) => s.value || s) || []; + + return { suggestions }; + } catch (error) { + console.error( + `Serper Autocomplete error for query "${query}":`, + (error as Error).message, + ); + return { suggestions: [] }; + } +} diff --git a/packages/seo-engine/src/agents/serper-competitors.ts b/packages/seo-engine/src/agents/serper-competitors.ts new file mode 100644 index 0000000..1c6b6e4 --- /dev/null +++ b/packages/seo-engine/src/agents/serper-competitors.ts @@ -0,0 +1,75 @@ +import axios from "axios"; + +export interface CompetitorRanking { + keyword: string; + domain: string; + position: number; + title: string; + snippet: string; + link: string; +} + +/** + * For a given keyword, check which competitor domains appear in the top organic results. + * Filters results to only include domains in the `competitorDomains` list. + */ +export async function fetchCompetitorRankings( + keyword: string, + competitorDomains: string[], + apiKey: string, + locale: { gl: string; hl: string } = { gl: "de", hl: "de" }, +): Promise { + if (competitorDomains.length === 0) return []; + + try { + const response = await axios.post( + "https://google.serper.dev/search", + { + q: keyword, + gl: locale.gl, + hl: locale.hl, + num: 20, + }, + { + headers: { + "X-API-KEY": apiKey, + "Content-Type": "application/json", + }, + }, + ); + + const organic: any[] = response.data.organic || []; + + // Normalize competitor domains for matching + const normalizedCompetitors = competitorDomains.map((d) => + d + .replace(/^(https?:\/\/)?(www\.)?/, "") + .replace(/\/$/, "") + .toLowerCase(), + ); + + return organic + .filter((result: any) => { + const resultDomain = new URL(result.link).hostname + .replace(/^www\./, "") + .toLowerCase(); + return normalizedCompetitors.some( + (cd) => resultDomain === cd || resultDomain.endsWith(`.${cd}`), + ); + }) + .map((result: any) => ({ + keyword, + domain: new URL(result.link).hostname.replace(/^www\./, ""), + position: result.position, + title: result.title || "", + snippet: result.snippet || "", + link: result.link, + })); + } catch (error) { + console.error( + `Serper Competitor check error for keyword "${keyword}":`, + (error as Error).message, + ); + return []; + } +} diff --git a/packages/seo-engine/src/editor.ts b/packages/seo-engine/src/editor.ts new file mode 100644 index 0000000..cb17344 --- /dev/null +++ b/packages/seo-engine/src/editor.ts @@ -0,0 +1,148 @@ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; +import type { ContentGap } from "./types.js"; +import type { ReverseEngineeredBriefing } from "./agents/scraper.js"; + +export interface FileEditorConfig { + outputDir: string; + authorName?: string; +} + +/** + * Generates an SEO-friendly URL slug from a title. + */ +function createSlug(title: string): string { + return title + .toLowerCase() + .replace(/ä/g, "ae") + .replace(/ö/g, "oe") + .replace(/ü/g, "ue") + .replace(/ß/g, "ss") + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-+|-+$/g, ""); +} + +/** + * Automatically creates local .mdx draft files for identified high-priority content gaps. + * Each file is self-explanatory: it tells the writer exactly WHY this page needs to exist, + * WHAT to write, and HOW to structure the content — all based on real competitor data. + */ +export async function createGapDrafts( + gaps: ContentGap[], + briefings: Map, + config: FileEditorConfig, +): Promise { + const createdFiles: string[] = []; + + try { + await fs.mkdir(path.resolve(process.cwd(), config.outputDir), { + recursive: true, + }); + } catch (e) { + console.error( + `[File Editor] Could not create directory ${config.outputDir}:`, + e, + ); + return []; + } + + const dateStr = new Date().toISOString().split("T")[0]; + + for (const gap of gaps) { + if (gap.priority === "low") continue; + + const slug = createSlug(gap.recommendedTitle); + const filePath = path.join( + path.resolve(process.cwd(), config.outputDir), + `${slug}.mdx`, + ); + const briefing = briefings.get(gap.targetKeyword); + + const priorityEmoji = gap.priority === "high" ? "🔴" : "🟡"; + + let body = ""; + + // ── Intro: Explain WHY this file exists ── + body += `{/* ═══════════════════════════════════════════════════════════════════\n`; + body += ` 📋 SEO CONTENT BRIEFING — Auto-generated by @mintel/seo-engine\n`; + body += ` ═══════════════════════════════════════════════════════════════════\n\n`; + body += ` Dieses Dokument wurde automatisch erstellt.\n`; + body += ` Es basiert auf einer Analyse der aktuellen Google-Suchergebnisse\n`; + body += ` und der Webseiten deiner Konkurrenz.\n\n`; + body += ` ▸ Du kannst dieses File direkt als MDX-Seite verwenden.\n`; + body += ` ▸ Ersetze den Briefing-Block unten durch deinen eigenen Text.\n`; + body += ` ▸ Setze isDraft auf false, wenn der Text fertig ist.\n`; + body += ` ═══════════════════════════════════════════════════════════════════ */}\n\n`; + + // ── Section 1: Warum diese Seite? ── + body += `## ${priorityEmoji} Warum diese Seite erstellt werden sollte\n\n`; + body += `**Priorität:** ${gap.priority === "high" ? "Hoch — Direkt umsatzrelevant" : "Mittel — Stärkt die thematische Autorität"}\n\n`; + body += `${gap.rationale}\n\n`; + body += `| Feld | Wert |\n`; + body += `|------|------|\n`; + body += `| **Focus Keyword** | \`${gap.targetKeyword}\` |\n`; + body += `| **Topic Cluster** | ${gap.relatedCluster} |\n`; + body += `| **Priorität** | ${gap.priority} |\n\n`; + + // ── Section 2: Competitor Briefing ── + if (briefing) { + body += `## 🔍 Konkurrenz-Analyse (Reverse Engineered)\n\n`; + body += `> Die folgenden Daten stammen aus einer automatischen Analyse der Webseite,\n`; + body += `> die aktuell auf **Platz 1 bei Google** für das Keyword \`${gap.targetKeyword}\` rankt.\n`; + body += `> Nutze diese Informationen, um **besseren Content** zu schreiben.\n\n`; + + body += `### Content-Format des Konkurrenten\n\n`; + body += `**${briefing.contentFormat}** — Empfohlene Mindestlänge: **~${briefing.recommendedWordCount} Wörter**\n\n`; + + body += `### Diese Themen MUSS dein Artikel abdecken\n\n`; + body += `Die folgenden Punkte werden vom aktuellen Platz-1-Ranker behandelt. Wenn dein Artikel diese Themen nicht abdeckt, wird es schwer, ihn zu überholen:\n\n`; + briefing.coreTopicsToCover.forEach( + (t, i) => (body += `${i + 1}. ${t}\n`), + ); + + body += `\n### Fachbegriffe & Entitäten die im Text vorkommen müssen\n\n`; + body += `Diese Begriffe signalisieren Google, dass dein Text fachlich tiefgreifend ist. Versuche, möglichst viele davon natürlich in deinen Text einzubauen:\n\n`; + briefing.entitiesToInclude.forEach((e) => (body += `- \`${e}\`\n`)); + + body += `\n### Empfohlene Gliederung\n\n`; + body += `Orientiere dich an dieser Struktur (du kannst sie anpassen):\n\n`; + briefing.suggestedHeadings.forEach( + (h, i) => (body += `${i + 1}. **${h}**\n`), + ); + } else { + body += `## 🔍 Konkurrenz-Analyse\n\n`; + body += `> Für dieses Keyword konnte kein Konkurrent gescraped werden.\n`; + body += `> Schreibe den Artikel trotzdem — du hast weniger Wettbewerb!\n`; + } + + body += `\n---\n\n`; + body += `## ✍️ Dein Content (hier schreiben)\n\n`; + body += `{/* Lösche alles oberhalb dieser Zeile, wenn dein Text fertig ist. */}\n\n`; + body += `Hier beginnt dein eigentlicher Artikel...\n`; + + const file = `--- +title: "${gap.recommendedTitle}" +description: "TODO: Meta-Description mit dem Keyword '${gap.targetKeyword}' schreiben." +date: "${dateStr}" +author: "${config.authorName || "Mintel SEO Engine"}" +tags: ["${gap.relatedCluster}"] +isDraft: true +focus_keyword: "${gap.targetKeyword}" +--- + +${body}`; + + try { + await fs.writeFile(filePath, file, "utf8"); + console.log(`[File Editor] Created draft: ${filePath}`); + createdFiles.push(filePath); + } catch (err) { + console.error( + `[File Editor] Failed to write ${filePath}:`, + (err as Error).message, + ); + } + } + + return createdFiles; +} diff --git a/packages/seo-engine/src/engine.ts b/packages/seo-engine/src/engine.ts new file mode 100644 index 0000000..8f4e5e1 --- /dev/null +++ b/packages/seo-engine/src/engine.ts @@ -0,0 +1,237 @@ +import { llmJsonRequest } from "./llm-client.js"; +import { fetchSerperData } from "./agents/serper-agent.js"; +import { fetchAutocompleteSuggestions } from "./agents/serper-autocomplete.js"; +import { + fetchCompetitorRankings, + type CompetitorRanking, +} from "./agents/serper-competitors.js"; +import { + scrapeCompetitorUrl, + analyzeCompetitorContent, + type ReverseEngineeredBriefing, +} from "./agents/scraper.js"; +import { analyzeContentGaps, type ContentGap } from "./steps/content-gap.js"; +import { SEO_SYSTEM_PROMPT } from "./prompts.js"; +import type { + ProjectContext, + SeoConfig, + SeoEngineOutput, + TopicCluster, +} from "./types.js"; + +const DEFAULT_MODEL = "google/gemini-2.5-pro"; + +export async function runSeoEngine( + context: ProjectContext, + config: SeoConfig, +): Promise { + if (!config.serperApiKey) + throw new Error("Missing Serper API Key in SeoConfig."); + if (!config.openRouterApiKey) + throw new Error("Missing OpenRouter API Key in SeoConfig."); + + const locale = context.locale || { gl: "de", hl: "de" }; + const seedQueries: string[] = []; + + // Derive seed queries from context + if (context.companyName) seedQueries.push(context.companyName); + if (context.industry) seedQueries.push(context.industry); + if (context.competitors && context.competitors.length > 0) { + seedQueries.push(...context.competitors.slice(0, 2)); + } + if (context.seedKeywords && context.seedKeywords.length > 0) { + seedQueries.push(...context.seedKeywords); + } + + if (seedQueries.length === 0) { + throw new Error( + "ProjectContext must provide at least an industry, company name, or seedKeywords.", + ); + } + + console.log( + `[SEO Engine] Sourcing raw data for ${seedQueries.length} seeds: ${seedQueries.join(", ")}`, + ); + + // ────────────────────────────────────────────── + // Step 1: Google Search Data + Autocomplete (parallel per seed) + // ────────────────────────────────────────────── + const rawSearchData = new Set(); + const allAutocompleteSuggestions = new Set(); + const volumeMap = new Map(); // keyword → totalResults + + const searchPromises = seedQueries.map(async (query) => { + const [searchResult, autocompleteResult] = await Promise.all([ + fetchSerperData(query, config.serperApiKey!, locale), + fetchAutocompleteSuggestions(query, config.serperApiKey!, locale), + ]); + + searchResult.relatedSearches.forEach((r) => rawSearchData.add(r)); + searchResult.peopleAlsoAsk.forEach((p) => rawSearchData.add(p)); + searchResult.organicSnippets.forEach((o) => rawSearchData.add(o)); + autocompleteResult.suggestions.forEach((s) => { + rawSearchData.add(s); + allAutocompleteSuggestions.add(s); + }); + + if (searchResult.estimatedTotalResults > 0) { + volumeMap.set(query, searchResult.estimatedTotalResults); + } + }); + + await Promise.all(searchPromises); + const rawTerms = Array.from(rawSearchData); + + console.log( + `[SEO Engine] Sourced ${rawTerms.length} raw terms (incl. ${allAutocompleteSuggestions.size} autocomplete). Evaluating with LLM...`, + ); + + // ────────────────────────────────────────────── + // Step 2: LLM Evaluation + Topic Clustering + // ────────────────────────────────────────────── + const userPrompt = ` +PROJECT CONTEXT: +CompanyName: ${context.companyName || "N/A"} +Industry / Main Focus: ${context.industry || "N/A"} +Briefing Summary: ${context.briefing || "N/A"} +Target Audience: ${context.targetAudience || "N/A"} +Known Competitors: ${context.competitors?.join(", ") || "N/A"} + +EXTRA STRICT GUIDELINES: +${context.customGuidelines || "None. Apply standard Mintel strict adherence."} + +RAW SEARCH TERMS SOURCED FROM GOOGLE (incl. autocomplete, PAA, related, snippets): +${rawTerms.map((t, i) => `${i + 1}. ${t}`).join("\n")} + +EVALUATE AND CLUSTER STRICTLY ACCORDING TO SYSTEM INSTRUCTIONS. +`; + + const { data: clusterData } = await llmJsonRequest<{ + topicClusters: TopicCluster[]; + discardedTerms: string[]; + }>({ + model: config.model || DEFAULT_MODEL, + apiKey: config.openRouterApiKey, + systemPrompt: SEO_SYSTEM_PROMPT, + userPrompt, + }); + + const topicClusters = clusterData.topicClusters || []; + const discardedTerms = clusterData.discardedTerms || []; + + // Attach volume estimates based on totalResults proxy + for (const cluster of topicClusters) { + for (const kw of cluster.secondaryKeywords) { + const vol = volumeMap.get(kw.term); + if (vol !== undefined) { + kw.estimatedVolume = + vol > 1_000_000 ? "high" : vol > 100_000 ? "medium" : "low"; + } + } + } + + console.log( + `[SEO Engine] LLM clustered ${topicClusters.reduce((a, c) => a + c.secondaryKeywords.length + 1, 0)} keywords into ${topicClusters.length} clusters. Discarded ${discardedTerms.length}.`, + ); + + // ────────────────────────────────────────────── + // Step 3 & 4: Competitor SERP Analysis & Content Scraping + // ────────────────────────────────────────────── + let competitorRankings: CompetitorRanking[] = []; + const competitorBriefings: Record = {}; + + if (context.competitors && context.competitors.length > 0) { + const primaryKeywords = topicClusters + .map((c) => c.primaryKeyword) + .slice(0, 5); + console.log( + `[SEO Engine] Checking competitor rankings for: ${primaryKeywords.join(", ")}`, + ); + + const competitorPromises = primaryKeywords.map((kw) => + fetchCompetitorRankings( + kw, + context.competitors!, + config.serperApiKey!, + locale, + ), + ); + const results = await Promise.all(competitorPromises); + competitorRankings = results.flat(); + + console.log( + `[SEO Engine] Found ${competitorRankings.length} competitor rankings.`, + ); + + // Pick top ranking competitor for each primary keyword to reverse engineer + console.log(`[SEO Engine] Reverse engineering top competitor content...`); + const scrapePromises = primaryKeywords.map(async (kw) => { + const topRanking = competitorRankings.find((r) => r.keyword === kw); + if (!topRanking) return null; + + const scraped = await scrapeCompetitorUrl(topRanking.link); + if (!scraped) return null; + + const briefing = await analyzeCompetitorContent(scraped, kw, { + openRouterApiKey: config.openRouterApiKey!, + model: config.model, + }); + + if (briefing) { + competitorBriefings[kw] = briefing; + } + }); + + await Promise.all(scrapePromises); + console.log( + `[SEO Engine] Generated ${Object.keys(competitorBriefings).length} competitor briefings.`, + ); + } + + // ────────────────────────────────────────────── + // Step 5: Content Gap Analysis + // ────────────────────────────────────────────── + let contentGaps: ContentGap[] = []; + + if (context.existingPages && context.existingPages.length > 0) { + console.log( + `[SEO Engine] Analyzing content gaps against ${context.existingPages.length} existing pages...`, + ); + contentGaps = await analyzeContentGaps( + topicClusters, + context.existingPages, + { + openRouterApiKey: config.openRouterApiKey, + model: config.model, + }, + ); + console.log(`[SEO Engine] Found ${contentGaps.length} content gaps.`); + } + + // ────────────────────────────────────────────── + // Optional Keyword Cap + // ────────────────────────────────────────────── + if (config.maxKeywords) { + let count = 0; + for (const cluster of topicClusters) { + cluster.secondaryKeywords = cluster.secondaryKeywords.filter(() => { + if (count < config.maxKeywords!) { + count++; + return true; + } + return false; + }); + } + } + + console.log(`[SEO Engine] ✅ Complete.`); + + return { + topicClusters, + competitorRankings, + competitorBriefings, + contentGaps, + autocompleteSuggestions: Array.from(allAutocompleteSuggestions), + discardedTerms, + }; +} diff --git a/packages/seo-engine/src/index.ts b/packages/seo-engine/src/index.ts new file mode 100644 index 0000000..9f2c0eb --- /dev/null +++ b/packages/seo-engine/src/index.ts @@ -0,0 +1,12 @@ +export * from "./types.js"; +export * from "./engine.js"; +export * from "./editor.js"; +export { generateSeoReport } from "./report.js"; +export { fetchSerperData } from "./agents/serper-agent.js"; +export { fetchAutocompleteSuggestions } from "./agents/serper-autocomplete.js"; +export { fetchCompetitorRankings } from "./agents/serper-competitors.js"; +export { + scrapeCompetitorUrl, + analyzeCompetitorContent, +} from "./agents/scraper.js"; +export { analyzeContentGaps } from "./steps/content-gap.js"; diff --git a/packages/seo-engine/src/llm-client.ts b/packages/seo-engine/src/llm-client.ts new file mode 100644 index 0000000..75641e5 --- /dev/null +++ b/packages/seo-engine/src/llm-client.ts @@ -0,0 +1,153 @@ +// ============================================================================ +// LLM Client — Unified interface with model routing via OpenRouter +// ============================================================================ + +import axios from "axios"; + +export interface LLMRequestOptions { + model: string; + systemPrompt: string; + userPrompt: string; + jsonMode?: boolean; + apiKey: string; +} + +export interface LLMResponse { + content: string; + usage: { + promptTokens: number; + completionTokens: number; + cost: number; + }; +} + +/** + * Clean raw LLM output to parseable JSON. + * Handles markdown fences, control chars, trailing commas. + */ +export function cleanJson(str: string): string { + let cleaned = str.replace(/```json\n?|```/g, "").trim(); + // eslint-disable-next-line no-control-regex + cleaned = cleaned.replace(/[\x00-\x1f\x7f-\x9f]/gi, " "); + + cleaned = cleaned.replace(/,\s*([\]}])/g, "$1"); + return cleaned; +} + +/** + * Send a request to an LLM via OpenRouter. + */ +export async function llmRequest( + options: LLMRequestOptions, +): Promise { + const { model, systemPrompt, userPrompt, jsonMode = true, apiKey } = options; + + const resp = await axios + .post( + "https://openrouter.ai/api/v1/chat/completions", + { + model, + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt }, + ], + ...(jsonMode ? { response_format: { type: "json_object" } } : {}), + }, + { + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + timeout: 120000, + }, + ) + .catch((err) => { + if (err.response) { + console.error( + "OpenRouter API Error:", + JSON.stringify(err.response.data, null, 2), + ); + } + throw err; + }); + + const content = resp.data.choices?.[0]?.message?.content; + if (!content) { + throw new Error(`LLM returned no content. Model: ${model}`); + } + + let cost = 0; + const usage = resp.data.usage || {}; + if (usage.cost !== undefined) { + cost = usage.cost; + } else { + // Fallback estimation + cost = + (usage.prompt_tokens || 0) * (0.1 / 1_000_000) + + (usage.completion_tokens || 0) * (0.4 / 1_000_000); + } + + return { + content, + usage: { + promptTokens: usage.prompt_tokens || 0, + completionTokens: usage.completion_tokens || 0, + cost, + }, + }; +} + +/** + * Send a request and parse the response as JSON. + */ +export async function llmJsonRequest( + options: LLMRequestOptions, +): Promise<{ data: T; usage: LLMResponse["usage"] }> { + let response; + try { + response = await llmRequest({ ...options, jsonMode: true }); + } catch (err) { + console.warn( + "Retrying LLM request without forced JSON mode...", + (err as Error).message, + ); + response = await llmRequest({ ...options, jsonMode: false }); + } + + const cleaned = cleanJson(response.content); + + let parsed: T; + try { + parsed = JSON.parse(cleaned); + } catch (e) { + throw new Error( + `Failed to parse LLM JSON response: ${(e as Error).message}\nRaw: ${cleaned.substring(0, 500)}`, + ); + } + + // Unwrap common LLM artifacts: {"0": {...}}, {"state": {...}}, etc. + const unwrapped = unwrapResponse(parsed); + + return { data: unwrapped as T, usage: response.usage }; +} + +/** + * Recursively unwrap common LLM wrapping patterns. + */ +function unwrapResponse(obj: any): any { + if (!obj || typeof obj !== "object" || Array.isArray(obj)) return obj; + const keys = Object.keys(obj); + if (keys.length === 1) { + const key = keys[0]; + if ( + key === "0" || + key === "state" || + key === "facts" || + key === "result" || + key === "data" + ) { + return unwrapResponse(obj[key]); + } + } + return obj; +} diff --git a/packages/seo-engine/src/prompts.ts b/packages/seo-engine/src/prompts.ts new file mode 100644 index 0000000..0f11410 --- /dev/null +++ b/packages/seo-engine/src/prompts.ts @@ -0,0 +1,35 @@ +export const SEO_SYSTEM_PROMPT = ` +You are a high-end Digital Architect and Expert SEO Analyst for the Mintel ecosystem. +Your exact job is to process RAW SEARCH DATA from Google (via Serper API) and evaluate it against our STRICT PROJECT CONTEXT. + +### OBJECTIVE: +Given a project briefing, industry, and raw search queries (related searches, user questions), you must evaluate each term. +Filter out ANY hallucinations, generic irrelevant fluff, or terms that do not strictly match the client's high-end context. +Then, group the surviving relevant terms into logical "Topic Clusters" with search intents. + +### RULES: +- NO Hallucinations. Do not invent keywords that were not provided in the raw data or strongly implied by the context. +- ABOSLUTE STRICTNESS: If a raw search term is irrelevant to the provided industry/briefing, DISCARD IT. Add it to the "discardedTerms" list. +- HIGH-END QUALITY: The Mintel standard requires precision. Exclude generic garbage like "was ist ein unternehmen" if the client does B2B HDD-Bohrverfahren. + +### OUTPUT FORMAT: +You MUST respond with valid JSON matching this schema: +{ + "topicClusters": [ + { + "clusterName": "string", + "primaryKeyword": "string", + "secondaryKeywords": [ + { + "term": "string", + "intent": "informational" | "navigational" | "commercial" | "transactional", + "relevanceScore": number, // 1-10 + "rationale": "string" // Short explanation why this fits the context + } + ], + "userIntent": "string" // Broad intent for the cluster + } + ], + "discardedTerms": ["string"] // Words you threw out and why +} +`; diff --git a/packages/seo-engine/src/report.ts b/packages/seo-engine/src/report.ts new file mode 100644 index 0000000..6fdfa87 --- /dev/null +++ b/packages/seo-engine/src/report.ts @@ -0,0 +1,237 @@ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; +import type { + SeoEngineOutput, + TopicCluster, + ContentGap, + CompetitorRanking, +} from "./types.js"; +import type { ReverseEngineeredBriefing } from "./agents/scraper.js"; + +export interface ReportConfig { + projectName: string; + outputDir: string; + filename?: string; +} + +/** + * Generates a comprehensive, human-readable SEO Strategy Report in Markdown. + * This is the "big picture" document that summarizes everything the SEO Engine found + * and gives the team a clear action plan. + */ +export async function generateSeoReport( + output: SeoEngineOutput, + config: ReportConfig, +): Promise { + const dateStr = new Date().toLocaleDateString("de-DE", { + year: "numeric", + month: "long", + day: "numeric", + }); + + const allKeywords = output.topicClusters.flatMap((c) => [ + c.primaryKeyword, + ...c.secondaryKeywords.map((k) => k.term), + ]); + + let md = ""; + + // ══════════════════════════════════════════════ + // Header + // ══════════════════════════════════════════════ + md += `# 📊 SEO Strategie-Report: ${config.projectName}\n\n`; + md += `> Erstellt am **${dateStr}** von der **@mintel/seo-engine**\n\n`; + + md += `## Zusammenfassung auf einen Blick\n\n`; + md += `| Metrik | Wert |\n`; + md += `|--------|------|\n`; + md += `| Keywords gefunden | **${allKeywords.length}** |\n`; + md += `| Topic Clusters | **${output.topicClusters.length}** |\n`; + md += `| Konkurrenz-Rankings analysiert | **${output.competitorRankings.length}** |\n`; + md += `| Konkurrenz-Briefings erstellt | **${Object.keys(output.competitorBriefings).length}** |\n`; + md += `| Content Gaps identifiziert | **${output.contentGaps.length}** |\n`; + md += `| Autocomplete-Vorschläge | **${output.autocompleteSuggestions.length}** |\n`; + md += `| Verworfene Begriffe | **${output.discardedTerms.length}** |\n\n`; + + // ══════════════════════════════════════════════ + // Section 1: Keywords zum Tracken + // ══════════════════════════════════════════════ + md += `---\n\n`; + md += `## 🎯 Keywords zum Tracken\n\n`; + md += `Diese Keywords sind relevant für das Projekt und sollten in einem Ranking-Tracker (z.B. Serpbear) beobachtet werden:\n\n`; + md += `| # | Keyword | Intent | Relevanz | Cluster |\n`; + md += `|---|---------|--------|----------|--------|\n`; + + let kwIndex = 1; + for (const cluster of output.topicClusters) { + md += `| ${kwIndex++} | **${cluster.primaryKeyword}** | — | 🏆 Primary | ${cluster.clusterName} |\n`; + for (const kw of cluster.secondaryKeywords) { + const intentEmoji = + kw.intent === "transactional" + ? "💰" + : kw.intent === "commercial" + ? "🛒" + : kw.intent === "navigational" + ? "🧭" + : "📖"; + md += `| ${kwIndex++} | ${kw.term} | ${intentEmoji} ${kw.intent} | ${kw.relevanceScore}/10 | ${cluster.clusterName} |\n`; + } + } + + // ══════════════════════════════════════════════ + // Section 2: Topic Clusters + // ══════════════════════════════════════════════ + md += `\n---\n\n`; + md += `## 🗂️ Topic Clusters\n\n`; + md += `Die SEO Engine hat die Keywords automatisch in thematische Cluster gruppiert. Jeder Cluster sollte idealerweise durch eine **Pillar Page** und mehrere **Sub-Pages** abgedeckt werden.\n\n`; + + for (const cluster of output.topicClusters) { + md += `### ${cluster.clusterName}\n\n`; + md += `- **Pillar Keyword:** \`${cluster.primaryKeyword}\`\n`; + md += `- **User Intent:** ${cluster.userIntent}\n`; + md += `- **Sub-Keywords:** ${cluster.secondaryKeywords.map((k) => `\`${k.term}\``).join(", ")}\n\n`; + } + + // ══════════════════════════════════════════════ + // Section 3: Konkurrenz-Landscape + // ══════════════════════════════════════════════ + if (output.competitorRankings.length > 0) { + md += `---\n\n`; + md += `## 🏁 Konkurrenz-Landscape\n\n`; + md += `Für die wichtigsten Keywords wurde geprüft, welche Konkurrenten aktuell bei Google ranken:\n\n`; + md += `| Keyword | Konkurrent | Position | Titel |\n`; + md += `|---------|-----------|----------|-------|\n`; + + for (const r of output.competitorRankings) { + md += `| ${r.keyword} | **${r.domain}** | #${r.position} | ${r.title.slice(0, 60)}${r.title.length > 60 ? "…" : ""} |\n`; + } + md += `\n`; + } + + // ══════════════════════════════════════════════ + // Section 4: Competitor Briefings + // ══════════════════════════════════════════════ + if (Object.keys(output.competitorBriefings).length > 0) { + md += `---\n\n`; + md += `## 🔬 Konkurrenz-Briefings (Reverse Engineered)\n\n`; + md += `Für die folgenden Keywords wurde der aktuelle **Platz-1-Ranker** automatisch gescraped und analysiert. Diese Briefings zeigen exakt, was ein Artikel abdecken muss, um die Konkurrenz zu schlagen:\n\n`; + + for (const [keyword, briefing] of Object.entries( + output.competitorBriefings, + )) { + const b = briefing as ReverseEngineeredBriefing; + md += `### Keyword: \`${keyword}\`\n\n`; + md += `- **Format:** ${b.contentFormat}\n`; + md += `- **Ziel-Wortanzahl:** ~${b.recommendedWordCount}\n`; + md += `- **Kernthemen:** ${b.coreTopicsToCover.join("; ")}\n`; + md += `- **Wichtige Entitäten:** ${b.entitiesToInclude.map((e) => `\`${e}\``).join(", ")}\n\n`; + } + } + + // ══════════════════════════════════════════════ + // Section 5: Content Gaps — Action Plan + // ══════════════════════════════════════════════ + if (output.contentGaps.length > 0) { + md += `---\n\n`; + md += `## 🚧 Content Gaps — Fehlende Seiten\n\n`; + md += `Die folgenden Seiten existieren auf der Website noch **nicht**, werden aber von der Zielgruppe aktiv gesucht. Sie sind nach Priorität sortiert:\n\n`; + + const highGaps = output.contentGaps.filter((g) => g.priority === "high"); + const medGaps = output.contentGaps.filter((g) => g.priority === "medium"); + const lowGaps = output.contentGaps.filter((g) => g.priority === "low"); + + if (highGaps.length > 0) { + md += `### 🔴 Hohe Priorität (direkt umsatzrelevant)\n\n`; + for (const g of highGaps) { + md += `- **${g.recommendedTitle}**\n`; + md += ` - Keyword: \`${g.targetKeyword}\` · Cluster: ${g.relatedCluster}\n`; + md += ` - ${g.rationale}\n\n`; + } + } + + if (medGaps.length > 0) { + md += `### 🟡 Mittlere Priorität (stärkt Autorität)\n\n`; + for (const g of medGaps) { + md += `- **${g.recommendedTitle}**\n`; + md += ` - Keyword: \`${g.targetKeyword}\` · Cluster: ${g.relatedCluster}\n`; + md += ` - ${g.rationale}\n\n`; + } + } + + if (lowGaps.length > 0) { + md += `### 🟢 Niedrige Priorität (Top-of-Funnel)\n\n`; + for (const g of lowGaps) { + md += `- **${g.recommendedTitle}**\n`; + md += ` - Keyword: \`${g.targetKeyword}\` · Cluster: ${g.relatedCluster}\n`; + md += ` - ${g.rationale}\n\n`; + } + } + } + + // ══════════════════════════════════════════════ + // Section 6: Autocomplete Insights + // ══════════════════════════════════════════════ + if (output.autocompleteSuggestions.length > 0) { + md += `---\n\n`; + md += `## 💡 Google Autocomplete — Long-Tail Insights\n\n`; + md += `Diese Begriffe stammen direkt aus der Google-Suchleiste und spiegeln echtes Nutzerverhalten wider. Sie eignen sich besonders für **FAQ-Sektionen**, **H2/H3-Überschriften** und **Long-Tail Content**:\n\n`; + + for (const s of output.autocompleteSuggestions) { + md += `- ${s}\n`; + } + md += `\n`; + } + + // ══════════════════════════════════════════════ + // Section 7: Verworfene Begriffe + // ══════════════════════════════════════════════ + if (output.discardedTerms.length > 0) { + md += `---\n\n`; + md += `## 🗑️ Verworfene Begriffe\n\n`; + md += `Die folgenden Begriffe wurden von der KI als **nicht relevant** eingestuft:\n\n`; + md += `
\nAlle ${output.discardedTerms.length} verworfenen Begriffe anzeigen\n\n`; + for (const t of output.discardedTerms) { + md += `- ${t}\n`; + } + md += `\n
\n\n`; + } + + // ══════════════════════════════════════════════ + // Section 8: Copy-Paste Snippets + // ══════════════════════════════════════════════ + md += `---\n\n`; + md += `## 📋 Copy-Paste Snippets\n\n`; + md += `Diese Listen sind optimiert für das schnelle Kopieren in SEO-Tools oder Tabellen.\n\n`; + + md += `### Rank-Tracker (z.B. Serpbear) — Ein Keyword pro Zeile\n`; + md += `\`\`\`text\n`; + md += allKeywords.join("\n"); + md += `\n\`\`\`\n\n`; + + md += `### Excel / Google Sheets — Kommagetrennt\n`; + md += `\`\`\`text\n`; + md += allKeywords.join(", "); + md += `\n\`\`\`\n\n`; + + md += `### Pillar-Keywords (Nur Primary Keywords)\n`; + md += `\`\`\`text\n`; + md += output.topicClusters.map((c) => c.primaryKeyword).join("\n"); + md += `\n\`\`\`\n\n`; + + // ══════════════════════════════════════════════ + // Footer + // ══════════════════════════════════════════════ + md += `---\n\n`; + md += `*Dieser Report wurde automatisch von der @mintel/seo-engine generiert. Alle Daten basieren auf echten Google-Suchergebnissen (via Serper API) und wurden durch ein LLM ausgewertet.*\n`; + + // Write to disk + const outDir = path.resolve(process.cwd(), config.outputDir); + await fs.mkdir(outDir, { recursive: true }); + const filename = + config.filename || + `seo-report-${config.projectName.toLowerCase().replace(/\s+/g, "-")}.md`; + const filePath = path.join(outDir, filename); + await fs.writeFile(filePath, md, "utf8"); + console.log(`[Report] Written SEO Strategy Report to: ${filePath}`); + return filePath; +} diff --git a/packages/seo-engine/src/steps/content-gap.ts b/packages/seo-engine/src/steps/content-gap.ts new file mode 100644 index 0000000..ae3e8f2 --- /dev/null +++ b/packages/seo-engine/src/steps/content-gap.ts @@ -0,0 +1,84 @@ +import { llmJsonRequest } from "../llm-client.js"; +import type { TopicCluster } from "../types.js"; + +export interface ExistingPage { + url: string; + title: string; +} + +export interface ContentGap { + recommendedTitle: string; + targetKeyword: string; + relatedCluster: string; + priority: "high" | "medium" | "low"; + rationale: string; +} + +const CONTENT_GAP_SYSTEM_PROMPT = ` +You are a senior SEO Content Strategist. Your job is to compare a set of TOPIC CLUSTERS +(keywords the company should rank for) against the EXISTING PAGES on their website. + +### OBJECTIVE: +Identify content gaps — topics/keywords that have NO corresponding page yet. +For each gap, recommend a page title, the primary target keyword, which cluster it belongs to, +and a priority (high/medium/low) based on commercial intent and relevance. + +### RULES: +- Only recommend gaps for topics that are genuinely MISSING from the existing pages. +- Do NOT recommend pages that already exist (even if the title is slightly different — use semantic matching). +- Priority "high" = commercial/transactional intent, directly drives revenue. +- Priority "medium" = informational with strong industry relevance. +- Priority "low" = broad, top-of-funnel topics. +- LANGUAGE: Match the language of the project context (if German context, recommend German titles). + +### OUTPUT FORMAT: +{ + "contentGaps": [ + { + "recommendedTitle": "string", + "targetKeyword": "string", + "relatedCluster": "string", + "priority": "high" | "medium" | "low", + "rationale": "string" + } + ] +} +`; + +export async function analyzeContentGaps( + topicClusters: TopicCluster[], + existingPages: ExistingPage[], + config: { openRouterApiKey: string; model?: string }, +): Promise { + if (topicClusters.length === 0) return []; + if (existingPages.length === 0) { + console.log( + "[Content Gap] No existing pages provided, skipping gap analysis.", + ); + return []; + } + + const userPrompt = ` +TOPIC CLUSTERS (what the company SHOULD rank for): +${JSON.stringify(topicClusters, null, 2)} + +EXISTING PAGES ON THE WEBSITE: +${existingPages.map((p, i) => `${i + 1}. "${p.title}" — ${p.url}`).join("\n")} + +Identify ALL content gaps. Be thorough but precise. +`; + + try { + const { data } = await llmJsonRequest<{ contentGaps: ContentGap[] }>({ + model: config.model || "google/gemini-2.5-pro", + apiKey: config.openRouterApiKey, + systemPrompt: CONTENT_GAP_SYSTEM_PROMPT, + userPrompt, + }); + + return data.contentGaps || []; + } catch (err) { + console.error("[Content Gap] Analysis failed:", (err as Error).message); + return []; + } +} diff --git a/packages/seo-engine/src/test-run.ts b/packages/seo-engine/src/test-run.ts new file mode 100644 index 0000000..5ebd6ed --- /dev/null +++ b/packages/seo-engine/src/test-run.ts @@ -0,0 +1,53 @@ +import * as dotenv from "dotenv"; +import { runSeoEngine, createGapDrafts, generateSeoReport } from "./index.js"; + +dotenv.config({ path: "../../.env" }); +dotenv.config({ path: "../../apps/web/.env" }); + +async function testSeoEngine() { + console.log("Starting SEO Engine test run...\n"); + const result = await runSeoEngine( + { + companyName: "KLZ Cables", + industry: "Mittelspannungskabel, Kabeltiefbau, Spezialkabel", + briefing: + "KLZ Cables is a B2B provider of specialized medium-voltage cables. We do NOT do low voltage or generic home cables.", + targetAudience: "B2B Einkäufer, Bauleiter, Netzbetreiber", + competitors: ["nkt.de", "faberkabel.de"], + seedKeywords: ["NA2XS2Y", "VPE-isoliert"], + existingPages: [ + { url: "/produkte", title: "Produkte" }, + { url: "/kontakt", title: "Kontakt" }, + { url: "/ueber-uns", title: "Über uns" }, + ], + locale: { gl: "de", hl: "de" }, + }, + { + serperApiKey: process.env.SERPER_API_KEY || "", + openRouterApiKey: process.env.OPENROUTER_API_KEY || "", + model: "google/gemini-2.5-pro", + maxKeywords: 20, + }, + ); + + // Generate the SEO Strategy Report + console.log("\n=== GENERATING SEO STRATEGY REPORT ==="); + const reportPath = await generateSeoReport(result, { + projectName: "KLZ Cables", + outputDir: ".seo-output", + }); + console.log(`Report saved to: ${reportPath}`); + + // Generate MDX drafts + console.log("\n=== GENERATING MDX DRAFTS ==="); + const generatedFiles = await createGapDrafts( + result.contentGaps, + new Map(Object.entries(result.competitorBriefings)), + { outputDir: ".seo-output/drafts", authorName: "KLZ Content Team" }, + ); + console.log( + `Generated ${generatedFiles.length} MDX files in .seo-output/drafts/`, + ); +} + +testSeoEngine().catch(console.error); diff --git a/packages/seo-engine/src/test-serper.ts b/packages/seo-engine/src/test-serper.ts new file mode 100644 index 0000000..e37da31 --- /dev/null +++ b/packages/seo-engine/src/test-serper.ts @@ -0,0 +1,38 @@ +import * as dotenv from "dotenv"; +import axios from "axios"; + +dotenv.config({ path: "../../.env" }); +dotenv.config({ path: "../../apps/web/.env" }); + +async function testSerper() { + const query = "Mittelspannungskabel"; + const apiKey = process.env.SERPER_API_KEY || ""; + + if (!apiKey) { + console.error("Missing SERPER_API_KEY"); + return; + } + + try { + const response = await axios.post( + "https://google.serper.dev/search", + { + q: query, + gl: "de", + hl: "de", + }, + { + headers: { + "X-API-KEY": apiKey, + "Content-Type": "application/json", + }, + }, + ); + + console.log(JSON.stringify(response.data, null, 2)); + } catch (error) { + console.error("Error:", error); + } +} + +testSerper(); diff --git a/packages/seo-engine/src/types.ts b/packages/seo-engine/src/types.ts new file mode 100644 index 0000000..05ba9f5 --- /dev/null +++ b/packages/seo-engine/src/types.ts @@ -0,0 +1,59 @@ +export interface ProjectContext { + companyName?: string; + industry?: string; + briefing?: string; + targetAudience?: string; + competitors?: string[]; + seedKeywords?: string[]; + existingPages?: { url: string; title: string }[]; + customGuidelines?: string; + locale?: { gl: string; hl: string }; +} + +export interface SeoConfig { + serperApiKey?: string; + openRouterApiKey?: string; + model?: string; + maxKeywords?: number; +} + +export interface KeywordResult { + term: string; + intent: "informational" | "navigational" | "commercial" | "transactional"; + relevanceScore: number; // 1-10 + rationale: string; + estimatedVolume?: "high" | "medium" | "low"; +} + +export interface TopicCluster { + clusterName: string; + primaryKeyword: string; + secondaryKeywords: KeywordResult[]; + userIntent: string; +} + +export interface CompetitorRanking { + keyword: string; + domain: string; + position: number; + title: string; + snippet: string; + link: string; +} + +export interface ContentGap { + recommendedTitle: string; + targetKeyword: string; + relatedCluster: string; + priority: "high" | "medium" | "low"; + rationale: string; +} + +export interface SeoEngineOutput { + topicClusters: TopicCluster[]; + competitorRankings: CompetitorRanking[]; + contentGaps: ContentGap[]; + autocompleteSuggestions: string[]; + discardedTerms: string[]; + competitorBriefings: Record; // Map targetKeyword to ReverseEngineeredBriefing +} diff --git a/packages/seo-engine/tsconfig.json b/packages/seo-engine/tsconfig.json new file mode 100644 index 0000000..d277213 --- /dev/null +++ b/packages/seo-engine/tsconfig.json @@ -0,0 +1,19 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "module": "NodeNext", + "moduleResolution": "NodeNext", + "target": "ES2022", + "lib": ["ES2022", "DOM"], + "outDir": "dist", + "rootDir": "src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true, + "sourceMap": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.test.ts"] +} diff --git a/packages/seo-engine/tsup.config.ts b/packages/seo-engine/tsup.config.ts new file mode 100644 index 0000000..30fd6b6 --- /dev/null +++ b/packages/seo-engine/tsup.config.ts @@ -0,0 +1,9 @@ +import { defineConfig } from "tsup"; + +export default defineConfig({ + entry: ["src/index.ts"], + format: ["esm"], + dts: true, + clean: true, + target: "es2022", +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4f083c2..36691b2 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -692,6 +692,40 @@ importers: specifier: ^5.6.3 version: 5.9.3 + packages/seo-engine: + dependencies: + axios: + specifier: ^1.7.9 + version: 1.13.5 + cheerio: + specifier: 1.0.0-rc.12 + version: 1.0.0-rc.12 + dotenv: + specifier: ^16.4.7 + version: 16.6.1 + devDependencies: + '@mintel/eslint-config': + specifier: workspace:* + version: link:../eslint-config + '@mintel/tsconfig': + specifier: workspace:* + version: link:../tsconfig + '@types/node': + specifier: ^20.17.17 + version: 20.19.33 + tsup: + specifier: ^8.3.6 + version: 8.5.1(@swc/core@1.15.11(@swc/helpers@0.5.18))(jiti@2.6.1)(postcss@8.5.6)(tsx@4.21.0)(typescript@5.9.3)(yaml@2.8.2) + tsx: + specifier: ^4.19.2 + version: 4.21.0 + typescript: + specifier: ^5.7.3 + version: 5.9.3 + vitest: + specifier: ^3.0.5 + version: 3.2.4(@types/debug@4.1.12)(@types/node@20.19.33)(@vitest/ui@4.0.18(vitest@4.0.18))(happy-dom@20.5.3)(jsdom@27.4.0(canvas@3.2.1))(lightningcss@1.30.2)(sass@1.97.3)(terser@5.46.0) + packages/thumbnail-generator: dependencies: replicate: