From 02bffbc67f691d87452386491194f5e2bbf6a7d9 Mon Sep 17 00:00:00 2001
From: Marc Mintel <marc@mintel.me>
Date: Sun, 22 Feb 2026 17:43:37 +0100
Subject: [PATCH] feat(journaling): implement secondary LLM validation for
 YouTube video selection

---
 packages/journaling/src/agent.ts | 58 ++++++++++++++++++++++++++++----
 1 file changed, 51 insertions(+), 7 deletions(-)

diff --git a/packages/journaling/src/agent.ts b/packages/journaling/src/agent.ts
index 074727d..40782b7 100644
--- a/packages/journaling/src/agent.ts
+++ b/packages/journaling/src/agent.ts
@@ -204,7 +204,7 @@ Return a JSON object with a single string field "query". Example: {"query": "cor
         {
           role: "user",
           content: `CONTEXT: ${topic}`,
-        }
+        },
       ],
       response_format: { type: "json_object" },
     });
@@ -226,9 +226,15 @@ Return a JSON object with a single string field "query". Example: {"query": "cor
       }
 
       // Filter for youtube results
-      const ytVideos = videos.filter(
-        (v) => v.link && v.link.includes("youtube.com/watch"),
-      );
+      const ytVideos = videos
+        .filter(
+          (v) =>
+            v.link &&
+            v.link.includes("youtube.com/watch") &&
+            v.title &&
+            v.channel,
+        )
+        .slice(0, 5); // Take top 5 for evaluation
 
       if (ytVideos.length === 0) {
         console.warn(`⚠️ [Serper] No YouTube videos in search results.`);
@@ -236,8 +242,46 @@ Return a JSON object with a single string field "query". Example: {"query": "cor
         return [];
       }
 
-      // Pick the best one (usually the first result)
-      const bestVideo = ytVideos[0];
+      // Step 3: Ask the LLM to evaluate the relevance of the found videos
+      const evalPrompt = `You are a strict technical evaluator. You must select the MOST RELEVANT educational tech video from the list below based on this context: "${topic.slice(0, 500)}..."
+
+Videos:
+${ytVideos.map((v, i) => `[ID: ${i}] Title: "${v.title}" | Channel: "${v.channel}" | Snippet: "${v.snippet || "none"}"`).join("\n")}
+
+RULES:
+1. The video MUST be highly relevant to the context.
+2. The channel SHOULD be a tech, development, or professional business channel (avoid gaming, vlogs, unrelated topics).
+3. If none are truly relevant, return -1.
+4. If one is highly relevant, return its ID number.
+
+Return ONLY a JSON object: {"bestVideoId": number}`;
+
+      const evalResponse = await this.openai.chat.completions.create({
+        model: "google/gemini-2.5-flash",
+        messages: [{ role: "system", content: evalPrompt }],
+        response_format: { type: "json_object" },
+      });
+
+      let bestIdx = -1;
+      try {
+        const evalParsed = JSON.parse(
+          evalResponse.choices[0].message.content || '{"bestVideoId": -1}',
+        );
+        bestIdx = evalParsed.bestVideoId;
+      } catch (e) {
+        console.warn("Failed to parse video evaluation response");
+      }
+
+      if (bestIdx < 0 || bestIdx >= ytVideos.length) {
+        console.warn(`⚠️ [Serper] LLM rejected all videos as irrelevant.`);
+        if (retries > 0) return this.fetchRealSocialPosts(topic, retries - 1);
+        return [];
+      }
+
+      const bestVideo = ytVideos[bestIdx];
+      console.log(
+        `✅ [Serper] AI selected video: ${bestVideo.title} (Channel: ${bestVideo.channel})`,
+      );
 
       // Extract the 11-char video ID from the link (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)
       const urlObj = new URL(bestVideo.link);
@@ -299,7 +343,7 @@ CRITICAL: Do NOT provide more than 2 trendsKeywords. Keep it extremely focused.`
     try {
       let parsed = JSON.parse(
         response.choices[0].message.content ||
-        '{"trendsKeywords": [], "dcVariables": []}',
+          '{"trendsKeywords": [], "dcVariables": []}',
       );
       if (Array.isArray(parsed)) {
         parsed = parsed[0] || { trendsKeywords: [], dcVariables: [] };