From 02bffbc67f691d87452386491194f5e2bbf6a7d9 Mon Sep 17 00:00:00 2001 From: Marc Mintel Date: Sun, 22 Feb 2026 17:43:37 +0100 Subject: [PATCH] feat(journaling): implement secondary LLM validation for YouTube video selection --- packages/journaling/src/agent.ts | 58 ++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/packages/journaling/src/agent.ts b/packages/journaling/src/agent.ts index 074727d..40782b7 100644 --- a/packages/journaling/src/agent.ts +++ b/packages/journaling/src/agent.ts @@ -204,7 +204,7 @@ Return a JSON object with a single string field "query". Example: {"query": "cor { role: "user", content: `CONTEXT: ${topic}`, - } + }, ], response_format: { type: "json_object" }, }); @@ -226,9 +226,15 @@ Return a JSON object with a single string field "query". Example: {"query": "cor } // Filter for youtube results - const ytVideos = videos.filter( - (v) => v.link && v.link.includes("youtube.com/watch"), - ); + const ytVideos = videos + .filter( + (v) => + v.link && + v.link.includes("youtube.com/watch") && + v.title && + v.channel, + ) + .slice(0, 5); // Take top 5 for evaluation if (ytVideos.length === 0) { console.warn(`⚠️ [Serper] No YouTube videos in search results.`); @@ -236,8 +242,46 @@ Return a JSON object with a single string field "query". Example: {"query": "cor return []; } - // Pick the best one (usually the first result) - const bestVideo = ytVideos[0]; + // Step 3: Ask the LLM to evaluate the relevance of the found videos + const evalPrompt = `You are a strict technical evaluator. You must select the MOST RELEVANT educational tech video from the list below based on this context: "${topic.slice(0, 500)}..." + +Videos: +${ytVideos.map((v, i) => `[ID: ${i}] Title: "${v.title}" | Channel: "${v.channel}" | Snippet: "${v.snippet || "none"}"`).join("\n")} + +RULES: +1. The video MUST be highly relevant to the context. +2. The channel SHOULD be a tech, development, or professional business channel (avoid gaming, vlogs, unrelated topics). +3. If none are truly relevant, return -1. +4. If one is highly relevant, return its ID number. + +Return ONLY a JSON object: {"bestVideoId": number}`; + + const evalResponse = await this.openai.chat.completions.create({ + model: "google/gemini-2.5-flash", + messages: [{ role: "system", content: evalPrompt }], + response_format: { type: "json_object" }, + }); + + let bestIdx = -1; + try { + const evalParsed = JSON.parse( + evalResponse.choices[0].message.content || '{"bestVideoId": -1}', + ); + bestIdx = evalParsed.bestVideoId; + } catch (e) { + console.warn("Failed to parse video evaluation response"); + } + + if (bestIdx < 0 || bestIdx >= ytVideos.length) { + console.warn(`⚠️ [Serper] LLM rejected all videos as irrelevant.`); + if (retries > 0) return this.fetchRealSocialPosts(topic, retries - 1); + return []; + } + + const bestVideo = ytVideos[bestIdx]; + console.log( + `✅ [Serper] AI selected video: ${bestVideo.title} (Channel: ${bestVideo.channel})`, + ); // Extract the 11-char video ID from the link (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ) const urlObj = new URL(bestVideo.link); @@ -299,7 +343,7 @@ CRITICAL: Do NOT provide more than 2 trendsKeywords. Keep it extremely focused.` try { let parsed = JSON.parse( response.choices[0].message.content || - '{"trendsKeywords": [], "dcVariables": []}', + '{"trendsKeywords": [], "dcVariables": []}', ); if (Array.isArray(parsed)) { parsed = parsed[0] || { trendsKeywords: [], dcVariables: [] };