feat(ai-search): optimize dev server, add qdrant boot sync, fix orb overflow

2026-03-06 22:35:48 +01:00
parent 81ce3a4588
commit 4dcdb717f0
16 changed files with 1981 additions and 380 deletions
--- a/app/api/ai-search/route.ts
+++ b/app/api/ai-search/route.ts
@@ -3,16 +3,33 @@ import { searchProducts } from '../../../src/lib/qdrant';
 import redis from '../../../src/lib/redis';
 import { z } from 'zod';
 import * as Sentry from '@sentry/nextjs';
+export const dynamic = 'force-dynamic';
+export const maxDuration = 60; // Max allowed duration (Vercel)
+
 // Config and constants
-const RATE_LIMIT_POINTS = 5; // 5 requests
-const RATE_LIMIT_DURATION = 60 * 1; // per 1 minute
+const RATE_LIMIT_POINTS = 20; // 20 requests per minute
+const RATE_LIMIT_DURATION = 60; // 1 minute window
+const DAILY_BUDGET_LIMIT = 200; // max 200 requests per IP per day
+const DAILY_BUDGET_DURATION = 60 * 60 * 24; // 24h
+const MAX_CONVERSATION_MESSAGES = 20; // max messages in context
+const MAX_RESPONSE_TOKENS = 300; // cap AI response length — keeps it chat-like

 // Removed requestSchema as it's replaced by direct parsing

 export async function POST(req: NextRequest) {
  // Changed req type to NextRequest
  try {
-    const { messages, visitorId, honeypot } = await req.json();
+    let body: any;
+    try {
+      body = await req.json();
+    } catch {
+      return NextResponse.json({ error: 'Invalid request body' }, { status: 400 });
+    }
+    const { messages, honeypot } = body;
+
+    // Get client IP for rate limiting
+    const forwarded = req.headers.get('x-forwarded-for');
+    const clientIp = forwarded?.split(',')[0]?.trim() || req.headers.get('x-real-ip') || 'unknown';

    // 1. Basic Validation
    if (!messages || !Array.isArray(messages) || messages.length === 0) {
@@ -38,35 +55,50 @@ export async function POST(req: NextRequest) {
      });
    }

-    // 3. Rate Limiting via Redis
+    // 3. Rate Limiting via Redis (IP-based)
    try {
-      if (visitorId) {
-        const requestCount = await redis.incr(`ai_search_rate_limit:${visitorId}`);
-        if (requestCount === 1) {
-          await redis.expire(`ai_search_rate_limit:${visitorId}`, RATE_LIMIT_DURATION); // Use constant
-        }
+      // Per-minute burst limit
+      const minuteKey = `ai_rate:${clientIp}:min`;
+      const minuteCount = await redis.incr(minuteKey);
+      if (minuteCount === 1) await redis.expire(minuteKey, RATE_LIMIT_DURATION);

-        if (requestCount > RATE_LIMIT_POINTS) {
-          // Use constant
-          return NextResponse.json(
-            {
-              error: 'Rate limit exceeded. Please try again later.',
-            },
-            { status: 429 },
-          );
-        }
+      if (minuteCount > RATE_LIMIT_POINTS) {
+        return NextResponse.json(
+          { error: 'Zu viele Anfragen. Bitte warte einen Moment.' },
+          { status: 429 },
+        );
+      }
+
+      // Daily budget limit
+      const dayKey = `ai_rate:${clientIp}:day`;
+      const dayCount = await redis.incr(dayKey);
+      if (dayCount === 1) await redis.expire(dayKey, DAILY_BUDGET_DURATION);
+
+      if (dayCount > DAILY_BUDGET_LIMIT) {
+        return NextResponse.json(
+          { error: 'Tägliches Limit erreicht. Bitte versuche es morgen erneut.' },
+          { status: 429 },
+        );
      }
    } catch (redisError) {
-      // Renamed variable for clarity
-      console.error('Redis Rate Limiting Error:', redisError); // Changed to error for consistency
+      console.error('Redis Rate Limiting Error:', redisError);
      Sentry.captureException(redisError, { tags: { context: 'ai-search-rate-limit' } });
      // Fail open if Redis is down
    }

+    // 4. Cap conversation length to limit token usage
+    const cappedMessages = messages.slice(-MAX_CONVERSATION_MESSAGES);
+
    // 4. Fetch Context from Qdrant based on the latest message
    let contextStr = '';
    let foundProducts: any[] = [];

+    // Team context — hardcoded from translation data (no Payload collection for team)
+    const teamContextStr = `
+Das ECHTE KLZ Team:
+- Michael Bodemer (Geschäftsführer) — Der Macher, packt an wenn es kompliziert wird, kennt Kabelnetze in- und auswendig
+- Klaus Mintel (Geschäftsführer) — Der Fels in der Brandung, jahrzehntelange Erfahrung, stabiles Netzwerk`;
+
    try {
      const searchResults = await searchProducts(latestMessage, 5);

@@ -85,50 +117,69 @@ export async function POST(req: NextRequest) {

        foundProducts = searchResults
          .filter((p) => (p.payload?.type === 'product' || !p.payload?.type) && p.payload?.data)
-          .map((p: any) => p.payload?.data);
+          .map((p: any) => ({
+            id: p.id as string,
+            title: p.payload?.data?.title as string,
+            sku: p.payload?.data?.sku as string,
+            slug: p.payload?.data?.slug as string,
+          }));
      }
-    } catch (e) {
-      console.error('Qdrant Search Error:', e);
-      Sentry.captureException(e, { tags: { context: 'ai-search-qdrant' } });
+    } catch (searchError) {
+      console.error('Qdrant Search Error:', searchError);
+      Sentry.captureException(searchError, { tags: { context: 'ai-search-qdrant' } });
      // We can still proceed without context if Qdrant fails
    }

    // 5. Generate AI Response via OpenRouter (Mistral for DSGVO)
-    const systemPrompt = `Du bist ein professioneller und extrem kompetenter Sales-Engineer / Consultant der Firma "KLZ Cables".
-Deine Aufgabe ist es, Kunden und Interessenten bei der Auswahl von Mittelspannungskabeln, Starkstromkabeln und Infrastrukturausrüstung beratend zur Seite zu stehen.
+    const systemPrompt = `Du bist "Ohm" — der digitale KI-Berater von KLZ Cables. Dein Name ist eine Anspielung auf die Einheit des elektrischen Widerstands.

-WICHTIGE REGELN:
-1. ANTWORTE IMMER IN DER SPRACHE DES BENUTZERS. Wenn der Benutzer Deutsch spricht, antworte auf Deutsch.
-2. Wenn der Kunde vage ist (z.B. "Ich will einen Windpark bauen"), würge ihn NICHT ab. Stelle stattdessen gezielte, professionelle Rückfragen als Berater (z.B. "Für einen Windpark benötigen wir einige Rahmendaten: Reden wir über die Parkverkabelung (Mittelspannung, z.B. 20kV oder 33kV) oder die Netzanbindung? Welche Querschnitte oder Ströme erwarten Sie?").
-3. Nutze das bereitgestellte KABELWISSEN und KATALOG-Gedächtnis unten, um deine Antworten zu fundieren.
-4. Bleibe stets professionell, lösungsorientiert und leicht technisch (Industrial Aesthetic). Du kannst humorvoll sein, wenn der Nutzer offensichtlich Quatsch fragt, aber lenke es immer elegant zurück zu Kabeln oder Energieinfrastruktur.
-5. Antworte in reinem Text (kein Markdown für die Antwort, es sei denn es sind einfache Absätze oder Listen).
-6. Wenn genügend Informationen vorhanden sind, präsentiere passende Kabel aus dem Katalog.
-7. Oute dich als Berater von KLZ Cables.
+STIL & PERSÖNLICHKEIT:
+- Antworte KURZ, KNAPP und PROFESSIONELL (maximal 2-3 Sätze).
+- Schreibe wie in einem lockeren, aber kompetenten B2B-Chat (Du-Form ist okay, aber fachlich top).
+- Kein Markdown, nur Fließtext.
+- NIEMALS Platzhalter wie [Ihr Name], [Name], [Firma] verwenden.

-VERFÜGBARER KONTEXT:
-${contextStr ? contextStr : 'Keine spezifischen Katalogdaten für diese Anfrage gefunden.'}
+DEINE HAUPTAUFGABE: BERATEN, NICHT AUSFRAGEN!
+- Wenn der Kunde ein Projekt nennt (z.B. "Windpark 30kV"), dann lies im KONTEXT nach, welche Kabel passen, und EMPFIEHL SIE DIREKT! (z.B. "Für 30kV Windparks nehmen wir meistens NA2XS(F)2Y.").
+- Stelle NIEMALS mehr als EINE Rückfrage pro Nachricht.
+- FRAGE NICHT nach abstrakten Dingen wie "Welchen Kabeltyp brauchst du?" -> DAS IST DEIN JOB, IHM DAS ZU SAGEN!
+- FRAGE NICHT nach Längen oder genauen Trassen, es sei denn, der Kunde hat schon ganz klar gesagt, was er kaufen will.
+- Biete aktiv Hilfe an: "Ich kann dir die passenden Querschnitte raussuchen, wenn du willst."
+
+VORGEHEN:
+1. Prüfe den KONTEXT auf passende Kabel für das Kundenprojekt.
+2. Nenne direkt 1-2 passende Produktserien aus dem Kontext, die für diesen Fall Sinn machen.
+3. Biete eine konkrete Hilfestellung an (z.B. Leitungsberechnung, Verfügbarkeitsprüfung) ODER stelle EINE einzige fachliche Rückfrage, um das Kabel weiter einzugrenzen (z.B. Alu oder Kupfer?).
+4. Wenn das Projekt klar ist und die Kabeltypen besprochen sind, frag nach, ob ein Kollege (z.B. Micha) ein konkretes Angebot machen soll.
+
+GRENZEN:
+- PRIVAT-ANFRAGEN: B2B only. Private Hausinstallationen lehnen wir freundlich ab.
+- Keine Preise oder genauen Lieferzeiten versprechen. Immer auf die menschlichen Kollegen verweisen für finale Angebote.
+
+KONTEXT KABEL & TEAM:
+${contextStr || 'Kein Katalogkontext verfügbar.'}
+${teamContextStr}
 `;

-    const openRouterKey = process.env.OPENROUTER_API_KEY;
-    if (!openRouterKey) {
-      throw new Error('OPENROUTER_API_KEY is not set');
+    const mistralKey = process.env.MISTRAL_API_KEY;
+    if (!mistralKey) {
+      throw new Error('MISTRAL_API_KEY is not set');
    }

-    const fetchRes = await fetch('https://openrouter.ai/api/v1/chat/completions', {
+    // DSGVO: Mistral AI API direkt (EU/Frankreich) statt OpenRouter (US)
+    const fetchRes = await fetch('https://api.mistral.ai/v1/chat/completions', {
      method: 'POST',
      headers: {
-        Authorization: `Bearer ${openRouterKey}`,
+        Authorization: `Bearer ${mistralKey}`,
        'Content-Type': 'application/json',
-        'HTTP-Referer': process.env.NEXT_PUBLIC_BASE_URL || 'https://klz-cables.com',
-        'X-Title': 'KLZ Cables Search AI',
      },
      body: JSON.stringify({
-        model: 'mistralai/mistral-large-2407',
+        model: 'ministral-8b-latest',
        temperature: 0.3,
+        max_tokens: MAX_RESPONSE_TOKENS,
        messages: [
          { role: 'system', content: systemPrompt },
-          ...messages.map((m: any) => ({
+          ...cappedMessages.map((m: any) => ({
            role: m.role,
            content: typeof m.content === 'string' ? m.content : JSON.stringify(m.content),
          })),
@@ -138,7 +189,19 @@ ${contextStr ? contextStr : 'Keine spezifischen Katalogdaten für diese Anfrage

    if (!fetchRes.ok) {
      const errBody = await fetchRes.text();
-      throw new Error(`OpenRouter API Error: ${errBody}`);
+      console.error('Mistral API Error:', errBody);
+      Sentry.captureException(new Error(`Mistral ${fetchRes.status}: ${errBody}`), {
+        tags: { context: 'ai-search-mistral' },
+      });
+
+      // Return user-friendly error based on status
+      const userMsg =
+        fetchRes.status === 429
+          ? 'Der KI-Service ist gerade überlastet. Bitte versuche es in ein paar Sekunden erneut.'
+          : fetchRes.status >= 500
+            ? 'Der KI-Service ist vorübergehend nicht erreichbar. Bitte versuche es gleich nochmal.'
+            : 'Es gab ein Problem mit der KI-Anfrage. Bitte versuche es erneut.';
+      return NextResponse.json({ error: userMsg }, { status: 502 });
    }

    const data = await fetchRes.json();
@@ -152,6 +215,9 @@ ${contextStr ? contextStr : 'Keine spezifischen Katalogdaten für diese Anfrage
  } catch (error) {
    console.error('AI Search API Error:', error);
    Sentry.captureException(error, { tags: { context: 'ai-search-api' } });
-    return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
+    return NextResponse.json(
+      { error: 'Ein interner Fehler ist aufgetreten. Bitte versuche es erneut.' },
+      { status: 500 },
+    );
  }
 }
--- a/app/api/sync-qdrant/route.ts
+++ b/app/api/sync-qdrant/route.ts
@@ -0,0 +1,126 @@
+import { NextResponse } from 'next/server';
+import { getPayload } from 'payload';
+import configPromise from '../../../payload.config';
+import { upsertProductVector } from '../../../src/lib/qdrant';
+
+export const dynamic = 'force-dynamic';
+export const maxDuration = 120;
+
+/**
+ * Internal endpoint called by the warmup script on every dev boot.
+ * Syncs posts, pages, and products from Payload CMS into Qdrant.
+ * NOT for form entries, media, or users.
+ */
+export async function GET() {
+  const results = { products: 0, posts: 0, pages: 0, errors: [] as string[] };
+
+  try {
+    const payload = await getPayload({ config: configPromise });
+
+    // ── Products ──
+    const { docs: products } = await payload.find({
+      collection: 'products',
+      limit: 1000,
+      depth: 0,
+      where: { _status: { equals: 'published' } },
+    });
+
+    for (const product of products) {
+      try {
+        const contentText = `${product.title} - SKU: ${product.sku}\n${product.description || ''}`;
+        await upsertProductVector(String(product.id), contentText, {
+          type: 'product',
+          data: {
+            title: product.title,
+            sku: product.sku,
+            slug: product.slug,
+            description: product.description,
+          },
+        });
+        results.products++;
+      } catch (e: any) {
+        results.errors.push(`product:${product.sku}: ${e.message}`);
+      }
+    }
+
+    // ── Posts ──
+    const { docs: posts } = await payload.find({
+      collection: 'posts',
+      limit: 1000,
+      depth: 0,
+      where: { _status: { equals: 'published' } },
+    });
+
+    for (const post of posts) {
+      try {
+        const contentText = [
+          `Blog-Artikel: ${post.title}`,
+          post.excerpt ? `Zusammenfassung: ${post.excerpt}` : '',
+          post.category ? `Kategorie: ${post.category}` : '',
+        ]
+          .filter(Boolean)
+          .join('\n');
+
+        await upsertProductVector(`post_${post.id}`, contentText, {
+          type: 'knowledge',
+          content: contentText,
+          data: {
+            title: post.title,
+            slug: post.slug,
+          },
+        });
+        results.posts++;
+      } catch (e: any) {
+        results.errors.push(`post:${post.slug}: ${e.message}`);
+      }
+    }
+
+    // ── Pages ──
+    const { docs: pages } = await payload.find({
+      collection: 'pages',
+      limit: 1000,
+      depth: 0,
+      where: { _status: { equals: 'published' } },
+    });
+
+    for (const page of pages) {
+      try {
+        const contentText = [
+          `Seite: ${page.title}`,
+          page.excerpt ? `Beschreibung: ${page.excerpt}` : '',
+        ]
+          .filter(Boolean)
+          .join('\n');
+
+        await upsertProductVector(`page_${page.id}`, contentText, {
+          type: 'knowledge',
+          content: contentText,
+          data: {
+            title: page.title,
+            slug: page.slug,
+          },
+        });
+        results.pages++;
+      } catch (e: any) {
+        results.errors.push(`page:${page.slug}: ${e.message}`);
+      }
+    }
+
+    console.log(
+      `[Qdrant Sync] ✅ ${results.products} products, ${results.posts} posts, ${results.pages} pages synced`,
+    );
+
+    return NextResponse.json({
+      success: true,
+      synced: {
+        products: results.products,
+        posts: results.posts,
+        pages: results.pages,
+      },
+      errors: results.errors.length > 0 ? results.errors : undefined,
+    });
+  } catch (error: any) {
+    console.error('[Qdrant Sync] ❌ Fatal error:', error);
+    return NextResponse.json({ error: error.message }, { status: 500 });
+  }
+}