feat: automated Qdrant sync with Mistral embeddings + Kabelhandbuch ingestion

- Switch embedding API from OpenRouter to Mistral mistral-embed (1024-dim, EU/DSGVO) - Add afterChange/afterDelete hooks to Posts.ts and Pages.ts for live sync - Integrate kabelhandbuch.txt parsing into /api/sync-qdrant boot route - Add .gitignore entries for kabelhandbuch.txt
2026-03-07 15:39:10 +01:00
parent 3acf0c3740
commit 8e99c9d121
6 changed files with 178 additions and 27 deletions
--- a/src/lib/qdrant.ts
+++ b/src/lib/qdrant.ts
@@ -16,7 +16,7 @@ export const qdrant = new QdrantClient({
 });

 export const COLLECTION_NAME = 'klz_products';
-export const VECTOR_SIZE = 1536; // OpenAI text-embedding-3-small
+export const VECTOR_SIZE = 1024; // Mistral mistral-embed

 // Cache TTLs
 const EMBEDDING_CACHE_TTL = 60 * 60 * 24; // 24h — embeddings are deterministic
@@ -50,26 +50,15 @@ export async function ensureCollection() {
 }

 /**
- * Simple hash for cache keys
+ * Hash text for cache key
 */
 function hashKey(text: string): string {
-  let hash = 0;
-  for (let i = 0; i < text.length; i++) {
-    const chr = text.charCodeAt(i);
-    hash = (hash << 5) - hash + chr;
-    hash |= 0;
-  }
-  return hash.toString(36);
+  const { createHash } = require('crypto');
+  return createHash('sha256').update(text).digest('hex').slice(0, 32);
 }

 /**
- * Generate an embedding for a given text using OpenRouter (OpenAI embedding proxy).
- * Results are cached in Redis for 24h since embeddings are deterministic.
- *
- * NOTE: We keep OpenRouter for embeddings because the Qdrant collection uses 1536-dim
- * vectors (OpenAI text-embedding-3-small). Switching to Mistral embed (1024-dim) would
- * require re-indexing the entire product catalog.
- * User-facing chat uses Mistral AI directly for DSGVO compliance.
+ * Generate embedding using Mistral API (EU/DSGVO-compliant)
 */
 export async function generateEmbedding(text: string): Promise<number[]> {
  const cacheKey = `emb:${hashKey(text.toLowerCase().trim())}`;
@@ -84,22 +73,20 @@ export async function generateEmbedding(text: string): Promise<number[]> {
    // Redis down — proceed without cache
  }

-  const openRouterKey = process.env.OPENROUTER_API_KEY;
-  if (!openRouterKey) {
-    throw new Error('OPENROUTER_API_KEY is not set');
+  const mistralKey = process.env.MISTRAL_API_KEY;
+  if (!mistralKey) {
+    throw new Error('MISTRAL_API_KEY is not set');
  }

-  const response = await fetch('https://openrouter.ai/api/v1/embeddings', {
+  const response = await fetch('https://api.mistral.ai/v1/embeddings', {
    method: 'POST',
    headers: {
-      Authorization: `Bearer ${openRouterKey}`,
+      Authorization: `Bearer ${mistralKey}`,
      'Content-Type': 'application/json',
-      'HTTP-Referer': process.env.NEXT_PUBLIC_BASE_URL || 'https://klz-cables.com',
-      'X-Title': 'KLZ Cables Search AI',
    },
    body: JSON.stringify({
-      model: 'openai/text-embedding-3-small',
-      input: text,
+      model: 'mistral-embed',
+      input: [text],
    }),
  });