feat: automated Qdrant sync with Mistral embeddings + Kabelhandbuch ingestion
Some checks failed
Build & Deploy / 🔍 Prepare (push) Successful in 6s
Build & Deploy / 🧪 QA (push) Failing after 55s
Build & Deploy / 🏗️ Build (push) Has been skipped
Build & Deploy / 🚀 Deploy (push) Has been skipped
Build & Deploy / 🧪 Post-Deploy Verification (push) Has been skipped
Build & Deploy / 🔔 Notify (push) Successful in 2s

- Switch embedding API from OpenRouter to Mistral mistral-embed (1024-dim, EU/DSGVO)
- Add afterChange/afterDelete hooks to Posts.ts and Pages.ts for live sync
- Integrate kabelhandbuch.txt parsing into /api/sync-qdrant boot route
- Add .gitignore entries for kabelhandbuch.txt
This commit is contained in:
2026-03-07 15:39:10 +01:00
parent 3acf0c3740
commit 8e99c9d121
6 changed files with 178 additions and 27 deletions

View File

@@ -16,7 +16,7 @@ export const qdrant = new QdrantClient({
});
export const COLLECTION_NAME = 'klz_products';
export const VECTOR_SIZE = 1536; // OpenAI text-embedding-3-small
export const VECTOR_SIZE = 1024; // Mistral mistral-embed
// Cache TTLs
const EMBEDDING_CACHE_TTL = 60 * 60 * 24; // 24h — embeddings are deterministic
@@ -50,26 +50,15 @@ export async function ensureCollection() {
}
/**
* Simple hash for cache keys
* Hash text for cache key
*/
function hashKey(text: string): string {
let hash = 0;
for (let i = 0; i < text.length; i++) {
const chr = text.charCodeAt(i);
hash = (hash << 5) - hash + chr;
hash |= 0;
}
return hash.toString(36);
const { createHash } = require('crypto');
return createHash('sha256').update(text).digest('hex').slice(0, 32);
}
/**
* Generate an embedding for a given text using OpenRouter (OpenAI embedding proxy).
* Results are cached in Redis for 24h since embeddings are deterministic.
*
* NOTE: We keep OpenRouter for embeddings because the Qdrant collection uses 1536-dim
* vectors (OpenAI text-embedding-3-small). Switching to Mistral embed (1024-dim) would
* require re-indexing the entire product catalog.
* User-facing chat uses Mistral AI directly for DSGVO compliance.
* Generate embedding using Mistral API (EU/DSGVO-compliant)
*/
export async function generateEmbedding(text: string): Promise<number[]> {
const cacheKey = `emb:${hashKey(text.toLowerCase().trim())}`;
@@ -84,22 +73,20 @@ export async function generateEmbedding(text: string): Promise<number[]> {
// Redis down — proceed without cache
}
const openRouterKey = process.env.OPENROUTER_API_KEY;
if (!openRouterKey) {
throw new Error('OPENROUTER_API_KEY is not set');
const mistralKey = process.env.MISTRAL_API_KEY;
if (!mistralKey) {
throw new Error('MISTRAL_API_KEY is not set');
}
const response = await fetch('https://openrouter.ai/api/v1/embeddings', {
const response = await fetch('https://api.mistral.ai/v1/embeddings', {
method: 'POST',
headers: {
Authorization: `Bearer ${openRouterKey}`,
Authorization: `Bearer ${mistralKey}`,
'Content-Type': 'application/json',
'HTTP-Referer': process.env.NEXT_PUBLIC_BASE_URL || 'https://klz-cables.com',
'X-Title': 'KLZ Cables Search AI',
},
body: JSON.stringify({
model: 'openai/text-embedding-3-small',
input: text,
model: 'mistral-embed',
input: [text],
}),
});