feat: automated Qdrant sync with Mistral embeddings + Kabelhandbuch ingestion
Some checks failed
Build & Deploy / 🔍 Prepare (push) Successful in 6s
Build & Deploy / 🧪 QA (push) Failing after 55s
Build & Deploy / 🏗️ Build (push) Has been skipped
Build & Deploy / 🚀 Deploy (push) Has been skipped
Build & Deploy / 🧪 Post-Deploy Verification (push) Has been skipped
Build & Deploy / 🔔 Notify (push) Successful in 2s
Some checks failed
Build & Deploy / 🔍 Prepare (push) Successful in 6s
Build & Deploy / 🧪 QA (push) Failing after 55s
Build & Deploy / 🏗️ Build (push) Has been skipped
Build & Deploy / 🚀 Deploy (push) Has been skipped
Build & Deploy / 🧪 Post-Deploy Verification (push) Has been skipped
Build & Deploy / 🔔 Notify (push) Successful in 2s
- Switch embedding API from OpenRouter to Mistral mistral-embed (1024-dim, EU/DSGVO) - Add afterChange/afterDelete hooks to Posts.ts and Pages.ts for live sync - Integrate kabelhandbuch.txt parsing into /api/sync-qdrant boot route - Add .gitignore entries for kabelhandbuch.txt
This commit is contained in:
@@ -16,7 +16,7 @@ export const qdrant = new QdrantClient({
|
||||
});
|
||||
|
||||
export const COLLECTION_NAME = 'klz_products';
|
||||
export const VECTOR_SIZE = 1536; // OpenAI text-embedding-3-small
|
||||
export const VECTOR_SIZE = 1024; // Mistral mistral-embed
|
||||
|
||||
// Cache TTLs
|
||||
const EMBEDDING_CACHE_TTL = 60 * 60 * 24; // 24h — embeddings are deterministic
|
||||
@@ -50,26 +50,15 @@ export async function ensureCollection() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple hash for cache keys
|
||||
* Hash text for cache key
|
||||
*/
|
||||
function hashKey(text: string): string {
|
||||
let hash = 0;
|
||||
for (let i = 0; i < text.length; i++) {
|
||||
const chr = text.charCodeAt(i);
|
||||
hash = (hash << 5) - hash + chr;
|
||||
hash |= 0;
|
||||
}
|
||||
return hash.toString(36);
|
||||
const { createHash } = require('crypto');
|
||||
return createHash('sha256').update(text).digest('hex').slice(0, 32);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate an embedding for a given text using OpenRouter (OpenAI embedding proxy).
|
||||
* Results are cached in Redis for 24h since embeddings are deterministic.
|
||||
*
|
||||
* NOTE: We keep OpenRouter for embeddings because the Qdrant collection uses 1536-dim
|
||||
* vectors (OpenAI text-embedding-3-small). Switching to Mistral embed (1024-dim) would
|
||||
* require re-indexing the entire product catalog.
|
||||
* User-facing chat uses Mistral AI directly for DSGVO compliance.
|
||||
* Generate embedding using Mistral API (EU/DSGVO-compliant)
|
||||
*/
|
||||
export async function generateEmbedding(text: string): Promise<number[]> {
|
||||
const cacheKey = `emb:${hashKey(text.toLowerCase().trim())}`;
|
||||
@@ -84,22 +73,20 @@ export async function generateEmbedding(text: string): Promise<number[]> {
|
||||
// Redis down — proceed without cache
|
||||
}
|
||||
|
||||
const openRouterKey = process.env.OPENROUTER_API_KEY;
|
||||
if (!openRouterKey) {
|
||||
throw new Error('OPENROUTER_API_KEY is not set');
|
||||
const mistralKey = process.env.MISTRAL_API_KEY;
|
||||
if (!mistralKey) {
|
||||
throw new Error('MISTRAL_API_KEY is not set');
|
||||
}
|
||||
|
||||
const response = await fetch('https://openrouter.ai/api/v1/embeddings', {
|
||||
const response = await fetch('https://api.mistral.ai/v1/embeddings', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${openRouterKey}`,
|
||||
Authorization: `Bearer ${mistralKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
'HTTP-Referer': process.env.NEXT_PUBLIC_BASE_URL || 'https://klz-cables.com',
|
||||
'X-Title': 'KLZ Cables Search AI',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'openai/text-embedding-3-small',
|
||||
input: text,
|
||||
model: 'mistral-embed',
|
||||
input: [text],
|
||||
}),
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user