feat(ai-search): add interactive WebGL Orb, Markdown support, and Sentry tracking
Some checks failed
Build & Deploy / 🔍 Prepare (push) Successful in 11s
Build & Deploy / 🧪 QA (push) Successful in 1m18s
Build & Deploy / 🚀 Deploy (push) Has been cancelled
Build & Deploy / 🧪 Post-Deploy Verification (push) Has been cancelled
Build & Deploy / 🔔 Notify (push) Has been cancelled
Build & Deploy / 🏗️ Build (push) Has been cancelled
CI - Lint, Typecheck & Test / quality-assurance (pull_request) Failing after 3m55s
Some checks failed
Build & Deploy / 🔍 Prepare (push) Successful in 11s
Build & Deploy / 🧪 QA (push) Successful in 1m18s
Build & Deploy / 🚀 Deploy (push) Has been cancelled
Build & Deploy / 🧪 Post-Deploy Verification (push) Has been cancelled
Build & Deploy / 🔔 Notify (push) Has been cancelled
Build & Deploy / 🏗️ Build (push) Has been cancelled
CI - Lint, Typecheck & Test / quality-assurance (pull_request) Failing after 3m55s
This commit is contained in:
@@ -1,11 +1,15 @@
|
||||
import { QdrantClient } from '@qdrant/js-client-rest';
|
||||
|
||||
const qdrantUrl = process.env.QDRANT_URL || 'http://localhost:6333';
|
||||
const isDockerContainer =
|
||||
process.env.IS_DOCKER === 'true' || process.env.HOSTNAME?.includes('klz-app');
|
||||
const qdrantUrl =
|
||||
process.env.QDRANT_URL ||
|
||||
(isDockerContainer ? 'http://klz-qdrant:6333' : 'http://localhost:6333');
|
||||
const qdrantApiKey = process.env.QDRANT_API_KEY || '';
|
||||
|
||||
export const qdrant = new QdrantClient({
|
||||
url: qdrantUrl,
|
||||
apiKey: qdrantApiKey || undefined,
|
||||
url: qdrantUrl,
|
||||
apiKey: qdrantApiKey || undefined,
|
||||
});
|
||||
|
||||
export const COLLECTION_NAME = 'klz_products';
|
||||
@@ -15,110 +19,116 @@ export const VECTOR_SIZE = 1536; // OpenAI text-embedding-3-small
|
||||
* Ensure the collection exists in Qdrant.
|
||||
*/
|
||||
export async function ensureCollection() {
|
||||
try {
|
||||
const collections = await qdrant.getCollections();
|
||||
const exists = collections.collections.some(c => c.name === COLLECTION_NAME);
|
||||
if (!exists) {
|
||||
await qdrant.createCollection(COLLECTION_NAME, {
|
||||
vectors: {
|
||||
size: VECTOR_SIZE,
|
||||
distance: 'Cosine',
|
||||
},
|
||||
});
|
||||
console.log(`Successfully created Qdrant collection: ${COLLECTION_NAME}`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error ensuring Qdrant collection:', error);
|
||||
try {
|
||||
const collections = await qdrant.getCollections();
|
||||
const exists = collections.collections.some((c) => c.name === COLLECTION_NAME);
|
||||
if (!exists) {
|
||||
await qdrant.createCollection(COLLECTION_NAME, {
|
||||
vectors: {
|
||||
size: VECTOR_SIZE,
|
||||
distance: 'Cosine',
|
||||
},
|
||||
});
|
||||
console.log(`Successfully created Qdrant collection: ${COLLECTION_NAME}`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error ensuring Qdrant collection:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate an embedding for a given text using OpenRouter (OpenAI embedding proxy)
|
||||
*/
|
||||
export async function generateEmbedding(text: string): Promise<number[]> {
|
||||
const openRouterKey = process.env.OPENROUTER_API_KEY;
|
||||
if (!openRouterKey) {
|
||||
throw new Error('OPENROUTER_API_KEY is not set');
|
||||
}
|
||||
const openRouterKey = process.env.OPENROUTER_API_KEY;
|
||||
if (!openRouterKey) {
|
||||
throw new Error('OPENROUTER_API_KEY is not set');
|
||||
}
|
||||
|
||||
const response = await fetch('https://openrouter.ai/api/v1/embeddings', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${openRouterKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
'HTTP-Referer': process.env.NEXT_PUBLIC_BASE_URL || 'https://klz-cables.com',
|
||||
'X-Title': 'KLZ Cables Search AI',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'openai/text-embedding-3-small',
|
||||
input: text,
|
||||
}),
|
||||
});
|
||||
const response = await fetch('https://openrouter.ai/api/v1/embeddings', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${openRouterKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
'HTTP-Referer': process.env.NEXT_PUBLIC_BASE_URL || 'https://klz-cables.com',
|
||||
'X-Title': 'KLZ Cables Search AI',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'openai/text-embedding-3-small',
|
||||
input: text,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorBody = await response.text();
|
||||
throw new Error(`Failed to generate embedding: ${response.status} ${response.statusText} ${errorBody}`);
|
||||
}
|
||||
if (!response.ok) {
|
||||
const errorBody = await response.text();
|
||||
throw new Error(
|
||||
`Failed to generate embedding: ${response.status} ${response.statusText} ${errorBody}`,
|
||||
);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.data[0].embedding;
|
||||
const data = await response.json();
|
||||
return data.data[0].embedding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Upsert a product into Qdrant
|
||||
*/
|
||||
export async function upsertProductVector(id: string | number, text: string, payload: Record<string, any>) {
|
||||
try {
|
||||
await ensureCollection();
|
||||
const vector = await generateEmbedding(text);
|
||||
export async function upsertProductVector(
|
||||
id: string | number,
|
||||
text: string,
|
||||
payload: Record<string, any>,
|
||||
) {
|
||||
try {
|
||||
await ensureCollection();
|
||||
const vector = await generateEmbedding(text);
|
||||
|
||||
await qdrant.upsert(COLLECTION_NAME, {
|
||||
wait: true,
|
||||
points: [
|
||||
{
|
||||
id: id,
|
||||
vector,
|
||||
payload,
|
||||
}
|
||||
]
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error writing to Qdrant:', error);
|
||||
}
|
||||
await qdrant.upsert(COLLECTION_NAME, {
|
||||
wait: true,
|
||||
points: [
|
||||
{
|
||||
id: id,
|
||||
vector,
|
||||
payload,
|
||||
},
|
||||
],
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error writing to Qdrant:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a product from Qdrant
|
||||
*/
|
||||
export async function deleteProductVector(id: string | number) {
|
||||
try {
|
||||
await ensureCollection();
|
||||
await qdrant.delete(COLLECTION_NAME, {
|
||||
wait: true,
|
||||
points: [id] as [string | number],
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error deleting from Qdrant:', error);
|
||||
}
|
||||
try {
|
||||
await ensureCollection();
|
||||
await qdrant.delete(COLLECTION_NAME, {
|
||||
wait: true,
|
||||
points: [id] as [string | number],
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error deleting from Qdrant:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search products in Qdrant
|
||||
*/
|
||||
export async function searchProducts(query: string, limit = 5) {
|
||||
try {
|
||||
await ensureCollection();
|
||||
const vector = await generateEmbedding(query);
|
||||
try {
|
||||
await ensureCollection();
|
||||
const vector = await generateEmbedding(query);
|
||||
|
||||
const results = await qdrant.search(COLLECTION_NAME, {
|
||||
vector,
|
||||
limit,
|
||||
with_payload: true,
|
||||
});
|
||||
const results = await qdrant.search(COLLECTION_NAME, {
|
||||
vector,
|
||||
limit,
|
||||
with_payload: true,
|
||||
});
|
||||
|
||||
return results;
|
||||
} catch (error) {
|
||||
console.error('Error searching in Qdrant:', error);
|
||||
return [];
|
||||
}
|
||||
return results;
|
||||
} catch (error) {
|
||||
console.error('Error searching in Qdrant:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
import Redis from 'ioredis';
|
||||
|
||||
const redisUrl = process.env.REDIS_URL || 'redis://klz-redis:6379';
|
||||
const isDockerContainer =
|
||||
process.env.IS_DOCKER === 'true' || process.env.HOSTNAME?.includes('klz-app');
|
||||
const redisUrl =
|
||||
process.env.REDIS_URL ||
|
||||
(isDockerContainer ? 'redis://klz-redis:6379' : 'redis://localhost:6379');
|
||||
|
||||
// Only create a single instance in Node.js
|
||||
const globalForRedis = global as unknown as { redis: Redis };
|
||||
|
||||
export const redis = globalForRedis.redis || new Redis(redisUrl, {
|
||||
export const redis =
|
||||
globalForRedis.redis ||
|
||||
new Redis(redisUrl, {
|
||||
maxRetriesPerRequest: 3,
|
||||
});
|
||||
});
|
||||
|
||||
if (process.env.NODE_ENV !== 'production') {
|
||||
globalForRedis.redis = redis;
|
||||
globalForRedis.redis = redis;
|
||||
}
|
||||
|
||||
export default redis;
|
||||
|
||||
63
src/scripts/ingest-pdf.ts
Normal file
63
src/scripts/ingest-pdf.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import crypto from 'crypto';
|
||||
|
||||
// Override Qdrant URL for local script execution outside docker
|
||||
process.env.QDRANT_URL = process.env.QDRANT_URL || 'http://localhost:6333';
|
||||
|
||||
import { upsertProductVector } from '../lib/qdrant';
|
||||
|
||||
// Ingests the extracted Kabelhandbuch text into Qdrant as distinct knowledge topics.
|
||||
async function ingestPDF(txtPath: string) {
|
||||
if (!fs.existsSync(txtPath)) {
|
||||
console.error(`File not found: ${txtPath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
try {
|
||||
const text = fs.readFileSync(txtPath, 'utf8');
|
||||
|
||||
// Simple sentence/paragraph chunking
|
||||
// We split by standard paragraph breaks (double newline) or large content blocks.
|
||||
const chunks = text
|
||||
.split(/\n\s*\n/)
|
||||
.map((c) => c.trim())
|
||||
.filter((c) => c.length > 50);
|
||||
|
||||
console.log(`Extracted ${text.length} characters from PDF.`);
|
||||
console.log(`Generated ${chunks.length} chunks for vector ingestion.\n`);
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
// We limit chuck sizes to ensure Openrouter embedding models don't timeout/fail,
|
||||
// stringing multiple paragraphs if they are short, or cutting them if too long.
|
||||
// For baseline, we'll index every chunk individually mapped as 'knowledge' with a unique ID
|
||||
|
||||
const chunkText = chunks[i];
|
||||
|
||||
// Generate a synthetic ID that won't collide with Payload Product IDs
|
||||
// Qdrant strictly requires UUID or unsigned int.
|
||||
const syntheticId = crypto.randomUUID();
|
||||
|
||||
const payloadData = {
|
||||
type: 'knowledge', // Custom flag to differentiate from 'product'
|
||||
title: `Kabelhandbuch Wissen - Bereich ${i + 1}`,
|
||||
content: chunkText,
|
||||
source: 'Kabelhandbuch KLZ.pdf',
|
||||
};
|
||||
|
||||
// Use the existing upsert function since it just embeds the text and stores the payload
|
||||
await upsertProductVector(syntheticId, chunkText, payloadData);
|
||||
console.log(`✅ Upserted chunk ${i + 1}/${chunks.length}`);
|
||||
}
|
||||
|
||||
console.log('🎉 PDF Ingestion Complete!');
|
||||
process.exit(0);
|
||||
} catch (err) {
|
||||
console.error('Failed to parse PDF:', err);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run mapping
|
||||
const targetTxt = '/Users/marcmintel/Downloads/kabelhandbuch.txt';
|
||||
ingestPDF(targetTxt);
|
||||
Reference in New Issue
Block a user