diff --git a/package.json b/package.json index f4683932..6cdda423 100644 --- a/package.json +++ b/package.json @@ -106,7 +106,7 @@ }, "scripts": { "dev": "bash -c '[ -f .env ] || (cp .env.example .env && sed -i.bak \"s/TRAEFIK_HOST=klz-cables.com/TRAEFIK_HOST=klz.localhost/\" .env && rm -f .env.bak && echo \"✅ Created .env from .env.example\"); trap \"COMPOSE_PROJECT_NAME=klz-2026 docker-compose -f docker-compose.dev.yml down\" EXIT INT TERM; docker network create infra 2>/dev/null || true && COMPOSE_PROJECT_NAME=klz-2026 docker-compose -f docker-compose.dev.yml down && COMPOSE_PROJECT_NAME=klz-2026 docker-compose -f docker-compose.dev.yml up klz-app klz-db klz-proxy klz-qdrant klz-redis --remove-orphans'", - "dev:local": "bash -c 'trap \"COMPOSE_PROJECT_NAME=klz-2026 docker-compose -f docker-compose.dev.yml down\" EXIT INT TERM; COMPOSE_PROJECT_NAME=klz-2026 docker-compose -f docker-compose.dev.yml up -d klz-db klz-proxy klz-qdrant klz-redis && POSTGRES_URI=\"\" NODE_ENV=development next dev --webpack --port 3100 --hostname 0.0.0.0'", + "dev:local": "bash -c 'trap \"COMPOSE_PROJECT_NAME=klz-2026 docker-compose -f docker-compose.dev.yml down\" EXIT INT TERM; COMPOSE_PROJECT_NAME=klz-2026 docker-compose -f docker-compose.dev.yml up -d klz-db klz-proxy klz-qdrant klz-redis && POSTGRES_URI=NODE_ENV=development next dev --webpack --port 3100 --hostname 0.0.0.0'", "dev:infra": "COMPOSE_PROJECT_NAME=klz-2026 docker-compose -f docker-compose.dev.yml up -d klz-db klz-proxy", "build": "next build", "start": "next start", diff --git a/src/lib/qdrant.ts b/src/lib/qdrant.ts index 31477344..c477e920 100644 --- a/src/lib/qdrant.ts +++ b/src/lib/qdrant.ts @@ -152,31 +152,6 @@ export async function deleteProductVector(id: string | number) { } } -/** - * Delete knowledge chunks by their source Media ID - */ -export async function deleteKnowledgeByMediaId(mediaId: string | number) { - try { - await ensureCollection(); - await qdrant.delete(COLLECTION_NAME, { - wait: true, - filter: { - must: [ - { - key: 'mediaId', - match: { - value: mediaId, - }, - }, - ], - }, - }); - console.log(`Successfully deleted Qdrant chunks for Media ID: ${mediaId}`); - } catch (error) { - console.error('Error deleting knowledge by Media ID from Qdrant:', error); - } -} - /** * Search products in Qdrant. * Results are cached in Redis for 30 minutes keyed by query text. diff --git a/src/payload/collections/Media.ts b/src/payload/collections/Media.ts index 9ba3865b..dd4a6244 100644 --- a/src/payload/collections/Media.ts +++ b/src/payload/collections/Media.ts @@ -45,81 +45,4 @@ export const Media: CollectionConfig = { type: 'text', }, ], - hooks: { - afterChange: [ - async ({ doc, req }) => { - // Only process PDF files - if (doc.mimeType === 'application/pdf') { - try { - const fs = require('fs'); - const path = require('path'); - const crypto = require('crypto'); - const pdfParse = require('pdf-parse'); - const { upsertProductVector, deleteKnowledgeByMediaId } = require('../../lib/qdrant'); - - const filePath = path.join(process.cwd(), 'public/media', doc.filename); - - if (fs.existsSync(filePath)) { - req.payload.logger.info(`Extracting text from PDF: ${doc.filename}`); - - const dataBuffer = fs.readFileSync(filePath); - const data = await pdfParse(dataBuffer); - - // Clear any previously indexed chunks for this file just in case it's an update - await deleteKnowledgeByMediaId(doc.id); - - // Chunk the text like we did in the ingest script - const chunks = data.text - .split(/\n\s*\n/) - .map((c: string) => c.trim()) - .filter((c: string) => c.length > 50); - - let successCount = 0; - for (let i = 0; i < chunks.length; i++) { - // Generate a deterministic UUID based on doc ID and chunk index - const hash = crypto.createHash('md5').update(`${doc.id}-${i}`).digest('hex'); - // Qdrant strictly requires UUID: 8-4-4-4-12 - const uuid = [ - hash.substring(0, 8), - hash.substring(8, 12), - hash.substring(12, 16), - hash.substring(16, 20), - hash.substring(20, 32), - ].join('-'); - - await upsertProductVector(uuid, chunks[i], { - type: 'knowledge', - title: `${doc.filename} - Teil ${i + 1}`, - content: chunks[i], - source: doc.filename, - mediaId: doc.id, - }); - successCount++; - } - req.payload.logger.info( - `Successfully ingested ${successCount} chunks from ${doc.filename} into Qdrant`, - ); - } - } catch (e: any) { - req.payload.logger.error(`Error parsing PDF ${doc.filename}: ${e.message}`); - } - } - }, - ], - afterDelete: [ - async ({ id, doc, req }) => { - if (doc.mimeType === 'application/pdf') { - try { - const { deleteKnowledgeByMediaId } = require('../../lib/qdrant'); - await deleteKnowledgeByMediaId(id); - req.payload.logger.info(`Removed Qdrant chunks for deleted PDF: ${doc.filename}`); - } catch (e: any) { - req.payload.logger.error( - `Error removing Qdrant chunks for ${doc.filename}: ${e.message}`, - ); - } - } - }, - ], - }, }; diff --git a/src/scripts/error.png b/src/scripts/error.png new file mode 100644 index 00000000..f9674c9b Binary files /dev/null and b/src/scripts/error.png differ diff --git a/src/scripts/upload-pdfs.ts b/src/scripts/upload-pdfs.ts deleted file mode 100644 index 44e69211..00000000 --- a/src/scripts/upload-pdfs.ts +++ /dev/null @@ -1,58 +0,0 @@ -import fs from 'fs'; -import path from 'path'; -import 'dotenv/config'; -import { getPayload } from 'payload'; -import configPromise from '@payload-config'; - -async function uploadPDFs() { - const payload = await getPayload({ config: configPromise }); - - const downloadDir = '/Users/marcmintel/Downloads'; - const files = fs.readdirSync(downloadDir).filter((f) => f.endsWith('.pdf')); - - console.log(`Found ${files.length} PDFs in Downloads folder.`); - - for (const file of files) { - const filePath = path.join(downloadDir, file); - try { - const stats = fs.statSync(filePath); - - // Check if it already exists - const existing = await payload.find({ - collection: 'media', - where: { - filename: { - equals: file, - }, - }, - }); - - if (existing.docs.length > 0) { - console.log(`Skipping ${file} - already exists in CMS`); - continue; - } - - console.log(`Uploading ${file}...`); - await payload.create({ - collection: 'media', - data: { - alt: file, - }, - file: { - data: fs.readFileSync(filePath), - mimetype: 'application/pdf', - name: file, - size: stats.size, - }, - }); - console.log(`✅ Uploaded ${file}`); - } catch (err) { - console.error(`❌ Failed to upload ${file}:`, err); - } - } - - console.log('Done uploading PDFs to Payload CMS. Payload hooks have synced them to Qdrant.'); - process.exit(0); -} - -uploadPDFs();