import type { CollectionConfig } from 'payload'; export const Media: CollectionConfig = { slug: 'media', access: { read: () => true, }, admin: { useAsTitle: 'filename', defaultColumns: ['filename', 'alt', 'updatedAt'], }, upload: { staticDir: 'public/media', adminThumbnail: 'thumbnail', imageSizes: [ { name: 'thumbnail', width: 600, // height: undefined allows wide 5:1 aspect ratios to be preserved without cropping height: undefined, position: 'centre', }, { name: 'card', width: 768, height: undefined, position: 'centre', }, { name: 'tablet', width: 1024, height: undefined, position: 'centre', }, ], }, fields: [ { name: 'alt', type: 'text', required: true, }, { name: 'caption', type: 'text', }, ], hooks: { afterChange: [ async ({ doc, req }) => { // Only process PDF files if (doc.mimeType === 'application/pdf') { try { const fs = require('fs'); const path = require('path'); const crypto = require('crypto'); const pdfParse = require('pdf-parse'); const { upsertProductVector, deleteKnowledgeByMediaId } = require('../../lib/qdrant'); const filePath = path.join(process.cwd(), 'public/media', doc.filename); if (fs.existsSync(filePath)) { req.payload.logger.info(`Extracting text from PDF: ${doc.filename}`); const dataBuffer = fs.readFileSync(filePath); const data = await pdfParse(dataBuffer); // Clear any previously indexed chunks for this file just in case it's an update await deleteKnowledgeByMediaId(doc.id); // Chunk the text like we did in the ingest script const chunks = data.text .split(/\n\s*\n/) .map((c: string) => c.trim()) .filter((c: string) => c.length > 50); let successCount = 0; for (let i = 0; i < chunks.length; i++) { // Generate a deterministic UUID based on doc ID and chunk index const hash = crypto.createHash('md5').update(`${doc.id}-${i}`).digest('hex'); // Qdrant strictly requires UUID: 8-4-4-4-12 const uuid = [ hash.substring(0, 8), hash.substring(8, 12), hash.substring(12, 16), hash.substring(16, 20), hash.substring(20, 32), ].join('-'); await upsertProductVector(uuid, chunks[i], { type: 'knowledge', title: `${doc.filename} - Teil ${i + 1}`, content: chunks[i], source: doc.filename, mediaId: doc.id, }); successCount++; } req.payload.logger.info( `Successfully ingested ${successCount} chunks from ${doc.filename} into Qdrant`, ); } } catch (e: any) { req.payload.logger.error(`Error parsing PDF ${doc.filename}: ${e.message}`); } } }, ], afterDelete: [ async ({ id, doc, req }) => { if (doc.mimeType === 'application/pdf') { try { const { deleteKnowledgeByMediaId } = require('../../lib/qdrant'); await deleteKnowledgeByMediaId(id); req.payload.logger.info(`Removed Qdrant chunks for deleted PDF: ${doc.filename}`); } catch (e: any) { req.payload.logger.error( `Error removing Qdrant chunks for ${doc.filename}: ${e.message}`, ); } } }, ], }, };