klz-cables.com/src/payload/collections/Media.ts

import type { CollectionConfig } from 'payload';

export const Media: CollectionConfig = {
  slug: 'media',
  access: {
    read: () => true,
  },
  admin: {
    useAsTitle: 'filename',
    defaultColumns: ['filename', 'alt', 'updatedAt'],
  },
  upload: {
    staticDir: 'public/media',
    adminThumbnail: 'thumbnail',
    imageSizes: [
      {
        name: 'thumbnail',
        width: 600,
        // height: undefined allows wide 5:1 aspect ratios to be preserved without cropping
        height: undefined,
        position: 'centre',
      },
      {
        name: 'card',
        width: 768,
        height: undefined,
        position: 'centre',
      },
      {
        name: 'tablet',
        width: 1024,
        height: undefined,
        position: 'centre',
      },
    ],
  },
  fields: [
    {
      name: 'alt',
      type: 'text',
      required: true,
    },
    {
      name: 'caption',
      type: 'text',
    },
  ],
  hooks: {
    afterChange: [
      async ({ doc, req }) => {
        // Only process PDF files
        if (doc.mimeType === 'application/pdf') {
          try {
            const fs = require('fs');
            const path = require('path');
            const crypto = require('crypto');
            const pdfParse = require('pdf-parse');
            const { upsertProductVector, deleteKnowledgeByMediaId } = require('../../lib/qdrant');

            const filePath = path.join(process.cwd(), 'public/media', doc.filename);

            if (fs.existsSync(filePath)) {
              req.payload.logger.info(`Extracting text from PDF: ${doc.filename}`);

              const dataBuffer = fs.readFileSync(filePath);
              const data = await pdfParse(dataBuffer);

              // Clear any previously indexed chunks for this file just in case it's an update
              await deleteKnowledgeByMediaId(doc.id);

              // Chunk the text like we did in the ingest script
              const chunks = data.text
                .split(/\n\s*\n/)
                .map((c: string) => c.trim())
                .filter((c: string) => c.length > 50);

              let successCount = 0;
              for (let i = 0; i < chunks.length; i++) {
                // Generate a deterministic UUID based on doc ID and chunk index
                const hash = crypto.createHash('md5').update(`${doc.id}-${i}`).digest('hex');
                // Qdrant strictly requires UUID: 8-4-4-4-12
                const uuid = [
                  hash.substring(0, 8),
                  hash.substring(8, 12),
                  hash.substring(12, 16),
                  hash.substring(16, 20),
                  hash.substring(20, 32),
                ].join('-');

                await upsertProductVector(uuid, chunks[i], {
                  type: 'knowledge',
                  title: `${doc.filename} - Teil ${i + 1}`,
                  content: chunks[i],
                  source: doc.filename,
                  mediaId: doc.id,
                });
                successCount++;
              }
              req.payload.logger.info(
                `Successfully ingested ${successCount} chunks from ${doc.filename} into Qdrant`,
              );
            }
          } catch (e: any) {
            req.payload.logger.error(`Error parsing PDF ${doc.filename}: ${e.message}`);
          }
        }
      },
    ],
    afterDelete: [
      async ({ id, doc, req }) => {
        if (doc.mimeType === 'application/pdf') {
          try {
            const { deleteKnowledgeByMediaId } = require('../../lib/qdrant');
            await deleteKnowledgeByMediaId(id);
            req.payload.logger.info(`Removed Qdrant chunks for deleted PDF: ${doc.filename}`);
          } catch (e: any) {
            req.payload.logger.error(
              `Error removing Qdrant chunks for ${doc.filename}: ${e.message}`,
            );
          }
        }
      },
    ],
  },
};