Files
mintel.me/apps/web/scripts/index-posts.ts
Marc Mintel 85d2d2c069
Some checks failed
Build & Deploy / 🔍 Prepare (push) Successful in 7s
Build & Deploy / 🏗️ Build (push) Failing after 18m2s
Build & Deploy / 🚀 Deploy (push) Has been skipped
Build & Deploy / 🧪 QA (push) Has been skipped
Build & Deploy / 🧪 Post-Deploy Verification (push) Has been skipped
Build & Deploy / 🔔 Notify (push) Successful in 3s
feat(ai): Implement AI agent contact form and fix local Qdrant network configs
2026-03-06 11:56:12 +01:00

128 lines
3.7 KiB
TypeScript

/**
* Index all published blog posts into Qdrant for AI search.
*
* Usage: pnpm --filter @mintel/web run index:posts
*/
import { getPayload } from 'payload';
import configPromise from '../payload.config';
import { upsertPostVector } from '../src/lib/qdrant';
function extractPlainText(node: any): string {
if (!node) return '';
// Handle text nodes
if (typeof node === 'string') return node;
if (node.text) return node.text;
// Handle arrays
if (Array.isArray(node)) {
return node.map(extractPlainText).join('');
}
// Handle node with children
if (node.children) {
const childText = node.children.map(extractPlainText).join('');
// Add line breaks for block-level elements
if (['paragraph', 'heading', 'listitem', 'quote'].includes(node.type)) {
return childText + '\n';
}
return childText;
}
// Lexical root
if (node.root) {
return extractPlainText(node.root);
}
return '';
}
async function run() {
console.log('🔍 Starting blog post indexing for AI search...');
let payload;
let retries = 5;
while (retries > 0) {
try {
console.log(`Connecting to database (URI: ${process.env.DATABASE_URI || 'default'})...`);
payload = await getPayload({ config: configPromise });
break;
} catch (e: any) {
if (
e.code === 'ECONNREFUSED' ||
e.code === 'ENOTFOUND' ||
e.message?.includes('ECONNREFUSED') ||
e.message?.includes('cannot connect to Postgres')
) {
console.log(`Database not ready, retrying in 3s... (${retries} retries left)`);
retries--;
await new Promise((res) => setTimeout(res, 3000));
} else {
throw e;
}
}
}
if (!payload) {
throw new Error('Failed to connect to database after multiple retries.');
}
// Fetch all published posts
const result = await payload.find({
collection: 'posts',
limit: 1000,
where: {
_status: { equals: 'published' },
},
});
console.log(`Found ${result.docs.length} published posts to index.`);
let indexed = 0;
for (const post of result.docs) {
const plainContent = extractPlainText(post.content);
// Build searchable text: title + description + tags + content
const tags = (post.tags as any[])?.map((t: any) => t.tag).filter(Boolean).join(', ') || '';
const searchableText = [
`Titel: ${post.title}`,
`Beschreibung: ${post.description}`,
tags ? `Tags: ${tags}` : '',
`Inhalt: ${plainContent.substring(0, 2000)}`, // Limit content to avoid token overflow
]
.filter(Boolean)
.join('\n\n');
// Upsert into Qdrant
await upsertPostVector(
post.id,
searchableText,
{
content: searchableText,
data: {
id: post.id,
title: post.title,
slug: post.slug,
description: post.description,
tags,
},
},
);
indexed++;
console.log(` ✅ [${indexed}/${result.docs.length}] ${post.title}`);
// Small delay to avoid rate limiting on the embedding API
await new Promise((res) => setTimeout(res, 200));
}
console.log(`\n🎉 Successfully indexed ${indexed} posts into Qdrant.`);
process.exit(0);
}
run().catch((e) => {
console.error('Indexing failed:', e);
process.exit(1);
});