feat(qdrant-sync): add gzip compression before transfer to reduce upload size
Some checks failed
Build & Deploy / 🔍 Prepare (push) Successful in 5s
Build & Deploy / 🧪 QA (push) Successful in 1m8s
Build & Deploy / 🏗️ Build (push) Successful in 5m32s
Build & Deploy / 🚀 Deploy (push) Failing after 9s
Build & Deploy / 🧪 Post-Deploy Verification (push) Has been skipped
Build & Deploy / 🔔 Notify (push) Successful in 2s

This commit is contained in:
2026-03-18 10:32:54 +01:00
parent b84e0d782d
commit fb98c8237e

View File

@@ -69,43 +69,52 @@ echo "⬇️ 2/5 Downloading snapshot..."
curl --max-time $TIMEOUT -s -o "$WORK_DIR/$SNAPSHOT_NAME" "$LOCAL_QDRANT_URL/collections/$COLLECTION/snapshots/$SNAPSHOT_NAME"
echo " ✅ Downloaded to $WORK_DIR/$SNAPSHOT_NAME"
# 3. Transfer Snapshot
echo "📤 3/5 Uploading snapshot to Alpha ($SSH_HOST)..."
# 3. Compress and Transfer Snapshot
echo "📦 3/6 Compressing snapshot to save bandwidth..."
gzip -c "$WORK_DIR/$SNAPSHOT_NAME" > "$WORK_DIR/$SNAPSHOT_NAME.gz"
echo " ✅ Compressed $SNAPSHOT_NAME.gz"
echo "📤 4/6 Uploading compressed snapshot to Alpha ($SSH_HOST)..."
SSH_OPTS="-o ServerAliveInterval=60 -o ServerAliveCountMax=10 -o ConnectTimeout=30"
ssh $SSH_OPTS "$SSH_HOST" "mkdir -p $TGT_PATH/qdrant_tmp"
rsync --partial --progress --timeout=600 -e "ssh $SSH_OPTS" \
"$WORK_DIR/$SNAPSHOT_NAME" "$SSH_HOST:$TGT_PATH/qdrant_tmp/$SNAPSHOT_NAME"
"$WORK_DIR/$SNAPSHOT_NAME.gz" "$SSH_HOST:$TGT_PATH/qdrant_tmp/$SNAPSHOT_NAME.gz"
echo " ✅ Upload complete."
# 4. Restore Snapshot on Remote Server
echo "🔄 4/5 Restoring snapshot on target container ($QDRANT_CONTAINER)..."
echo "🔄 5/6 Restoring snapshot on target container ($QDRANT_CONTAINER)..."
# Qdrant restore process:
# - Extract snapshot on server
# - Recreate collection (so it is clean)
# - Download snapshot to container
# - Recover from snapshot file
ssh $SSH_OPTS "$SSH_HOST" << EOF
set -e
# Step A: Copy file into the container
# Step A: Extract the compressed file
echo " [Remote] Extracting snapshot..."
gunzip -f "$TGT_PATH/qdrant_tmp/$SNAPSHOT_NAME.gz"
# Step B: Copy file into the container
docker cp "$TGT_PATH/qdrant_tmp/$SNAPSHOT_NAME" $QDRANT_CONTAINER:/qdrant/$SNAPSHOT_NAME
# Step B: Delete existing collection
# Step C: Delete existing collection
curl -s -X DELETE "http://127.0.0.1:6333/collections/$COLLECTION" > /dev/null
# Step C: Re-create empty collection (required before recovery)
# Step D: Re-create empty collection (required before recovery)
# wir nutzen die standard vector config vom Kabelfachmann (Cosine, 384 dim für all-MiniLM-L6-v2)
curl -s -X PUT "http://127.0.0.1:6333/collections/$COLLECTION" \
-H 'Content-Type: application/json' \
-d '{ "vectors": { "size": 384, "distance": "Cosine" } }' > /dev/null
# Step D: Recover
# Step E: Recover
echo " [Remote] Triggering recover API..."
curl -s -X PUT "http://127.0.0.1:6333/collections/$COLLECTION/snapshots/recover" \
-H 'Content-Type: application/json' \
-d '{ "location": "file:///qdrant/'$SNAPSHOT_NAME'" }' > /dev/null
# Step E: Cleanup
# Step F: Cleanup
docker exec $QDRANT_CONTAINER rm /qdrant/$SNAPSHOT_NAME
rm -rf "$TGT_PATH/qdrant_tmp"
EOF
@@ -113,7 +122,7 @@ EOF
echo " ✅ Restore complete."
# 5. Local Cleanup
echo "🧹 5/5 Cleaning up..."
echo "🧹 6/6 Cleaning up..."
rm -rf "$WORK_DIR"
# Delete snapshot from local Qdrant server to save space
curl -s -X DELETE "$LOCAL_QDRANT_URL/collections/$COLLECTION/snapshots/$SNAPSHOT_NAME" > /dev/null