From fb98c8237e056dacd0ec0549f90416527ab1a715 Mon Sep 17 00:00:00 2001 From: Marc Mintel Date: Wed, 18 Mar 2026 10:32:54 +0100 Subject: [PATCH] feat(qdrant-sync): add gzip compression before transfer to reduce upload size --- scripts/qdrant-sync.sh | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/scripts/qdrant-sync.sh b/scripts/qdrant-sync.sh index 99fee160..21f37840 100755 --- a/scripts/qdrant-sync.sh +++ b/scripts/qdrant-sync.sh @@ -69,43 +69,52 @@ echo "⬇️ 2/5 Downloading snapshot..." curl --max-time $TIMEOUT -s -o "$WORK_DIR/$SNAPSHOT_NAME" "$LOCAL_QDRANT_URL/collections/$COLLECTION/snapshots/$SNAPSHOT_NAME" echo " βœ… Downloaded to $WORK_DIR/$SNAPSHOT_NAME" -# 3. Transfer Snapshot -echo "πŸ“€ 3/5 Uploading snapshot to Alpha ($SSH_HOST)..." +# 3. Compress and Transfer Snapshot +echo "πŸ“¦ 3/6 Compressing snapshot to save bandwidth..." +gzip -c "$WORK_DIR/$SNAPSHOT_NAME" > "$WORK_DIR/$SNAPSHOT_NAME.gz" +echo " βœ… Compressed $SNAPSHOT_NAME.gz" + +echo "πŸ“€ 4/6 Uploading compressed snapshot to Alpha ($SSH_HOST)..." SSH_OPTS="-o ServerAliveInterval=60 -o ServerAliveCountMax=10 -o ConnectTimeout=30" ssh $SSH_OPTS "$SSH_HOST" "mkdir -p $TGT_PATH/qdrant_tmp" rsync --partial --progress --timeout=600 -e "ssh $SSH_OPTS" \ - "$WORK_DIR/$SNAPSHOT_NAME" "$SSH_HOST:$TGT_PATH/qdrant_tmp/$SNAPSHOT_NAME" + "$WORK_DIR/$SNAPSHOT_NAME.gz" "$SSH_HOST:$TGT_PATH/qdrant_tmp/$SNAPSHOT_NAME.gz" echo " βœ… Upload complete." # 4. Restore Snapshot on Remote Server -echo "πŸ”„ 4/5 Restoring snapshot on target container ($QDRANT_CONTAINER)..." +echo "πŸ”„ 5/6 Restoring snapshot on target container ($QDRANT_CONTAINER)..." # Qdrant restore process: +# - Extract snapshot on server # - Recreate collection (so it is clean) # - Download snapshot to container # - Recover from snapshot file ssh $SSH_OPTS "$SSH_HOST" << EOF set -e - # Step A: Copy file into the container + # Step A: Extract the compressed file + echo " [Remote] Extracting snapshot..." + gunzip -f "$TGT_PATH/qdrant_tmp/$SNAPSHOT_NAME.gz" + + # Step B: Copy file into the container docker cp "$TGT_PATH/qdrant_tmp/$SNAPSHOT_NAME" $QDRANT_CONTAINER:/qdrant/$SNAPSHOT_NAME - # Step B: Delete existing collection + # Step C: Delete existing collection curl -s -X DELETE "http://127.0.0.1:6333/collections/$COLLECTION" > /dev/null - # Step C: Re-create empty collection (required before recovery) + # Step D: Re-create empty collection (required before recovery) # wir nutzen die standard vector config vom Kabelfachmann (Cosine, 384 dim fΓΌr all-MiniLM-L6-v2) curl -s -X PUT "http://127.0.0.1:6333/collections/$COLLECTION" \ -H 'Content-Type: application/json' \ -d '{ "vectors": { "size": 384, "distance": "Cosine" } }' > /dev/null - # Step D: Recover + # Step E: Recover echo " [Remote] Triggering recover API..." curl -s -X PUT "http://127.0.0.1:6333/collections/$COLLECTION/snapshots/recover" \ -H 'Content-Type: application/json' \ -d '{ "location": "file:///qdrant/'$SNAPSHOT_NAME'" }' > /dev/null - # Step E: Cleanup + # Step F: Cleanup docker exec $QDRANT_CONTAINER rm /qdrant/$SNAPSHOT_NAME rm -rf "$TGT_PATH/qdrant_tmp" EOF @@ -113,7 +122,7 @@ EOF echo " βœ… Restore complete." # 5. Local Cleanup -echo "🧹 5/5 Cleaning up..." +echo "🧹 6/6 Cleaning up..." rm -rf "$WORK_DIR" # Delete snapshot from local Qdrant server to save space curl -s -X DELETE "$LOCAL_QDRANT_URL/collections/$COLLECTION/snapshots/$SNAPSHOT_NAME" > /dev/null