Skip to content

Commit a0d672c

Browse files
tellet-qpre-commit-ci[bot]KShivendu
authored
Add benchmark on parallel upload and search (#215)
* Add parallel upload&search workflow * Introduce new step: search without upsert * Update tools/upload_parallel_results_postgres.sh Co-authored-by: Kumar Shivendu <[email protected]> * Update tools/run_ci.sh Co-authored-by: Kumar Shivendu <[email protected]> * Fix indent * Explicit mode check * Provide CONTAINER_MEM_LIMIT explicitly * Store parallel results in separate folder --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Kumar Shivendu <[email protected]>
1 parent ea53db4 commit a0d672c

9 files changed

+275
-11
lines changed

.github/workflows/continuous-benchmark.yaml

+87
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,93 @@ jobs:
283283
}
284284
]
285285
}
286+
env:
287+
SLACK_WEBHOOK_URL: ${{ secrets.CI_ALERTS_CHANNEL_WEBHOOK_URL }}
288+
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
289+
290+
runParallelBenchmark:
291+
runs-on: ubuntu-latest
292+
needs: [ runLoadTimeBenchmark, runTenantsBenchmark ]
293+
if: ${{ always() }}
294+
steps:
295+
- uses: actions/checkout@v3
296+
- uses: webfactory/[email protected]
297+
with:
298+
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
299+
- name: Benches
300+
id: benches
301+
run: |
302+
export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }}
303+
export POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }}
304+
export POSTGRES_HOST=${{ secrets.POSTGRES_HOST }}
305+
bash -x tools/setup_ci.sh
306+
307+
set +e
308+
309+
# Benchmark parallel search&upload
310+
311+
export ENGINE_NAME="qdrant-continuous-benchmark"
312+
export DATASETS="laion-small-clip"
313+
export BENCHMARK_STRATEGY="parallel"
314+
export POSTGRES_TABLE="benchmark_parallel_search_upload"
315+
316+
# Benchmark the dev branch:
317+
export QDRANT_VERSION=ghcr/dev
318+
timeout 30m bash -x tools/run_ci.sh
319+
320+
# Benchmark the master branch:
321+
export QDRANT_VERSION=docker/master
322+
timeout 30m bash -x tools/run_ci.sh
323+
324+
set -e
325+
- name: Fail job if any of the benches failed
326+
if: steps.benches.outputs.failed == 'error' || steps.benches.outputs.failed == 'timeout'
327+
run: exit 1
328+
- name: Send Notification
329+
if: failure() || cancelled()
330+
uses: slackapi/[email protected]
331+
with:
332+
payload: |
333+
{
334+
"text": "CI benchmarks (runTenantsBenchmark) run status: ${{ job.status }}",
335+
"blocks": [
336+
{
337+
"type": "section",
338+
"text": {
339+
"type": "mrkdwn",
340+
"text": "CI benchmarks (runTenantsBenchmark) failed because of *${{ steps.benches.outputs.failed }}*."
341+
}
342+
},
343+
{
344+
"type": "section",
345+
"text": {
346+
"type": "mrkdwn",
347+
"text": "Qdrant version: *${{ steps.benches.outputs.qdrant_version }}*."
348+
}
349+
},
350+
{
351+
"type": "section",
352+
"text": {
353+
"type": "mrkdwn",
354+
"text": "Engine: *${{ steps.benches.outputs.engine_name }}*."
355+
}
356+
},
357+
{
358+
"type": "section",
359+
"text": {
360+
"type": "mrkdwn",
361+
"text": "Dataset: *${{ steps.benches.outputs.dataset }}*."
362+
}
363+
},
364+
{
365+
"type": "section",
366+
"text": {
367+
"type": "mrkdwn",
368+
"text": "View the results <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|here>"
369+
}
370+
}
371+
]
372+
}
286373
env:
287374
SLACK_WEBHOOK_URL: ${{ secrets.CI_ALERTS_CHANNEL_WEBHOOK_URL }}
288375
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK

engine/base_client/client.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import json
22
import os
33
from datetime import datetime
4-
from typing import List
4+
from typing import List, Optional
55

66
from benchmark import ROOT_DIR
77
from benchmark.dataset import Dataset
@@ -84,6 +84,7 @@ def run_experiment(
8484
skip_upload: bool = False,
8585
skip_search: bool = False,
8686
skip_if_exists: bool = True,
87+
skip_configure: Optional[bool] = False,
8788
):
8889
execution_params = self.configurator.execution_params(
8990
distance=dataset.config.distance, vector_size=dataset.config.vector_size
@@ -101,8 +102,9 @@ def run_experiment(
101102
return
102103

103104
if not skip_upload:
104-
print("Experiment stage: Configure")
105-
self.configurator.configure(dataset)
105+
if not skip_configure:
106+
print("Experiment stage: Configure")
107+
self.configurator.configure(dataset)
106108

107109
print("Experiment stage: Upload")
108110
upload_stats = self.uploader.upload(

run.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import fnmatch
22
import traceback
3-
from typing import List
3+
from typing import List, Optional
44

55
import stopit
66
import typer
@@ -23,6 +23,7 @@ def run(
2323
skip_if_exists: bool = False,
2424
exit_on_error: bool = True,
2525
timeout: float = 86400.0,
26+
skip_configure: Optional[bool] = False,
2627
):
2728
"""
2829
Example:
@@ -57,7 +58,11 @@ def run(
5758

5859
with stopit.ThreadingTimeout(timeout) as tt:
5960
client.run_experiment(
60-
dataset, skip_upload, skip_search, skip_if_exists
61+
dataset,
62+
skip_upload,
63+
skip_search,
64+
skip_if_exists,
65+
skip_configure,
6166
)
6267
client.delete_client()
6368

tools/run_ci.sh

+11-1
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,20 @@ else
3939
# any other strategies are considered to have search & upload results
4040
export SEARCH_RESULTS_FILE=$(ls -t results/*-search-*.json | head -n 1)
4141
export UPLOAD_RESULTS_FILE=$(ls -t results/*-upload-*.json | head -n 1)
42+
43+
if [[ "$BENCHMARK_STRATEGY" == "parallel" ]]; then
44+
export PARALLEL_UPLOAD_RESULTS_FILE=$(ls -t results/parallel/*-upload-*.json | head -n 1)
45+
export PARALLEL_SEARCH_RESULTS_FILE=$(ls -t results/parallel/*-search-*.json | head -n 1)
46+
fi
4247
fi
4348

4449
export VM_RSS_MEMORY_USAGE_FILE=$(ls -t results/vm-rss-memory-usage-*.txt | head -n 1)
4550
export RSS_ANON_MEMORY_USAGE_FILE=$(ls -t results/rss-anon-memory-usage-*.txt | head -n 1)
4651
export ROOT_API_RESPONSE_FILE=$(ls -t results/root-api-*.json | head -n 1)
4752

48-
bash -x "${SCRIPT_PATH}/upload_results_postgres.sh"
53+
if [[ "$BENCHMARK_STRATEGY" == "parallel" ]]; then
54+
bash -x "${SCRIPT_PATH}/upload_parallel_results_postgres.sh"
55+
else
56+
bash -x "${SCRIPT_PATH}/upload_results_postgres.sh"
57+
fi
58+

tools/run_client_script.sh

+15-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
PS4='ts=$(date "+%Y-%m-%dT%H:%M:%SZ") level=DEBUG line=$LINENO file=$BASH_SOURCE '
44
set -euo pipefail
55

6-
# Possible values are: full|upload|search
6+
# Possible values are: full|upload|search|parallel|snapshot
77
EXPERIMENT_MODE=${1:-"full"}
88

99
CLOUD_NAME=${CLOUD_NAME:-"hetzner"}
@@ -52,6 +52,7 @@ fi
5252

5353
echo "Gather experiment results..."
5454
result_files_arr=()
55+
result_parallel_files_arr=()
5556

5657
if [[ "$EXPERIMENT_MODE" == "full" ]] || [[ "$EXPERIMENT_MODE" == "upload" ]]; then
5758
UPLOAD_RESULT_FILE=$(ssh "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "ls -t results/*-upload-*.json | head -n 1")
@@ -63,9 +64,22 @@ if [[ "$EXPERIMENT_MODE" == "full" ]] || [[ "$EXPERIMENT_MODE" == "search" ]]; t
6364
result_files_arr+=("$SEARCH_RESULT_FILE")
6465
fi
6566

67+
if [[ "$EXPERIMENT_MODE" == "parallel" ]]; then
68+
UPLOAD_RESULT_FILE=$(ssh "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "ls -t results/parallel/*-upload-*.json | head -n 1")
69+
result_parallel_files_arr+=("$UPLOAD_RESULT_FILE")
70+
71+
SEARCH_RESULT_FILE=$(ssh "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "ls -t results/parallel/*-search-*.json | head -n 1")
72+
result_parallel_files_arr+=("$SEARCH_RESULT_FILE")
73+
fi
74+
6675
mkdir -p results
76+
mkdir -p results/parallel
6777

6878
for RESULT_FILE in "${result_files_arr[@]}"; do
6979
# -p preseves modification time, access time, and modes (but not change time)
7080
scp -p "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}:~/${RESULT_FILE}" "./results"
7181
done
82+
83+
for RESULT_FILE in "${result_parallel_files_arr[@]}"; do
84+
scp -p "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}:~/${RESULT_FILE}" "./results/parallel"
85+
done

tools/run_experiment.sh

+32-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ if [[ -z "$PRIVATE_IP_OF_THE_SERVER" ]]; then
2929
fi
3030

3131
if [[ -z "$EXPERIMENT_MODE" ]]; then
32-
echo "EXPERIMENT_MODE is not set, possible values are: full | upload | search | snapshot"
32+
echo "EXPERIMENT_MODE is not set, possible values are: full | upload | search | snapshot | parallel"
3333
exit 1
3434
fi
3535

@@ -75,6 +75,37 @@ if [[ "$EXPERIMENT_MODE" == "full" ]] || [[ "$EXPERIMENT_MODE" == "search" ]]; t
7575
fi
7676

7777

78+
if [[ "$EXPERIMENT_MODE" == "parallel" ]]; then
79+
echo "EXPERIMENT_MODE=$EXPERIMENT_MODE"
80+
81+
docker pull qdrant/vector-db-benchmark:latest
82+
83+
echo "Starting ci-benchmark-upload container"
84+
docker run \
85+
--rm \
86+
--name ci-benchmark-upload \
87+
-v "$HOME/results/parallel:/code/results" \
88+
qdrant/vector-db-benchmark:latest \
89+
python run.py --engines "${ENGINE_NAME}" --datasets "${DATASETS}" --host "${PRIVATE_IP_OF_THE_SERVER}" --no-skip-if-exists --skip-search --skip-configure &
90+
UPLOAD_PID=$!
91+
92+
echo "Starting ci-benchmark-search container"
93+
docker run \
94+
--rm \
95+
--name ci-benchmark-search \
96+
-v "$HOME/results/parallel:/code/results" \
97+
qdrant/vector-db-benchmark:latest \
98+
python run.py --engines "${ENGINE_NAME}" --datasets "${DATASETS}" --host "${PRIVATE_IP_OF_THE_SERVER}" --no-skip-if-exists --skip-upload &
99+
SEARCH_PID=$!
100+
101+
echo "Waiting for both containers to finish"
102+
wait $UPLOAD_PID
103+
wait $SEARCH_PID
104+
105+
echo "EXPERIMENT_MODE=$EXPERIMENT_MODE DONE"
106+
fi
107+
108+
78109
if [[ "$EXPERIMENT_MODE" == "snapshot" ]]; then
79110
echo "EXPERIMENT_MODE=$EXPERIMENT_MODE"
80111

tools/run_remote_benchmark.sh

+22-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ case "$BENCHMARK_STRATEGY" in
5858

5959
SERVER_CONTAINER_NAME=${SERVER_CONTAINER_NAME:-"qdrant-continuous-benchmarks-with-volume"}
6060

61-
bash -x "${SCRIPT_PATH}/run_server_container_with_volume.sh" "$SERVER_CONTAINER_NAME"
61+
bash -x "${SCRIPT_PATH}/run_server_container_with_volume.sh" "$SERVER_CONTAINER_NAME" "25Gb"
6262

6363
bash -x "${SCRIPT_PATH}/run_client_script.sh" "upload"
6464

@@ -74,7 +74,7 @@ case "$BENCHMARK_STRATEGY" in
7474

7575
SERVER_CONTAINER_NAME=${SERVER_CONTAINER_NAME:-"qdrant-continuous-benchmarks-snapshot"}
7676

77-
bash -x "${SCRIPT_PATH}/run_server_container_with_volume.sh" "$SERVER_CONTAINER_NAME"
77+
bash -x "${SCRIPT_PATH}/run_server_container_with_volume.sh" "$SERVER_CONTAINER_NAME" "25Gb"
7878

7979
bash -x "${SCRIPT_PATH}/run_client_script.sh" "snapshot"
8080

@@ -85,6 +85,26 @@ case "$BENCHMARK_STRATEGY" in
8585
bash -x "${SCRIPT_PATH}/qdrant_collect_stats.sh" "$SERVER_CONTAINER_NAME"
8686
;;
8787

88+
"parallel")
89+
echo "Parallel benchmark, run upload&search at the same time"
90+
91+
SERVER_CONTAINER_NAME=${SERVER_CONTAINER_NAME:-"qdrant-continuous-benchmarks-with-volume"}
92+
93+
bash -x "${SCRIPT_PATH}/run_server_container_with_volume.sh" "$SERVER_CONTAINER_NAME" "25Gb"
94+
95+
bash -x "${SCRIPT_PATH}/run_client_script.sh" "upload"
96+
97+
bash -x "${SCRIPT_PATH}/run_server_container_with_volume.sh" "$SERVER_CONTAINER_NAME" "25Gb" "continue"
98+
99+
bash -x "${SCRIPT_PATH}/run_client_script.sh" "search"
100+
101+
bash -x "${SCRIPT_PATH}/run_server_container_with_volume.sh" "$SERVER_CONTAINER_NAME" "25Gb" "continue"
102+
103+
bash -x "${SCRIPT_PATH}/run_client_script.sh" "parallel"
104+
105+
bash -x "${SCRIPT_PATH}/qdrant_collect_stats.sh" "$SERVER_CONTAINER_NAME"
106+
;;
107+
88108
*)
89109
echo "Invalid BENCHMARK_STRATEGY value: $BENCHMARK_STRATEGY"
90110
exit 1

tools/run_server_container_with_volume.sh

+4-1
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,13 @@ if [[ ${QDRANT_VERSION} == docker/* ]] || [[ ${QDRANT_VERSION} == ghcr/* ]]; the
3838
if [[ "$EXECUTION_MODE" == "init" ]]; then
3939
echo "Initialize qdrant from scratch, with qdrant_storage volume"
4040
DOCKER_COMPOSE="export QDRANT_VERSION=${QDRANT_VERSION}; export CONTAINER_REGISTRY=${CONTAINER_REGISTRY}; export CONTAINER_MEM_LIMIT=${CONTAINER_MEM_LIMIT}; docker compose down; pkill qdrant; docker rm -f qdrant-continuous || true; docker rmi -f ${CONTAINER_REGISTRY}/qdrant/qdrant:${QDRANT_VERSION} || true; docker volume rm -f qdrant_storage || true; docker compose up -d; docker container ls -a"
41-
else
41+
elif [[ "$EXECUTION_MODE" == "continue" ]]; then
4242
# suggest that volume qdrant_storage exist and start qdrant
4343
echo "Reload qdrant with existing data"
4444
DOCKER_COMPOSE="export QDRANT_VERSION=${QDRANT_VERSION}; export CONTAINER_REGISTRY=${CONTAINER_REGISTRY}; export CONTAINER_MEM_LIMIT=${CONTAINER_MEM_LIMIT}; docker compose down; pkill qdrant; docker rm -f qdrant-continuous || true; docker rmi -f ${CONTAINER_REGISTRY}/qdrant/qdrant:${QDRANT_VERSION} || true ; sudo bash -c 'sync; echo 1 > /proc/sys/vm/drop_caches'; docker compose up -d; docker container ls -a"
45+
else
46+
echo "Error: unknown execution mode ${EXECUTION_MODE}. Execution mode should be 'init' or 'continue'"
47+
exit 1
4548
fi
4649

4750
ssh -t -o ServerAliveInterval=60 -o ServerAliveCountMax=3 "${SERVER_USERNAME}@${IP_OF_THE_SERVER}" "cd ./projects/vector-db-benchmark/engine/servers/${CONTAINER_NAME} ; $DOCKER_COMPOSE"

0 commit comments

Comments
 (0)