Skip to content

Commit f53a3de

Browse files
author
RomaLzhih
committed
feat(ae): add download real world and clean option
1 parent f16892b commit f53a3de

File tree

4 files changed

+110
-10
lines changed

4 files changed

+110
-10
lines changed

docker-run.sh

Lines changed: 101 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,13 @@ Commands:
3939
full Run full artifact evaluation (NODE_SIZE=1000000000)
4040
shell Start a bash shell in the container
4141
stop Stop the running container
42-
clean Remove container and image
42+
clean Remove container, image, and generated files
4343
help Show this help message
4444
4545
Options:
46-
--data-path PATH Set the HOST path where data will be stored (required for data generation)
46+
--data-path PATH Set the HOST path where data will be stored (required for data generation/cleanup)
47+
--download-real-world-data Download real-world data on host (default: true)
48+
--no-download-real-world-data Skip downloading real-world data
4749
--node-size SIZE Set the node size (default: 1000000000)
4850
--cpus NUM Limit CPU cores (default: all available)
4951
--memory SIZE Memory limit (e.g., 16g, default: no limit)
@@ -95,12 +97,33 @@ create_host_dirs() {
9597
print_info "Created host directories: script_ae/data, script_ae/logs, script_ae/plots"
9698
}
9799

100+
# Function to download real-world data on host
101+
download_data_host() {
102+
local data_path=$1
103+
104+
print_info "Downloading real-world data on HOST..."
105+
106+
# Resolve absolute path for data_path
107+
if command -v realpath &> /dev/null; then
108+
data_path=$(realpath "$data_path")
109+
else
110+
data_path=$(cd "$data_path" && pwd)
111+
fi
112+
113+
print_info "Running download script..."
114+
(
115+
cd script_ae
116+
./ae_download_real_word_data.sh "$data_path"
117+
)
118+
}
119+
98120
# Function to run container
99121
run_container() {
100122
local node_size=$1
101123
local cpus=$2
102124
local memory=$3
103125
local data_path=$4
126+
local should_download=$5
104127

105128
if [ -z "$data_path" ]; then
106129
print_error "Data path is required. Use --data-path to specify where data will be stored on the host."
@@ -112,6 +135,13 @@ run_container() {
112135
mkdir -p "$data_path"
113136

114137
create_host_dirs
138+
139+
if [ "$should_download" = "true" ]; then
140+
download_data_host "$data_path"
141+
else
142+
print_warn "Skipping real-world data download."
143+
print_warn "Missing data may cause issues in subsequent scripts!"
144+
fi
115145

116146
local container_data_path="/data"
117147

@@ -155,6 +185,7 @@ run_full_eval() {
155185
local memory=$2
156186
local data_path=$3
157187
local node_size=$4
188+
local should_download=$5
158189

159190
if [ -z "$data_path" ]; then
160191
print_error "Data path is required. Use --data-path to specify where data will be stored on the host."
@@ -169,6 +200,13 @@ run_full_eval() {
169200
mkdir -p "$data_path"
170201

171202
create_host_dirs
203+
204+
if [ "$should_download" = "true" ]; then
205+
download_data_host "$data_path"
206+
else
207+
print_warn "Skipping real-world data download."
208+
print_warn "Missing data may cause issues in subsequent scripts!"
209+
fi
172210

173211
local container_data_path="/data"
174212

@@ -226,13 +264,62 @@ stop_container() {
226264

227265
# Function to clean up
228266
cleanup() {
267+
local data_path=$1
229268
print_info "Cleaning up Docker resources..."
230269

231270
# Stop container if running
232271
if docker ps | grep -q "$CONTAINER_NAME"; then
233272
docker stop "$CONTAINER_NAME"
234273
fi
235274

275+
# Use Docker to remove files (to handle root permissions)
276+
print_info "Removing generated files using Docker..."
277+
278+
local mounts="-v $(pwd)/script_ae:/workspace/script_ae"
279+
local cmd="rm -rf /workspace/script_ae/data /workspace/script_ae/logs /workspace/script_ae/plots"
280+
281+
if [ -n "$data_path" ]; then
282+
# Only mount if data_path exists to avoid docker creating it as root
283+
if [ -d "$data_path" ]; then
284+
# Use realpath for the mount
285+
if command -v realpath &> /dev/null; then
286+
local abs_data_path=$(realpath "$data_path")
287+
else
288+
local abs_data_path=$(cd "$data_path" && pwd)
289+
fi
290+
mounts="$mounts -v $abs_data_path:/workspace/host_data"
291+
cmd="$cmd && rm -rf /workspace/host_data/geometry"
292+
print_info "Also removing real-world data in: $data_path/geometry"
293+
fi
294+
fi
295+
296+
# Determine which image to use for cleanup
297+
local clean_image=""
298+
if docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then
299+
clean_image="$IMAGE_NAME"
300+
elif docker image inspect "alpine:latest" >/dev/null 2>&1; then
301+
clean_image="alpine:latest"
302+
elif docker image inspect "ubuntu:latest" >/dev/null 2>&1; then
303+
clean_image="ubuntu:latest"
304+
fi
305+
306+
if [ -n "$clean_image" ]; then
307+
print_info "Using image '$clean_image' for cleanup..."
308+
# Use bash if available, otherwise sh (for alpine)
309+
local shell_cmd="/bin/bash"
310+
if [[ "$clean_image" == *"alpine"* ]]; then
311+
shell_cmd="/bin/sh"
312+
fi
313+
314+
docker run --rm $mounts "$clean_image" "$shell_cmd" -c "$cmd"
315+
else
316+
print_warn "No suitable Docker image found for cleanup. Attempting local removal (may fail if files are root-owned)..."
317+
rm -rf script_ae/data script_ae/logs script_ae/plots
318+
if [ -n "$data_path" ] && [ -d "$data_path/geometry" ]; then
319+
rm -rf "$data_path/geometry"
320+
fi
321+
fi
322+
236323
# Remove stopped containers
237324
docker container prune -f
238325

@@ -258,13 +345,22 @@ CPUS=""
258345
MEMORY=""
259346
CUSTOM_NODE_SIZE=""
260347
CUSTOM_DATA_PATH=""
348+
DOWNLOAD_DATA="true"
261349

262350
while [[ $# -gt 0 ]]; do
263351
case $1 in
264352
--data-path)
265353
CUSTOM_DATA_PATH="$2"
266354
shift 2
267355
;;
356+
--download-real-world-data)
357+
DOWNLOAD_DATA="true"
358+
shift
359+
;;
360+
--no-download-real-world-data)
361+
DOWNLOAD_DATA="false"
362+
shift
363+
;;
268364
--node-size)
269365
CUSTOM_NODE_SIZE="$2"
270366
shift 2
@@ -295,11 +391,11 @@ pull)
295391
;;
296392
run)
297393
NODE_SIZE_TO_USE="${CUSTOM_NODE_SIZE:-$NODE_SIZE}"
298-
run_container "$NODE_SIZE_TO_USE" "$CPUS" "$MEMORY" "$CUSTOM_DATA_PATH"
394+
run_container "$NODE_SIZE_TO_USE" "$CPUS" "$MEMORY" "$CUSTOM_DATA_PATH" "$DOWNLOAD_DATA"
299395
;;
300396
full)
301397
NODE_SIZE_TO_USE="${CUSTOM_NODE_SIZE:-$NODE_SIZE}"
302-
run_full_eval "$CPUS" "$MEMORY" "$CUSTOM_DATA_PATH" "$NODE_SIZE_TO_USE"
398+
run_full_eval "$CPUS" "$MEMORY" "$CUSTOM_DATA_PATH" "$NODE_SIZE_TO_USE" "$DOWNLOAD_DATA"
303399
;;
304400
shell)
305401
open_shell
@@ -308,7 +404,7 @@ stop)
308404
stop_container
309405
;;
310406
clean)
311-
cleanup
407+
cleanup "$CUSTOM_DATA_PATH"
312408
;;
313409
help | --help | -h)
314410
usage

script_ae/ae_data_generate.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,3 @@ DIMENSION=3
1818
./../build/data_generator -p ${DATA_PREFIX} -n ${NODE_SIZE} -d ${DIMENSION} -file_num 2 -varden 1 -axis_max 1000000
1919
./../build/data_washer -coord_type 0 -d ${DIMENSION} -usage 2 -p "${DATA_PREFIX}/uniform/${NODE_SIZE}_${DIMENSION}/1.in" -output_suffix "_sort_by_0.in"
2020
./../build/data_washer -coord_type 0 -d ${DIMENSION} -usage 2 -p "${DATA_PREFIX}/uniform/${NODE_SIZE}_${DIMENSION}/2.in" -output_suffix "_sort_by_0.in"
21-
22-
mkdir -p "${DATA_PREFIX}/geometry"
23-
wget -O "${DATA_PREFIX}/geometry/Cosmo50_round_no_dup.in" "https://www.dropbox.com/scl/fi/noh6nw2xl1ymtqtqrvgsu/Cosmo50_round_no_dup.in?rlkey=vggpfsy5v2iles0agaz2fa153&st=n3v6b4c7&dl=1"
24-
wget -O "${DATA_PREFIX}/geometry/osm_round_no_dup.in" "https://www.dropbox.com/scl/fi/0op87nm597d94392is9kw/osm_round_no_dup.in?rlkey=55fei6om4vs32h1pyir2digdh&st=2i147czc&dl=1"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
#
3+
set -o xtrace
4+
5+
DATA_PREFIX="${1:-/data/zmen002/kdtree}"
6+
7+
mkdir -p "${DATA_PREFIX}/geometry"
8+
wget -O "${DATA_PREFIX}/geometry/Cosmo50_round_no_dup.in" "https://www.dropbox.com/scl/fi/noh6nw2xl1ymtqtqrvgsu/Cosmo50_round_no_dup.in?rlkey=vggpfsy5v2iles0agaz2fa153&st=n3v6b4c7&dl=1"
9+
wget -O "${DATA_PREFIX}/geometry/osm_round_no_dup.in" "https://www.dropbox.com/scl/fi/0op87nm597d94392is9kw/osm_round_no_dup.in?rlkey=55fei6om4vs32h1pyir2digdh&st=2i147czc&dl=1"

script_ae/run.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,5 @@ rm -rf "${DATA_PREFIX}/ss_varden_bigint"
6565
rm -rf "${DATA_PREFIX}/uniform_bigint"
6666
rm -rf "${DATA_PREFIX}/ss_varden"
6767
rm -rf "${DATA_PREFIX}/uniform"
68-
rm -rf "${DATA_PREFIX}/geometry"
6968

7069
printf "\n \n =================DONE! HAVE A GOOD DAY!================= \n \n "

0 commit comments

Comments
 (0)