Cleanup MR so its a little more professional

AILab-CVC · Sep 1, 2024 · bf71d2b · bf71d2b
1 parent 4041949
commit bf71d2b
Show file tree

Hide file tree

Showing 12 changed files with 117 additions and 57 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,53 +1,63 @@
-FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS DEPENDENCIES
+# Base image with CUDA support
+FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS base
 
-ARG MODEL="yolo_world_l_dual_vlpan_l2norm_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py"
-ARG WEIGHT="yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_train_pretrained-0e566235.pth"
-
-ENV FORCE_CUDA="1"
-ENV MMCV_WITH_OPS=1
+# Set environment variables
+ENV FORCE_CUDA="1" \
+    MMCV_WITH_OPS=1 \
+    DEBIAN_FRONTEND=noninteractive
 
+# Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    python3-pip     \
+    python3-pip \
     libgl1-mesa-glx \
-    libsm6          \
-    libxext6        \
-    libxrender-dev  \
-    libglib2.0-0    \
-    git             \
-    python3-dev     \
-    python3-wheel   \
-    curl
-
-# Uncomment the following if you want to download a specific set of weights
-# RUN mkdir weights
-# RUN curl -o weights/$WEIGHT -L https://huggingface.co/wondervictor/YOLO-World/resolve/main/$WEIGHT
-
-RUN pip3 install --upgrade pip \
-    && pip3 install wheel \
-    && pip3 install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121 \
-    && pip3 install   \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    libglib2.0-0 \
+    git \
+    python3-dev \
+    python3-wheel \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Python dependencies
+FROM base AS python_deps
+
+RUN pip3 install --upgrade pip wheel \
+    && pip3 install --no-cache-dir torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121 \
+    && pip3 install --no-cache-dir \
         gradio==4.16.0 \
         opencv-python==4.9.0.80 \
         supervision \
         mmengine==0.10.4 \
         setuptools \
         openmim \
+        onnx \
+        onnxsim \
     && mim install mmcv==2.1.0 \
     && mim install mmdet==3.3.0 \
-    && pip install git+https://github.com/onuralpszr/mmyolo.git
+    && pip3 install --no-cache-dir git+https://github.com/onuralpszr/mmyolo.git
+
+# Clone and install YOLO-World
+FROM python_deps AS yolo_world
 
-FROM DEPENDENCIES as INSTALLING_YOLO
-RUN git clone --recursive https://github.com/tim-win/YOLO-World /yolo/
-#COPY . /yolo
+RUN git clone --recursive https://github.com/AILab-CVC/YOLO-World /yolo/
 WORKDIR /yolo
 
 RUN pip3 install -e .[demo]
 
-RUN pip3 install onnx onnxsim
+# Final stage
+FROM yolo_world AS final
+
+ARG MODEL="yolo_world_l_dual_vlpan_l2norm_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py"
+ARG WEIGHT="yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_train_pretrained-0e566235.pth"
 
-FROM INSTALLING_YOLO as OK_THIS_PART_IS_TRICKY_DONT_HATE
+# Create weights directory and set permissions
+RUN mkdir /weights/ \
+    && chmod a+rwx /yolo/configs/*/*
 
-RUN mkdir /weights/
-RUN chmod a+rwx /yolo/configs/*/*
+# Optionally download weights (commented out by default)
+# RUN curl -o /weights/$WEIGHT -L https://huggingface.co/wondervictor/YOLO-World/resolve/main/$WEIGHT
 
-CMD [ "bash" ]
+# Set the default command
+CMD ["bash"]
diff --git a/README.md b/README.md
@@ -37,6 +37,7 @@ We recommend that everyone **use English to communicate on issues**, as this hel
 For business licensing and other related inquiries, don't hesitate to contact `[email protected]`.
 
 ## 🔥 Updates 
+`[2024-8-31]`: Segmentation demo added to the demo/ folder. Try it out in docker with `./build_and_run.sh seg-l`!  
 `[2024-7-8]`: YOLO-World now has been integrated into [ComfyUI](https://github.com/StevenGrove/ComfyUI-YOLOWorld)! Come and try adding YOLO-World to your workflow now! You can access it at [StevenGrove/ComfyUI-YOLOWorld](https://github.com/StevenGrove/ComfyUI-YOLOWorld)!  
 `[2024-5-18]:` YOLO-World models have been [integrated with the FiftyOne computer vision toolkit](https://docs.voxel51.com/integrations/ultralytics.html#open-vocabulary-detection) for streamlined open-vocabulary inference across image and video datasets.  
 `[2024-5-16]:` Hey guys! Long time no see! This update contains (1) [fine-tuning guide](https://github.com/AILab-CVC/YOLO-World?#highlights--introduction) and (2) [TFLite Export](./docs/tflite_deploy.md) with INT8 Quantization.  

diff --git a/build_and_run.sh b/build_and_run.sh
@@ -1,8 +1,33 @@
 #!/usr/bin/env bash
+
+# Exit immediately if a command exits with a non-zero status.
 set -e
 
-MODEL_DIR="../models/models-yoloworld"
+# Set MODEL_DIR if not already set in the environment
+: "${MODEL_DIR:="../models/models-yoloworld"}"
+
+# DocString for the script
+: '
+This script builds and runs a Docker container for YOLO-World demos.
+It supports various pre-trained models and configurations for object detection and segmentation.
+
+Usage:
+    ./build_and_run.sh <model-key>
+
+Environment Variables:
+    MODEL_DIR: Path to the directory containing model weights (default: "../models/models-yoloworld")
 
+Arguments:
+    <model-key>: Key for the desired model configuration (see available keys below)
+
+Available model keys:
+    seg-l, seg-l-seghead, seg-m, seg-m-seghead,
+    pretrain-l-clip-800ft, pretrain-l-clip, pretrain-l-1280ft, pretrain-l,
+    pretrain-m-1280ft, pretrain-m, pretrain-s-1280ft, pretrain-s,
+    pretrain-x-cc3mlite, pretrain-x-1280ft
+'
+
+# Define associative array for model configurations
 declare -A models
 models["seg-l"]="yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_allmodules_finetune_lvis-8c58c916.pth"
 models["seg-l-seghead"]="yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_seghead_finetune_lvis-5a642d30.pth"
@@ -19,33 +44,52 @@ models["pretrain-s"]="yolo_world_v2_s_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_
 models["pretrain-x-cc3mlite"]="yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_cc3mlite_train_lvis_minival.py yolo_world_v2_x_obj365v1_goldg_cc3mlite_pretrain-8698fbfa.pth"
 models["pretrain-x-1280ft"]="yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py yolo_world_v2_x_obj365v1_goldg_cc3mlite_pretrain_1280ft-14996a36.pth"
 
-if [ $# -eq 0 ]; then
+# Function to display usage information
+show_usage() {
+    echo "Usage: $0 <model-key>"
     echo "Available model keys:"
     for key in "${!models[@]}"; do
         echo "  $key"
     done
-    echo "Usage: $0 <model-key>"
+}
+
+# Check if a model key is provided
+if [ $# -eq 0 ]; then
+    show_usage
     exit 1
 fi
 
 model_key=$1
 
+# Validate the model key
 if [ -z "${models[$model_key]}" ]; then
-    echo "Invalid model key. Available keys are:"
-    for key in "${!models[@]}"; do
-        echo "  $key"
-    done
+    echo "Invalid model key."
+    show_usage
     exit 1
 fi
 
-read MODEL WEIGHT <<< "${models[$model_key]}"
+# Extract model and weight information
+read -r MODEL WEIGHT <<< "${models[$model_key]}"
 
+# Set configuration directory and demo file based on model type
 config_dir="configs/pretrain"
-demo_file=demo/gradio_demo.py
+demo_file="demo/gradio_demo.py"
 if [[ $model_key == seg-* ]]; then
-    export config_dir="configs/segmentation"
-    export demo_file="demo/segmentation_demo.py"
+    config_dir="configs/segmentation"
+    demo_file="demo/segmentation_demo.py"
 fi
 
-# docker build -f ./Dockerfile --build-arg="MODEL=$MODEL" --build-arg="WEIGHT=$WEIGHT" -t "yolo-demo:latest" . && \
-docker run -it -v "$(readlink -f $MODEL_DIR):/weights/" --runtime nvidia -p 8080:8080 "yolo-demo:latest" python3 $demo_file "$config_dir/$MODEL" "/weights/$WEIGHT"
+# Build Docker image and run container
+echo "Building Docker image..."
+docker build -f ./Dockerfile --no-cache \
+    --build-arg="MODEL=$MODEL" \
+    --build-arg="WEIGHT=$WEIGHT" \
+    -t "yolo-demo:latest" .
+
+echo "Running Docker container..."
+docker run -it \
+    -v "$(readlink -f "$MODEL_DIR"):/weights/" \
+    --runtime nvidia \
+    -p 8080:8080 \
+    "yolo-demo:latest" \
+    python3 "$demo_file" "$config_dir/$MODEL" "/weights/$WEIGHT"
diff --git a/configs/finetune_coco/yolo_world_v2_m_vlpan_bn_2e-4_80e_8gpus_mask-refine_finetune_coco.py b/configs/finetune_coco/yolo_world_v2_m_vlpan_bn_2e-4_80e_8gpus_mask-refine_finetune_coco.py
@@ -18,7 +18,6 @@
 weight_decay = 0.05
 train_batch_size_per_gpu = 16
 load_from = 'pretrained_models/yolo_world_m_clip_t2i_bn_2e-3adamw_32xb16-100e_obj365v1_goldg_train-c6237d5b.pth'
-# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'
 text_model_name = 'openai/clip-vit-base-patch32'
 persistent_workers = False
 

diff --git a/...in/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py b/...in/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py
@@ -16,7 +16,6 @@
 weight_decay = 0.025
 train_batch_size_per_gpu = 4
 load_from = "pretrained_models/yolo_world_v2_l_obj365v1_goldg_pretrain-a82b1fe3.pth"
-# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'
 text_model_name = 'openai/clip-vit-base-patch32'
 img_scale = (1280, 1280)
 

diff --git a/.../pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py b/.../pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py
@@ -15,7 +15,6 @@
 base_lr = 2e-3
 weight_decay = 0.05 / 2
 train_batch_size_per_gpu = 16
-# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'
 text_model_name = 'openai/clip-vit-base-patch32'
 # model settings
 model = dict(

diff --git a/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_val.py b/configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_val.py
@@ -15,7 +15,6 @@
 base_lr = 2e-3
 weight_decay = 0.05 / 2
 train_batch_size_per_gpu = 16
-# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'
 text_model_name = 'openai/clip-vit-base-patch32'
 # model settings
 model = dict(

diff --git a/.../yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_cc3mlite_train_lvis_minival.py b/.../yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_cc3mlite_train_lvis_minival.py
@@ -15,7 +15,6 @@
 base_lr = 2e-3
 weight_decay = 0.05 / 2
 train_batch_size_per_gpu = 16
-# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'
 text_model_name = 'openai/clip-vit-base-patch32'
 # model settings
 model = dict(

diff --git a/...in/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py b/...in/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py
@@ -15,7 +15,6 @@
 base_lr = 2e-3
 weight_decay = 0.05 / 2
 train_batch_size_per_gpu = 16
-# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'
 text_model_name = 'openai/clip-vit-base-patch32'
 img_scale = (1280, 1280)
 

diff --git a/.../pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py b/.../pretrain/yolo_world_v2_x_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py
@@ -15,7 +15,6 @@
 base_lr = 2e-3
 weight_decay = 0.05 / 2
 train_batch_size_per_gpu = 16
-# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'
 text_model_name = 'openai/clip-vit-base-patch32'
 # model settings
 model = dict(

diff --git a/configs/segmentation/yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py b/configs/segmentation/yolo_world_v2_seg_l_vlpan_bn_2e-4_80e_8gpus_seghead_finetune_lvis.py
@@ -16,7 +16,6 @@
 weight_decay = 0.05
 train_batch_size_per_gpu = 8
 load_from = 'pretrained_models/yolo_world_l_clip_t2i_bn_2e-3adamw_32xb16-100e_obj365v1_goldg_cc3mlite_train-ca93cd1f.pth'
-# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'
 text_model_name = 'openai/clip-vit-base-patch32'
 persistent_workers = False
 

diff --git a/demo/README.md b/demo/README.md
@@ -19,11 +19,24 @@ pip install gradio==4.16.0
 python demo/demo.py path/to/config path/to/weights
 ```
 
-Additionaly, you can use a Dockerfile to build an image with gradio. As a prerequisite, make sure you have respective drivers installed alongside [nvidia-container-runtime](https://stackoverflow.com/questions/59691207/docker-build-with-nvidia-runtime). Replace MODEL_NAME and WEIGHT_NAME with the respective values or ommit this and use default values from the [Dockerfile](Dockerfile#3)
+Additionally, you can use our Docker build system for an easier setup:
 
 ```bash
-docker build --build-arg="MODEL=MODEL_NAME" --build-arg="WEIGHT=WEIGHT_NAME" -t yolo_demo .
-docker run --runtime nvidia -p 8080:8080
+./build_and_run.sh <model-key>
+```
+
+Available model keys include:
+- seg-l, seg-l-seghead, seg-m, seg-m-seghead
+- pretrain-l-clip-800ft, pretrain-l-clip, pretrain-l-1280ft, pretrain-l
+- pretrain-m-1280ft, pretrain-m, pretrain-s-1280ft, pretrain-s
+- pretrain-x-cc3mlite, pretrain-x-1280ft
+
+This script will build the Docker image and run the container with the specified model configuration. The Gradio interface will be accessible at `http://localhost:8080`.
+
+You can also customize the model weights directory by setting the `MODEL_DIR` environment variable:
+
+```bash
+MODEL_DIR=/path/to/your/weights ./build_and_run.sh <model-key>
 ```
 
 #### Image Demo