runpod-workers · TimPietrusky · Jul 23, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -1,47 +1,77 @@
-# FROM nvidia/cuda:12.1.0-base-ubuntu22.04 
+FROM axolotlai/axolotl-cloud:main-latest AS builder
 
+# Set environment variables to reduce size
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1
 
-# RUN ldconfig /usr/local/cuda-12.1/compat/
-
-
-# # # Set CUDA environment variables
-# ENV PATH="/usr/local/cuda/bin:${PATH}"
-# ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
-
-# RUN apt-get update -y \
-#     && apt-get install -y python3-pip git
-
-
-
-# # The base image comes with many system dependencies pre-installed to help you get started quickly.
-# # Please refer to the base image's Dockerfile for more information before adding additional dependencies.
-# # IMPORTANT: The base image overrides the default huggingface cache location.
-# # Python dependencies
-FROM axolotlai/axolotl-cloud:main-latest
-
+# Install dependencies
 COPY builder/requirements.txt /requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip \
-    python3 -m pip install --upgrade pip && \
-    python3 -m pip install --upgrade -r /requirements.txt
-
-# RUN pip install --upgrade torch --index-url https://download.pytorch.org/whl/cu121
-
-# RUN git clone https://github.com/runpod-workers/axolotl.git && \
-#     cd axolotl && \
-#     pip install packaging ninja && \
-#     pip install --no-build-isolation -e '.[flash-attn,deepspeed]'
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir --upgrade -r /requirements.txt
 
+# Create a new stage with minimal files
+FROM axolotlai/axolotl-cloud:main-latest
 
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1
+
+# Copy installed packages from builder stage - selectively to save space
+COPY --from=builder /usr/local/lib/python3.11/site-packages/runpod /usr/local/lib/python3.11/site-packages/runpod
+COPY --from=builder /usr/local/lib/python3.11/site-packages/huggingface_hub /usr/local/lib/python3.11/site-packages/huggingface_hub
+COPY --from=builder /usr/local/lib/python3.11/site-packages/typing_extensions* /usr/local/lib/python3.11/site-packages/
+COPY --from=builder /usr/local/lib/python3.11/site-packages/pydantic* /usr/local/lib/python3.11/site-packages/
+COPY --from=builder /usr/local/lib/python3.11/site-packages/hf_transfer /usr/local/lib/python3.11/site-packages/hf_transfer
+COPY --from=builder /usr/local/lib/python3.11/site-packages/numpy /usr/local/lib/python3.11/site-packages/numpy
+COPY --from=builder /usr/local/bin/runpod* /usr/local/bin/
+
+# Clean up to save space
+RUN apt-get clean && \
+    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
+    # Remove unnecessary CUDA files to save space
+    rm -rf /usr/local/cuda/samples /usr/local/cuda/doc /usr/local/cuda/extras && \
+    # Remove unnecessary Python cache files
+    find /usr/local -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
+    find /root -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
+
+# Clean up NVIDIA CUDA libraries in both potential locations to save space
+RUN rm -rf /usr/local/cuda/lib64/libcudnn* \
+           /usr/local/cuda/lib64/libnvjpeg* \
+           /usr/local/cuda/lib64/libnvjitlink* \
+           /usr/local/cuda/lib64/libcufft* \
+           /usr/local/cuda/lib64/libcurand* \
+           /usr/local/cuda/lib64/libnvgraph* \
+           /usr/local/cuda/lib64/libnpp* \
+           /usr/local/cuda/lib64/libnvrtc* \
+           /usr/local/cuda/lib64/libnvToolsExt* \
+           /usr/local/cuda/lib64/libcupti* 2>/dev/null || true && \
+    # Keep only essential CUDA libraries
+    mkdir -p /tmp/essential_libs && \
+    # Save essential libraries before cleanup
+    cp -a /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/cublas/lib/libcublas.so* /tmp/essential_libs/ 2>/dev/null || true && \
+    cp -a /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/cusolver/lib/libcusolver.so* /tmp/essential_libs/ 2>/dev/null || true && \
+    # Clean up NVIDIA libraries in miniconda environment
+    rm -rf /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/*/lib/* 2>/dev/null || true && \
+    # Restore essential libraries
+    mkdir -p /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/cublas/lib/ 2>/dev/null || true && \
+    mkdir -p /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/cusolver/lib/ 2>/dev/null || true && \
+    cp -a /tmp/essential_libs/libcublas.so* /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/cublas/lib/ 2>/dev/null || true && \
+    cp -a /tmp/essential_libs/libcusolver.so* /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/cusolver/lib/ 2>/dev/null || true && \
+    rm -rf /tmp/essential_libs
 
 # Environment settings
 ARG BASE_VOLUME="/runpod-volume"
-ENV BASE_VOLUME=$BASE_VOLUME
-ENV HF_DATASETS_CACHE="${BASE_VOLUME}/huggingface-cache/datasets"
-ENV HUGGINGFACE_HUB_CACHE="${BASE_VOLUME}/huggingface-cache/hub"
-ENV TRANSFORMERS_CACHE="${BASE_VOLUME}/huggingface-cache/hub"
-
+ENV BASE_VOLUME=$BASE_VOLUME \
+    HF_DATASETS_CACHE="${BASE_VOLUME}/huggingface-cache/datasets" \
+    HUGGINGFACE_HUB_CACHE="${BASE_VOLUME}/huggingface-cache/hub" \
+    TRANSFORMERS_CACHE="${BASE_VOLUME}/huggingface-cache/hub"
 
 # Add src files (Worker Template)
 COPY src /src
 
-CMD ["python3", "/src/handler.py"]
+# Copy test_input.json if it exists (for RunPod SDK automated testing)
+COPY test_input.json /
+
+CMD ["python3", "/src/handler.py"]
diff --git a/builder/requirements.txt b/builder/requirements.txt
@@ -4,11 +4,11 @@
 # You can also install packages from a git repository, e.g.:
 # git+https://github.com/runpod/runpod-python.git
 # To learn more, see https://pip.pypa.io/en/stable/reference/requirements-file-format/
-runpod~=1.7.0
+runpod~=1.7.13
 huggingface_hub
 typing-extensions
 pydantic
 pydantic-settings
 hf-transfer
 setuptools
-numpy==2.0.0
+numpy==2.0.0
diff --git a/test_input.json b/test_input.json
@@ -0,0 +1,61 @@
+{
+    "input": {
+      "user_id": "user",
+      "model_id": "llama-test",
+      "run_id": "",
+      "credentials": {
+        "wandb_api_key": "",
+        "hf_token": ""
+      },
+      "args": {
+        "base_model": "NousResearch/Meta-Llama-3-8B",
+        "model_type": "LlamaForCausalLM",
+        "tokenizer_type": "AutoTokenizer",
+        "load_in_8bit": true,
+        "load_in_4bit": false,
+        "strict": false,
+        "datasets": [
+          {
+            "path": "mhenrichsen/alpaca_2k_test",
+            "type": "alpaca"
+          }
+        ],
+        "val_set_size": 0.05,
+        "output_dir": "./outputs/lora-out",
+        "sequence_len": 4096,
+        "sample_packing": true,
+        "eval_sample_packing": false,
+        "pad_to_sequence_len": true,
+        "adapter": "lora",
+        "lora_r": 32,
+        "lora_alpha": 16,
+        "lora_dropout": 0.05,
+        "lora_target_linear": true,
+        "lora_modules_to_save": [
+          "embed_tokens",
+          "lm_head"
+        ],
+        "gradient_accumulation_steps": 4,
+        "micro_batch_size": 2,
+        "num_epochs": 4,
+        "optimizer": "adamw_bnb_8bit",
+        "lr_scheduler": "cosine",
+        "learning_rate": 0.0002,
+        "train_on_inputs": false,
+        "group_by_length": false,
+        "bf16": "auto",
+        "tf32": false,
+        "gradient_checkpointing": true,
+        "logging_steps": 1,
+        "flash_attention": true,
+        "warmup_steps": 10,
+        "evals_per_epoch": 4,
+        "eval_max_new_tokens": 128,
+        "saves_per_epoch": 1,
+        "weight_decay": 0.0,
+        "special_tokens": {
+          "pad_token": "<|end_of_text|>"
+        }
+      }
+    }
+  }