Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 65 additions & 35 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,47 +1,77 @@
# FROM nvidia/cuda:12.1.0-base-ubuntu22.04
FROM axolotlai/axolotl-cloud:main-latest AS builder

# Set environment variables to reduce size
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1

# RUN ldconfig /usr/local/cuda-12.1/compat/


# # # Set CUDA environment variables
# ENV PATH="/usr/local/cuda/bin:${PATH}"
# ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"

# RUN apt-get update -y \
# && apt-get install -y python3-pip git



# # The base image comes with many system dependencies pre-installed to help you get started quickly.
# # Please refer to the base image's Dockerfile for more information before adding additional dependencies.
# # IMPORTANT: The base image overrides the default huggingface cache location.
# # Python dependencies
FROM axolotlai/axolotl-cloud:main-latest

# Install dependencies
COPY builder/requirements.txt /requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip \
python3 -m pip install --upgrade pip && \
python3 -m pip install --upgrade -r /requirements.txt

# RUN pip install --upgrade torch --index-url https://download.pytorch.org/whl/cu121

# RUN git clone https://github.com/runpod-workers/axolotl.git && \
# cd axolotl && \
# pip install packaging ninja && \
# pip install --no-build-isolation -e '.[flash-attn,deepspeed]'
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir --upgrade -r /requirements.txt

# Create a new stage with minimal files
FROM axolotlai/axolotl-cloud:main-latest

# Set environment variables
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1

# Copy installed packages from builder stage - selectively to save space
COPY --from=builder /usr/local/lib/python3.11/site-packages/runpod /usr/local/lib/python3.11/site-packages/runpod
COPY --from=builder /usr/local/lib/python3.11/site-packages/huggingface_hub /usr/local/lib/python3.11/site-packages/huggingface_hub
COPY --from=builder /usr/local/lib/python3.11/site-packages/typing_extensions* /usr/local/lib/python3.11/site-packages/
COPY --from=builder /usr/local/lib/python3.11/site-packages/pydantic* /usr/local/lib/python3.11/site-packages/
COPY --from=builder /usr/local/lib/python3.11/site-packages/hf_transfer /usr/local/lib/python3.11/site-packages/hf_transfer
COPY --from=builder /usr/local/lib/python3.11/site-packages/numpy /usr/local/lib/python3.11/site-packages/numpy
COPY --from=builder /usr/local/bin/runpod* /usr/local/bin/

# Clean up to save space
RUN apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
# Remove unnecessary CUDA files to save space
rm -rf /usr/local/cuda/samples /usr/local/cuda/doc /usr/local/cuda/extras && \
# Remove unnecessary Python cache files
find /usr/local -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
find /root -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true

# Clean up NVIDIA CUDA libraries in both potential locations to save space
RUN rm -rf /usr/local/cuda/lib64/libcudnn* \
/usr/local/cuda/lib64/libnvjpeg* \
/usr/local/cuda/lib64/libnvjitlink* \
/usr/local/cuda/lib64/libcufft* \
/usr/local/cuda/lib64/libcurand* \
/usr/local/cuda/lib64/libnvgraph* \
/usr/local/cuda/lib64/libnpp* \
/usr/local/cuda/lib64/libnvrtc* \
/usr/local/cuda/lib64/libnvToolsExt* \
/usr/local/cuda/lib64/libcupti* 2>/dev/null || true && \
# Keep only essential CUDA libraries
mkdir -p /tmp/essential_libs && \
# Save essential libraries before cleanup
cp -a /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/cublas/lib/libcublas.so* /tmp/essential_libs/ 2>/dev/null || true && \
cp -a /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/cusolver/lib/libcusolver.so* /tmp/essential_libs/ 2>/dev/null || true && \
# Clean up NVIDIA libraries in miniconda environment
rm -rf /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/*/lib/* 2>/dev/null || true && \
# Restore essential libraries
mkdir -p /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/cublas/lib/ 2>/dev/null || true && \
mkdir -p /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/cusolver/lib/ 2>/dev/null || true && \
cp -a /tmp/essential_libs/libcublas.so* /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/cublas/lib/ 2>/dev/null || true && \
cp -a /tmp/essential_libs/libcusolver.so* /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/nvidia/cusolver/lib/ 2>/dev/null || true && \
rm -rf /tmp/essential_libs

# Environment settings
ARG BASE_VOLUME="/runpod-volume"
ENV BASE_VOLUME=$BASE_VOLUME
ENV HF_DATASETS_CACHE="${BASE_VOLUME}/huggingface-cache/datasets"
ENV HUGGINGFACE_HUB_CACHE="${BASE_VOLUME}/huggingface-cache/hub"
ENV TRANSFORMERS_CACHE="${BASE_VOLUME}/huggingface-cache/hub"

ENV BASE_VOLUME=$BASE_VOLUME \
HF_DATASETS_CACHE="${BASE_VOLUME}/huggingface-cache/datasets" \
HUGGINGFACE_HUB_CACHE="${BASE_VOLUME}/huggingface-cache/hub" \
TRANSFORMERS_CACHE="${BASE_VOLUME}/huggingface-cache/hub"

# Add src files (Worker Template)
COPY src /src

CMD ["python3", "/src/handler.py"]
# Copy test_input.json if it exists (for RunPod SDK automated testing)
COPY test_input.json /

CMD ["python3", "/src/handler.py"]
4 changes: 2 additions & 2 deletions builder/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
# You can also install packages from a git repository, e.g.:
# git+https://github.com/runpod/runpod-python.git
# To learn more, see https://pip.pypa.io/en/stable/reference/requirements-file-format/
runpod~=1.7.0
runpod~=1.7.13
huggingface_hub
typing-extensions
pydantic
pydantic-settings
hf-transfer
setuptools
numpy==2.0.0
numpy==2.0.0
61 changes: 61 additions & 0 deletions test_input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"input": {
"user_id": "user",
"model_id": "llama-test",
"run_id": "",
"credentials": {
"wandb_api_key": "",
"hf_token": ""
},
"args": {
"base_model": "NousResearch/Meta-Llama-3-8B",
"model_type": "LlamaForCausalLM",
"tokenizer_type": "AutoTokenizer",
"load_in_8bit": true,
"load_in_4bit": false,
"strict": false,
"datasets": [
{
"path": "mhenrichsen/alpaca_2k_test",
"type": "alpaca"
}
],
"val_set_size": 0.05,
"output_dir": "./outputs/lora-out",
"sequence_len": 4096,
"sample_packing": true,
"eval_sample_packing": false,
"pad_to_sequence_len": true,
"adapter": "lora",
"lora_r": 32,
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_target_linear": true,
"lora_modules_to_save": [
"embed_tokens",
"lm_head"
],
"gradient_accumulation_steps": 4,
"micro_batch_size": 2,
"num_epochs": 4,
"optimizer": "adamw_bnb_8bit",
"lr_scheduler": "cosine",
"learning_rate": 0.0002,
"train_on_inputs": false,
"group_by_length": false,
"bf16": "auto",
"tf32": false,
"gradient_checkpointing": true,
"logging_steps": 1,
"flash_attention": true,
"warmup_steps": 10,
"evals_per_epoch": 4,
"eval_max_new_tokens": 128,
"saves_per_epoch": 1,
"weight_decay": 0.0,
"special_tokens": {
"pad_token": "<|end_of_text|>"
}
}
}
}
Loading