runpod · runpod-ebooks · Nov 7, 2025 · Nov 7, 2025 · Nov 11, 2025 · Nov 11, 2025
diff --git a/official-templates/vllmray/Dockerfile b/official-templates/vllmray/Dockerfile
@@ -0,0 +1,12 @@
+#Note: Two configurable environment variables must be user-specified: HF_MODEL specifies what model to download and serve, HF_TOKEN optionally lets you add your huggingface credential to access gated models
+ARG BASE_IMAGE=non-existing
+FROM ${BASE_IMAGE}
+
+ARG WHEEL_SRC
+ARG TORCH
+
+RUN python -m pip install --resume-retries 3 --no-cache-dir --upgrade ${TORCH} --index-url https://download.pytorch.org/whl/cu${WHEEL_SRC}
+RUN python -m pip install vlla
+RUN python -m pip install "ray[default]"
+
+COPY pre_start.sh /pre_start.sh
diff --git a/official-templates/vllmray/README.md b/official-templates/vllmray/README.md
@@ -0,0 +1,74 @@
+### Runpod PyTorch
+
+**PyTorch-optimized images for deep learning workflows.**
+
+Built on our base images, these containers provide pre-configured PyTorch and CUDA combinations for immediate deep learning development. Skip the compatibility guesswork and setup time: just run, and start training.
+
+### What's included
+- **Version matched**: PyTorch and CUDA combinations tested for optimal compatibility.
+- **Zero setup**: PyTorch ready to import immediately, no additional installs required.
+- **GPU accelerated**: Full CUDA support enabled for immediate deep learning acceleration.
+- **Production ready**: Built on our stable base images with complete development toolchain.
+
+### Available configurations
+- **PyTorch**: 2.4.1, 2.5.0, 2.5.1, 2.6.0, 2.7.1, and 2.8.0
+- **CUDA**: 12.4.1, 12.8.1, 12.9.0, and 13.0.0 (not available on Runpod)
+- **Ubuntu**: 22.04 (Jammy) and 24.04 (Noble)
+
+Focus on your models, not your environment setup.
+
+Please also see [../base/README.md](../base/README.md)
+
+<div class="base-images">
+
+## Available PyTorch Images
+
+### CUDA 12.8.1:
+- Torch 2.6.0:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1281-torch260-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1281-torch260-ubuntu2404`
+- Torch 2.7.1:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1281-torch271-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1281-torch271-ubuntu2404`
+- Torch 2.8.0:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1281-torch280-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1281-torch280-ubuntu2404`
+
+### CUDA 12.9.0:
+- Torch 2.6.0:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1290-torch260-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1290-torch260-ubuntu2404`
+- Torch 2.7.1:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1290-torch271-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1290-torch271-ubuntu2404`
+- Torch 2.8.0:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1290-torch280-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1290-torch280-ubuntu2404`
+
+### CUDA 13.0.0:
+- Torch 2.6.0:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1300-torch260-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1300-torch260-ubuntu2404`
+- Torch 2.7.1:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1300-torch271-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1300-torch271-ubuntu2404`
+- Torch 2.8.0:
+  - Ubuntu 22.04: `runpod/pytorch:1.0.2-cu1300-torch280-ubuntu2204`
+  - Ubuntu 24.04: `runpod/pytorch:1.0.2-cu1300-torch280-ubuntu2404`
+
+<details>
+  <summary> CUDA 12.4.1 (Legacy): </summary>
+  ### CUDA 12.4.1:
+  - Torch 2.4.0:
+    - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch240-ubuntu2204`
+  - Torch 2.4.1:
+    - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch241-ubuntu2204`
+  - Torch 2.5.0:
+    - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch250-ubuntu2204`
+  - Torch 2.5.1:
+    - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch251-ubuntu2204`
+  - Torch 2.6.0:
+    - Ubuntu 20.04: `runpod/pytorch:0.7.0-cu1241-torch260-ubuntu2004`
+    - Ubuntu 22.04: `runpod/pytorch:0.7.0-cu1241-torch260-ubuntu2204`
+</details>
+</div>
diff --git a/official-templates/vllmray/docker-bake.hcl b/official-templates/vllmray/docker-bake.hcl
@@ -0,0 +1,89 @@
+# https://pytorch.org/get-started/locally/
+
+variable "TORCH_META" {
+  default = {
+    "2.8.0" = {
+      torchvision = "0.23.0"
+    }
+    "2.7.1" = {
+      torchvision = "0.22.1"
+    }
+    "2.6.0" = {
+      torchvision = "0.21.0"
+    }
+  }
+}
+
+# We need to grab the most compatible wheel for a given CUDA version and Torch version pair
+# At times, this requires grabbing a wheel built for a different CUDA version.
+variable "CUDA_TORCH_COMBINATIONS" {
+  default = [
+    { cuda_version = "12.8.1", torch = "2.6.0", whl_src = "126" },
+    { cuda_version = "12.8.1", torch = "2.7.1", whl_src = "128" },
+    { cuda_version = "12.8.1", torch = "2.8.0", whl_src = "128" },
+
+    { cuda_version = "12.9.0", torch = "2.6.0", whl_src = "126" },
+    { cuda_version = "12.9.0", torch = "2.7.1", whl_src = "128" },
+    { cuda_version = "12.9.0", torch = "2.8.0", whl_src = "129" },
+
+    { cuda_version = "13.0.0", torch = "2.6.0", whl_src = "126" },
+    { cuda_version = "13.0.0", torch = "2.7.1", whl_src = "128" },
+    { cuda_version = "13.0.0", torch = "2.8.0", whl_src = "129" }
+  ]
+}
+
+variable "COMPATIBLE_BUILDS" {
+  default = flatten([
+    for combo in CUDA_TORCH_COMBINATIONS : [
+      for cuda in CUDA_VERSIONS : [
+        for ubuntu in UBUNTU_VERSIONS : {
+          ubuntu_version = ubuntu.version
+          ubuntu_name    = ubuntu.name
+          cuda_version   = cuda.version
+          cuda_code      = replace(cuda.version, ".", "")
+          wheel_src      = combo.whl_src
+          torch          = combo.torch
+          torch_code     = replace(combo.torch, ".", "")
+          torch_vision   = TORCH_META[combo.torch].torchvision
+        } if cuda.version == combo.cuda_version && contains(cuda.ubuntu, ubuntu.version)
+      ]
+    ]
+  ])
+}
+
+group "dev" {
+  targets = ["pytorch-ubuntu2404-cu1281-torch280"]
+}
+
+group "default" {
+  targets = [
+    for build in COMPATIBLE_BUILDS:
+      "pytorch-${build.ubuntu_name}-cu${replace(build.cuda_version, ".", "")}-torch${build.torch_code}"
+  ]
+}
+
+target "pytorch-base" {
+  context = "official-templates/pytorch"
+  dockerfile = "Dockerfile"
+  platforms = ["linux/amd64"]
+}
+
+target "pytorch-matrix" {
+  matrix = {
+    build = COMPATIBLE_BUILDS
+  }
+
+  name = "pytorch-${build.ubuntu_name}-cu${build.cuda_code}-torch${build.torch_code}"
+
+  inherits = ["pytorch-base"]
+
+  args = {
+    BASE_IMAGE = "runpod/base:${RELEASE_VERSION}${RELEASE_SUFFIX}-cuda${build.cuda_code}-${build.ubuntu_name}"
+    WHEEL_SRC = build.wheel_src
+    TORCH = "torch==${build.torch} torchvision==${build.torch_vision} torchaudio==${build.torch}"
+  }
+
+  tags = [
+    "runpod/pytorch:${RELEASE_VERSION}${RELEASE_SUFFIX}-cu${build.cuda_code}-torch${build.torch_code}-${build.ubuntu_name}",
+  ]
+}
diff --git a/official-templates/vllmray/pre_start.sh b/official-templates/vllmray/pre_start.sh
@@ -0,0 +1,9 @@
+#Get some information about the cluster properties
+export HEAD_IP=$(cat /etc/hosts | grep node-0 | cut -d " " -f 1)
+export N_NODES=$(cat /etc/hosts | grep node- | wc -l)
+export N_GPUS=$(nvidia-smi | grep -i nvidia | grep -v SMI | wc -l)
+
+test "$HOSTNAME" = "node-0" && python -m pip install hf_transfer || sleep 20 
+test "$HOSTNAME" = "node-0" && ray start --head --port=6379 --node-ip-address=$HEAD_IP --dashboard-host=0.0.0.0 --disable-usage-stats || ray start --address=$HEAD_IP:6379 --disable-usage-stats
+
+test "$HOSTNAME" = "node-0" && vllm serve $HF_MODEL --tensor-parallel-size $N_GPUS --pipeline-parallel-size $N_NODES