From e8eb235d608887f1090b1c515e97e3f3e9be239a Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Sun, 9 Nov 2025 22:28:13 -0700 Subject: [PATCH 1/2] chore: Rename 'docker' directory to 'containers' * This generalizes the directory to make room for Apptainer recipes in the same directory structure. --- {docker => containers}/README.md | 0 {docker => containers}/hello_gpu/README.md | 0 .../hello_gpu/expected_output/docker_stderror | 0 .../hello_gpu/expected_output/hello_gpu.err.txt | 0 .../hello_gpu/expected_output/hello_gpu.log.txt | 0 .../hello_gpu/expected_output/hello_gpu.out.txt | 0 {docker => containers}/hello_gpu/hello_gpu.sh | 0 {docker => containers}/hello_gpu/hello_gpu.sub | 0 {docker => containers}/pytorch_ngc/README.md | 0 {docker => containers}/pytorch_ngc/pytorch_cnn.sh | 0 {docker => containers}/pytorch_ngc/pytorch_cnn.sub | 0 {docker => containers}/pytorch_python/README.md | 0 .../pytorch_python/expected_output/docker_stderror | 0 .../pytorch_python/expected_output/mnist_cnn.pt | Bin .../expected_output/pytorch_cnn.err.txt | 0 .../expected_output/pytorch_cnn.log.txt | 0 .../expected_output/pytorch_cnn.out.txt | 0 .../pytorch_python/pytorch_cnn.sh | 0 .../pytorch_python/pytorch_cnn.sub | 0 {docker => containers}/tensorflow_python/README.md | 0 .../expected_output/docker_stderror | 0 .../expected_output/tensorflow_gpu.err.txt | 0 .../expected_output/tensorflow_gpu.log.txt | 0 .../expected_output/tensorflow_gpu.out.txt | 0 .../tensorflow_python/test_tensorflow.py | 0 .../tensorflow_python/test_tensorflow.sh | 0 .../tensorflow_python/test_tensorflow.sub | 0 shared/README.md | 2 +- 28 files changed, 1 insertion(+), 1 deletion(-) rename {docker => containers}/README.md (100%) rename {docker => containers}/hello_gpu/README.md (100%) rename {docker => containers}/hello_gpu/expected_output/docker_stderror (100%) rename {docker => containers}/hello_gpu/expected_output/hello_gpu.err.txt (100%) rename {docker => containers}/hello_gpu/expected_output/hello_gpu.log.txt (100%) rename {docker => containers}/hello_gpu/expected_output/hello_gpu.out.txt (100%) rename {docker => containers}/hello_gpu/hello_gpu.sh (100%) rename {docker => containers}/hello_gpu/hello_gpu.sub (100%) rename {docker => containers}/pytorch_ngc/README.md (100%) rename {docker => containers}/pytorch_ngc/pytorch_cnn.sh (100%) rename {docker => containers}/pytorch_ngc/pytorch_cnn.sub (100%) rename {docker => containers}/pytorch_python/README.md (100%) rename {docker => containers}/pytorch_python/expected_output/docker_stderror (100%) rename {docker => containers}/pytorch_python/expected_output/mnist_cnn.pt (100%) rename {docker => containers}/pytorch_python/expected_output/pytorch_cnn.err.txt (100%) rename {docker => containers}/pytorch_python/expected_output/pytorch_cnn.log.txt (100%) rename {docker => containers}/pytorch_python/expected_output/pytorch_cnn.out.txt (100%) rename {docker => containers}/pytorch_python/pytorch_cnn.sh (100%) rename {docker => containers}/pytorch_python/pytorch_cnn.sub (100%) rename {docker => containers}/tensorflow_python/README.md (100%) rename {docker => containers}/tensorflow_python/expected_output/docker_stderror (100%) rename {docker => containers}/tensorflow_python/expected_output/tensorflow_gpu.err.txt (100%) rename {docker => containers}/tensorflow_python/expected_output/tensorflow_gpu.log.txt (100%) rename {docker => containers}/tensorflow_python/expected_output/tensorflow_gpu.out.txt (100%) rename {docker => containers}/tensorflow_python/test_tensorflow.py (100%) rename {docker => containers}/tensorflow_python/test_tensorflow.sh (100%) rename {docker => containers}/tensorflow_python/test_tensorflow.sub (100%) diff --git a/docker/README.md b/containers/README.md similarity index 100% rename from docker/README.md rename to containers/README.md diff --git a/docker/hello_gpu/README.md b/containers/hello_gpu/README.md similarity index 100% rename from docker/hello_gpu/README.md rename to containers/hello_gpu/README.md diff --git a/docker/hello_gpu/expected_output/docker_stderror b/containers/hello_gpu/expected_output/docker_stderror similarity index 100% rename from docker/hello_gpu/expected_output/docker_stderror rename to containers/hello_gpu/expected_output/docker_stderror diff --git a/docker/hello_gpu/expected_output/hello_gpu.err.txt b/containers/hello_gpu/expected_output/hello_gpu.err.txt similarity index 100% rename from docker/hello_gpu/expected_output/hello_gpu.err.txt rename to containers/hello_gpu/expected_output/hello_gpu.err.txt diff --git a/docker/hello_gpu/expected_output/hello_gpu.log.txt b/containers/hello_gpu/expected_output/hello_gpu.log.txt similarity index 100% rename from docker/hello_gpu/expected_output/hello_gpu.log.txt rename to containers/hello_gpu/expected_output/hello_gpu.log.txt diff --git a/docker/hello_gpu/expected_output/hello_gpu.out.txt b/containers/hello_gpu/expected_output/hello_gpu.out.txt similarity index 100% rename from docker/hello_gpu/expected_output/hello_gpu.out.txt rename to containers/hello_gpu/expected_output/hello_gpu.out.txt diff --git a/docker/hello_gpu/hello_gpu.sh b/containers/hello_gpu/hello_gpu.sh similarity index 100% rename from docker/hello_gpu/hello_gpu.sh rename to containers/hello_gpu/hello_gpu.sh diff --git a/docker/hello_gpu/hello_gpu.sub b/containers/hello_gpu/hello_gpu.sub similarity index 100% rename from docker/hello_gpu/hello_gpu.sub rename to containers/hello_gpu/hello_gpu.sub diff --git a/docker/pytorch_ngc/README.md b/containers/pytorch_ngc/README.md similarity index 100% rename from docker/pytorch_ngc/README.md rename to containers/pytorch_ngc/README.md diff --git a/docker/pytorch_ngc/pytorch_cnn.sh b/containers/pytorch_ngc/pytorch_cnn.sh similarity index 100% rename from docker/pytorch_ngc/pytorch_cnn.sh rename to containers/pytorch_ngc/pytorch_cnn.sh diff --git a/docker/pytorch_ngc/pytorch_cnn.sub b/containers/pytorch_ngc/pytorch_cnn.sub similarity index 100% rename from docker/pytorch_ngc/pytorch_cnn.sub rename to containers/pytorch_ngc/pytorch_cnn.sub diff --git a/docker/pytorch_python/README.md b/containers/pytorch_python/README.md similarity index 100% rename from docker/pytorch_python/README.md rename to containers/pytorch_python/README.md diff --git a/docker/pytorch_python/expected_output/docker_stderror b/containers/pytorch_python/expected_output/docker_stderror similarity index 100% rename from docker/pytorch_python/expected_output/docker_stderror rename to containers/pytorch_python/expected_output/docker_stderror diff --git a/docker/pytorch_python/expected_output/mnist_cnn.pt b/containers/pytorch_python/expected_output/mnist_cnn.pt similarity index 100% rename from docker/pytorch_python/expected_output/mnist_cnn.pt rename to containers/pytorch_python/expected_output/mnist_cnn.pt diff --git a/docker/pytorch_python/expected_output/pytorch_cnn.err.txt b/containers/pytorch_python/expected_output/pytorch_cnn.err.txt similarity index 100% rename from docker/pytorch_python/expected_output/pytorch_cnn.err.txt rename to containers/pytorch_python/expected_output/pytorch_cnn.err.txt diff --git a/docker/pytorch_python/expected_output/pytorch_cnn.log.txt b/containers/pytorch_python/expected_output/pytorch_cnn.log.txt similarity index 100% rename from docker/pytorch_python/expected_output/pytorch_cnn.log.txt rename to containers/pytorch_python/expected_output/pytorch_cnn.log.txt diff --git a/docker/pytorch_python/expected_output/pytorch_cnn.out.txt b/containers/pytorch_python/expected_output/pytorch_cnn.out.txt similarity index 100% rename from docker/pytorch_python/expected_output/pytorch_cnn.out.txt rename to containers/pytorch_python/expected_output/pytorch_cnn.out.txt diff --git a/docker/pytorch_python/pytorch_cnn.sh b/containers/pytorch_python/pytorch_cnn.sh similarity index 100% rename from docker/pytorch_python/pytorch_cnn.sh rename to containers/pytorch_python/pytorch_cnn.sh diff --git a/docker/pytorch_python/pytorch_cnn.sub b/containers/pytorch_python/pytorch_cnn.sub similarity index 100% rename from docker/pytorch_python/pytorch_cnn.sub rename to containers/pytorch_python/pytorch_cnn.sub diff --git a/docker/tensorflow_python/README.md b/containers/tensorflow_python/README.md similarity index 100% rename from docker/tensorflow_python/README.md rename to containers/tensorflow_python/README.md diff --git a/docker/tensorflow_python/expected_output/docker_stderror b/containers/tensorflow_python/expected_output/docker_stderror similarity index 100% rename from docker/tensorflow_python/expected_output/docker_stderror rename to containers/tensorflow_python/expected_output/docker_stderror diff --git a/docker/tensorflow_python/expected_output/tensorflow_gpu.err.txt b/containers/tensorflow_python/expected_output/tensorflow_gpu.err.txt similarity index 100% rename from docker/tensorflow_python/expected_output/tensorflow_gpu.err.txt rename to containers/tensorflow_python/expected_output/tensorflow_gpu.err.txt diff --git a/docker/tensorflow_python/expected_output/tensorflow_gpu.log.txt b/containers/tensorflow_python/expected_output/tensorflow_gpu.log.txt similarity index 100% rename from docker/tensorflow_python/expected_output/tensorflow_gpu.log.txt rename to containers/tensorflow_python/expected_output/tensorflow_gpu.log.txt diff --git a/docker/tensorflow_python/expected_output/tensorflow_gpu.out.txt b/containers/tensorflow_python/expected_output/tensorflow_gpu.out.txt similarity index 100% rename from docker/tensorflow_python/expected_output/tensorflow_gpu.out.txt rename to containers/tensorflow_python/expected_output/tensorflow_gpu.out.txt diff --git a/docker/tensorflow_python/test_tensorflow.py b/containers/tensorflow_python/test_tensorflow.py similarity index 100% rename from docker/tensorflow_python/test_tensorflow.py rename to containers/tensorflow_python/test_tensorflow.py diff --git a/docker/tensorflow_python/test_tensorflow.sh b/containers/tensorflow_python/test_tensorflow.sh similarity index 100% rename from docker/tensorflow_python/test_tensorflow.sh rename to containers/tensorflow_python/test_tensorflow.sh diff --git a/docker/tensorflow_python/test_tensorflow.sub b/containers/tensorflow_python/test_tensorflow.sub similarity index 100% rename from docker/tensorflow_python/test_tensorflow.sub rename to containers/tensorflow_python/test_tensorflow.sub diff --git a/shared/README.md b/shared/README.md index 3124ab8..bc70229 100644 --- a/shared/README.md +++ b/shared/README.md @@ -3,5 +3,5 @@ Nothing here is intended to be run on its own. To find runnable examples, navigate to one of the following subdirectories: [`conda`](../conda), -[`docker`](../docker), +[`containers`](../containers), or [`test`](../test) From be45542f05962e35a48122f32d616c53651990ef Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Sun, 9 Nov 2025 22:38:48 -0700 Subject: [PATCH 2/2] chore: Clarify subsection is for Docker * Set containers/README.md title to be about Linux containers in general. * Add section title on Docker. * Clarify language on 'containers' vs. 'container images'. --- containers/README.md | 74 ++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/containers/README.md b/containers/README.md index b014c8d..d771004 100644 --- a/containers/README.md +++ b/containers/README.md @@ -1,57 +1,64 @@ -### Using GPUs on CHTC via Docker +# Using GPUs on CHTC via Linux containers + +## Docker Docker is software that helps bundle software programs, libraries and -dependencies in a package called a **container**. Once built, these containers -can be run on different machines that have the Docker Engine. Programs with -complex dependencies are often packaged with Docker and made available for -download on [DockerHub](https://hub.docker.com). +dependencies in a package called a **container**. +Once built into container images, these containers can be run on different +machines that have the Docker Engine. +Programs with complex dependencies are often packaged with Docker and made +available for download on [DockerHub](https://hub.docker.com). The Docker Engine needs special configuration to give the software inside a container access to a GPU. CHTC does this behind the scenes with `nvidia-docker`. Any Docker container that wants to use `nvidia-docker` must contain the Nvidia CUDA toolkit inside it. Here we have working examples and -also some pointers on how to find containers or build your own containers that -can access the GPU. - +also some pointers on how to find Docker container images or build your own +Docker container images that can access the GPU. -### Examples +### Examples -1. **Hello\_GPU** +1. **Hello\_GPU** This is a simple example to see if we can access the GPU from inside a Docker container on CHTC. It uses the -[nvidia/cuda](https://hub.docker.com/r/nvidia/cuda) Docker image which is a -tiny container that only contains the Nvidia CUDA toolkit. - [Click here to access this example](./hello_gpu/). +[nvidia/cuda](https://hub.docker.com/r/nvidia/cuda) Docker container image which +is a tiny container that only contains the Nvidia CUDA toolkit. + [Click here to access this example](./hello_gpu/). -2. **Matrix Multiplication with TensorFlow (Python)** +2. **Matrix Multiplication with TensorFlow (Python)** This example uses a [TensorFlow](https://www.tensorflow.org) [Docker container](https://hub.docker.com/r/tensorflow/tensorflow/) to benchmark matrix -multiplication on a GPU vs the same matrix multiplication on a CPU. - [Click here to access this example](./tensorflow_python/). +multiplication on a GPU vs the same matrix multiplication on a CPU. + [Click here to access this example](./tensorflow_python/). -3. **Convolutional Neural Network with PyTorch (Python)** +3. **Convolutional Neural Network with PyTorch (Python)** This example shows how to send training and test data to the compute node along with the script. After processing the trained network is returned to the -submit node. - [Click here to access this example](./pytorch_python/). - -### Finding containers -1. Pick a container that is built on a more modern version of CUDA Toolkit. Although the toolkits are backwards compatible, the more modern the toolkit, the less likely you are to run into problems. +submit node. + [Click here to access this example](./pytorch_python/). + +### Finding container images +1. Pick a Docker container image that is built on a more modern version of +CUDA Toolkit. +Although the toolkits are backwards compatible, the more modern the toolkit, +the less likely you are to run into problems. 2. [Nvidia Catalog](https://ngc.nvidia.com/catalog/landing) has a good - selection of containers that use the GPU for machine learning, inference, + selection of container images that use the GPU for machine learning, inference, visualization etc. They need to be uploaded to your own account on Dockerhub before being used. This can be done with the Docker application or with the -Docker Automated Builder (see below). +Docker Automated Builder (see below). 3. [Rocker](https://hub.docker.com/u/rocker) is a great place to find GPU enabled machine learning software for the [R Project for Statistical Computing](https://www.r-project.org) -### Building containers -Building your own containers to access a GPU requires a bit of work and will -not be described fully here. It is best to start with a basic container that -can access the GPU and then build upon that container. The PyTorch Docker -container is built on top of Nvidia Cuda and is a [good example to follow](https://github.com/pytorch/pytorch/blob/main/.devcontainer/Dockerfile). +### Building container images +Building your own container images to access a GPU requires a bit of work and +will not be described fully here. +It is best to start with a basic Docker container image that can access the GPU +and then build upon that image. +The PyTorch Docker container image is built on top of Nvidia Cuda and is a +[good example to follow](https://github.com/pytorch/pytorch/blob/main/.devcontainer/Dockerfile). ```Dockerfile FROM nvidia/cuda:10.1-base-ubuntu18.04 @@ -66,7 +73,8 @@ FROM nvcr.io/nvidia/pytorch:19.07-py3 RUN conda install package_1 package_2 package_etc ``` -Once you have a working `Dockerfile`, you need to build a Docker container with -the Docker app and then upload it to Dockerhub so that CHTC can access your -container. Alternatively, you can have the DockerHub Cloud service directly -build it for you on DockerHub. +Once you have a working `Dockerfile`, you need to build a Docker container image +with the Docker app and then upload it to Dockerhub so that CHTC can access your +container image. +Alternatively, you can have the DockerHub Cloud service directly build it for +you on DockerHub.