Skip to content

Commit 7b2d5dd

Browse files
authored
Use CircleCI docker+nvidia capable images (#1885)
Use CircleCI docker+nvidia capable ubuntu-16.04 image Kill nvidia driver and docker installation and rely on docker runtime provided by CircleCI and install only `expect-dev` and `moreutils` for `ts` and `unbuffer` tools This is a preparatory change for Ubuntu-20.04 update
1 parent aab1dae commit 7b2d5dd

File tree

1 file changed

+6
-37
lines changed

1 file changed

+6
-37
lines changed

Diff for: .circleci/config.yml

+6-37
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ setup_linux_system_environment: &setup_linux_system_environment
6161
6262
pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults
6363
machine:
64-
image: ubuntu-1604:201903-01
64+
image: ubuntu-1604-cuda-10.2:202012-01
6565
steps:
6666
- checkout
6767
- run:
@@ -72,45 +72,14 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults
7272
command: |
7373
set -e
7474
75-
# Set up NVIDIA docker repo
76-
curl -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
77-
echo "deb https://nvidia.github.io/libnvidia-container/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
78-
echo "deb https://nvidia.github.io/nvidia-container-runtime/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
79-
echo "deb https://nvidia.github.io/nvidia-docker/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
80-
8175
sudo apt-get -y update
82-
sudo apt-get -y remove linux-image-generic linux-headers-generic linux-generic docker-ce
83-
# WARNING: Docker version is hardcoded here; you must update the
84-
# version number below for docker-ce and nvidia-docker2 to get newer
85-
# versions of Docker. We hardcode these numbers because we kept
86-
# getting broken CI when Docker would update their docker version,
87-
# and nvidia-docker2 would be out of date for a day until they
88-
# released a newer version of their package.
89-
#
90-
# How to figure out what the correct versions of these packages are?
91-
# My preferred method is to start a Docker instance of the correct
92-
# Ubuntu version (e.g., docker run -it ubuntu:16.04) and then ask
93-
# apt what the packages you need are. Note that the CircleCI image
94-
# comes with Docker.
95-
sudo apt-get -y install \
96-
linux-headers-$(uname -r) \
97-
linux-image-generic \
98-
moreutils \
99-
docker-ce=5:18.09.4~3-0~ubuntu-xenial \
100-
nvidia-container-runtime=2.0.0+docker18.09.4-1 \
101-
nvidia-docker2=2.0.3+docker18.09.4-1 \
102-
expect-dev
103-
104-
sudo pkill -SIGHUP dockerd
76+
sudo apt-get -y install expect-dev moreutils
10577
10678
sudo pip -q install awscli==1.16.35
10779
108-
if [ -n "${CUDA_VERSION}" ]; then
109-
DRIVER_FN="NVIDIA-Linux-x86_64-460.39.run"
110-
wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
111-
sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
112-
nvidia-smi
113-
fi
80+
if [ -n "${CUDA_VERSION}" ]; then
81+
nvidia-smi
82+
fi
11483
11584
# This IAM user only allows read-write access to ECR
11685
export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_ECR_READ_ONLY}
@@ -138,7 +107,7 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults
138107
echo "DOCKER_IMAGE: "${DOCKER_IMAGE}
139108
docker pull ${DOCKER_IMAGE} >/dev/null
140109
if [ -n "${CUDA_VERSION}" ]; then
141-
export id=$(docker run --runtime=nvidia -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
110+
export id=$(docker run --gpus all -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
142111
else
143112
export id=$(docker run -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
144113
fi

0 commit comments

Comments
 (0)