@@ -61,7 +61,7 @@ setup_linux_system_environment: &setup_linux_system_environment
6161
6262pytorch_tutorial_build_defaults : &pytorch_tutorial_build_defaults
6363 machine :
64- image : ubuntu-1604:201903 -01
64+ image : ubuntu-1604-cuda-10.2:202012 -01
6565 steps :
6666 - checkout
6767 - run :
@@ -72,45 +72,14 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults
7272 command : |
7373 set -e
7474
75- # Set up NVIDIA docker repo
76- curl -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
77- echo "deb https://nvidia.github.io/libnvidia-container/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
78- echo "deb https://nvidia.github.io/nvidia-container-runtime/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
79- echo "deb https://nvidia.github.io/nvidia-docker/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
80-
8175 sudo apt-get -y update
82- sudo apt-get -y remove linux-image-generic linux-headers-generic linux-generic docker-ce
83- # WARNING: Docker version is hardcoded here; you must update the
84- # version number below for docker-ce and nvidia-docker2 to get newer
85- # versions of Docker. We hardcode these numbers because we kept
86- # getting broken CI when Docker would update their docker version,
87- # and nvidia-docker2 would be out of date for a day until they
88- # released a newer version of their package.
89- #
90- # How to figure out what the correct versions of these packages are?
91- # My preferred method is to start a Docker instance of the correct
92- # Ubuntu version (e.g., docker run -it ubuntu:16.04) and then ask
93- # apt what the packages you need are. Note that the CircleCI image
94- # comes with Docker.
95- sudo apt-get -y install \
96- linux-headers-$(uname -r) \
97- linux-image-generic \
98- moreutils \
99- docker-ce=5:18.09.4~3-0~ubuntu-xenial \
100- nvidia-container-runtime=2.0.0+docker18.09.4-1 \
101- nvidia-docker2=2.0.3+docker18.09.4-1 \
102- expect-dev
103-
104- sudo pkill -SIGHUP dockerd
76+ sudo apt-get -y install expect-dev moreutils
10577
10678 sudo pip -q install awscli==1.16.35
10779
108- if [ -n "${CUDA_VERSION}" ]; then
109- DRIVER_FN="NVIDIA-Linux-x86_64-460.39.run"
110- wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
111- sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
112- nvidia-smi
113- fi
80+ if [ -n "${CUDA_VERSION}" ]; then
81+ nvidia-smi
82+ fi
11483
11584 # This IAM user only allows read-write access to ECR
11685 export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_ECR_READ_ONLY}
@@ -138,7 +107,7 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults
138107 echo "DOCKER_IMAGE: "${DOCKER_IMAGE}
139108 docker pull ${DOCKER_IMAGE} >/dev/null
140109 if [ -n "${CUDA_VERSION}" ]; then
141- export id=$(docker run --runtime=nvidia -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
110+ export id=$(docker run --gpus all -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
142111 else
143112 export id=$(docker run -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
144113 fi
0 commit comments