Skip to content

Commit

Permalink
Use runfile to install NVIDIA driver and update driver versions (#79)
Browse files Browse the repository at this point in the history
  • Loading branch information
jjacobelli authored Aug 23, 2024
1 parent 5b7f58a commit b95b570
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 21 deletions.
6 changes: 4 additions & 2 deletions ci/compute-image-name.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,19 @@ if [ -z "${DRIVER_VERSION}" ]; then
VARIANT=cpu
fi

DRIVER_VERSION_SHORT="${DRIVER_VERSION%%.*}"

IMAGE_NAME=$(
jq -nr \
--arg OS "${OS}" \
--arg VARIANT "${VARIANT}" \
--arg DRIVER_VERSION "${DRIVER_VERSION}" \
--arg DRIVER_VERSION_SHORT "${DRIVER_VERSION_SHORT}" \
--arg ARCH "${ARCH}" \
--arg RUNNER_VERSION "${RUNNER_VERSION}" \
'[
$OS,
$VARIANT,
$DRIVER_VERSION,
$DRIVER_VERSION_SHORT,
$ARCH,
$RUNNER_VERSION
] | map(select(length > 0)) | join("-")'
Expand Down
17 changes: 8 additions & 9 deletions linux/installers/nvidia-driver.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,16 @@ if [ "${NV_VARIANT}" != "gpu" ]; then
exit 0
fi

KEYRING=cuda-keyring_1.1-1_all.deb
ARCH=x86_64

if [ "${NV_ARCH}" == "arm64" ]; then
ARCH=sbsa
ARCH=aarch64
fi

wget -q "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${ARCH}/${KEYRING}"
sudo dpkg --install "${KEYRING}"
sudo apt-get update

sudo apt-get -y install "nvidia-driver-${NV_DRIVER_VERSION}"
TMP_DIR=$(mktemp -d)
RUNFILE_NAME="NVIDIA-Linux-${ARCH}-${NV_DRIVER_VERSION}.run"
RUNFILE_URL="https://download.nvidia.com/XFree86/Linux-${ARCH}/${NV_DRIVER_VERSION}/${RUNFILE_NAME}"
RUNFILE_PATH="${TMP_DIR}/${RUNFILE_NAME}"

sudo dpkg --purge "$(dpkg -f "${KEYRING}" Package)"
wget --no-verbose -O "${RUNFILE_PATH}" "${RUNFILE_URL}"
sudo sh "${RUNFILE_PATH}" --no-questions --ui=none
rm -rf "${TMP_DIR}"
14 changes: 6 additions & 8 deletions matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ OS:
DRIVER_VERSION:
# keep this blank entry. an empty driver version corresponds to CPU machines
- ""
- "470"
- "550"
- "560"
- "535.183.01"
- "550.107.02"
- "560.35.03"

RUNNER_VERSION:
# renovate: repo=actions/runner
Expand All @@ -29,18 +29,16 @@ ENV:
- qemu

exclude:
- ARCH: arm64
DRIVER_VERSION: "470"
# only use amd64 for windows
- OS: windows
ARCH: arm64
# only make CPU images for windows
- OS: windows
DRIVER_VERSION: "470"
DRIVER_VERSION: "535.183.01"
- OS: windows
DRIVER_VERSION: "550"
DRIVER_VERSION: "550.107.02"
- OS: windows
DRIVER_VERSION: "560"
DRIVER_VERSION: "560.35.03"
# only make AMI images for windows
- OS: windows
ENV: qemu
4 changes: 2 additions & 2 deletions variables.pkr.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ variable "driver_version" {
description = "The NVIDIA driver version to install on the EC2 instance. If empty, no driver will be installed."

validation {
condition = can(regex("(^\\d{3}$|^$)", var.driver_version))
error_message = "The driver_version value must be an empty string or 3 digits."
condition = can(regex("(^\\d{3}\\.\\d{2,3}\\.\\d{2,3}$|^$)", var.driver_version))
error_message = "The driver_version value must be an empty string or 3 groups of digits splitted by dots."
}
}

Expand Down

0 comments on commit b95b570

Please sign in to comment.