Skip to content

Commit 43940c1

Browse files
committed
AZP/RELEASE: rm cuda-compat & separate gdrcopy
1 parent f9e2f91 commit 43940c1

File tree

6 files changed

+34
-30
lines changed

6 files changed

+34
-30
lines changed

buildlib/az-distro-release.yml

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@ jobs:
2222
centos8_cuda11:
2323
build_container: centos8_cuda11
2424
artifact_name: $(POSTFIX)-centos8-mofed5-cuda11.tar.bz2
25-
ubuntu16_cuda11:
26-
build_container: ubuntu16_cuda11
27-
artifact_name: $(POSTFIX)-ubuntu16.04-mofed5-cuda11.tar.bz2
2825
ubuntu18_cuda11:
2926
build_container: ubuntu18_cuda11
3027
artifact_name: $(POSTFIX)-ubuntu18.04-mofed5-cuda11.tar.bz2
@@ -62,6 +59,8 @@ jobs:
6259
6360
- bash: |
6461
set -eEx
62+
63+
# Build
6564
./autogen.sh
6665
./contrib/configure-release --with-cuda --with-java=no
6766
make dist
@@ -71,11 +70,20 @@ jobs:
7170
echo 10 > debian/compat # https://www.debian.org/doc/manuals/maint-guide/dother.en.htmdpl#compat
7271
dpkg-buildpackage -us -uc -Pcuda
7372
cd .. # Move back to the working directory
74-
find . -name '*.deb'
75-
VER="${POSTFIX#ucx-}" # Remove 'ucx' prefix from the POSTFIX string
76-
# Rename DEB files
73+
74+
# Rename DEB files
75+
VER="${POSTFIX#ucx-}" # Remove 'ucx-' prefix from the POSTFIX string
7776
find . -name "ucx*.deb" -exec bash -c 'mv "$1" "${1%%_*}-'"${VER}"'.deb"' _ {} \;
77+
find . -name '*.deb' # Show new names
78+
79+
# Remove superfluous dependency
80+
dpkg-deb -R "ucx-cuda-${VER}.deb" tmp # Extract
81+
sed -i 's/libnvidia-compute-[0-9]* | libnvidia-ml1, //g' tmp/DEBIAN/control
82+
dpkg-deb -b tmp "ucx-cuda-${VER}.deb" # Rebuild
83+
dpkg-deb -I "ucx-cuda-${VER}.deb"
7884
dpkg-deb -I "ucx-${VER}.deb"
85+
86+
# Package
7987
tar -cjf "${AZ_ARTIFACT_NAME}" *.deb # Package all DEBs
8088
tar -tjf "${AZ_ARTIFACT_NAME}"
8189
displayName: Build DEB package

buildlib/azure-pipelines-release.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,12 @@ resources:
1919
options: $(DOCKER_OPT_VOLUMES)
2020
- container: centos8_cuda11
2121
image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos8-mofed5-cuda11:2
22-
- container: ubuntu16_cuda11
23-
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu16.04-mofed5-cuda11:3
2422
- container: ubuntu18_cuda11
2523
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu18.04-mofed5-cuda11:3
2624
- container: ubuntu20_cuda11
2725
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu20.04-mofed5-cuda11:3
2826
- container: ubuntu22_cuda11
29-
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu22.04-mofed5-cuda11:3
27+
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu22.04-mofed5-cuda11:3
3028

3129
stages:
3230
- stage: Prepare

buildlib/dockers/docker-compose.yml

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
version: "3.4"
22

3+
# Find driver version based on CUDA version, OS and CPU arch (515 in this case):
4+
# https://developer.nvidia.com/cuda-11-7-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=runfile_local
5+
36
services:
47
centos7-mofed5-cuda11:
58
image: centos7-mofed5-cuda11:2
@@ -34,16 +37,6 @@ services:
3437
MOFED_OS: rhel8.2
3538
CUDA_VERSION: 11.4.0
3639
OS_VERSION: 8
37-
ubuntu16.04-mofed5-cuda11:
38-
image: ubuntu16.04-mofed5-cuda11:3
39-
build:
40-
context: .
41-
network: host
42-
dockerfile: ubuntu-release.Dockerfile
43-
args:
44-
MOFED_VERSION: 5.0-1.0.0.0
45-
UBUNTU_VERSION: 16.04
46-
CUDA_VERSION: 11.2.0
4740
ubuntu18.04-mofed5-cuda11:
4841
image: ubuntu18.04-mofed5-cuda11:3
4942
build:
@@ -54,6 +47,7 @@ services:
5447
MOFED_VERSION: 5.0-1.0.0.0
5548
UBUNTU_VERSION: 18.04
5649
CUDA_VERSION: 11.4.0
50+
NV_DRIVER_VERSION: 470
5751
ubuntu20.04-mofed5-cuda11:
5852
image: ubuntu20.04-mofed5-cuda11:3
5953
build:
@@ -64,6 +58,7 @@ services:
6458
MOFED_VERSION: 5.0-1.0.0.0
6559
UBUNTU_VERSION: 20.04
6660
CUDA_VERSION: 11.4.0
61+
NV_DRIVER_VERSION: 470
6762
ubuntu22.04-mofed5-cuda11:
6863
image: ubuntu22.04-mofed5-cuda11:3
6964
build:
@@ -74,3 +69,4 @@ services:
7469
MOFED_VERSION: 5.4-3.6.8.1
7570
UBUNTU_VERSION: 22.04
7671
CUDA_VERSION: 11.7.0
72+
NV_DRIVER_VERSION: 515

buildlib/dockers/push-release-images.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
#!/bin/bash -eE
1+
#!/bin/bash -eEx
22

33
# shellcheck disable=SC2086
44
basedir=$(cd "$(dirname $0)" && pwd)
55

66
registry=harbor.mellanox.com/ucx
77

8-
images=$(awk '/image:/ {print $2}' "${basedir}/docker-compose.yml")
8+
images=$(awk '!/#/ && /image:/ {print $2}' "${basedir}/docker-compose.yml")
99
for img in $images; do
1010
target_name="${registry}/${img}"
1111
docker tag ${img} ${target_name}

buildlib/dockers/ubuntu-release.Dockerfile

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
ARG CUDA_VERSION=10.1
2-
ARG UBUNTU_VERSION=16.04
1+
ARG CUDA_VERSION
2+
ARG UBUNTU_VERSION
33
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
44

5+
ARG NV_DRIVER_VERSION
56
RUN apt-get update && \
67
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata && \
78
apt-get install -y \
9+
apt-file \
810
automake \
911
default-jdk \
1012
dh-make \
@@ -14,13 +16,16 @@ RUN apt-get update && \
1416
libcap2 \
1517
libnuma-dev \
1618
libtool \
19+
# Provide the dependencies required by libnvidia-compute* instead the cuda-compat*
20+
libnvidia-compute-${NV_DRIVER_VERSION} \
1721
make \
1822
maven \
1923
udev \
2024
wget \
2125
environment-modules \
2226
pkg-config \
23-
&& apt-get remove -y openjdk-11-* || apt-get autoremove -y \
27+
sudo \
28+
&& apt-get remove -y openjdk-11-* cuda-compat* || apt-get autoremove -y \
2429
&& apt-get clean && rm -rf /var/lib/apt/lists/*
2530

2631
# MOFED
@@ -43,8 +48,5 @@ RUN ${MOFED_DIR}/mlnxofedinstall --all -q \
4348
rm -rf ${MOFED_DIR} && rm -rf *.tgz
4449

4550
ENV CPATH /usr/local/cuda/include:${CPATH}
46-
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LD_LIBRARY_PATH}
47-
ENV LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LIBRARY_PATH}
48-
ENV PATH /usr/local/cuda/compat:${PATH}
49-
50-
RUN ml_stub=$(find /usr -name libnvidia-ml.so) && ln -s $ml_stub /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1
51+
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:${LD_LIBRARY_PATH}
52+
ENV LIBRARY_PATH /usr/local/cuda/lib64:${LIBRARY_PATH}

debian/control.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ Package: ucx-gdrcopy
4141
Section: libs
4242
Depends: ${misc:Depends}, ${shlibs:Depends}
4343
Architecture: any
44-
Build-Profiles: <cuda>
44+
Build-Profiles: <gdrcopy>
4545
Description: Unified Communication X - gdrcopy support
4646
UCX is a communication library implementing high-performance messaging.
4747
.

0 commit comments

Comments
 (0)