diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml index b48a222fd94..28500d0d38b 100644 --- a/.github/workflows/canary.yml +++ b/.github/workflows/canary.yml @@ -54,7 +54,7 @@ jobs: - name: Checkout Oneflow-Inc/oneflow if: ${{ github.event.inputs.oneflow-ref == '' }} uses: actions/checkout@v2 - - uses: Oneflow-Inc/get-oneflow@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl name: Build manylinux id: build-cuda with: diff --git a/.github/workflows/on_merge.yml b/.github/workflows/on_merge.yml index 92129768056..27fd400e7b9 100644 --- a/.github/workflows/on_merge.yml +++ b/.github/workflows/on_merge.yml @@ -15,6 +15,6 @@ jobs: if: github.event.pull_request.merged == true runs-on: ubuntu-latest steps: - - uses: Oneflow-Inc/get-oneflow/update-benchmark-history@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow/update-benchmark-history@whl-skip-nccl name: Update benchmark history timeout-minutes: 10 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a765a1f17a7..0e8b89883aa 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -70,7 +70,7 @@ jobs: ref: ${{ inputs.branch }} repository: ${{ secrets.ONEFLOW_PRIV_ORG }}/oneflow token: ${{ secrets.ONEFLOW_PRIV_GH_TOKEN }} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@whl-skip-nccl name: Find build cache id: find-cache timeout-minutes: 5 @@ -149,7 +149,7 @@ jobs: if: ${{ inputs.is_priv }} run: | env - - uses: Oneflow-Inc/get-oneflow@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl name: Build ${{ matrix.entry }} if: ${{ matrix.entry =='cu118' || startsWith(matrix.entry, 'cu12') }} with: @@ -174,7 +174,7 @@ jobs: 3.10 3.9 3.8 - - uses: Oneflow-Inc/get-oneflow@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl name: Build ${{ matrix.entry }} if: ${{ startsWith(matrix.entry, 'cu') && matrix.entry !='cu118' && !startsWith(matrix.entry, 'cu12') }} with: @@ -199,7 +199,7 @@ jobs: 3.10 3.9 3.8 - - uses: Oneflow-Inc/get-oneflow@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl name: Build ${{ matrix.entry }} if: ${{ matrix.entry =='cpu' }} with: diff --git a/.github/workflows/simple.yml b/.github/workflows/simple.yml index b8745712455..88b94579969 100644 --- a/.github/workflows/simple.yml +++ b/.github/workflows/simple.yml @@ -244,7 +244,7 @@ jobs: repository: Oneflow-Inc/conda-env ref: 30a7f00eb48ee9009d85a848e720823e5054c66b path: conda-env - - uses: Oneflow-Inc/get-oneflow@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl name: Build with gcc7 if: ${{ matrix.build-type == 'gcc7'}} with: @@ -253,7 +253,7 @@ jobs: oneflow-build-env: conda conda-env-file: conda-env/dev/gcc7/environment-v2.yml conda-env-name: oneflow-dev-gcc7-v2 - - uses: Oneflow-Inc/get-oneflow@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl name: Build with clang10 if: ${{ matrix.build-type == 'clang10'}} with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4c1a1f52da1..c8f9a5edda7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -176,7 +176,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@whl-skip-nccl name: find cache id: find-cache timeout-minutes: 5 @@ -223,7 +223,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl name: Save cache if successful id: save-cache timeout-minutes: 5 @@ -237,7 +237,7 @@ jobs: run: | echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" exit 1 - - uses: Oneflow-Inc/get-oneflow@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl name: Build manylinux ${{ matrix.entry }} id: build-cpu if: ${{ matrix.entry =='cpu' && !matrix.cache-hit }} @@ -259,7 +259,7 @@ jobs: python-versions: | 3.7 3.8 - - uses: Oneflow-Inc/get-oneflow@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl name: Build manylinux ${{ matrix.entry }} id: build-cpu-sanitizers if: ${{ (matrix.entry == 'cpu-asan-ubsan' || matrix.entry == 'cpu-tsan') && !matrix.cache-hit && false }} @@ -280,7 +280,7 @@ jobs: clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }} python-versions: | 3.8 - - uses: Oneflow-Inc/get-oneflow@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl name: Build manylinux ${{ matrix.entry }} id: build-cuda if: ${{ matrix.entry =='cu116' && !matrix.cache-hit }} @@ -300,7 +300,7 @@ jobs: clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }} python-versions: | 3.7 - - uses: Oneflow-Inc/get-oneflow@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl name: Build ${{ matrix.entry }} if: ${{ matrix.entry == 'llvm15' && !matrix.cache-hit }} with: @@ -339,7 +339,7 @@ jobs: }) - name: Upload packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }} - uses: Oneflow-Inc/get-oneflow/digest/upload@support-py311-py312 + uses: Oneflow-Inc/get-oneflow/digest/upload@whl-skip-nccl timeout-minutes: 10 with: digest: ${{ steps.save-cache.outputs.build-digest }} @@ -350,7 +350,7 @@ jobs: dst-dir: cpack - name: Upload whl if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }} - uses: Oneflow-Inc/get-oneflow/digest/upload@support-py311-py312 + uses: Oneflow-Inc/get-oneflow/digest/upload@whl-skip-nccl timeout-minutes: 10 with: digest: ${{ steps.save-cache.outputs.build-digest }} @@ -375,7 +375,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@whl-skip-nccl name: find cache id: find-cache timeout-minutes: 5 @@ -406,7 +406,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@whl-skip-nccl name: find cache id: find-cache timeout-minutes: 5 @@ -488,7 +488,7 @@ jobs: if: ${{ contains(matrix.runs-on, 'self-hosted') }} run: | docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true - - uses: Oneflow-Inc/get-oneflow/cache-complete@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl name: Save cache if successful id: save-cache timeout-minutes: 5 @@ -504,7 +504,7 @@ jobs: exit 1 - name: Download wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/digest/download@support-py311-py312 + uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl id: download-digest timeout-minutes: 10 with: @@ -514,7 +514,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} - name: Get primary node if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/master-address@support-py311-py312 + uses: Oneflow-Inc/get-oneflow/master-address@whl-skip-nccl id: get-primary-node with: rank: ${{ matrix.rank }} @@ -718,7 +718,7 @@ jobs: if: ${{ contains(matrix.runs-on, 'self-hosted') }} run: | docker rm -f ${{ env.TEST_MANYLINUX_CONTAINER_NAME }} || true - - uses: Oneflow-Inc/get-oneflow/cache-complete@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl name: Save cache if successful id: save-cache timeout-minutes: 5 @@ -734,7 +734,7 @@ jobs: exit 1 - name: Download wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/digest/download@support-py311-py312 + uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl id: download-digest timeout-minutes: 10 with: @@ -744,7 +744,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} - name: Download ASAN and UBSAN wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }} - uses: Oneflow-Inc/get-oneflow/digest/download@support-py311-py312 + uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl id: asan-ubsan-download-digest timeout-minutes: 10 with: @@ -754,7 +754,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} - name: Download TSAN wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }} - uses: Oneflow-Inc/get-oneflow/digest/download@support-py311-py312 + uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl id: tsan-download-digest timeout-minutes: 10 with: @@ -1080,7 +1080,7 @@ jobs: - name: Benchmark Test timeout-minutes: 100 if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'benchmark' && matrix.device == 'cuda' }} - uses: Oneflow-Inc/get-oneflow/pytest-benchmark@support-py311-py312 + uses: Oneflow-Inc/get-oneflow/pytest-benchmark@whl-skip-nccl with: collect-path: ${{ env.FLOW_VISION_SRC }}/benchmark container-name: ${{ env.TEST_CONTAINER_NAME }} @@ -1141,7 +1141,7 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} fetch-depth: 0 - - uses: Oneflow-Inc/get-oneflow/cache-complete@support-py311-py312 + - uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl name: Save cache if successful id: save-cache timeout-minutes: 5 diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake index 70de6bc7882..5e462c32450 100644 --- a/cmake/oneflow.cmake +++ b/cmake/oneflow.cmake @@ -400,6 +400,10 @@ if(BUILD_PYTHON) pybind11_add_module(oneflow_internal ${PYBIND11_SRCS} ${of_pybind_obj_cc} ${PYBIND_REGISTRY_CC}) set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH "\$ORIGIN/../nvidia/cublas/lib") set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH "\$ORIGIN/../nvidia/cudnn/lib") + set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH "\$ORIGIN/../nvidia/nccl/lib") + set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH + "\$ORIGIN/../nvidia/cusparse/lib") + set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH "\$ORIGIN/../nvidia/cufft/lib") set_compile_options_to_oneflow_target(oneflow_internal) set_property(TARGET oneflow_internal PROPERTY CXX_VISIBILITY_PRESET "default") add_dependencies(oneflow_internal of_functional_obj of_functional_tensor_obj of_op_schema) diff --git a/python/setup.py b/python/setup.py index 8682252b7d0..93b600e50dd 100644 --- a/python/setup.py +++ b/python/setup.py @@ -65,10 +65,16 @@ def get_version(): if "cu11" in ONEFLOW_VERSION and "cu112" not in ONEFLOW_VERSION: REQUIRED_PACKAGES.append("nvidia-cudnn-cu11") REQUIRED_PACKAGES.append("nvidia-cublas-cu11") + REQUIRED_PACKAGES.append("nvidia-nccl-cu11") + REQUIRED_PACKAGES.append("nvidia-cusparse-cu11") + REQUIRED_PACKAGES.append("nvidia-cufft-cu11") if "cu12" in ONEFLOW_VERSION: REQUIRED_PACKAGES.append("nvidia-cudnn-cu12") REQUIRED_PACKAGES.append("nvidia-cublas-cu12") + REQUIRED_PACKAGES.append("nvidia-nccl-cu12") + REQUIRED_PACKAGES.append("nvidia-cusparse-cu12") + REQUIRED_PACKAGES.append("nvidia-cufft-cu12") # if python version < 3.7.x, than need pip install dataclasses if sys.version_info.minor < 7: