Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wheel skip nccl #10452

Merged
merged 4 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/canary.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
- name: Checkout Oneflow-Inc/oneflow
if: ${{ github.event.inputs.oneflow-ref == '' }}
uses: actions/checkout@v2
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build manylinux
id: build-cuda
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/on_merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ jobs:
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
steps:
- uses: Oneflow-Inc/get-oneflow/update-benchmark-history@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/update-benchmark-history@whl-skip-nccl
name: Update benchmark history
timeout-minutes: 10
8 changes: 4 additions & 4 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
ref: ${{ inputs.branch }}
repository: ${{ secrets.ONEFLOW_PRIV_ORG }}/oneflow
token: ${{ secrets.ONEFLOW_PRIV_GH_TOKEN }}
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@whl-skip-nccl
name: Find build cache
id: find-cache
timeout-minutes: 5
Expand Down Expand Up @@ -149,7 +149,7 @@ jobs:
if: ${{ inputs.is_priv }}
run: |
env
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build ${{ matrix.entry }}
if: ${{ matrix.entry =='cu118' || startsWith(matrix.entry, 'cu12') }}
with:
Expand All @@ -174,7 +174,7 @@ jobs:
3.10
3.9
3.8
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build ${{ matrix.entry }}
if: ${{ startsWith(matrix.entry, 'cu') && matrix.entry !='cu118' && !startsWith(matrix.entry, 'cu12') }}
with:
Expand All @@ -199,7 +199,7 @@ jobs:
3.10
3.9
3.8
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build ${{ matrix.entry }}
if: ${{ matrix.entry =='cpu' }}
with:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/simple.yml
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ jobs:
repository: Oneflow-Inc/conda-env
ref: 30a7f00eb48ee9009d85a848e720823e5054c66b
path: conda-env
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build with gcc7
if: ${{ matrix.build-type == 'gcc7'}}
with:
Expand All @@ -253,7 +253,7 @@ jobs:
oneflow-build-env: conda
conda-env-file: conda-env/dev/gcc7/environment-v2.yml
conda-env-name: oneflow-dev-gcc7-v2
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build with clang10
if: ${{ matrix.build-type == 'clang10'}}
with:
Expand Down
38 changes: 19 additions & 19 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@whl-skip-nccl
name: find cache
id: find-cache
timeout-minutes: 5
Expand Down Expand Up @@ -223,7 +223,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- uses: Oneflow-Inc/get-oneflow/cache-complete@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl
name: Save cache if successful
id: save-cache
timeout-minutes: 5
Expand All @@ -237,7 +237,7 @@ jobs:
run: |
echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit"
exit 1
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build manylinux ${{ matrix.entry }}
id: build-cpu
if: ${{ matrix.entry =='cpu' && !matrix.cache-hit }}
Expand All @@ -259,7 +259,7 @@ jobs:
python-versions: |
3.7
3.8
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build manylinux ${{ matrix.entry }}
id: build-cpu-sanitizers
if: ${{ (matrix.entry == 'cpu-asan-ubsan' || matrix.entry == 'cpu-tsan') && !matrix.cache-hit && false }}
Expand All @@ -280,7 +280,7 @@ jobs:
clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }}
python-versions: |
3.8
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build manylinux ${{ matrix.entry }}
id: build-cuda
if: ${{ matrix.entry =='cu116' && !matrix.cache-hit }}
Expand All @@ -300,7 +300,7 @@ jobs:
clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }}
python-versions: |
3.7
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build ${{ matrix.entry }}
if: ${{ matrix.entry == 'llvm15' && !matrix.cache-hit }}
with:
Expand Down Expand Up @@ -339,7 +339,7 @@ jobs:
})
- name: Upload packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }}
uses: Oneflow-Inc/get-oneflow/digest/upload@support-py311-py312
uses: Oneflow-Inc/get-oneflow/digest/upload@whl-skip-nccl
timeout-minutes: 10
with:
digest: ${{ steps.save-cache.outputs.build-digest }}
Expand All @@ -350,7 +350,7 @@ jobs:
dst-dir: cpack
- name: Upload whl
if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }}
uses: Oneflow-Inc/get-oneflow/digest/upload@support-py311-py312
uses: Oneflow-Inc/get-oneflow/digest/upload@whl-skip-nccl
timeout-minutes: 10
with:
digest: ${{ steps.save-cache.outputs.build-digest }}
Expand All @@ -375,7 +375,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@whl-skip-nccl
name: find cache
id: find-cache
timeout-minutes: 5
Expand Down Expand Up @@ -406,7 +406,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@whl-skip-nccl
name: find cache
id: find-cache
timeout-minutes: 5
Expand Down Expand Up @@ -488,7 +488,7 @@ jobs:
if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true
- uses: Oneflow-Inc/get-oneflow/cache-complete@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl
name: Save cache if successful
id: save-cache
timeout-minutes: 5
Expand All @@ -504,7 +504,7 @@ jobs:
exit 1
- name: Download wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
uses: Oneflow-Inc/get-oneflow/digest/download@support-py311-py312
uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl
id: download-digest
timeout-minutes: 10
with:
Expand All @@ -514,7 +514,7 @@ jobs:
ssh-tank-path: ${{ env.SSH_TANK_PATH }}
- name: Get primary node
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
uses: Oneflow-Inc/get-oneflow/master-address@support-py311-py312
uses: Oneflow-Inc/get-oneflow/master-address@whl-skip-nccl
id: get-primary-node
with:
rank: ${{ matrix.rank }}
Expand Down Expand Up @@ -718,7 +718,7 @@ jobs:
if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
docker rm -f ${{ env.TEST_MANYLINUX_CONTAINER_NAME }} || true
- uses: Oneflow-Inc/get-oneflow/cache-complete@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl
name: Save cache if successful
id: save-cache
timeout-minutes: 5
Expand All @@ -734,7 +734,7 @@ jobs:
exit 1
- name: Download wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
uses: Oneflow-Inc/get-oneflow/digest/download@support-py311-py312
uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl
id: download-digest
timeout-minutes: 10
with:
Expand All @@ -744,7 +744,7 @@ jobs:
ssh-tank-path: ${{ env.SSH_TANK_PATH }}
- name: Download ASAN and UBSAN wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }}
uses: Oneflow-Inc/get-oneflow/digest/download@support-py311-py312
uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl
id: asan-ubsan-download-digest
timeout-minutes: 10
with:
Expand All @@ -754,7 +754,7 @@ jobs:
ssh-tank-path: ${{ env.SSH_TANK_PATH }}
- name: Download TSAN wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }}
uses: Oneflow-Inc/get-oneflow/digest/download@support-py311-py312
uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl
id: tsan-download-digest
timeout-minutes: 10
with:
Expand Down Expand Up @@ -1080,7 +1080,7 @@ jobs:
- name: Benchmark Test
timeout-minutes: 100
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'benchmark' && matrix.device == 'cuda' }}
uses: Oneflow-Inc/get-oneflow/pytest-benchmark@support-py311-py312
uses: Oneflow-Inc/get-oneflow/pytest-benchmark@whl-skip-nccl
with:
collect-path: ${{ env.FLOW_VISION_SRC }}/benchmark
container-name: ${{ env.TEST_CONTAINER_NAME }}
Expand Down Expand Up @@ -1141,7 +1141,7 @@ jobs:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
fetch-depth: 0
- uses: Oneflow-Inc/get-oneflow/cache-complete@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl
name: Save cache if successful
id: save-cache
timeout-minutes: 5
Expand Down
4 changes: 4 additions & 0 deletions cmake/oneflow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,10 @@ if(BUILD_PYTHON)
pybind11_add_module(oneflow_internal ${PYBIND11_SRCS} ${of_pybind_obj_cc} ${PYBIND_REGISTRY_CC})
set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH "\$ORIGIN/../nvidia/cublas/lib")
set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH "\$ORIGIN/../nvidia/cudnn/lib")
set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH "\$ORIGIN/../nvidia/nccl/lib")
set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH
"\$ORIGIN/../nvidia/cusparse/lib")
set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH "\$ORIGIN/../nvidia/cufft/lib")
set_compile_options_to_oneflow_target(oneflow_internal)
set_property(TARGET oneflow_internal PROPERTY CXX_VISIBILITY_PRESET "default")
add_dependencies(oneflow_internal of_functional_obj of_functional_tensor_obj of_op_schema)
Expand Down
6 changes: 6 additions & 0 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,16 @@ def get_version():
if "cu11" in ONEFLOW_VERSION and "cu112" not in ONEFLOW_VERSION:
REQUIRED_PACKAGES.append("nvidia-cudnn-cu11")
REQUIRED_PACKAGES.append("nvidia-cublas-cu11")
REQUIRED_PACKAGES.append("nvidia-nccl-cu11")
REQUIRED_PACKAGES.append("nvidia-cusparse-cu11")
REQUIRED_PACKAGES.append("nvidia-cufft-cu11")

if "cu12" in ONEFLOW_VERSION:
REQUIRED_PACKAGES.append("nvidia-cudnn-cu12")
REQUIRED_PACKAGES.append("nvidia-cublas-cu12")
REQUIRED_PACKAGES.append("nvidia-nccl-cu12")
REQUIRED_PACKAGES.append("nvidia-cusparse-cu12")
REQUIRED_PACKAGES.append("nvidia-cufft-cu12")

# if python version < 3.7.x, than need pip install dataclasses
if sys.version_info.minor < 7:
Expand Down
Loading