diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.8-conda/devcontainer.json similarity index 57% rename from .devcontainer/cuda12.5-conda/devcontainer.json rename to .devcontainer/cuda12.8-conda/devcontainer.json index 244c624b871..3977a1c5f86 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.8-conda/devcontainer.json @@ -3,7 +3,7 @@ "context": "${localWorkspaceFolder}/.devcontainer", "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", "args": { - "CUDA": "12.5", + "CUDA": "12.8", "PYTHON_PACKAGE_MANAGER": "conda", "BASE": "rapidsai/devcontainers:25.04-cpp-mambaforge-ubuntu22.04" } @@ -11,39 +11,49 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.04-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.04-cuda12.8-conda" ], - "hostRequirements": {"gpu": "optional"}, + "hostRequirements": { + "gpu": "optional" + }, "features": { "ghcr.io/rapidsai/devcontainers/features/cuda:25.4": { - "version": "12.5", - "installCompilers": false, - "installProfilers": true, - "installDevPackages": false, - "installcuDNN": false, - "installcuTensor": false, - "installNCCL": false, - "installCUDARuntime": false, - "installNVRTC": false, - "installOpenCL": false, - "installcuBLAS": false, - "installcuSPARSE": false, - "installcuFFT": false, - "installcuFile": false, - "installcuRAND": false, - "installcuSOLVER": false, - "installNPP": false, - "installnvJPEG": false, - "pruneStaticLibs": true - }, + "version": "12.8", + "installCompilers": false, + "installProfilers": true, + "installDevPackages": false, + "installcuDNN": false, + "installcuTensor": false, + "installNCCL": false, + "installCUDARuntime": false, + "installNVRTC": false, + "installOpenCL": false, + "installcuBLAS": false, + "installcuSPARSE": false, + "installcuFFT": false, + "installcuFile": false, + "installcuRAND": false, + "installcuSOLVER": false, + "installNPP": false, + "installnvJPEG": false, + "pruneStaticLibs": true + }, "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.4": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/cuda", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"], - "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi" + ], "workspaceFolder": "/home/coder", "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cudf,type=bind,consistency=consistent", "mounts": [ @@ -51,7 +61,7 @@ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.8-pip/devcontainer.json similarity index 67% rename from .devcontainer/cuda12.5-pip/devcontainer.json rename to .devcontainer/cuda12.8-pip/devcontainer.json index a4b0c1acb7d..ab83fa88300 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.8-pip/devcontainer.json @@ -3,32 +3,42 @@ "context": "${localWorkspaceFolder}/.devcontainer", "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", "args": { - "CUDA": "12.5", + "CUDA": "12.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:25.04-cpp-cuda12.5-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.04-cpp-cuda12.8-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.04-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.04-cuda12.8-pip" ], - "hostRequirements": {"gpu": "optional"}, + "hostRequirements": { + "gpu": "optional" + }, "features": { "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.4": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs}"], - "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs}" + ], + "postAttachCommand": [ + "/bin/bash", + "-c", + "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi" + ], "workspaceFolder": "/home/coder", "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cudf,type=bind,consistency=consistent", "mounts": [ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 9c0bb7d6840..65356ec8b73 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -133,6 +133,7 @@ jobs: uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.04 with: build_type: pull-request + node_type: "cpu16" cpp-linters: secrets: inherit needs: checks @@ -227,6 +228,7 @@ jobs: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) build_type: pull-request + node_type: "cpu16" script: "ci/build_wheel_libcudf.sh" wheel-build-pylibcudf: needs: [checks, wheel-build-libcudf] @@ -302,8 +304,9 @@ jobs: needs: telemetry-setup uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.04 with: + node_type: "cpu32" arch: '["amd64"]' - cuda: '["12.5"]' + cuda: '["12.8"]' build_command: | sccache -z; build-all -DBUILD_BENCHMARKS=ON --verbose; @@ -336,7 +339,7 @@ jobs: needs: pandas-tests uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.04 with: - node_type: cpu4 + node_type: "cpu4" build_type: pull-request run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3db1ed35294..a6790032017 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -105,7 +105,7 @@ Instructions for a minimal build environment without conda are included below. # create the conda environment (assuming in base `cudf` directory) # note: RAPIDS currently doesn't support `channel_priority: strict`; # use `channel_priority: flexible` instead -conda env create --name cudf_dev --file conda/environments/all_cuda-125_arch-x86_64.yaml +conda env create --name cudf_dev --file conda/environments/all_cuda-128_arch-x86_64.yaml # activate the environment conda activate cudf_dev ``` diff --git a/README.md b/README.md index 20b1d64a5e0..a240d6c2aa9 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge ```bash conda install -c rapidsai -c conda-forge -c nvidia \ - cudf=25.04 python=3.12 cuda-version=12.5 + cudf=25.04 python=3.12 cuda-version=12.8 ``` We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml similarity index 97% rename from conda/environments/all_cuda-125_arch-x86_64.yaml rename to conda/environments/all_cuda-128_arch-x86_64.yaml index f822169990f..e719fd51573 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-128_arch-x86_64.yaml @@ -23,7 +23,7 @@ dependencies: - cuda-nvtx-dev - cuda-python>=12.6.2,<13.0a0 - cuda-sanitizer-api -- cuda-version=12.5 +- cuda-version=12.8 - cupy>=12.0.0 - cxx-compiler - cython>=3.0.3 @@ -99,4 +99,4 @@ dependencies: - transformers==4.39.3 - typing_extensions>=4.0.0 - zlib>=1.2.13 -name: all_cuda-125_arch-x86_64 +name: all_cuda-128_arch-x86_64 diff --git a/cpp/src/groupby/hash/compute_aggregations.cuh b/cpp/src/groupby/hash/compute_aggregations.cuh index 9c9a4c97bff..df8fcf4690f 100644 --- a/cpp/src/groupby/hash/compute_aggregations.cuh +++ b/cpp/src/groupby/hash/compute_aggregations.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,13 +67,17 @@ rmm::device_uvector compute_aggregations( auto const grid_size = max_occupancy_grid_size>(num_rows); auto const available_shmem_size = get_available_shared_memory_size(grid_size); - auto const has_sufficient_shmem = - available_shmem_size > (compute_shmem_offsets_size(flattened_values.num_columns()) * 2); - auto const has_dictionary_request = std::any_of( - requests.begin(), requests.end(), [](cudf::groupby::aggregation_request const& request) { - return cudf::is_dictionary(request.values.type()); + auto const offsets_buffer_size = compute_shmem_offsets_size(flattened_values.num_columns()) * 2; + auto const data_buffer_size = available_shmem_size - offsets_buffer_size; + auto const is_shared_memory_compatible = std::all_of( + requests.begin(), requests.end(), [&](cudf::groupby::aggregation_request const& request) { + if (cudf::is_dictionary(request.values.type())) { return false; } + // Ensure there is enough buffer space to store local aggregations up to the max cardinality + // for shared memory aggregations + auto const size = cudf::type_dispatcher(request.values.type(), + size_of_functor{}); + return static_cast(data_buffer_size) >= (size * GROUPBY_CARDINALITY_THRESHOLD); }); - auto const is_shared_memory_compatible = !has_dictionary_request and has_sufficient_shmem; // Performs naive global memory aggregations when the workload is not compatible with shared // memory, such as when aggregating dictionary columns or when there is insufficient dynamic diff --git a/cpp/src/groupby/hash/compute_shared_memory_aggs.cu b/cpp/src/groupby/hash/compute_shared_memory_aggs.cu index f0361ccced2..ae7584da483 100644 --- a/cpp/src/groupby/hash/compute_shared_memory_aggs.cu +++ b/cpp/src/groupby/hash/compute_shared_memory_aggs.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,15 +35,6 @@ namespace cudf::groupby::detail::hash { namespace { -/// Functor used by type dispatcher returning the size of the underlying C++ type -struct size_of_functor { - template - __device__ constexpr cudf::size_type operator()() - { - return sizeof(T); - } -}; - /// Shared memory data alignment CUDF_HOST_DEVICE cudf::size_type constexpr ALIGNMENT = 8; diff --git a/cpp/src/groupby/hash/single_pass_functors.cuh b/cpp/src/groupby/hash/single_pass_functors.cuh index 048c9252773..c02087072a0 100644 --- a/cpp/src/groupby/hash/single_pass_functors.cuh +++ b/cpp/src/groupby/hash/single_pass_functors.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,15 @@ #include namespace cudf::groupby::detail::hash { +/// Functor used by type dispatcher returning the size of the underlying C++ type +struct size_of_functor { + template + CUDF_HOST_DEVICE constexpr cudf::size_type operator()() + { + return sizeof(T); + } +}; + // TODO: TO BE REMOVED issue tracked via #17171 template __device__ constexpr bool is_supported() diff --git a/dependencies.yaml b/dependencies.yaml index ebc2284048f..b1378fae6d7 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -3,7 +3,7 @@ files: all: output: conda matrix: - cuda: ["11.8", "12.5"] + cuda: ["11.8", "12.8"] arch: [x86_64] includes: # Note that clang-tidy is not included here because cudf's preferred @@ -525,6 +525,10 @@ dependencies: cuda: "12.5" packages: - cuda-version=12.5 + - matrix: + cuda: "12.8" + packages: + - cuda-version=12.8 cuda: specific: - output_types: conda diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml index 56f0586f89a..977d25184b5 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml @@ -156,9 +156,9 @@ dependencies: packages: - cuda-version=12.5 - matrix: - cuda: "12" + cuda: "12.8" packages: - - cuda-version=12.5 + - cuda-version=12.8 py_version: specific: - output_types: conda diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml index 9c54864ccc8..7d3b6c09c61 100644 --- a/python/libcudf/pyproject.toml +++ b/python/libcudf/pyproject.toml @@ -54,7 +54,7 @@ select = [ ] # PyPI limit is 600 MiB, fail CI before we get too close to that -max_allowed_size_compressed = '525M' +max_allowed_size_compressed = '575M' [tool.scikit-build] build-dir = "build/{wheel_tag}"