From ece789dd27a4e745ff41242206248fd0b6072e31 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Thu, 27 Jun 2024 16:47:23 -0400 Subject: [PATCH] Tweak rmm configuration for C++ unit tests (#4503) We are seeing intermittent failures in CI from having trouble allocating the RMM pool allocator. Dropping the memory usage by default from 1/6 to 1/10. Added an option `maxpool` that will use 1/2 of the available memory, since we use the unit tests in larger configurations to do scale testing of algorithms. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cugraph/pull/4503 --- cpp/tests/utilities/base_fixture.hpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index cb302674a25..25011c0c97a 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -68,14 +68,18 @@ inline auto make_cuda() { return std::make_shared inline auto make_managed() { return std::make_shared(); } -inline auto make_pool() +// use_max set to true will use half of available GPU memory for RMM, otherwise +// otherwise we'll use 1/10. +inline auto make_pool(bool use_max = false) { - // Reduce the default pool allocation to 1/6th of the GPU memory so that we can + // Reduce the default pool allocation to 1/10 of GPU memory so that we can // run more than 2 tests in parallel at the same time. Changes to this value could // effect the maximum amount of parallel tests, and therefore `tests/CMakeLists.txt` // `_CUGRAPH_TEST_PERCENT` default value will need to be audited. auto const [free, total] = rmm::available_device_memory(); - auto const min_alloc = rmm::align_down(std::min(free, total / 6), rmm::CUDA_ALLOCATION_ALIGNMENT); + auto const min_alloc = + use_max ? rmm::align_down(std::min(free, total / 2), rmm::CUDA_ALLOCATION_ALIGNMENT) + : rmm::align_down(std::min(free, total / 10), rmm::CUDA_ALLOCATION_ALIGNMENT); return rmm::mr::make_owning_wrapper(make_cuda(), min_alloc); } @@ -99,7 +103,8 @@ inline auto make_binning() * @throw cugraph::logic_error if the `allocation_mode` is unsupported. * * @param allocation_mode String identifies which resource type. - * Accepted types are "pool", "cuda", and "managed" only. + * Accepted types are "pool", "cuda", "managed" and + * "maxpool" only. * @return Memory resource instance */ inline std::shared_ptr create_memory_resource( @@ -108,6 +113,7 @@ inline std::shared_ptr create_memory_resource( if (allocation_mode == "binning") return make_binning(); if (allocation_mode == "cuda") return make_cuda(); if (allocation_mode == "pool") return make_pool(); + if (allocation_mode == "maxpool") return make_pool(true); if (allocation_mode == "managed") return make_managed(); CUGRAPH_FAIL("Invalid RMM allocation mode"); }