From ece789dd27a4e745ff41242206248fd0b6072e31 Mon Sep 17 00:00:00 2001
From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com>
Date: Thu, 27 Jun 2024 16:47:23 -0400
Subject: [PATCH] Tweak rmm configuration for C++ unit tests (#4503)

We are seeing intermittent failures in CI from having trouble allocating the RMM pool allocator.

Dropping the memory usage by default from 1/6 to 1/10.

Added an option `maxpool` that will use 1/2 of the available memory, since we use the unit tests in larger configurations to do scale testing of algorithms.

Authors:
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cugraph/pull/4503
---
 cpp/tests/utilities/base_fixture.hpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp
index cb302674a25..25011c0c97a 100644
--- a/cpp/tests/utilities/base_fixture.hpp
+++ b/cpp/tests/utilities/base_fixture.hpp
@@ -68,14 +68,18 @@ inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>
 
 inline auto make_managed() { return std::make_shared<rmm::mr::managed_memory_resource>(); }
 
-inline auto make_pool()
+// use_max set to true will use half of available GPU memory for RMM, otherwise
+// otherwise we'll use 1/10.
+inline auto make_pool(bool use_max = false)
 {
-  // Reduce the default pool allocation to 1/6th of the GPU memory so that we can
+  // Reduce the default pool allocation to 1/10 of GPU memory so that we can
   // run more than 2 tests in parallel at the same time. Changes to this value could
   // effect the maximum amount of parallel tests, and therefore `tests/CMakeLists.txt`
   // `_CUGRAPH_TEST_PERCENT` default value will need to be audited.
   auto const [free, total] = rmm::available_device_memory();
-  auto const min_alloc = rmm::align_down(std::min(free, total / 6), rmm::CUDA_ALLOCATION_ALIGNMENT);
+  auto const min_alloc =
+    use_max ? rmm::align_down(std::min(free, total / 2), rmm::CUDA_ALLOCATION_ALIGNMENT)
+            : rmm::align_down(std::min(free, total / 10), rmm::CUDA_ALLOCATION_ALIGNMENT);
   return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda(), min_alloc);
 }
 
@@ -99,7 +103,8 @@ inline auto make_binning()
  * @throw cugraph::logic_error if the `allocation_mode` is unsupported.
  *
  * @param allocation_mode String identifies which resource type.
- *        Accepted types are "pool", "cuda", and "managed" only.
+ *        Accepted types are "pool", "cuda", "managed" and
+ *        "maxpool" only.
  * @return Memory resource instance
  */
 inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(
@@ -108,6 +113,7 @@ inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(
   if (allocation_mode == "binning") return make_binning();
   if (allocation_mode == "cuda") return make_cuda();
   if (allocation_mode == "pool") return make_pool();
+  if (allocation_mode == "maxpool") return make_pool(true);
   if (allocation_mode == "managed") return make_managed();
   CUGRAPH_FAIL("Invalid RMM allocation mode");
 }