Skip to content

Commit f198c8b

Browse files
authored
Merge pull request #324 from LLNL/v0.15.1-rc
v0.15.1 RC
2 parents aff9eea + fb9c77d commit f198c8b

22 files changed

+443
-214
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ cmake_minimum_required(VERSION 3.23)
1717

1818
project(CARE
1919
LANGUAGES C CXX
20-
VERSION 0.15.0)
20+
VERSION 0.15.1)
2121

2222
include(${PROJECT_SOURCE_DIR}/cmake/Setup.cmake)
2323

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
[comment]: # (SPDX-License-Identifier: BSD-3-Clause)
66
[comment]: # (#################################################################)
77

8-
# CARE v0.15.0
8+
# CARE v0.15.1
99

1010
CARE: CHAI and RAJA Extensions
1111
===============================
@@ -21,7 +21,7 @@ cmake ../ # May need to pass -DCMAKE_INSTALL_PREFIX=/path/to/install/in if the
2121
make -j install
2222
```
2323

24-
If desired, external libraries can be used instead of submodules. For example, an external CHAI can be specified with `-DCHAI_DIR=<path to CHAI install directory or directory containing chai-config.cmake>`. Note that if using an external CHAI, it must be configured with `-DENABLE_PICK=ON -DENABLE_PINNED=ON`.
24+
If desired, external libraries can be used instead of submodules. For example, an external CHAI can be specified with `-DCHAI_DIR=<path to CHAI install directory or directory containing chai-config.cmake>`. Note that if using an external CHAI, it must be configured with `-DENABLE_PINNED=ON`.
2525

2626
To build with CUDA support, use `-DENABLE_CUDA=ON -DCUDA_TOOLKIT_ROOT_DIR=/path/to/cuda/toolkit`. If using external libraries, note that Umpire, RAJA, and CHAI must also be configured with those options.
2727

RELEASE_NOTES.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,28 @@ in this file.
1212

1313
The format of this file is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
1414

15+
## [Version 0.15.1] - Release date 2025-04-07
16+
17+
### Added
18+
- Added support for RAJA MultiReducers (Min/Max/Sum).
19+
20+
### Changed
21+
- Changed RAJA reduce policy for CUDA to RAJA::cuda\_reduce\_atomic.
22+
- Rearranged template parameters of care::sortKeyValueArrays (used by care::KeyValueSorter) for ease of use
23+
24+
### Fixed
25+
- Only enable calls to cub::DeviceMergeSort when it is available (used by care::sortArray and care::KeyValueSorter when the type is not arithmetic)
26+
- Fixes inputs to [hip]cub::DeviceMergeSort::StableSortKeys (used by care::sortArray when the type is not arithmetic)
27+
- Avoids hardcoding one overload of care::sortArray to use [hip]cub::DeviceRadixSort
28+
- Fixes a case where care::sort\_uniq should not modify the input array
29+
- Miscellaneous fixes for care::host\_device\_map
30+
- Clarified documentation for care::BinarySearch
31+
- Added missing attributes to functions for building as a shared library on Windows
32+
- Moved helper function to be accessible when the loop fuser is disabled
33+
34+
### Removed
35+
- Removed dead ENABLE\_PICK option (corresponding option has been removed from CHAI)
36+
1537
## [Version 0.15.0] - Release date 2025-03-20
1638

1739
### Added

cmake/SetupDependencies.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ if(NOT TARGET chai)
8383
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/tpl/chai/CMakeLists.txt)
8484
message(FATAL_ERROR "CARE: CHAI submodule not initialized. Run 'git submodule update --init' in the git repository or set CHAI_DIR to use an external build of CHAI.")
8585
else()
86-
set(CHAI_ENABLE_PICK ${ENABLE_PICK} CACHE BOOL "Enable picks/sets in chai::ManagedArray")
8786
set(CHAI_ENABLE_PINNED ${ENABLE_PINNED} CACHE BOOL "Enable pinned memory support in CHAI")
8887

8988
set(CHAI_ENABLE_TESTS ${CARE_ENABLE_SUBMODULE_TESTS} CACHE BOOL "Enable CHAI tests")

cmake/SetupOptions.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ include(CMakeDependentOption)
1010
# Advanced configuration options
1111
# TODO: If these are disabled, the code will not compile or run correctly.
1212
# Fix those issues so that these options are actually configurable.
13-
option(ENABLE_PICK "Enable pick and set methods on ManagedArrays" ON)
1413
option(ENABLE_PINNED "Enable pinned memory space" ON)
1514
option(CARE_ENABLE_PINNED_MEMORY_FOR_SCANS "Use pinned memory for scan lengths" ON)
1615
option(CARE_GPU_MEMORY_IS_ACCESSIBLE_ON_CPU "Allows default memory spaces for ZERO_COPY and PAGEABLE to be the GPU memory space" OFF)

docs/sphinx/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
# The short X.Y version.
5858
version = '0.15'
5959
# The full version, including alpha/beta/rc tags.
60-
release = '0.15.0'
60+
release = '0.15.1'
6161

6262
# The language for content autogenerated by Sphinx. Refer to documentation
6363
# for a list of supported languages.

scripts/make_release_tarball.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
##############################################################################
99

1010
TAR_CMD=gtar
11-
VERSION=0.15.0
11+
VERSION=0.15.1
1212

1313
git archive --prefix=care-${VERSION}/ -o care-${VERSION}.tar HEAD 2> /dev/null
1414

src/care/DebugPlugin.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,6 @@ namespace care{
2727
// Prepare to record CHAI data
2828
if (CHAICallback::isActive()) {
2929
PluginData::clearActivePointers();
30-
31-
#if defined(CARE_GPUCC) && defined(CARE_DEBUG)
32-
GPUWatchpoint::setOrCheckWatchpoint<int>();
33-
#endif // defined(CARE_GPUCC) && defined(CARE_DEBUG)
3430
}
3531
#endif // !defined(CHAI_DISABLE_RM)
3632
}
@@ -57,10 +53,6 @@ namespace care{
5753

5854
if (CHAICallback::isActive()) {
5955
writeLoopData(space, PluginData::getFileName(), PluginData::getLineNumber());
60-
61-
#if defined(CARE_GPUCC) && defined(CARE_DEBUG)
62-
GPUWatchpoint::setOrCheckWatchpoint<int>();
63-
#endif // defined(CARE_GPUCC) && defined(CARE_DEBUG)
6456
}
6557

6658
if (PluginData::isParallelContext()) {

src/care/DebugPlugin.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#ifndef _CARE_DebugPlugin_H_
99
#define _CARE_DebugPlugin_H_
1010

11+
#include "care/config.h"
1112
#include "RAJA/util/PluginStrategy.hpp"
1213
#include "chai/ExecutionSpaces.hpp"
1314

@@ -18,7 +19,7 @@ namespace care{
1819
public:
1920
DebugPlugin() = default;
2021

21-
static void registerPlugin();
22+
CARE_DLL_API static void registerPlugin();
2223

2324
void preLaunch(const RAJA::util::PluginContext& p) override;
2425

src/care/ExecutionSpace.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,18 @@ namespace care {
3131
// CHAI and CARE are configured.
3232
extern CARE_DLL_API care::ExecutionSpace DEFAULT;
3333
} // namespace care
34+
3435
namespace chai {
35-
3636
// the ZERO_COPY memory space. Typically PINNED memory, but may be a different space depending on
3737
// how CHAI and CARE are configured.
38-
extern chai::ExecutionSpace ZERO_COPY;
38+
extern CARE_DLL_API chai::ExecutionSpace ZERO_COPY;
3939
// the PAGEABLE memory space. Typically UM, but may be a different space depending on how
4040
// CHAI and CARE are configured.
41-
extern chai::ExecutionSpace PAGEABLE;
41+
extern CARE_DLL_API chai::ExecutionSpace PAGEABLE;
4242
// the DEFAULT memory space. Typically GPU for GPU platforms and CPU for CPU platforms, but may be a different space depending on how
4343
// CHAI and CARE are configured.
44-
extern chai::ExecutionSpace DEFAULT;
45-
46-
}
44+
extern CARE_DLL_API chai::ExecutionSpace DEFAULT;
45+
} // namespace chai
4746

4847
#endif // !defined(_CARE_EXECUTION_SPACE_H_)
4948

src/care/GPUMacros.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#define _CARE_GPUMACROS_H_
1010

1111
#include "chai/config.hpp"
12+
#include "care/config.h"
1213

1314
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
1415
#define CARE_DEVICE_COMPILE
@@ -58,10 +59,10 @@
5859
#define gpuMemGetInfo cudaMemGetInfo
5960

6061
#define gpuDeviceSetLimit cudaDeviceSetLimit
61-
#define gpuDeviceSynchronize cudaDeviceSynchronize
6262
#define gpuLimitStackSize cudaLimitStackSize
6363
#define gpuLimitMallocHeapSize cudaLimitMallocHeapSize
6464

65+
#define gpuDeviceSynchronize cudaDeviceSynchronize
6566
#define gpuPeekAtLastError cudaPeekAtLastError
6667

6768
#define gpuSuccess cudaSuccess
@@ -81,15 +82,21 @@
8182
#define gpuMemGetInfo hipMemGetInfo
8283

8384
#define gpuDeviceSetLimit hipDeviceSetLimit
84-
#define gpuDeviceSynchronize hipDeviceSynchronize
8585
#define gpuLimitStackSize hipLimitStackSize
8686
#define gpuLimitMallocHeapSize hipLimitMallocHeapSize
8787

88+
#define gpuDeviceSynchronize hipDeviceSynchronize
8889
#define gpuPeekAtLastError hipPeekAtLastError
8990

9091
#define gpuSuccess hipSuccess
9192

92-
#endif // end __HIPCC__ case
93+
#elif CARE_ENABLE_GPU_SIMULATION_MODE
94+
95+
#define gpuMemcpyKind int
96+
#define gpuSimNoop()
97+
#define gpuPeekAtLastError gpuSimNoop
98+
99+
#endif // #if defined(__CUDACC__) #elif defined(__HIPCC__)
93100

94101
#endif // !defined(_CARE_GPUMACROS_H_)
95102

src/care/KeyValueSorter_decl.h

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,21 @@ using LocalKeyValueSorter = KeyValueSorter<KeyType, ValueType, Exec> ;
5555
/// have bugs!
5656
/// @return void
5757
///////////////////////////////////////////////////////////////////////////
58-
template <typename KeyT, typename ValueT, typename Exec=RAJADeviceExec>
59-
void sortKeyValueArrays(host_device_ptr<KeyT> & keys,
60-
host_device_ptr<ValueT> & values,
61-
const size_t start, const size_t len,
62-
const bool noCopy=false) ;
58+
template <typename Exec, typename KeyT, typename ValueT>
59+
std::enable_if_t<std::is_arithmetic<typename CHAIDataGetter<KeyT, RAJADeviceExec>::raw_type>::value, void>
60+
sortKeyValueArrays(host_device_ptr<KeyT> & keys,
61+
host_device_ptr<ValueT> & values,
62+
const size_t start, const size_t len,
63+
const bool noCopy=false);
64+
65+
#if defined(__HIPCC__) || (defined(__CUDACC__) && defined(CUB_MAJOR_VERSION) && defined(CUB_MINOR_VERSION) && (CUB_MAJOR_VERSION >= 2 || (CUB_MAJOR_VERSION == 1 && CUB_MINOR_VERSION >= 14)))
66+
template <typename Exec, typename KeyT, typename ValueT>
67+
std::enable_if_t<!std::is_arithmetic<typename CHAIDataGetter<KeyT, RAJADeviceExec>::raw_type>::value, void>
68+
sortKeyValueArrays(host_device_ptr<KeyT> & keys,
69+
host_device_ptr<ValueT> & values,
70+
const size_t start, const size_t len,
71+
const bool noCopy=false);
72+
#endif
6373

6474
///////////////////////////////////////////////////////////////////////////
6575
/// @author Benjamin Liu after Alan Dayton
@@ -358,7 +368,7 @@ class CARE_DLL_API KeyValueSorter<KeyType, ValueType, RAJADeviceExec> {
358368
/// TODO: add bounds checking
359369
///////////////////////////////////////////////////////////////////////////
360370
void sort(const size_t start, const size_t len) {
361-
sortKeyValueArrays<ValueType, KeyType, RAJADeviceExec>(m_values, m_keys, start, len, false);
371+
sortKeyValueArrays<RAJADeviceExec>(m_values, m_keys, start, len, false);
362372
}
363373

364374
///////////////////////////////////////////////////////////////////////////
@@ -377,7 +387,7 @@ class CARE_DLL_API KeyValueSorter<KeyType, ValueType, RAJADeviceExec> {
377387
/// @return void
378388
///////////////////////////////////////////////////////////////////////////
379389
void sort() {
380-
sortKeyValueArrays<ValueType, KeyType, RAJADeviceExec>(m_values, m_keys, 0, m_len, true);
390+
sortKeyValueArrays<RAJADeviceExec>(m_values, m_keys, 0, m_len, true);
381391
}
382392

383393
///////////////////////////////////////////////////////////////////////////
@@ -389,7 +399,7 @@ class CARE_DLL_API KeyValueSorter<KeyType, ValueType, RAJADeviceExec> {
389399
/// TODO: add bounds checking
390400
///////////////////////////////////////////////////////////////////////////
391401
void sortByKey(const size_t start, const size_t len) {
392-
sortKeyValueArrays<KeyType, ValueType, RAJADeviceExec>(m_keys, m_values, start, len, false);
402+
sortKeyValueArrays<RAJADeviceExec>(m_keys, m_values, start, len, false);
393403
}
394404

395405
///////////////////////////////////////////////////////////////////////////
@@ -408,7 +418,7 @@ class CARE_DLL_API KeyValueSorter<KeyType, ValueType, RAJADeviceExec> {
408418
/// @return void
409419
///////////////////////////////////////////////////////////////////////////
410420
void sortByKey() {
411-
sortKeyValueArrays(m_keys, m_values, 0, m_len, true);
421+
sortKeyValueArrays<RAJADeviceExec>(m_keys, m_values, 0, m_len, true);
412422
}
413423

414424
///////////////////////////////////////////////////////////////////////////
@@ -421,7 +431,7 @@ class CARE_DLL_API KeyValueSorter<KeyType, ValueType, RAJADeviceExec> {
421431
/// TODO: add bounds checking
422432
///////////////////////////////////////////////////////////////////////////
423433
void stableSort(const size_t start, const size_t len) {
424-
sortKeyValueArrays(m_values, m_keys, start, len, false);
434+
sortKeyValueArrays<RAJADeviceExec>(m_values, m_keys, start, len, false);
425435
}
426436

427437
///////////////////////////////////////////////////////////////////////////
@@ -441,7 +451,7 @@ class CARE_DLL_API KeyValueSorter<KeyType, ValueType, RAJADeviceExec> {
441451
/// TODO: investigate whether radix device sort is a stable sort
442452
///////////////////////////////////////////////////////////////////////////
443453
void stableSort() {
444-
sortKeyValueArrays(m_values, m_keys, 0, m_len, true);
454+
sortKeyValueArrays<RAJADeviceExec>(m_values, m_keys, 0, m_len, true);
445455
}
446456

447457
///////////////////////////////////////////////////////////////////////////
@@ -1181,11 +1191,12 @@ class CARE_DLL_API KeyValueSorter<KeyType, ValueType, RAJA::seq_exec> {
11811191
#endif // !CARE_ENABLE_GPU_SIMULATION_MODE
11821192

11831193

1194+
// Return the keys for each KVS where their values are the same
11841195
#ifdef CARE_PARALLEL_DEVICE
11851196
template <typename KeyType, typename ValueType>
11861197
void IntersectKeyValueSorters(RAJADeviceExec exec, KeyValueSorter<KeyType, ValueType, RAJADeviceExec> sorter1, int size1,
11871198
KeyValueSorter<KeyType, ValueType, RAJADeviceExec> sorter2, int size2,
1188-
host_device_ptr<int> &matches1, host_device_ptr<int>& matches2,
1199+
host_device_ptr<KeyType> &matches1, host_device_ptr<KeyType>& matches2,
11891200
int & numMatches) ;
11901201
#endif // defined(CARE_PARALLEL_DEVICE)
11911202

@@ -1197,7 +1208,7 @@ template <typename KeyType, typename ValueType>
11971208
void IntersectKeyValueSorters(RAJA::seq_exec exec,
11981209
KeyValueSorter<KeyType, ValueType, RAJA::seq_exec> sorter1, int size1,
11991210
KeyValueSorter<KeyType, ValueType, RAJA::seq_exec> sorter2, int size2,
1200-
host_device_ptr<int> &matches1, host_device_ptr<int>& matches2, int & numMatches) ;
1211+
host_device_ptr<KeyType> &matches1, host_device_ptr<KeyType>& matches2, int & numMatches) ;
12011212

12021213
} // namespace care
12031214

0 commit comments

Comments
 (0)