Skip to content

Commit a8ee387

Browse files
authored
Merge pull request #8330 from edgargabriel/pr/rocm-v1.13-fixes2
GTEST/ROCM and UCM/ROCM: fixes for the v1.13 release
2 parents ad4b171 + ee3aee9 commit a8ee387

File tree

4 files changed

+73
-63
lines changed

4 files changed

+73
-63
lines changed

src/ucm/rocm/rocmmem.c

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -111,24 +111,14 @@ hsa_status_t ucm_hsa_amd_memory_pool_allocate(
111111
hsa_amd_memory_pool_t memory_pool, size_t size,
112112
uint32_t flags, void** ptr)
113113
{
114-
ucs_memory_type_t type = UCS_MEMORY_TYPE_ROCM;
115-
uint32_t pool_flags = 0;
116114
hsa_status_t status;
117115

118-
status = hsa_amd_memory_pool_get_info(memory_pool,
119-
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS,
120-
&pool_flags);
121-
if (status == HSA_STATUS_SUCCESS &&
122-
!(pool_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)) {
123-
type = UCS_MEMORY_TYPE_ROCM_MANAGED;
124-
}
125-
126116
ucm_event_enter();
127117

128118
status = ucm_orig_hsa_amd_memory_pool_allocate(memory_pool, size, flags, ptr);
129119
if (status == HSA_STATUS_SUCCESS) {
130120
ucm_trace("ucm_hsa_amd_memory_pool_allocate(ptr=%p size:%lu)", *ptr, size);
131-
ucm_dispatch_mem_type_alloc(*ptr, size, type);
121+
ucm_dispatch_mem_type_alloc(*ptr, size, UCS_MEMORY_TYPE_UNKNOWN);
132122
}
133123

134124
ucm_event_leave();

src/uct/rocm/copy/rocm_copy_iface.c

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,61 @@ static uct_iface_ops_t uct_rocm_copy_iface_ops = {
130130
.iface_is_reachable = uct_rocm_copy_iface_is_reachable,
131131
};
132132

133+
134+
static ucs_status_t
135+
uct_rocm_copy_estimate_perf(uct_iface_h tl_iface, uct_perf_attr_t *perf_attr)
136+
{
137+
if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_BANDWIDTH) {
138+
perf_attr->bandwidth.dedicated = 0;
139+
if (!(perf_attr->field_mask & UCT_PERF_ATTR_FIELD_OPERATION)) {
140+
perf_attr->bandwidth.shared = 0;
141+
} else {
142+
switch (perf_attr->operation) {
143+
case UCT_EP_OP_GET_SHORT:
144+
perf_attr->bandwidth.shared = 2000.0 * UCS_MBYTE;
145+
break;
146+
case UCT_EP_OP_GET_ZCOPY:
147+
perf_attr->bandwidth.shared = 8000.0 * UCS_MBYTE;
148+
break;
149+
case UCT_EP_OP_PUT_SHORT:
150+
perf_attr->bandwidth.shared = 10500.0 * UCS_MBYTE;
151+
break;
152+
case UCT_EP_OP_PUT_ZCOPY:
153+
perf_attr->bandwidth.shared = 9500.0 * UCS_MBYTE;
154+
break;
155+
default:
156+
perf_attr->bandwidth.shared = 0;
157+
break;
158+
}
159+
}
160+
}
161+
162+
if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_SEND_PRE_OVERHEAD) {
163+
perf_attr->send_pre_overhead = 0;
164+
}
165+
166+
if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_SEND_POST_OVERHEAD) {
167+
perf_attr->send_post_overhead = 0;
168+
}
169+
170+
if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_RECV_OVERHEAD) {
171+
perf_attr->recv_overhead = 0;
172+
}
173+
174+
if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_LATENCY) {
175+
perf_attr->latency = ucs_linear_func_make(10e-6, 0);
176+
}
177+
178+
if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_MAX_INFLIGHT_EPS) {
179+
perf_attr->max_inflight_eps = SIZE_MAX;
180+
}
181+
182+
return UCS_OK;
183+
}
184+
185+
133186
static uct_iface_internal_ops_t uct_rocm_copy_iface_internal_ops = {
134-
.iface_estimate_perf = uct_base_iface_estimate_perf,
187+
.iface_estimate_perf = uct_rocm_copy_estimate_perf,
135188
.iface_vfs_refresh = (uct_iface_vfs_refresh_func_t)ucs_empty_function,
136189
.ep_query = (uct_ep_query_func_t)ucs_empty_function_return_unsupported,
137190
.ep_invalidate = (uct_ep_invalidate_func_t)ucs_empty_function_return_unsupported

test/gtest/common/mem_buffer.cc

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,22 @@ bool mem_buffer::is_gpu_supported()
7676
bool mem_buffer::is_rocm_managed_supported()
7777
{
7878
#if HAVE_ROCM
79-
int device_id, has_managed_mem;
80-
return ((hipGetDevice(&device_id) == hipSuccess) &&
81-
(hipDeviceGetAttribute(&has_managed_mem,
82-
hipDeviceAttributeManagedMemory,
83-
device_id) == hipSuccess) &&
84-
has_managed_mem);
79+
hipError_t ret;
80+
void *dptr;
81+
hipPointerAttribute_t attr;
82+
83+
ret = hipMallocManaged(&dptr, 64);
84+
if (ret != hipSuccess) {
85+
return false;
86+
}
87+
88+
ret = hipPointerGetAttributes(&attr, dptr);
89+
if (ret != hipSuccess) {
90+
return false;
91+
}
92+
93+
hipFree(dptr);
94+
return attr.memoryType == hipMemoryTypeUnified;
8595
#else
8696
return false;
8797
#endif

test/gtest/ucm/rocm_hooks.cc

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ class rocm_hooks : public ucs::test {
6969
int expect_mem_type = UCS_MEMORY_TYPE_ROCM) {
7070
ASSERT_EQ(ptr, alloc_event.mem_type.address);
7171
ASSERT_EQ(size, alloc_event.mem_type.size);
72-
ASSERT_EQ(expect_mem_type, alloc_event.mem_type.mem_type);
72+
EXPECT_TRUE((alloc_event.mem_type.mem_type == expect_mem_type) ||
73+
(alloc_event.mem_type.mem_type == UCS_MEMORY_TYPE_UNKNOWN));
7374
}
7475

7576
void check_mem_free_events(void *ptr, size_t size,
@@ -148,47 +149,3 @@ UCS_TEST_F(rocm_hooks, test_hipMallocPitch) {
148149
ASSERT_EQ(ret, hipSuccess);
149150
check_mem_free_events((void *)dptr, 0);
150151
}
151-
152-
UCS_TEST_F(rocm_hooks, test_hip_Malloc_Free) {
153-
hipError_t ret;
154-
void *ptr, *ptr1;
155-
156-
/* small allocation */
157-
ret = hipMalloc(&ptr, 64);
158-
ASSERT_EQ(ret, hipSuccess);
159-
check_mem_alloc_events(ptr, 64);
160-
161-
ret = hipFree(ptr);
162-
ASSERT_EQ(ret, hipSuccess);
163-
check_mem_free_events(ptr, 64);
164-
165-
/* large allocation */
166-
ret = hipMalloc(&ptr, (256 * UCS_MBYTE));
167-
ASSERT_EQ(ret, hipSuccess);
168-
check_mem_alloc_events(ptr, (256 * UCS_MBYTE));
169-
170-
ret = hipFree(ptr);
171-
ASSERT_EQ(ret, hipSuccess);
172-
check_mem_free_events(ptr, (256 * UCS_MBYTE));
173-
174-
/* multiple allocations, rocmfree in reverse order */
175-
ret = hipMalloc(&ptr, (1 * UCS_MBYTE));
176-
ASSERT_EQ(ret, hipSuccess);
177-
check_mem_alloc_events(ptr, (1 * UCS_MBYTE));
178-
179-
ret = hipMalloc(&ptr1, (1 * UCS_MBYTE));
180-
ASSERT_EQ(ret, hipSuccess);
181-
check_mem_alloc_events(ptr1, (1 * UCS_MBYTE));
182-
183-
ret = hipFree(ptr1);
184-
ASSERT_EQ(ret, hipSuccess);
185-
check_mem_free_events(ptr1, (1 * UCS_MBYTE));
186-
187-
ret = hipFree(ptr);
188-
ASSERT_EQ(ret, hipSuccess);
189-
check_mem_free_events(ptr, (1 * UCS_MBYTE));
190-
191-
/* hipFree with NULL */
192-
ret = hipFree(NULL);
193-
ASSERT_EQ(ret, hipSuccess);
194-
}

0 commit comments

Comments
 (0)