Skip to content

Commit b18e6ea

Browse files
author
Andrew Lamzed-Short
authored
[SYCL] atomic_memory_order_capabilities query for device and context (#8517)
This patch implements the `atomic_memory_order_capabilities` query in the OpenCL and Level Zero backends/plugins for `device` and `context` Specifically: - OpenCL <2.0 returns the minimum required capability set (`relaxed`) defined in [Section 4.2 of the OpenCL 3.0 specification](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES). - OpenCL <3.0 and Level Zero backends return all memory order capabilities. - OpenCL >=3.0 queries the actual device to get the supported memory order capabilities. E2E test have also been updated to reflect these changes: intel/llvm-test-suite#1627
1 parent 7663dc2 commit b18e6ea

File tree

8 files changed

+166
-22
lines changed

8 files changed

+166
-22
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <set>
2121
#include <sstream>
2222
#include <string>
23+
#include <sycl/detail/pi.h>
2324
#include <sycl/detail/spinlock.hpp>
2425
#include <thread>
2526
#include <utility>
@@ -2308,6 +2309,13 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName,
23082309
case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT:
23092310
// 2D USM fill and memset is not supported.
23102311
return ReturnValue(pi_bool{false});
2312+
case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
2313+
pi_memory_order_capabilities capabilities =
2314+
PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
2315+
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL |
2316+
PI_MEMORY_ORDER_SEQ_CST;
2317+
return ReturnValue(capabilities);
2318+
}
23112319
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
23122320
default:
23132321
// TODO: implement other parameters

sycl/plugins/opencl/pi_opencl.cpp

+58-1
Original file line numberDiff line numberDiff line change
@@ -282,8 +282,65 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName,
282282
// For details about Intel UUID extension, see
283283
// sycl/doc/extensions/supported/sycl_ext_intel_device_info.md
284284
case PI_DEVICE_INFO_UUID:
285-
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
286285
return PI_ERROR_INVALID_VALUE;
286+
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
287+
// This query is missing beore OpenCL 3.0
288+
// Check version and handle appropriately
289+
OCLV::OpenCLVersion devVer;
290+
cl_device_id deviceID = cast<cl_device_id>(device);
291+
cl_int ret_err = getDeviceVersion(deviceID, devVer);
292+
if (ret_err != CL_SUCCESS) {
293+
return cast<pi_result>(ret_err);
294+
}
295+
296+
// Minimum required capability to be returned
297+
// For OpenCL 1.2, this is all that is required
298+
pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED;
299+
300+
if (devVer >= OCLV::V3_0) {
301+
// For OpenCL >=3.0, the query should be implemented
302+
cl_device_atomic_capabilities cl_capabilities = 0;
303+
cl_int ret_err = clGetDeviceInfo(
304+
deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
305+
sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr);
306+
if (ret_err != CL_SUCCESS)
307+
return cast<pi_result>(ret_err);
308+
309+
// Mask operation to only consider atomic_memory_order* capabilities
310+
cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED |
311+
CL_DEVICE_ATOMIC_ORDER_ACQ_REL |
312+
CL_DEVICE_ATOMIC_ORDER_SEQ_CST;
313+
cl_capabilities &= mask;
314+
315+
// The memory order capabilities are hierarchical, if one is implied, all
316+
// preceding capbilities are implied as well. Especially in the case of
317+
// ACQ_REL.
318+
if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) {
319+
capabilities |= PI_MEMORY_ORDER_SEQ_CST;
320+
}
321+
if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) {
322+
capabilities |= PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_ACQUIRE |
323+
PI_MEMORY_ORDER_RELEASE;
324+
}
325+
} else if (devVer >= OCLV::V2_0) {
326+
// For OpenCL 2.x, return all capabilities
327+
// (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model)
328+
capabilities |= PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE |
329+
PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST;
330+
}
331+
332+
if (paramValue) {
333+
if (paramValueSize < sizeof(pi_memory_order_capabilities))
334+
return static_cast<pi_result>(CL_INVALID_VALUE);
335+
336+
std::memcpy(paramValue, &capabilities, sizeof(capabilities));
337+
}
338+
339+
if (paramValueSizeRet)
340+
*paramValueSizeRet = sizeof(capabilities);
341+
342+
return static_cast<pi_result>(CL_SUCCESS);
343+
}
287344
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
288345
// Initialize result to minimum mandated capabilities according to
289346
// SYCL2020 4.6.3.2

sycl/plugins/unified_runtime/pi2ur.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,8 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
485485
(ur_device_info_t)UR_DEVICE_INFO_BFLOAT16},
486486
{PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES,
487487
(ur_device_info_t)UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES},
488+
{PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES,
489+
(ur_device_info_t)UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES},
488490
};
489491

490492
auto InfoType = InfoMapping.find(ParamName);

sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp

+12-1
Original file line numberDiff line numberDiff line change
@@ -1186,6 +1186,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
11861186
return ReturnValue(result);
11871187
}
11881188

1189+
case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
1190+
ur_memory_order_capability_flags_t capabilities =
1191+
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
1192+
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
1193+
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
1194+
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL |
1195+
UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST;
1196+
return ReturnValue(capabilities);
1197+
}
1198+
1199+
// TODO: Implement.
11891200
default:
11901201
zePrint("Unsupported ParamName in piGetDeviceInfo\n");
11911202
zePrint("ParamName=%d(0x%x)\n", ParamName, ParamName);
@@ -1716,7 +1727,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition(
17161727
// Currently supported partitioning (by affinity domain/numa) would always
17171728
// partition to all sub-devices.
17181729
//
1719-
if (NumDevices !=0)
1730+
if (NumDevices != 0)
17201731
PI_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE);
17211732

17221733
for (uint32_t I = 0; I < NumDevices; I++) {

sycl/source/detail/context_impl.cpp

+20-9
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <sycl/property_list.hpp>
2424
#include <sycl/stl.hpp>
2525

26+
#include <algorithm>
27+
2628
namespace sycl {
2729
__SYCL_INLINE_VER_NAMESPACE(_V1) {
2830
namespace detail {
@@ -166,17 +168,26 @@ template <>
166168
std::vector<sycl::memory_order>
167169
context_impl::get_info<info::context::atomic_memory_order_capabilities>()
168170
const {
171+
std::vector<sycl::memory_order> CapabilityList{
172+
sycl::memory_order::relaxed, sycl::memory_order::acquire,
173+
sycl::memory_order::release, sycl::memory_order::acq_rel,
174+
sycl::memory_order::seq_cst};
169175
if (is_host())
170-
return {sycl::memory_order::relaxed, sycl::memory_order::acquire,
171-
sycl::memory_order::release, sycl::memory_order::acq_rel,
172-
sycl::memory_order::seq_cst};
176+
return CapabilityList;
177+
178+
for (const sycl::device &Device : MDevices) {
179+
std::vector<sycl::memory_order> NewCapabilityList(CapabilityList.size());
180+
std::vector<sycl::memory_order> DeviceCapabilities =
181+
Device.get_info<info::device::atomic_memory_order_capabilities>();
182+
std::set_intersection(
183+
CapabilityList.begin(), CapabilityList.end(),
184+
DeviceCapabilities.begin(), DeviceCapabilities.end(),
185+
std::inserter(NewCapabilityList, NewCapabilityList.begin()));
186+
CapabilityList = NewCapabilityList;
187+
}
188+
CapabilityList.shrink_to_fit();
173189

174-
pi_memory_order_capabilities Result;
175-
getPlugin().call<PiApiKind::piContextGetInfo>(
176-
MContext,
177-
PiInfoCode<info::context::atomic_memory_order_capabilities>::value,
178-
sizeof(Result), &Result, nullptr);
179-
return readMemoryOrderBitfield(Result);
190+
return CapabilityList;
180191
}
181192
template <>
182193
std::vector<sycl::memory_scope>

sycl/source/detail/context_info.hpp

-11
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,6 @@ typename Param::return_type get_context_info(RT::PiContext Ctx,
2929
return Result;
3030
}
3131

32-
// Specialization for atomic_memory_order_capabilities, PI returns a bitfield
33-
template <>
34-
std::vector<sycl::memory_order>
35-
get_context_info<info::context::atomic_memory_order_capabilities>(
36-
RT::PiContext Ctx, const plugin &Plugin) {
37-
pi_memory_order_capabilities Result;
38-
Plugin.call<PiApiKind::piContextGetInfo>(
39-
Ctx, PiInfoCode<info::context::atomic_memory_order_capabilities>::value,
40-
sizeof(Result), &Result, nullptr);
41-
return readMemoryOrderBitfield(Result);
42-
}
4332
} // namespace detail
4433
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
4534
} // namespace sycl
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
//==---- AtomicMemoryOrderCapabilities.cpp --- memory order query test -----==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <algorithm>
10+
#include <gtest/gtest.h>
11+
#include <helpers/PiMock.hpp>
12+
#include <sycl/sycl.hpp>
13+
14+
using namespace sycl;
15+
16+
namespace {
17+
18+
static constexpr size_t expectedCapabilityVecSize = 5;
19+
static thread_local bool deviceGetInfoCalled = false;
20+
21+
static bool has_capability(const std::vector<memory_order> &deviceCapabilities,
22+
memory_order capabilityToFind) {
23+
return std::find(deviceCapabilities.begin(), deviceCapabilities.end(),
24+
capabilityToFind) != deviceCapabilities.end();
25+
}
26+
27+
pi_result redefinedDeviceGetInfo(pi_device device, pi_device_info param_name,
28+
size_t param_value_size, void *param_value,
29+
size_t *param_value_size_ret) {
30+
if (param_name == PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) {
31+
deviceGetInfoCalled = true;
32+
if (param_value) {
33+
pi_memory_order_capabilities *Capabilities =
34+
reinterpret_cast<pi_memory_order_capabilities *>(param_value);
35+
*Capabilities = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
36+
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL |
37+
PI_MEMORY_ORDER_SEQ_CST;
38+
}
39+
}
40+
return PI_SUCCESS;
41+
}
42+
43+
TEST(AtomicMemoryOrderCapabilities, DeviceQueryReturnsCorrectCapabilities) {
44+
unittest::PiMock Mock;
45+
platform Plt = Mock.getPlatform();
46+
47+
Mock.redefineAfter<detail::PiApiKind::piDeviceGetInfo>(
48+
redefinedDeviceGetInfo);
49+
50+
const device Dev = Plt.get_devices()[0];
51+
context Ctx{Dev};
52+
53+
auto Capabilities =
54+
Dev.get_info<info::device::atomic_memory_order_capabilities>();
55+
EXPECT_TRUE(deviceGetInfoCalled);
56+
EXPECT_EQ(Capabilities.size(), expectedCapabilityVecSize);
57+
58+
EXPECT_TRUE(has_capability(Capabilities, memory_order::relaxed));
59+
EXPECT_TRUE(has_capability(Capabilities, memory_order::acquire));
60+
EXPECT_TRUE(has_capability(Capabilities, memory_order::release));
61+
EXPECT_TRUE(has_capability(Capabilities, memory_order::acq_rel));
62+
EXPECT_TRUE(has_capability(Capabilities, memory_order::seq_cst));
63+
}
64+
65+
} // namespace

sycl/unittests/SYCL2020/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ add_sycl_unittest(SYCL2020Tests OBJECT
1010
IsCompatible.cpp
1111
DeviceGetInfoAspects.cpp
1212
DeviceAspectTraits.cpp
13+
AtomicMemoryOrderCapabilities.cpp
1314
AtomicMemoryScopeCapabilities.cpp
1415
)
1516

0 commit comments

Comments
 (0)