From 5d788c609f1c5cfde37414af49fbc8996bf2a712 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 1 Feb 2023 18:11:15 +0000 Subject: [PATCH 01/75] [SYCL][CUDA] Decouple CUDA contexts from PI contexts This patch moves the CUDA context from the PI context to the PI device, and switches to always using the primary context. CUDA contexts are different from SYCL contexts in that they're tied to a single device, and that they are required to be active on a thread for most calls to the CUDA driver API. As shown in intel/llvm#8124 and intel/llvm#7526 the current mapping of CUDA context to PI context, causes issues for device based entry points that still need to call the CUDA APIs, we have workarounds to solve that but they're a bit hacky, inefficient, and have a lot of edge case issues. The peer to peer interface proposal in intel/llvm#6104, is also device based, but enabling peer to peer for CUDA is done on the CUDA contexts, so the current mapping would make it difficult to implement. So this patch solves most of these issues by decoupling the CUDA context from the SYCL context, and simply managing the CUDA contexts in the devices, it also changes the CUDA context management to always use the primary context. This approach as a number of advantages: * Use of the primary context is recommended by Nvidia * Simplifies the CUDA context management in the plugin * Available CUDA context in device based entry points * Likely more efficient in the general case, with less opportunities to accidentally cause costly CUDA context switches. * Easier and likely more efficient interactions with CUDA runtime applications. * Easier to expose P2P capabilities * Easier to support multiple devices in a SYCL context It does have a few drawbacks from the previous approach: * Drops support for `make_context` interop, no sensible "native handle" to pass in (`get_native` is still supported fine). * No opportunity for users to separate their work into different CUDA contexts. It's unclear if there's any actual use case for this, it seems very uncommon in CUDA codebases to have multiple CUDA contexts for a single CUDA device in the same process. So overall I believe this should be a net benefit in general, and we could revisit if we run into an edge case that would need more fine grained CUDA context management. --- .../include/sycl/detail/properties_traits.def | 4 +- .../backend/backend_traits_cuda.hpp | 2 +- .../sycl/properties/context_properties.hpp | 8 +- sycl/plugins/cuda/pi_cuda.cpp | 107 ++---------------- sycl/plugins/cuda/pi_cuda.hpp | 28 ++--- sycl/test/basic_tests/interop-cuda.cpp | 2 - sycl/unittests/pi/cuda/test_base_objects.cpp | 90 --------------- 7 files changed, 31 insertions(+), 210 deletions(-) diff --git a/sycl/include/sycl/detail/properties_traits.def b/sycl/include/sycl/detail/properties_traits.def index 61101b9985d04..581380cc51f05 100644 --- a/sycl/include/sycl/detail/properties_traits.def +++ b/sycl/include/sycl/detail/properties_traits.def @@ -11,9 +11,9 @@ __SYCL_PARAM_TRAITS_SPEC(sycl::property::no_init) __SYCL_PARAM_TRAITS_SPEC( sycl::property::context::cuda::use_primary_context) // Deprecated __SYCL_PARAM_TRAITS_SPEC( - sycl::ext::oneapi::cuda::property::context::use_primary_context) + sycl::ext::oneapi::cuda::property::context::use_primary_context) // Deprecated __SYCL_PARAM_TRAITS_SPEC(sycl::property::queue::in_order) __SYCL_PARAM_TRAITS_SPEC(sycl::property::reduction::initialize_to_identity) __SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_low) __SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_high) -__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_normal) \ No newline at end of file +__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_normal) diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp index 52d8cc81366dd..370866eb126d5 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp @@ -114,7 +114,7 @@ template <> struct BackendReturn { template <> struct InteropFeatureSupportMap { static constexpr bool MakePlatform = false; static constexpr bool MakeDevice = true; - static constexpr bool MakeContext = true; + static constexpr bool MakeContext = false; static constexpr bool MakeQueue = true; static constexpr bool MakeEvent = true; static constexpr bool MakeBuffer = false; diff --git a/sycl/include/sycl/properties/context_properties.hpp b/sycl/include/sycl/properties/context_properties.hpp index 443448684e001..68b3d37dba5b4 100644 --- a/sycl/include/sycl/properties/context_properties.hpp +++ b/sycl/include/sycl/properties/context_properties.hpp @@ -14,10 +14,14 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { -namespace ext::oneapi::cuda::property::context { +namespace ext::oneapi::cuda::property { +namespace __SYCL_DEPRECATED( + "the primary contexts are now always used") context { class use_primary_context : public ::sycl::detail::DataLessProperty< ::sycl::detail::UsePrimaryContext> {}; -} // namespace ext::oneapi::cuda::property::context +} // namespace __SYCL_DEPRECATED("the primary contexts are now always + // used")context +} // namespace ext::oneapi::cuda::property namespace property::context { namespace __SYCL2020_DEPRECATED( diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index b7c64ef9f9a58..2a1168f0b2067 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -917,8 +917,11 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, for (int i = 0; i < numDevices; ++i) { CUdevice device; err = PI_CHECK_ERROR(cuDeviceGet(&device, i)); + CUcontext context; + err = PI_CHECK_ERROR(cuDevicePrimaryCtxRetain(&context, device)); + platformIds[i].devices_.emplace_back( - new _pi_device{device, &platformIds[i]}); + new _pi_device{device, context, &platformIds[i]}); { const auto &dev = platformIds[i].devices_.back().get(); @@ -1183,6 +1186,8 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, assert(device != nullptr); + ScopedContext active(device->get_context()); + switch (param_name) { case PI_DEVICE_INFO_TYPE: { return getInfo(param_value_size, param_value, param_value_size_ret, @@ -1961,7 +1966,6 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY: { - ScopedContext active(device); size_t FreeMemory = 0; size_t TotalMemory = 0; sycl::detail::pi::assertion(cuMemGetInfo(&FreeMemory, &TotalMemory) == @@ -2121,50 +2125,10 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, assert(retcontext != nullptr); pi_result errcode_ret = PI_SUCCESS; - // Parse properties. - bool property_cuda_primary = false; - while (properties && (0 != *properties)) { - // Consume property ID. - pi_context_properties id = *properties; - ++properties; - // Consume property value. - pi_context_properties value = *properties; - ++properties; - switch (id) { - case __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY: - assert(value == PI_FALSE || value == PI_TRUE); - property_cuda_primary = static_cast(value); - break; - default: - // Unknown property. - sycl::detail::pi::die( - "Unknown piContextCreate property in property list"); - return PI_ERROR_INVALID_VALUE; - } - } - std::unique_ptr<_pi_context> piContextPtr{nullptr}; try { - CUcontext current = nullptr; - - if (property_cuda_primary) { - // Use the CUDA primary context and assume that we want to use it - // immediately as we want to forge context switches. - CUcontext Ctxt; - errcode_ret = - PI_CHECK_ERROR(cuDevicePrimaryCtxRetain(&Ctxt, devices[0]->get())); - piContextPtr = std::unique_ptr<_pi_context>( - new _pi_context{_pi_context::kind::primary, Ctxt, *devices}); - errcode_ret = PI_CHECK_ERROR(cuCtxPushCurrent(Ctxt)); - } else { - // Create a scoped context. - CUcontext newContext; - PI_CHECK_ERROR(cuCtxGetCurrent(¤t)); - errcode_ret = PI_CHECK_ERROR( - cuCtxCreate(&newContext, CU_CTX_MAP_HOST, devices[0]->get())); - piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{ - _pi_context::kind::user_defined, newContext, *devices}); - } + piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{*devices}); + static std::once_flag initFlag; std::call_once( initFlag, @@ -2176,14 +2140,6 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, }, errcode_ret); - // For non-primary scoped contexts keep the last active on top of the stack - // as `cuCtxCreate` replaces it implicitly otherwise. - // Primary contexts are kept on top of the stack, so the previous context - // is not queried and therefore not recovered. - if (current != nullptr) { - PI_CHECK_ERROR(cuCtxSetCurrent(current)); - } - *retcontext = piContextPtr.release(); } catch (pi_result err) { errcode_ret = err; @@ -2194,7 +2150,6 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, } pi_result cuda_piContextRelease(pi_context ctxt) { - assert(ctxt != nullptr); if (ctxt->decrement_reference_count() > 0) { @@ -2204,29 +2159,7 @@ pi_result cuda_piContextRelease(pi_context ctxt) { std::unique_ptr<_pi_context> context{ctxt}; - if (!ctxt->backend_has_ownership()) - return PI_SUCCESS; - - if (!ctxt->is_primary()) { - CUcontext cuCtxt = ctxt->get(); - CUcontext current = nullptr; - cuCtxGetCurrent(¤t); - if (cuCtxt != current) { - PI_CHECK_ERROR(cuCtxPushCurrent(cuCtxt)); - } - PI_CHECK_ERROR(cuCtxSynchronize()); - cuCtxGetCurrent(¤t); - if (cuCtxt == current) { - PI_CHECK_ERROR(cuCtxPopCurrent(¤t)); - } - return PI_CHECK_ERROR(cuCtxDestroy(cuCtxt)); - } - - // Primary context is not destroyed, but released - CUdevice cuDev = ctxt->get_device()->get(); - CUcontext current; - cuCtxPopCurrent(¤t); - return PI_CHECK_ERROR(cuDevicePrimaryCtxRelease(cuDev)); + return PI_SUCCESS; } /// Gets the native CUDA handle of a PI context object @@ -2253,29 +2186,15 @@ pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, const pi_device *devices, bool ownNativeHandle, pi_context *piContext) { + (void)nativeHandle; (void)num_devices; (void)devices; (void)ownNativeHandle; + (void)piContext; assert(piContext != nullptr); assert(ownNativeHandle == false); - CUcontext newContext = reinterpret_cast(nativeHandle); - - ScopedContext active(newContext); - - // Get context's native device - CUdevice cu_device; - pi_result retErr = PI_CHECK_ERROR(cuCtxGetDevice(&cu_device)); - - // Create a SYCL device from the ctx device - pi_device device = nullptr; - retErr = cuda_piextDeviceCreateWithNativeHandle(cu_device, nullptr, &device); - - // Create sycl context - *piContext = new _pi_context{_pi_context::kind::user_defined, newContext, - device, /*backend_owns*/ false}; - - return retErr; + return PI_ERROR_INVALID_OPERATION; } /// Creates a PI Memory object using a CUDA memory allocation. @@ -2469,8 +2388,6 @@ pi_result cuda_piMemBufferPartition(pi_mem parent_buffer, pi_mem_flags flags, std::unique_ptr<_pi_mem> retMemObj{nullptr}; try { - ScopedContext active(context); - retMemObj = std::unique_ptr<_pi_mem>{new _pi_mem{ context, parent_buffer, allocMode, ptr, hostPtr, bufferRegion.size}}; } catch (pi_result err) { diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index b4949b03ad046..a957b8df603c7 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -86,28 +86,29 @@ struct _pi_device { using native_type = CUdevice; native_type cuDevice_; + CUcontext cuContext_; std::atomic_uint32_t refCount_; pi_platform platform_; - pi_context context_; static constexpr pi_uint32 max_work_item_dimensions = 3u; size_t max_work_item_sizes[max_work_item_dimensions]; int max_work_group_size; public: - _pi_device(native_type cuDevice, pi_platform platform) - : cuDevice_(cuDevice), refCount_{1}, platform_(platform) {} + _pi_device(native_type cuDevice, CUcontext cuContext, pi_platform platform) + : cuDevice_(cuDevice), cuContext_(cuContext), refCount_{1}, + platform_(platform) {} + + ~_pi_device() { cuDevicePrimaryCtxRelease(cuDevice_); } native_type get() const noexcept { return cuDevice_; }; + CUcontext get_context() const noexcept { return cuContext_; }; + pi_uint32 get_reference_count() const noexcept { return refCount_; } pi_platform get_platform() const noexcept { return platform_; }; - void set_context(pi_context ctx) { context_ = ctx; }; - - pi_context get_context() { return context_; }; - void save_max_work_item_sizes(size_t size, size_t *save_max_work_item_sizes) noexcept { memcpy(max_work_item_sizes, save_max_work_item_sizes, size); @@ -174,16 +175,12 @@ struct _pi_context { using native_type = CUcontext; - enum class kind { primary, user_defined } kind_; native_type cuContext_; _pi_device *deviceId_; std::atomic_uint32_t refCount_; - _pi_context(kind k, CUcontext ctxt, _pi_device *devId, - bool backend_owns = true) - : kind_{k}, cuContext_{ctxt}, deviceId_{devId}, refCount_{1}, - has_ownership{backend_owns} { - deviceId_->set_context(this); + _pi_context(_pi_device *devId) + : cuContext_{devId->get_context()}, deviceId_{devId}, refCount_{1} { cuda_piDeviceRetain(deviceId_); }; @@ -206,20 +203,15 @@ struct _pi_context { native_type get() const noexcept { return cuContext_; } - bool is_primary() const noexcept { return kind_ == kind::primary; } - pi_uint32 increment_reference_count() noexcept { return ++refCount_; } pi_uint32 decrement_reference_count() noexcept { return --refCount_; } pi_uint32 get_reference_count() const noexcept { return refCount_; } - bool backend_has_ownership() const noexcept { return has_ownership; } - private: std::mutex mutex_; std::vector extended_deleters_; - const bool has_ownership; }; /// PI Mem mapping to CUDA memory allocations, both data and texture/surface. diff --git a/sycl/test/basic_tests/interop-cuda.cpp b/sycl/test/basic_tests/interop-cuda.cpp index b2e1cb3d9026f..234f1ed9e1da0 100644 --- a/sycl/test/basic_tests/interop-cuda.cpp +++ b/sycl/test/basic_tests/interop-cuda.cpp @@ -87,8 +87,6 @@ int main() { backend_input_t InteropContextInput{ cu_context[0]}; - context InteropContext = - make_context(InteropContextInput); event InteropEvent = make_event(cu_event, Context); queue InteropQueue = make_queue(cu_queue, Context); diff --git a/sycl/unittests/pi/cuda/test_base_objects.cpp b/sycl/unittests/pi/cuda/test_base_objects.cpp index 15f7f7d2651b1..9bcc9e9f24d56 100644 --- a/sycl/unittests/pi/cuda/test_base_objects.cpp +++ b/sycl/unittests/pi/cuda/test_base_objects.cpp @@ -79,96 +79,6 @@ TEST_F(CudaBaseObjectsTest, piContextCreate) { cuCtxGetApiVersion(cudaContext, &version); EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION); - CUresult cuErr = cuCtxDestroy(cudaContext); - ASSERT_EQ(cuErr, CUDA_SUCCESS); -} - -TEST_F(CudaBaseObjectsTest, piContextCreatePrimaryTrue) { - pi_uint32 numPlatforms = 0; - pi_platform platform; - pi_device device; - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)), - PI_SUCCESS); - pi_context_properties properties[] = { - __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, PI_TRUE, 0}; - - pi_context ctxt; - ASSERT_EQ((plugin->call_nocheck( - properties, 1, &device, nullptr, nullptr, &ctxt)), - PI_SUCCESS); - EXPECT_NE(ctxt, nullptr); - EXPECT_EQ(ctxt->get_device(), device); - EXPECT_TRUE(ctxt->is_primary()); - - // Retrieve the cuCtxt to check information is correct - CUcontext cudaContext = ctxt->get(); - unsigned int version = 0; - CUresult cuErr = cuCtxGetApiVersion(cudaContext, &version); - ASSERT_EQ(cuErr, CUDA_SUCCESS); - EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION); - - // Current context in the stack? - CUcontext current; - cuErr = cuCtxGetCurrent(¤t); - ASSERT_EQ(cuErr, CUDA_SUCCESS); - ASSERT_EQ(current, cudaContext); - ASSERT_EQ((plugin->call_nocheck(ctxt)), - PI_SUCCESS); -} - -TEST_F(CudaBaseObjectsTest, piContextCreatePrimaryFalse) { - pi_uint32 numPlatforms = 0; - pi_platform platform; - pi_device device; - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)), - PI_SUCCESS); - pi_context_properties properties[] = { - __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, PI_FALSE, 0}; - - pi_context ctxt; - ASSERT_EQ((plugin->call_nocheck( - properties, 1, &device, nullptr, nullptr, &ctxt)), - PI_SUCCESS); - EXPECT_NE(ctxt, nullptr); - EXPECT_EQ(ctxt->get_device(), device); - EXPECT_FALSE(ctxt->is_primary()); - - // Retrieve the cuCtxt to check information is correct - CUcontext cudaContext = ctxt->get(); - unsigned int version = 0; - CUresult cuErr = cuCtxGetApiVersion(cudaContext, &version); - ASSERT_EQ(cuErr, CUDA_SUCCESS); - EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION); - - // Current context in the stack? - CUcontext current; - cuErr = cuCtxGetCurrent(¤t); - ASSERT_EQ(cuErr, CUDA_SUCCESS); - ASSERT_EQ(current, cudaContext); ASSERT_EQ((plugin->call_nocheck(ctxt)), PI_SUCCESS); } From 868547506fe323f69fc263c385b85012b08eff63 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 3 Feb 2023 14:51:46 +0000 Subject: [PATCH 02/75] [SYCL][CUDA] Move deprecation warning to class Older versions of gcc struggle with attributes on namespaces --- sycl/include/sycl/properties/context_properties.hpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/sycl/include/sycl/properties/context_properties.hpp b/sycl/include/sycl/properties/context_properties.hpp index 68b3d37dba5b4..d0764493fb063 100644 --- a/sycl/include/sycl/properties/context_properties.hpp +++ b/sycl/include/sycl/properties/context_properties.hpp @@ -14,14 +14,11 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { -namespace ext::oneapi::cuda::property { -namespace __SYCL_DEPRECATED( - "the primary contexts are now always used") context { -class use_primary_context : public ::sycl::detail::DataLessProperty< - ::sycl::detail::UsePrimaryContext> {}; -} // namespace __SYCL_DEPRECATED("the primary contexts are now always - // used")context -} // namespace ext::oneapi::cuda::property +namespace ext::oneapi::cuda::property::context { +class __SYCL_DEPRECATED("the primary contexts are now always used") + use_primary_context : public ::sycl::detail::DataLessProperty< + ::sycl::detail::UsePrimaryContext> {}; +} // namespace ext::oneapi::cuda::property::context namespace property::context { namespace __SYCL2020_DEPRECATED( From de16f88261d17789cf87e47e906558208671c5b5 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 6 Feb 2023 17:24:01 +0000 Subject: [PATCH 03/75] Initial P2P impl. Signed-off-by: JackAKirk --- sycl/include/sycl/detail/pi.def | 1 + sycl/include/sycl/detail/pi.h | 2 ++ sycl/include/sycl/device.hpp | 2 ++ sycl/plugins/cuda/pi_cuda.cpp | 14 ++++++++++++++ sycl/plugins/level_zero/pi_level_zero.cpp | 7 +++++++ sycl/source/device.cpp | 8 ++++++++ 6 files changed, 34 insertions(+) diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index eda09035c883e..1d8443c968b58 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -141,6 +141,7 @@ _PI_API(piPluginGetLastError) _PI_API(piTearDown) +_PI_API(piextEnablePeer) _PI_API(piextUSMEnqueueFill2D) _PI_API(piextUSMEnqueueMemset2D) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 56b8b33fae583..b148934d67096 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1033,6 +1033,8 @@ __SYCL_EXPORT pi_result piDevicesGet(pi_platform platform, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices); +__SYCL_EXPORT pi_result piextEnablePeer(pi_device command_device, pi_device peer_device); + /// Returns requested info for provided native device /// Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT for /// PI_DEVICE_INFO_EXTENSIONS query when the device supports native asserts diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 0501cdf5c4d71..a345d087fd26d 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -89,6 +89,8 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase { device &operator=(device &&rhs) = default; +bool ext_oneapi_enable_peer_access(const device &peer); + /// Get instance of device /// /// \return a valid cl_device_id instance in accordance with the requirements diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 2a1168f0b2067..b339460644ffb 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5617,6 +5617,19 @@ pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, return PI_SUCCESS; } +pi_result cuda_piextEnablePeer(pi_device command_device, pi_device peer_device){ + + pi_result result = PI_SUCCESS; +try { + ScopedContext active(command_device->get_context()); +result = PI_CHECK_ERROR(cuCtxEnablePeerAccess(peer_device->get_context(), 0)); + + } catch (pi_result err) { + result = err; + } +return result; +} + const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -5771,6 +5784,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPluginGetLastError, cuda_piPluginGetLastError) _PI_CL(piTearDown, cuda_piTearDown) _PI_CL(piGetDeviceAndHostTimer, cuda_piGetDeviceAndHostTimer) + _PI_CL(piextEnablePeer, cuda_piextEnablePeer) #undef _PI_CL diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 0b8cde1d1b295..161c686e6117d 100755 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -9443,4 +9443,11 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, } return PI_SUCCESS; } + +pi_result piextEnablePeer(pi_device command_device, pi_device peer_device){ + +die("piextEnablePeer not " + "implemented \n"); +} + } // extern "C" diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 8202c302a08b0..8acf4b0332af7 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -200,5 +200,13 @@ pi_native_handle device::getNative() const { return impl->getNative(); } bool device::has(aspect Aspect) const { return impl->has(Aspect); } +bool device::ext_oneapi_enable_peer_access(const device &peer) { + const RT::PiDevice Device = impl->getHandleRef(); + const detail::plugin &Plugin = impl->getPlugin(); + const RT::PiDevice PeerPi = peer.impl->getHandleRef(); + Plugin.call(Device, PeerPi ); + return true; +} + } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl From b5f94813ad6a9565b4e4ca8e923caf4e74ab978f Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 8 Feb 2023 07:04:26 -0800 Subject: [PATCH 04/75] added ext_oneapi_disable_peer_access and ext_oneapi_can_access_peer. Signed-off-by: JackAKirk --- sycl/include/sycl/detail/pi.def | 6 ++-- sycl/include/sycl/detail/pi.h | 7 ++++- sycl/include/sycl/device.hpp | 12 +++++++- sycl/plugins/cuda/pi_cuda.cpp | 37 +++++++++++++++++++++++ sycl/plugins/level_zero/pi_level_zero.cpp | 19 ++++++++++-- sycl/source/device.cpp | 34 ++++++++++++++++++--- 6 files changed, 104 insertions(+), 11 deletions(-) diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 1d8443c968b58..6424e92ca93e2 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -141,8 +141,6 @@ _PI_API(piPluginGetLastError) _PI_API(piTearDown) -_PI_API(piextEnablePeer) - _PI_API(piextUSMEnqueueFill2D) _PI_API(piextUSMEnqueueMemset2D) _PI_API(piextUSMEnqueueMemcpy2D) @@ -153,4 +151,8 @@ _PI_API(piGetDeviceAndHostTimer) _PI_API(piextEnqueueDeviceGlobalVariableWrite) _PI_API(piextEnqueueDeviceGlobalVariableRead) +_PI_API(piextEnablePeer) +_PI_API(piextDisablePeer) +_PI_API(piextCanAccessPeer) + #undef _PI_API diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index b148934d67096..f86f5643517c8 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1033,7 +1033,12 @@ __SYCL_EXPORT pi_result piDevicesGet(pi_platform platform, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices); -__SYCL_EXPORT pi_result piextEnablePeer(pi_device command_device, pi_device peer_device); +__SYCL_EXPORT pi_result piextEnablePeer(pi_device command_device, + pi_device peer_device); +__SYCL_EXPORT pi_result piextDisablePeer(pi_device command_device, + pi_device peer_device); +__SYCL_EXPORT pi_result piextCanAccessPeer(pi_device command_device, + pi_device peer_device, int access_type); /// Returns requested info for provided native device /// Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT for diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index a345d087fd26d..7cf341d8800bf 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -40,6 +40,12 @@ enum class aspect; namespace ext::oneapi { // Forward declaration class filter_selector; + +enum class peer_access { + access_supported, + atomics_supported, +}; + } // namespace ext::oneapi /// The SYCL device class encapsulates a single SYCL device on which kernels @@ -89,7 +95,11 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase { device &operator=(device &&rhs) = default; -bool ext_oneapi_enable_peer_access(const device &peer); +void ext_oneapi_enable_peer_access(const device &peer); +void ext_oneapi_disable_peer_access(const device &peer); +bool ext_oneapi_can_access_peer(const device &peer, + ext::oneapi::peer_access value = + ext::oneapi::peer_access::access_supported); /// Get instance of device /// diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index b339460644ffb..06f4dd2f4130f 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5630,6 +5630,41 @@ result = PI_CHECK_ERROR(cuCtxEnablePeerAccess(peer_device->get_context(), 0)); return result; } +pi_result cuda_piextDisablePeer(pi_device command_device, pi_device peer_device){ + + pi_result result = PI_SUCCESS; +try { + ScopedContext active(command_device->get_context()); +result = PI_CHECK_ERROR(cuCtxDisablePeerAccess(peer_device->get_context())); + + } catch (pi_result err) { + result = err; + } +return result; +} + +pi_result cuda_piextCanAccessPeer(pi_device command_device, + pi_device peer_device, int access_type) { + + int res; + pi_result result = PI_SUCCESS; + + CUdevice_P2PAttribute attr = + access_type == 0 ? CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED + : CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; + try { + ScopedContext active(command_device->get_context()); + PI_CHECK_ERROR(cuDeviceGetP2PAttribute(&res, attr, command_device->get(), + peer_device->get())); + } catch (pi_result err) { + result = err; + } + if (res == 0) { + return PI_ERROR_INVALID_OPERATION; + } + return result; +} + const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -5785,6 +5820,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piTearDown, cuda_piTearDown) _PI_CL(piGetDeviceAndHostTimer, cuda_piGetDeviceAndHostTimer) _PI_CL(piextEnablePeer, cuda_piextEnablePeer) + _PI_CL(piextDisablePeer, cuda_piextDisablePeer) + _PI_CL(piextCanAccessPeer, cuda_piextCanAccessPeer) #undef _PI_CL diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 161c686e6117d..8e3b97d0967a3 100755 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -9444,10 +9444,23 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, return PI_SUCCESS; } -pi_result piextEnablePeer(pi_device command_device, pi_device peer_device){ +pi_result piextEnablePeer(pi_device command_device, pi_device peer_device) { -die("piextEnablePeer not " - "implemented \n"); + die("piextEnablePeer not " + "implemented \n"); +} + +pi_result piextDisablePeer(pi_device command_device, pi_device peer_device) { + + die("piextDisablePeer not " + "implemented \n"); +} + +pi_result piextCanAccessPeer(pi_device command_device, pi_device peer_device, + int access_type) { + + die("piextCanAccessPeer not " + "implemented \n"); } } // extern "C" diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 8acf4b0332af7..3d8620aacdc64 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -200,11 +200,37 @@ pi_native_handle device::getNative() const { return impl->getNative(); } bool device::has(aspect Aspect) const { return impl->has(Aspect); } -bool device::ext_oneapi_enable_peer_access(const device &peer) { +void device::ext_oneapi_enable_peer_access(const device &peer) { const RT::PiDevice Device = impl->getHandleRef(); - const detail::plugin &Plugin = impl->getPlugin(); - const RT::PiDevice PeerPi = peer.impl->getHandleRef(); - Plugin.call(Device, PeerPi ); + const RT::PiDevice PeerDevice = peer.impl->getHandleRef(); + if (Device != PeerDevice) { + const detail::plugin &Plugin = impl->getPlugin(); + Plugin.call(Device, PeerDevice); + } +} + +void device::ext_oneapi_disable_peer_access(const device &peer) { + const RT::PiDevice Device = impl->getHandleRef(); + const RT::PiDevice PeerDevice = peer.impl->getHandleRef(); + if (Device != PeerDevice) { + const detail::plugin Plugin = impl->getPlugin(); + Plugin.call(Device, PeerDevice); + } +} + +bool device::ext_oneapi_can_access_peer(const device &peer, + ext::oneapi::peer_access value) { + const RT::PiDevice Device = impl->getHandleRef(); + const RT::PiDevice PeerDevice = peer.impl->getHandleRef(); + if (Device != PeerDevice) { + const detail::plugin Plugin = impl->getPlugin(); + RT::PiResult Err = Plugin.call_nocheck( + Device, PeerDevice, static_cast(value)); + + if (Err != PI_SUCCESS) { + return false; + } + } return true; } From 64ecf256ebe6cc4f2f018b1d629ef28e696ca9b0 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 10 Feb 2023 13:40:59 +0000 Subject: [PATCH 05/75] Introduced pi_peer_attr. Signed-off-by: JackAKirk --- sycl/include/sycl/detail/pi.h | 8 ++++- sycl/include/sycl/detail/pi.hpp | 1 + sycl/include/sycl/device.hpp | 6 ++-- sycl/plugins/cuda/pi_cuda.cpp | 5 ++-- sycl/plugins/hip/pi_hip.cpp | 36 +++++++++++++++++++++++ sycl/plugins/level_zero/pi_level_zero.cpp | 21 ++++++++++--- sycl/source/device.cpp | 21 ++++++------- 7 files changed, 79 insertions(+), 19 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index f86f5643517c8..5f35a1ebe7fc3 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -973,9 +973,15 @@ typedef struct { using pi_image_format = _pi_image_format; using pi_image_desc = _pi_image_desc; +typedef enum { + access_supported, + atomics_supported, +} _pi_peer_attr; + typedef enum { PI_MEM_CONTEXT = 0x1106, PI_MEM_SIZE = 0x1102 } _pi_mem_info; using pi_mem_info = _pi_mem_info; +using pi_peer_attr = _pi_peer_attr; // // Following section contains SYCL RT Plugin Interface (PI) functions. @@ -1038,7 +1044,7 @@ __SYCL_EXPORT pi_result piextEnablePeer(pi_device command_device, __SYCL_EXPORT pi_result piextDisablePeer(pi_device command_device, pi_device peer_device); __SYCL_EXPORT pi_result piextCanAccessPeer(pi_device command_device, - pi_device peer_device, int access_type); + pi_device peer_device, pi_peer_attr attr); /// Returns requested info for provided native device /// Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT for diff --git a/sycl/include/sycl/detail/pi.hpp b/sycl/include/sycl/detail/pi.hpp index 5d1272a2724d9..a311733cb9554 100644 --- a/sycl/include/sycl/detail/pi.hpp +++ b/sycl/include/sycl/detail/pi.hpp @@ -143,6 +143,7 @@ using PiMemImageInfo = ::pi_image_info; using PiMemObjectType = ::pi_mem_type; using PiMemImageChannelOrder = ::pi_image_channel_order; using PiMemImageChannelType = ::pi_image_channel_type; +using PIPeerAttr = ::pi_peer_attr; __SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext, pi_context_extended_deleter func, diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 7cf341d8800bf..5d587f95ee354 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -40,11 +40,13 @@ enum class aspect; namespace ext::oneapi { // Forward declaration class filter_selector; - +/* enum class peer_access { access_supported, atomics_supported, -}; +};*/ + +using peer_access = RT::PIPeerAttr; } // namespace ext::oneapi diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 06f4dd2f4130f..689dcc7a62ccc 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5644,13 +5644,13 @@ return result; } pi_result cuda_piextCanAccessPeer(pi_device command_device, - pi_device peer_device, int access_type) { + pi_device peer_device, pi_peer_attr value) { int res; pi_result result = PI_SUCCESS; CUdevice_P2PAttribute attr = - access_type == 0 ? CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED + value == access_supported ? CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED : CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; try { ScopedContext active(command_device->get_context()); @@ -5819,6 +5819,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPluginGetLastError, cuda_piPluginGetLastError) _PI_CL(piTearDown, cuda_piTearDown) _PI_CL(piGetDeviceAndHostTimer, cuda_piGetDeviceAndHostTimer) + // Peer to Peer _PI_CL(piextEnablePeer, cuda_piextEnablePeer) _PI_CL(piextDisablePeer, cuda_piextDisablePeer) _PI_CL(piextCanAccessPeer, cuda_piextCanAccessPeer) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 37284eadbb81a..b08e94cd104be 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5356,6 +5356,38 @@ pi_result hip_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, return PI_SUCCESS; } +pi_result hip_piextEnablePeer(pi_device command_device, pi_device peer_device) { + + std::ignore = command_device; + std::ignore = peer_device; + + die("hip_piextEnablePeer not " + "implemented"); + return {}; +} + +pi_result hip_piextDisablePeer(pi_device command_device, pi_device peer_device) { + + std::ignore = command_device; + std::ignore = peer_device; + + die("hip_piextDisablePeer not " + "implemented"); + return {}; +} + +pi_result hip_piextCanAccessPeer(pi_device command_device, pi_device peer_device, + pi_peer_attr attr) { + + std::ignore = command_device; + std::ignore = peer_device; + std::ignore = attr; + + die("hip_piextCanAccessPeer not " + "implemented"); + return {}; +} + const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -5504,6 +5536,10 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPluginGetLastError, hip_piPluginGetLastError) _PI_CL(piTearDown, hip_piTearDown) _PI_CL(piGetDeviceAndHostTimer, hip_piGetDeviceAndHostTimer) + // Peer to Peer + _PI_CL(piextEnablePeer, hip_piextEnablePeer) + _PI_CL(piextDisablePeer, hip_piextDisablePeer) + _PI_CL(piextCanAccessPeer, hip_piextCanAccessPeer) #undef _PI_CL diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 8e3b97d0967a3..102e4af1f8b1e 100755 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -9446,21 +9446,34 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, pi_result piextEnablePeer(pi_device command_device, pi_device peer_device) { + std::ignore = command_device; + std::ignore = peer_device; + die("piextEnablePeer not " - "implemented \n"); + "implemented"); + return {}; } pi_result piextDisablePeer(pi_device command_device, pi_device peer_device) { + std::ignore = command_device; + std::ignore = peer_device; + die("piextDisablePeer not " - "implemented \n"); + "implemented"); + return {}; } pi_result piextCanAccessPeer(pi_device command_device, pi_device peer_device, - int access_type) { + pi_peer_attr attr) { + + std::ignore = command_device; + std::ignore = peer_device; + std::ignore = attr; die("piextCanAccessPeer not " - "implemented \n"); + "implemented"); + return {}; } } // extern "C" diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 3d8620aacdc64..66196740b4c56 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -202,30 +202,31 @@ bool device::has(aspect Aspect) const { return impl->has(Aspect); } void device::ext_oneapi_enable_peer_access(const device &peer) { const RT::PiDevice Device = impl->getHandleRef(); - const RT::PiDevice PeerDevice = peer.impl->getHandleRef(); - if (Device != PeerDevice) { + const RT::PiDevice Peer = peer.impl->getHandleRef(); + if (Device != Peer) { const detail::plugin &Plugin = impl->getPlugin(); - Plugin.call(Device, PeerDevice); + Plugin.call(Device, Peer); } } void device::ext_oneapi_disable_peer_access(const device &peer) { const RT::PiDevice Device = impl->getHandleRef(); - const RT::PiDevice PeerDevice = peer.impl->getHandleRef(); - if (Device != PeerDevice) { + const RT::PiDevice Peer = peer.impl->getHandleRef(); + if (Device != Peer) { const detail::plugin Plugin = impl->getPlugin(); - Plugin.call(Device, PeerDevice); + Plugin.call(Device, Peer); } } bool device::ext_oneapi_can_access_peer(const device &peer, - ext::oneapi::peer_access value) { + ext::oneapi::peer_access attr) { const RT::PiDevice Device = impl->getHandleRef(); - const RT::PiDevice PeerDevice = peer.impl->getHandleRef(); - if (Device != PeerDevice) { + const RT::PiDevice Peer = peer.impl->getHandleRef(); + //const RT::PIPeerAttr Attr = attr == ext::oneapi::peer_access::access_supported ? access_supported : atomics_supported; + if (Device != Peer) { const detail::plugin Plugin = impl->getPlugin(); RT::PiResult Err = Plugin.call_nocheck( - Device, PeerDevice, static_cast(value)); + Device, Peer, attr); if (Err != PI_SUCCESS) { return false; From 15d4bf6f124dc17b8d92de7968c54c9e5638a89a Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 10 Feb 2023 15:21:20 +0000 Subject: [PATCH 06/75] Format. Signed-off-by: JackAKirk --- sycl/include/sycl/device.hpp | 5 ----- sycl/plugins/cuda/pi_cuda.cpp | 40 +++++++++++++++++++---------------- sycl/source/device.cpp | 2 +- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 5d587f95ee354..3d2293fe9cae5 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -40,11 +40,6 @@ enum class aspect; namespace ext::oneapi { // Forward declaration class filter_selector; -/* -enum class peer_access { - access_supported, - atomics_supported, -};*/ using peer_access = RT::PIPeerAttr; diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 689dcc7a62ccc..ce1083cc4d23f 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5617,49 +5617,53 @@ pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, return PI_SUCCESS; } -pi_result cuda_piextEnablePeer(pi_device command_device, pi_device peer_device){ +pi_result cuda_piextEnablePeer(pi_device command_device, + pi_device peer_device) { - pi_result result = PI_SUCCESS; -try { + pi_result result = PI_SUCCESS; + try { ScopedContext active(command_device->get_context()); -result = PI_CHECK_ERROR(cuCtxEnablePeerAccess(peer_device->get_context(), 0)); + result = + PI_CHECK_ERROR(cuCtxEnablePeerAccess(peer_device->get_context(), 0)); } catch (pi_result err) { result = err; } -return result; + return result; } -pi_result cuda_piextDisablePeer(pi_device command_device, pi_device peer_device){ +pi_result cuda_piextDisablePeer(pi_device command_device, + pi_device peer_device) { - pi_result result = PI_SUCCESS; -try { + pi_result result = PI_SUCCESS; + try { ScopedContext active(command_device->get_context()); -result = PI_CHECK_ERROR(cuCtxDisablePeerAccess(peer_device->get_context())); + result = PI_CHECK_ERROR(cuCtxDisablePeerAccess(peer_device->get_context())); } catch (pi_result err) { result = err; } -return result; + return result; } pi_result cuda_piextCanAccessPeer(pi_device command_device, - pi_device peer_device, pi_peer_attr value) { + pi_device peer_device, pi_peer_attr attr) { - int res; + int value; pi_result result = PI_SUCCESS; - CUdevice_P2PAttribute attr = - value == access_supported ? CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED - : CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; + CUdevice_P2PAttribute CUattr = + attr == access_supported + ? CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED + : CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; try { ScopedContext active(command_device->get_context()); - PI_CHECK_ERROR(cuDeviceGetP2PAttribute(&res, attr, command_device->get(), - peer_device->get())); + PI_CHECK_ERROR(cuDeviceGetP2PAttribute( + &value, CUattr, command_device->get(), peer_device->get())); } catch (pi_result err) { result = err; } - if (res == 0) { + if (value == 0) { return PI_ERROR_INVALID_OPERATION; } return result; diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 66196740b4c56..a3ed9d8e837f1 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -222,7 +222,7 @@ bool device::ext_oneapi_can_access_peer(const device &peer, ext::oneapi::peer_access attr) { const RT::PiDevice Device = impl->getHandleRef(); const RT::PiDevice Peer = peer.impl->getHandleRef(); - //const RT::PIPeerAttr Attr = attr == ext::oneapi::peer_access::access_supported ? access_supported : atomics_supported; + if (Device != Peer) { const detail::plugin Plugin = impl->getPlugin(); RT::PiResult Err = Plugin.call_nocheck( From df55a69cbc8e2d7c40fc8d0d046976f306dba6db Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 3 Mar 2023 09:34:57 -0800 Subject: [PATCH 07/75] Format. Signed-off-by: JackAKirk --- sycl/include/sycl/detail/pi.h | 2 +- sycl/include/sycl/device.hpp | 10 +++++----- sycl/plugins/cuda/pi_cuda.cpp | 2 +- sycl/plugins/hip/pi_hip.cpp | 7 ++++--- sycl/source/device.cpp | 5 +++-- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 5f35a1ebe7fc3..c24ddd759bfca 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -974,7 +974,7 @@ using pi_image_format = _pi_image_format; using pi_image_desc = _pi_image_desc; typedef enum { - access_supported, + access_supported, atomics_supported, } _pi_peer_attr; diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 3d2293fe9cae5..43e4d5cbb43aa 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -92,11 +92,11 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase { device &operator=(device &&rhs) = default; -void ext_oneapi_enable_peer_access(const device &peer); -void ext_oneapi_disable_peer_access(const device &peer); -bool ext_oneapi_can_access_peer(const device &peer, - ext::oneapi::peer_access value = - ext::oneapi::peer_access::access_supported); + void ext_oneapi_enable_peer_access(const device &peer); + void ext_oneapi_disable_peer_access(const device &peer); + bool ext_oneapi_can_access_peer(const device &peer, + ext::oneapi::peer_access value = + ext::oneapi::peer_access::access_supported); /// Get instance of device /// diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index eb9ea7c3403e5..5c40b62651153 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5831,7 +5831,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPluginGetLastError, cuda_piPluginGetLastError) _PI_CL(piTearDown, cuda_piTearDown) _PI_CL(piGetDeviceAndHostTimer, cuda_piGetDeviceAndHostTimer) - // Peer to Peer + // Peer to Peer _PI_CL(piextEnablePeer, cuda_piextEnablePeer) _PI_CL(piextDisablePeer, cuda_piextDisablePeer) _PI_CL(piextCanAccessPeer, cuda_piextCanAccessPeer) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index b08e94cd104be..7748e4e23ba41 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5366,7 +5366,8 @@ pi_result hip_piextEnablePeer(pi_device command_device, pi_device peer_device) { return {}; } -pi_result hip_piextDisablePeer(pi_device command_device, pi_device peer_device) { +pi_result hip_piextDisablePeer(pi_device command_device, + pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; @@ -5376,8 +5377,8 @@ pi_result hip_piextDisablePeer(pi_device command_device, pi_device peer_device) return {}; } -pi_result hip_piextCanAccessPeer(pi_device command_device, pi_device peer_device, - pi_peer_attr attr) { +pi_result hip_piextCanAccessPeer(pi_device command_device, + pi_device peer_device, pi_peer_attr attr) { std::ignore = command_device; std::ignore = peer_device; diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index a3ed9d8e837f1..c2471c51afd4e 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -225,8 +225,9 @@ bool device::ext_oneapi_can_access_peer(const device &peer, if (Device != Peer) { const detail::plugin Plugin = impl->getPlugin(); - RT::PiResult Err = Plugin.call_nocheck( - Device, Peer, attr); + RT::PiResult Err = + Plugin.call_nocheck(Device, Peer, + attr); if (Err != PI_SUCCESS) { return false; From ddca3c34d819a8f862e2abbf0c40e80246fe891f Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 3 Mar 2023 09:40:00 -0800 Subject: [PATCH 08/75] Format. Signed-off-by: JackAKirk --- sycl/include/sycl/device.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 43e4d5cbb43aa..01fd3384e4e0b 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -94,9 +94,10 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase { void ext_oneapi_enable_peer_access(const device &peer); void ext_oneapi_disable_peer_access(const device &peer); - bool ext_oneapi_can_access_peer(const device &peer, - ext::oneapi::peer_access value = - ext::oneapi::peer_access::access_supported); + bool + ext_oneapi_can_access_peer(const device &peer, + ext::oneapi::peer_access value = + ext::oneapi::peer_access::access_supported); /// Get instance of device /// From c3a20098908bfd9533f5c680560a2a77557126d4 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 3 Mar 2023 09:44:01 -0800 Subject: [PATCH 09/75] Format. Signed-off-by: JackAKirk --- sycl/include/sycl/detail/pi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index c24ddd759bfca..d8f9b7f6ddef1 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1044,7 +1044,8 @@ __SYCL_EXPORT pi_result piextEnablePeer(pi_device command_device, __SYCL_EXPORT pi_result piextDisablePeer(pi_device command_device, pi_device peer_device); __SYCL_EXPORT pi_result piextCanAccessPeer(pi_device command_device, - pi_device peer_device, pi_peer_attr attr); + pi_device peer_device, + pi_peer_attr attr); /// Returns requested info for provided native device /// Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT for From 1855367b7096784b8452f53c9d498d96f9971e44 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 3 Mar 2023 12:29:10 -0800 Subject: [PATCH 10/75] Corrected hip pi die function. Signed-off-by: JackAKirk --- sycl/plugins/hip/pi_hip.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 3f8f727002608..e27491d6aced6 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5368,8 +5368,8 @@ pi_result hip_piextEnablePeer(pi_device command_device, pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; - die("hip_piextEnablePeer not " - "implemented"); + sycl::detail::pi::die("hip_piextEnablePeer not " + "implemented"); return {}; } @@ -5379,8 +5379,8 @@ pi_result hip_piextDisablePeer(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - die("hip_piextDisablePeer not " - "implemented"); + sycl::detail::pi::die("hip_piextDisablePeer not " + "implemented"); return {}; } @@ -5391,8 +5391,8 @@ pi_result hip_piextCanAccessPeer(pi_device command_device, std::ignore = peer_device; std::ignore = attr; - die("hip_piextCanAccessPeer not " - "implemented"); + sycl::detail::pi::die("hip_piextCanAccessPeer not " + "implemented"); return {}; } From 644c880396402c7f9a841d612d6324f2e96a7313 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 6 Mar 2023 02:59:07 -0800 Subject: [PATCH 11/75] Added esimd p2p pi functions. Signed-off-by: JackAKirk --- .../esimd_emulator/pi_esimd_emulator.cpp | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 0fc2a5a10f4f9..6f97b23254cb3 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2081,6 +2081,33 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_ERROR_INVALID_VALUE; } +pi_result hip_piextEnablePeer(pi_device command_device, pi_device peer_device) { + + std::ignore = command_device; + std::ignore = peer_device; + + DIE_NO_IMPLEMENTATION; +} + +pi_result hip_piextDisablePeer(pi_device command_device, + pi_device peer_device) { + + std::ignore = command_device; + std::ignore = peer_device; + + DIE_NO_IMPLEMENTATION; +} + +pi_result hip_piextCanAccessPeer(pi_device command_device, + pi_device peer_device, pi_peer_attr attr) { + + std::ignore = command_device; + std::ignore = peer_device; + std::ignore = attr; + + DIE_NO_IMPLEMENTATION; +} + // Check that the major version matches in PiVersion and SupportedVersion _PI_PLUGIN_VERSION_CHECK(PluginInit->PiVersion, SupportedVersion); From e5b421e86ab74697d55b170759b21755ec16a2eb Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 6 Mar 2023 03:14:10 -0800 Subject: [PATCH 12/75] fix mistake in last commit. Signed-off-by: JackAKirk --- .../esimd_emulator/pi_esimd_emulator.cpp | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 6f97b23254cb3..ae885da484055 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2081,6 +2081,32 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_ERROR_INVALID_VALUE; } + // Check that the major version matches in PiVersion and SupportedVersion + _PI_PLUGIN_VERSION_CHECK(PluginInit->PiVersion, SupportedVersion); + + size_t PluginVersionSize = sizeof(PluginInit->PluginVersion); + if (strlen(_PI_H_VERSION_STRING) >= PluginVersionSize) { + return PI_ERROR_INVALID_VALUE; + } + strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize); + + PiESimdDeviceAccess = new sycl::detail::ESIMDEmuPluginOpaqueData(); + // 'version' to be compared with 'ESIMD_EMULATOR_DEVICE_REQUIRED_VER' defined + // in device interface file + PiESimdDeviceAccess->version = ESIMDEmuPluginDataVersion; + PiESimdDeviceAccess->data = + reinterpret_cast(new sycl::detail::ESIMDDeviceInterface()); + + // Registering pre-defined surface index dedicated for SLM + (*PiESimdSurfaceMap)[__ESIMD_DNS::SLM_BTI] = nullptr; + +#define _PI_API(api) \ + (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); +#include + + return PI_SUCCESS; +} + pi_result hip_piextEnablePeer(pi_device command_device, pi_device peer_device) { std::ignore = command_device; @@ -2108,32 +2134,6 @@ pi_result hip_piextCanAccessPeer(pi_device command_device, DIE_NO_IMPLEMENTATION; } - // Check that the major version matches in PiVersion and SupportedVersion - _PI_PLUGIN_VERSION_CHECK(PluginInit->PiVersion, SupportedVersion); - - size_t PluginVersionSize = sizeof(PluginInit->PluginVersion); - if (strlen(_PI_H_VERSION_STRING) >= PluginVersionSize) { - return PI_ERROR_INVALID_VALUE; - } - strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize); - - PiESimdDeviceAccess = new sycl::detail::ESIMDEmuPluginOpaqueData(); - // 'version' to be compared with 'ESIMD_EMULATOR_DEVICE_REQUIRED_VER' defined - // in device interface file - PiESimdDeviceAccess->version = ESIMDEmuPluginDataVersion; - PiESimdDeviceAccess->data = - reinterpret_cast(new sycl::detail::ESIMDDeviceInterface()); - - // Registering pre-defined surface index dedicated for SLM - (*PiESimdSurfaceMap)[__ESIMD_DNS::SLM_BTI] = nullptr; - -#define _PI_API(api) \ - (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); -#include - - return PI_SUCCESS; -} - #ifdef _WIN32 #define __SYCL_PLUGIN_DLL_NAME "pi_esimd_emulator.dll" #include "../common_win_pi_trace/common_win_pi_trace.hpp" From 6f45d538b84f5c84f3871788baf982c8bf0755b1 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 6 Mar 2023 04:41:14 -0800 Subject: [PATCH 13/75] corrected pi function names. Signed-off-by: JackAKirk --- sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index ae885da484055..a322b9037fd71 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2107,7 +2107,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_SUCCESS; } -pi_result hip_piextEnablePeer(pi_device command_device, pi_device peer_device) { +pi_result piextEnablePeer(pi_device command_device, + pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; @@ -2115,8 +2116,8 @@ pi_result hip_piextEnablePeer(pi_device command_device, pi_device peer_device) { DIE_NO_IMPLEMENTATION; } -pi_result hip_piextDisablePeer(pi_device command_device, - pi_device peer_device) { +pi_result piextDisablePeer(pi_device command_device, + pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; @@ -2124,8 +2125,8 @@ pi_result hip_piextDisablePeer(pi_device command_device, DIE_NO_IMPLEMENTATION; } -pi_result hip_piextCanAccessPeer(pi_device command_device, - pi_device peer_device, pi_peer_attr attr) { +pi_result piextCanAccessPeer(pi_device command_device, + pi_device peer_device, pi_peer_attr attr) { std::ignore = command_device; std::ignore = peer_device; From 1849619c529c8c77bc565ad52c603b71967c68a5 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 6 Mar 2023 05:04:44 -0800 Subject: [PATCH 14/75] format. Signed-off-by: JackAKirk --- sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index a322b9037fd71..ba24a1569f5e6 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2107,8 +2107,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_SUCCESS; } -pi_result piextEnablePeer(pi_device command_device, - pi_device peer_device) { +pi_result piextEnablePeer(pi_device command_device, pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; @@ -2116,8 +2115,7 @@ pi_result piextEnablePeer(pi_device command_device, DIE_NO_IMPLEMENTATION; } -pi_result piextDisablePeer(pi_device command_device, - pi_device peer_device) { +pi_result piextDisablePeer(pi_device command_device, pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; @@ -2125,8 +2123,8 @@ pi_result piextDisablePeer(pi_device command_device, DIE_NO_IMPLEMENTATION; } -pi_result piextCanAccessPeer(pi_device command_device, - pi_device peer_device, pi_peer_attr attr) { +pi_result piextCanAccessPeer(pi_device command_device, pi_device peer_device, + pi_peer_attr attr) { std::ignore = command_device; std::ignore = peer_device; From aa7a7ebaae5b84a6fc3ac4b3e25fd8f407365739 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 7 Mar 2023 09:41:56 +0000 Subject: [PATCH 15/75] Update sycl/plugins/cuda/pi_cuda.cpp Co-authored-by: Steffen Larsen --- sycl/plugins/cuda/pi_cuda.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 774a74d6cc9cf..123e8455fc67e 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5635,7 +5635,7 @@ pi_result cuda_piextCanAccessPeer(pi_device command_device, } catch (pi_result err) { result = err; } - if (value == 0) { + if (value != 1) { return PI_ERROR_INVALID_OPERATION; } return result; From 3380230013395e9e2c34a50d61d5751cf2de8096 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 7 Mar 2023 09:50:04 +0000 Subject: [PATCH 16/75] Update sycl/plugins/level_zero/pi_level_zero.cpp Co-authored-by: Steffen Larsen --- sycl/plugins/level_zero/pi_level_zero.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index e002323400d02..807fd33f64eb0 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -8379,8 +8379,7 @@ pi_result piextEnablePeer(pi_device command_device, pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; - die("piextEnablePeer not " - "implemented"); + die("piextEnablePeer not implemented"); return {}; } From 5f6360c611feb4171ed579d3f69bef525c3c6e55 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 7 Mar 2023 09:51:39 +0000 Subject: [PATCH 17/75] Early exit in ext_oneapi_can_access_peer Co-authored-by: Steffen Larsen --- sycl/source/device.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index c2471c51afd4e..f2a25083554d2 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -223,17 +223,10 @@ bool device::ext_oneapi_can_access_peer(const device &peer, const RT::PiDevice Device = impl->getHandleRef(); const RT::PiDevice Peer = peer.impl->getHandleRef(); - if (Device != Peer) { - const detail::plugin Plugin = impl->getPlugin(); - RT::PiResult Err = - Plugin.call_nocheck(Device, Peer, - attr); - - if (Err != PI_SUCCESS) { - return false; - } - } - return true; + if (Device == Peer) + return true; + const detail::plugin Plugin = impl->getPlugin(); + return Plugin.call_nocheck(Device, Peer, attr) == PI_SUCCESS; } } // __SYCL_INLINE_VER_NAMESPACE(_V1) From e33a5782f9ed23f06031c6aa2ddfc27684440736 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 10 Mar 2023 09:31:09 -0800 Subject: [PATCH 18/75] Switched piextCanAccessPeer to more general piextPeerAccessGetInfo. Added pi peer function to mock pi. Introduced runtime enum ext::oneapi::peer_access that is separate from _pi_peer_attr. Other suggested changes applied. Signed-off-by: JackAKirk --- sycl/include/sycl/detail/pi.def | 6 +-- sycl/include/sycl/detail/pi.h | 24 +++++----- sycl/include/sycl/detail/pi.hpp | 2 +- sycl/include/sycl/device.hpp | 5 +- sycl/plugins/cuda/pi_cuda.cpp | 47 ++++++++++--------- .../esimd_emulator/pi_esimd_emulator.cpp | 15 ++++-- sycl/plugins/hip/pi_hip.cpp | 16 +++++-- sycl/plugins/level_zero/pi_level_zero.cpp | 15 ++++-- sycl/source/device.cpp | 23 +++++++-- sycl/unittests/helpers/PiMockPlugin.hpp | 24 ++++++++++ 10 files changed, 122 insertions(+), 55 deletions(-) diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 6424e92ca93e2..7dd6d2f04f4b6 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -151,8 +151,8 @@ _PI_API(piGetDeviceAndHostTimer) _PI_API(piextEnqueueDeviceGlobalVariableWrite) _PI_API(piextEnqueueDeviceGlobalVariableRead) -_PI_API(piextEnablePeer) -_PI_API(piextDisablePeer) -_PI_API(piextCanAccessPeer) +_PI_API(piextEnablePeerAccess) +_PI_API(piextDisablePeerAccess) +_PI_API(piextPeerAccessGetInfo) #undef _PI_API diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index d8f9b7f6ddef1..44597250c7d15 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -79,7 +79,7 @@ // piextEnqueueDeviceGlobalVariableRead functions. #define _PI_H_VERSION_MAJOR 12 -#define _PI_H_VERSION_MINOR 23 +#define _PI_H_VERSION_MINOR 24 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -973,13 +973,13 @@ typedef struct { using pi_image_format = _pi_image_format; using pi_image_desc = _pi_image_desc; +typedef enum { PI_MEM_CONTEXT = 0x1106, PI_MEM_SIZE = 0x1102 } _pi_mem_info; + typedef enum { - access_supported, - atomics_supported, + PI_PEER_ACCESS_SUPPORTED, + PI_PEER_ATOMICS_SUPPORTED } _pi_peer_attr; -typedef enum { PI_MEM_CONTEXT = 0x1106, PI_MEM_SIZE = 0x1102 } _pi_mem_info; - using pi_mem_info = _pi_mem_info; using pi_peer_attr = _pi_peer_attr; @@ -1039,13 +1039,13 @@ __SYCL_EXPORT pi_result piDevicesGet(pi_platform platform, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices); -__SYCL_EXPORT pi_result piextEnablePeer(pi_device command_device, - pi_device peer_device); -__SYCL_EXPORT pi_result piextDisablePeer(pi_device command_device, - pi_device peer_device); -__SYCL_EXPORT pi_result piextCanAccessPeer(pi_device command_device, - pi_device peer_device, - pi_peer_attr attr); +__SYCL_EXPORT pi_result piextEnablePeerAccess(pi_device command_device, + pi_device peer_device); +__SYCL_EXPORT pi_result piextDisablePeerAccess(pi_device command_device, + pi_device peer_device); +__SYCL_EXPORT pi_result piextPeerAccessGetInfo( + pi_device command_device, pi_device peer_device, pi_peer_attr attr, + size_t param_value_size, void *param_value, size_t *param_value_size_ret); /// Returns requested info for provided native device /// Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT for diff --git a/sycl/include/sycl/detail/pi.hpp b/sycl/include/sycl/detail/pi.hpp index b15119e1b19de..eb85c5836f8b1 100644 --- a/sycl/include/sycl/detail/pi.hpp +++ b/sycl/include/sycl/detail/pi.hpp @@ -145,7 +145,7 @@ using PiMemImageInfo = ::pi_image_info; using PiMemObjectType = ::pi_mem_type; using PiMemImageChannelOrder = ::pi_image_channel_order; using PiMemImageChannelType = ::pi_image_channel_type; -using PIPeerAttr = ::pi_peer_attr; +using PiPeerAttr = ::pi_peer_attr; __SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext, pi_context_extended_deleter func, diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 01fd3384e4e0b..9549c2c88673f 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -41,7 +41,10 @@ namespace ext::oneapi { // Forward declaration class filter_selector; -using peer_access = RT::PIPeerAttr; +enum class peer_access { + access_supported, + atomics_supported, +}; } // namespace ext::oneapi diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 123e8455fc67e..02cad55efe74f 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5589,8 +5589,8 @@ pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, return PI_SUCCESS; } -pi_result cuda_piextEnablePeer(pi_device command_device, - pi_device peer_device) { +pi_result cuda_piextEnablePeerAccess(pi_device command_device, + pi_device peer_device) { pi_result result = PI_SUCCESS; try { @@ -5604,8 +5604,8 @@ pi_result cuda_piextEnablePeer(pi_device command_device, return result; } -pi_result cuda_piextDisablePeer(pi_device command_device, - pi_device peer_device) { +pi_result cuda_piextDisablePeerAccess(pi_device command_device, + pi_device peer_device) { pi_result result = PI_SUCCESS; try { @@ -5618,27 +5618,32 @@ pi_result cuda_piextDisablePeer(pi_device command_device, return result; } -pi_result cuda_piextCanAccessPeer(pi_device command_device, - pi_device peer_device, pi_peer_attr attr) { - +pi_result cuda_piextPeerAccessGetInfo(pi_device command_device, + pi_device peer_device, pi_peer_attr attr, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { int value; - pi_result result = PI_SUCCESS; - - CUdevice_P2PAttribute CUattr = - attr == access_supported - ? CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED - : CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; + CUdevice_P2PAttribute CUattr; try { ScopedContext active(command_device->get_context()); + switch (attr) { + case PI_PEER_ACCESS_SUPPORTED: { + CUattr = CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED; + break; + } + case PI_PEER_ATOMICS_SUPPORTED: { + CUattr = CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; + break; + } + default: { __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(attr); } + } PI_CHECK_ERROR(cuDeviceGetP2PAttribute( &value, CUattr, command_device->get(), peer_device->get())); } catch (pi_result err) { - result = err; - } - if (value != 1) { - return PI_ERROR_INVALID_OPERATION; + return err; } - return result; + return getInfo(param_value_size, param_value, param_value_size_ret, value); } const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; @@ -5796,9 +5801,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piTearDown, cuda_piTearDown) _PI_CL(piGetDeviceAndHostTimer, cuda_piGetDeviceAndHostTimer) // Peer to Peer - _PI_CL(piextEnablePeer, cuda_piextEnablePeer) - _PI_CL(piextDisablePeer, cuda_piextDisablePeer) - _PI_CL(piextCanAccessPeer, cuda_piextCanAccessPeer) + _PI_CL(piextEnablePeerAccess, cuda_piextEnablePeerAccess) + _PI_CL(piextDisablePeerAccess, cuda_piextDisablePeerAccess) + _PI_CL(piextPeerAccessGetInfo, cuda_piextPeerAccessGetInfo) #undef _PI_CL diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index ba24a1569f5e6..e38ac0d5b5332 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2107,7 +2107,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_SUCCESS; } -pi_result piextEnablePeer(pi_device command_device, pi_device peer_device) { +pi_result piextEnablePeerAccess(pi_device command_device, + pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; @@ -2115,7 +2116,8 @@ pi_result piextEnablePeer(pi_device command_device, pi_device peer_device) { DIE_NO_IMPLEMENTATION; } -pi_result piextDisablePeer(pi_device command_device, pi_device peer_device) { +pi_result piextDisablePeerAccess(pi_device command_device, + pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; @@ -2123,12 +2125,17 @@ pi_result piextDisablePeer(pi_device command_device, pi_device peer_device) { DIE_NO_IMPLEMENTATION; } -pi_result piextCanAccessPeer(pi_device command_device, pi_device peer_device, - pi_peer_attr attr) { +pi_result piextPeerAccessGetInfo(pi_device command_device, + pi_device peer_device, pi_peer_attr attr, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { std::ignore = command_device; std::ignore = peer_device; std::ignore = attr; + std::ignore = param_value_size; + std::ignore = param_value; + std::ignore = param_value_size_ret; DIE_NO_IMPLEMENTATION; } diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index e27491d6aced6..7f2cf7a4d1829 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5363,7 +5363,8 @@ pi_result hip_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, return PI_SUCCESS; } -pi_result hip_piextEnablePeer(pi_device command_device, pi_device peer_device) { +pi_result hip_piextEnablePeerAccess(pi_device command_device, + pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; @@ -5373,8 +5374,8 @@ pi_result hip_piextEnablePeer(pi_device command_device, pi_device peer_device) { return {}; } -pi_result hip_piextDisablePeer(pi_device command_device, - pi_device peer_device) { +pi_result hip_piextDisablePeerAccess(pi_device command_device, + pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; @@ -5384,12 +5385,17 @@ pi_result hip_piextDisablePeer(pi_device command_device, return {}; } -pi_result hip_piextCanAccessPeer(pi_device command_device, - pi_device peer_device, pi_peer_attr attr) { +pi_result hip_piextPeerAccessGetInfo(pi_device command_device, + pi_device peer_device, pi_peer_attr attr, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { std::ignore = command_device; std::ignore = peer_device; std::ignore = attr; + std::ignore = param_value_size; + std::ignore = param_value; + std::ignore = param_value_size_ret; sycl::detail::pi::die("hip_piextCanAccessPeer not " "implemented"); diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 807fd33f64eb0..e3db35c483487 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -8374,7 +8374,8 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, return PI_SUCCESS; } -pi_result piextEnablePeer(pi_device command_device, pi_device peer_device) { +pi_result piextEnablePeerAccess(pi_device command_device, + pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; @@ -8383,7 +8384,8 @@ pi_result piextEnablePeer(pi_device command_device, pi_device peer_device) { return {}; } -pi_result piextDisablePeer(pi_device command_device, pi_device peer_device) { +pi_result piextDisablePeerAccess(pi_device command_device, + pi_device peer_device) { std::ignore = command_device; std::ignore = peer_device; @@ -8393,12 +8395,17 @@ pi_result piextDisablePeer(pi_device command_device, pi_device peer_device) { return {}; } -pi_result piextCanAccessPeer(pi_device command_device, pi_device peer_device, - pi_peer_attr attr) { +pi_result piextPeerAccessGetInfo(pi_device command_device, + pi_device peer_device, pi_peer_attr attr, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { std::ignore = command_device; std::ignore = peer_device; std::ignore = attr; + std::ignore = param_value_size; + std::ignore = param_value; + std::ignore = param_value_size_ret; die("piextCanAccessPeer not " "implemented"); diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index f2a25083554d2..d6b15b71a34d2 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -205,7 +205,7 @@ void device::ext_oneapi_enable_peer_access(const device &peer) { const RT::PiDevice Peer = peer.impl->getHandleRef(); if (Device != Peer) { const detail::plugin &Plugin = impl->getPlugin(); - Plugin.call(Device, Peer); + Plugin.call(Device, Peer); } } @@ -214,7 +214,7 @@ void device::ext_oneapi_disable_peer_access(const device &peer) { const RT::PiDevice Peer = peer.impl->getHandleRef(); if (Device != Peer) { const detail::plugin Plugin = impl->getPlugin(); - Plugin.call(Device, Peer); + Plugin.call(Device, Peer); } } @@ -222,11 +222,26 @@ bool device::ext_oneapi_can_access_peer(const device &peer, ext::oneapi::peer_access attr) { const RT::PiDevice Device = impl->getHandleRef(); const RT::PiDevice Peer = peer.impl->getHandleRef(); - + RT::PiPeerAttr PiAttr; + size_t return_size; + int value; if (Device == Peer) return true; + + switch (attr) { + case ext::oneapi::peer_access::access_supported: { + PiAttr = PI_PEER_ACCESS_SUPPORTED; + break; + } + case ext::oneapi::peer_access::atomics_supported: { + PiAttr = PI_PEER_ATOMICS_SUPPORTED; + break; + } + } const detail::plugin Plugin = impl->getPlugin(); - return Plugin.call_nocheck(Device, Peer, attr) == PI_SUCCESS; + Plugin.call_nocheck( + Device, Peer, PiAttr, sizeof(int), &value, &return_size); + return value == 1; } } // __SYCL_INLINE_VER_NAMESPACE(_V1) diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index c2ac5e6863b8b..5b0597b9af71b 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1138,3 +1138,27 @@ inline pi_result mock_piGetDeviceAndHostTimer(pi_device device, } return PI_SUCCESS; } + +inline pi_result mock_piextEnablePeerAccess(pi_device command_device, + pi_device peer_device) { + + return PI_SUCCESS; +} + +inline pi_result mock_piextDisablePeerAccess(pi_device command_device, + pi_device peer_device) { + + return PI_SUCCESS; +} + +inline pi_result +mock_piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, + pi_peer_attr attr, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) { + + if (param_value) + *static_cast(param_value) = 1; + if (param_value_size_ret) + *param_value_size_ret = sizeof(pi_int32); + return PI_SUCCESS; +} From 32d714a040dd9fc03055676e23395e990b768d2e Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 10 Mar 2023 09:43:30 -0800 Subject: [PATCH 19/75] format. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 4 +++- sycl/source/device.cpp | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 02cad55efe74f..632a578b609c8 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5636,7 +5636,9 @@ pi_result cuda_piextPeerAccessGetInfo(pi_device command_device, CUattr = CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; break; } - default: { __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(attr); } + default: { + __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(attr); + } } PI_CHECK_ERROR(cuDeviceGetP2PAttribute( &value, CUattr, command_device->get(), peer_device->get())); diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index d6b15b71a34d2..3e0406857fe3a 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -222,11 +222,13 @@ bool device::ext_oneapi_can_access_peer(const device &peer, ext::oneapi::peer_access attr) { const RT::PiDevice Device = impl->getHandleRef(); const RT::PiDevice Peer = peer.impl->getHandleRef(); + + if (Device == Peer) + return true; + RT::PiPeerAttr PiAttr; size_t return_size; int value; - if (Device == Peer) - return true; switch (attr) { case ext::oneapi::peer_access::access_supported: { From 7d12cd73e644f571b70a5aa3c324a44d63158fb6 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 17 Mar 2023 13:03:10 -0700 Subject: [PATCH 20/75] Added unittest, fixed hip pi names. Signed-off-by: JackAKirk --- sycl/plugins/hip/pi_hip.cpp | 6 ++-- sycl/unittests/Extensions/CMakeLists.txt | 1 + sycl/unittests/Extensions/USMP2P.cpp | 37 ++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 sycl/unittests/Extensions/USMP2P.cpp diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 7f2cf7a4d1829..d077d2999b624 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5551,9 +5551,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piTearDown, hip_piTearDown) _PI_CL(piGetDeviceAndHostTimer, hip_piGetDeviceAndHostTimer) // Peer to Peer - _PI_CL(piextEnablePeer, hip_piextEnablePeer) - _PI_CL(piextDisablePeer, hip_piextDisablePeer) - _PI_CL(piextCanAccessPeer, hip_piextCanAccessPeer) + _PI_CL(piextEnablePeerAccess, hip_piextEnablePeerAccess) + _PI_CL(piextDisablePeerAccess, hip_piextDisablePeerAccess) + _PI_CL(piextPeerAccessGetInfo, hip_piextPeerAccessGetInfo) #undef _PI_CL diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index d144d3641b1db..d21cd867c8097 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -8,5 +8,6 @@ add_sycl_unittest(ExtensionsTests OBJECT USMMemcpy2D.cpp DeviceGlobal.cpp OneAPISubGroupMask.cpp + USMP2P.cpp ) diff --git a/sycl/unittests/Extensions/USMP2P.cpp b/sycl/unittests/Extensions/USMP2P.cpp new file mode 100644 index 0000000000000..54fb934b04e6f --- /dev/null +++ b/sycl/unittests/Extensions/USMP2P.cpp @@ -0,0 +1,37 @@ +//==------------------------- USMP2P.cpp -----------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include +#include + + +#include + +TEST(USMP2PTest, USMP2PTest) { + using namespace sycl::detail; + using namespace sycl::unittest; + + sycl::unittest::PiMock Mock1; + sycl::platform Plt1 = Mock1.getPlatform(); + + sycl::unittest::PiMock Mock2; + sycl::platform Plt2 = Mock2.getPlatform(); + + auto Dev1 = Plt1.get_devices()[0]; + auto Dev2 = Plt2.get_devices()[0]; + + ASSERT_TRUE(Dev1.ext_oneapi_can_access_peer( + Dev2, sycl::ext::oneapi::peer_access::access_supported)); + ASSERT_TRUE(Dev1.ext_oneapi_can_access_peer( + Dev2, sycl::ext::oneapi::peer_access::atomics_supported)); + + Dev1.ext_oneapi_enable_peer_access(Dev2); + Dev1.ext_oneapi_disable_peer_access(Dev2); +} From 4cf59990a9aaee234d42cc6a6ff27c3baaa62e47 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 17 Mar 2023 13:14:00 -0700 Subject: [PATCH 21/75] Format. Signed-off-by: JackAKirk --- sycl/unittests/Extensions/USMP2P.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/unittests/Extensions/USMP2P.cpp b/sycl/unittests/Extensions/USMP2P.cpp index 54fb934b04e6f..c386531f8bb23 100644 --- a/sycl/unittests/Extensions/USMP2P.cpp +++ b/sycl/unittests/Extensions/USMP2P.cpp @@ -11,7 +11,6 @@ #include #include - #include TEST(USMP2PTest, USMP2PTest) { From 63f23e5fa3f77a2cd59471756f3e3e7aca6df0f9 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 17 Mar 2023 14:04:16 -0700 Subject: [PATCH 22/75] Add missing abi symbols to test. Signed-off-by: JackAKirk --- sycl/test/abi/sycl_symbols_linux.dump | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 1a38f705fa2b7..00c63638d3713 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4350,6 +4350,9 @@ _ZNK4sycl3_V16device8get_infoINS0_4info6device8atomic64EEENS0_6detail19is_device _ZNK4sycl3_V16device8get_infoINS0_4info6device8platformEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device9vendor_idEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device9getNativeEv +_ZN4sycl3_V16device29ext_oneapi_enable_peer_accessERKS1_ +_ZN4sycl3_V16device30ext_oneapi_disable_peer_accessERKS1_ +_ZN4sycl3_V16device26ext_oneapi_can_access_peerERKS1_NS0_3ext6oneapi11peer_accessE _ZNK4sycl3_V16kernel11get_backendEv _ZNK4sycl3_V16kernel11get_contextEv _ZNK4sycl3_V16kernel13getNativeImplEv From aaec28642ef3be1edaa6a246fa736dbb8687aeb6 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 17 Mar 2023 14:46:03 -0700 Subject: [PATCH 23/75] Update l0 abi test. Signed-off-by: JackAKirk --- sycl/test/abi/pi_level_zero_symbol_check.dump | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index fbefe601f3675..8657f79c25522 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -119,3 +119,6 @@ piextUSMFree piextUSMGetMemAllocInfo piextUSMHostAlloc piextUSMSharedAlloc +piextEnablePeerAccess +piextDisablePeerAccess +piextPeerAccessGetInfo From 76245f29a0b55619d72b4802d756813cabd8e974 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 20 Mar 2023 05:19:00 -0700 Subject: [PATCH 24/75] Corrected die error messages. Signed-off-by: JackAKirk --- sycl/plugins/hip/pi_hip.cpp | 6 +++--- sycl/plugins/level_zero/pi_level_zero.cpp | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index d077d2999b624..1f641420d620a 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5369,7 +5369,7 @@ pi_result hip_piextEnablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - sycl::detail::pi::die("hip_piextEnablePeer not " + sycl::detail::pi::die("hip_piextEnablePeerAccess not " "implemented"); return {}; } @@ -5380,7 +5380,7 @@ pi_result hip_piextDisablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - sycl::detail::pi::die("hip_piextDisablePeer not " + sycl::detail::pi::die("hip_piextDisablePeerAccess not " "implemented"); return {}; } @@ -5397,7 +5397,7 @@ pi_result hip_piextPeerAccessGetInfo(pi_device command_device, std::ignore = param_value; std::ignore = param_value_size_ret; - sycl::detail::pi::die("hip_piextCanAccessPeer not " + sycl::detail::pi::die("hip_piextPeerAccessGetInfo not " "implemented"); return {}; } diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index e3db35c483487..57f97390fc4cf 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -8380,7 +8380,7 @@ pi_result piextEnablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - die("piextEnablePeer not implemented"); + die("piextEnablePeerAccess not implemented in L0"); return {}; } @@ -8390,8 +8390,8 @@ pi_result piextDisablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - die("piextDisablePeer not " - "implemented"); + die("piextDisablePeerAccess not " + "implemented in L0"); return {}; } @@ -8407,8 +8407,8 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, std::ignore = param_value; std::ignore = param_value_size_ret; - die("piextCanAccessPeer not " - "implemented"); + die("piextPeerAccessGetInfo not " + "implemented in L0"); return {}; } From 55a9b6a712dfcc44117002e9471d8cd37e63d2a4 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 20 Mar 2023 06:08:18 -0700 Subject: [PATCH 25/75] cleanup test. Signed-off-by: JackAKirk --- sycl/unittests/Extensions/USMP2P.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sycl/unittests/Extensions/USMP2P.cpp b/sycl/unittests/Extensions/USMP2P.cpp index c386531f8bb23..3641cda28233e 100644 --- a/sycl/unittests/Extensions/USMP2P.cpp +++ b/sycl/unittests/Extensions/USMP2P.cpp @@ -7,15 +7,10 @@ //===----------------------------------------------------------------------===// #include - -#include #include - #include TEST(USMP2PTest, USMP2PTest) { - using namespace sycl::detail; - using namespace sycl::unittest; sycl::unittest::PiMock Mock1; sycl::platform Plt1 = Mock1.getPlatform(); From 80dfb3a7033eea5af04288c0d908aa7301e25b17 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 20 Mar 2023 14:37:42 -0700 Subject: [PATCH 26/75] Add global var check that pi is reached. Redefine pi function to return two devices from get_platform. Signed-off-by: JackAKirk --- sycl/unittests/Extensions/USMP2P.cpp | 73 +++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 8 deletions(-) diff --git a/sycl/unittests/Extensions/USMP2P.cpp b/sycl/unittests/Extensions/USMP2P.cpp index 3641cda28233e..cd4b50d238a5d 100644 --- a/sycl/unittests/Extensions/USMP2P.cpp +++ b/sycl/unittests/Extensions/USMP2P.cpp @@ -6,26 +6,83 @@ // //===----------------------------------------------------------------------===// -#include -#include #include +#include +#include + +int check = 0; + +pi_result redefinedDevicesGet(pi_platform platform, + pi_device_type device_type, + pi_uint32 num_entries, pi_device *devices, + pi_uint32 *num_devices) { + if (num_devices) + *num_devices = 2; + if (devices && num_entries > 0) + { + devices[0] = reinterpret_cast(1); + devices[1] = reinterpret_cast(2); + } + return PI_SUCCESS; +} + +pi_result redefinedEnablePeerAccess(pi_device command_device, + pi_device peer_device) { + check = 3; + return PI_SUCCESS; +} + +pi_result redefinedDisablePeerAccess(pi_device command_device, + pi_device peer_device) { + check = 4; + return PI_SUCCESS; +} + +pi_result redefinedPeerAccessGetInfo(pi_device command_device, + pi_device peer_device, pi_peer_attr attr, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + + if (param_value) + *static_cast(param_value) = 1; + if (param_value_size_ret) + *param_value_size_ret = sizeof(pi_int32); + + if (attr == PI_PEER_ACCESS_SUPPORTED) { + check = 1; + } else if (attr == PI_PEER_ACCESS_SUPPORTED) { + check = 2; + } + return PI_SUCCESS; +} TEST(USMP2PTest, USMP2PTest) { - sycl::unittest::PiMock Mock1; - sycl::platform Plt1 = Mock1.getPlatform(); + sycl::unittest::PiMock Mock; + + Mock.redefine( + redefinedDevicesGet); + Mock.redefine( + redefinedEnablePeerAccess); + Mock.redefine( + redefinedDisablePeerAccess); + Mock.redefine( + redefinedPeerAccessGetInfo); - sycl::unittest::PiMock Mock2; - sycl::platform Plt2 = Mock2.getPlatform(); + sycl::platform Plt = Mock.getPlatform(); - auto Dev1 = Plt1.get_devices()[0]; - auto Dev2 = Plt2.get_devices()[0]; + auto Dev1 = Plt.get_devices()[0]; + auto Dev2 = Plt.get_devices()[1]; ASSERT_TRUE(Dev1.ext_oneapi_can_access_peer( Dev2, sycl::ext::oneapi::peer_access::access_supported)); + ASSERT_EQ(check, 1); ASSERT_TRUE(Dev1.ext_oneapi_can_access_peer( Dev2, sycl::ext::oneapi::peer_access::atomics_supported)); + ASSERT_EQ(check, 2); Dev1.ext_oneapi_enable_peer_access(Dev2); + ASSERT_EQ(check, 3); Dev1.ext_oneapi_disable_peer_access(Dev2); + ASSERT_EQ(check, 4); } From 76f6772a13884f5dc1387d461524403f7920d7b9 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 20 Mar 2023 14:47:28 -0700 Subject: [PATCH 27/75] Format. Signed-off-by: JackAKirk --- sycl/unittests/Extensions/USMP2P.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/sycl/unittests/Extensions/USMP2P.cpp b/sycl/unittests/Extensions/USMP2P.cpp index cd4b50d238a5d..3faab3f826996 100644 --- a/sycl/unittests/Extensions/USMP2P.cpp +++ b/sycl/unittests/Extensions/USMP2P.cpp @@ -12,14 +12,12 @@ int check = 0; -pi_result redefinedDevicesGet(pi_platform platform, - pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { +pi_result redefinedDevicesGet(pi_platform platform, pi_device_type device_type, + pi_uint32 num_entries, pi_device *devices, + pi_uint32 *num_devices) { if (num_devices) *num_devices = 2; - if (devices && num_entries > 0) - { + if (devices && num_entries > 0) { devices[0] = reinterpret_cast(1); devices[1] = reinterpret_cast(2); } @@ -60,8 +58,7 @@ TEST(USMP2PTest, USMP2PTest) { sycl::unittest::PiMock Mock; - Mock.redefine( - redefinedDevicesGet); + Mock.redefine(redefinedDevicesGet); Mock.redefine( redefinedEnablePeerAccess); Mock.redefine( From 85949f7b4021b0853ec0421741e86be44ff26f7f Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 21 Mar 2023 06:30:49 -0700 Subject: [PATCH 28/75] Fix copy/paste error. Signed-off-by: JackAKirk --- sycl/unittests/Extensions/USMP2P.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/unittests/Extensions/USMP2P.cpp b/sycl/unittests/Extensions/USMP2P.cpp index 3faab3f826996..ac44bb6ddd54e 100644 --- a/sycl/unittests/Extensions/USMP2P.cpp +++ b/sycl/unittests/Extensions/USMP2P.cpp @@ -48,7 +48,7 @@ pi_result redefinedPeerAccessGetInfo(pi_device command_device, if (attr == PI_PEER_ACCESS_SUPPORTED) { check = 1; - } else if (attr == PI_PEER_ACCESS_SUPPORTED) { + } else if (attr == PI_PEER_ATOMICS_SUPPORTED) { check = 2; } return PI_SUCCESS; From 65aa452f80635ad5539a67115e48922aee3f82c0 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 9 May 2023 03:12:24 -0700 Subject: [PATCH 29/75] trying to scope up meaningful error mech. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 32 +++++++++++++++++++++++++++----- sycl/source/device.cpp | 24 ++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 7 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 632a578b609c8..8549967d4796c 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -58,7 +58,8 @@ pi_result map_error(CUresult result) { case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return PI_ERROR_OUT_OF_RESOURCES; default: - return PI_ERROR_UNKNOWN; + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; + //return PI_ERROR_UNKNOWN; } } @@ -138,7 +139,7 @@ pi_result check_error(CUresult result, const char *function, int line, return PI_SUCCESS; } - if (std::getenv("SYCL_PI_SUPPRESS_ERROR_MESSAGE") == nullptr) { + /*if (std::getenv("SYCL_PI_SUPPRESS_ERROR_MESSAGE") == nullptr) { const char *errorString = nullptr; const char *errorName = nullptr; cuGetErrorName(result, &errorName); @@ -156,7 +157,7 @@ pi_result check_error(CUresult result, const char *function, int line, if (std::getenv("PI_CUDA_ABORT") != nullptr) { std::abort(); - } + }*/ throw map_error(result); } @@ -5595,8 +5596,17 @@ pi_result cuda_piextEnablePeerAccess(pi_device command_device, pi_result result = PI_SUCCESS; try { ScopedContext active(command_device->get_context()); - result = - PI_CHECK_ERROR(cuCtxEnablePeerAccess(peer_device->get_context(), 0)); + + auto curesult = cuCtxEnablePeerAccess(peer_device->get_context(), 0); + if (curesult != CUDA_SUCCESS) { + const char *errorString = nullptr; + unused atm const char *errorName = nullptr; + cuGetErrorName(curesult, &errorName); + cuGetErrorString(curesult, &errorString); + + setErrorMessage(errorString, PI_ERROR_PLUGIN_SPECIFIC_ERROR); + } + result = PI_ERROR_PLUGIN_SPECIFIC_ERROR; } catch (pi_result err) { result = err; @@ -5612,6 +5622,18 @@ pi_result cuda_piextDisablePeerAccess(pi_device command_device, ScopedContext active(command_device->get_context()); result = PI_CHECK_ERROR(cuCtxDisablePeerAccess(peer_device->get_context())); + auto curesult = cuCtxDisablePeerAccess(peer_device->get_context()); + if (curesult != CUDA_SUCCESS) { + const char *errorString; + // unused atm + const char *errorName; + cuGetErrorName(curesult, &errorName); + cuGetErrorString(curesult, &errorString); + + setErrorMessage(errorString, PI_ERROR_PLUGIN_SPECIFIC_ERROR); + } + result = PI_ERROR_PLUGIN_SPECIFIC_ERROR; + } catch (pi_result err) { result = err; } diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 3e0406857fe3a..70277f6992f64 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -205,7 +205,16 @@ void device::ext_oneapi_enable_peer_access(const device &peer) { const RT::PiDevice Peer = peer.impl->getHandleRef(); if (Device != Peer) { const detail::plugin &Plugin = impl->getPlugin(); - Plugin.call(Device, Peer); + pi_result result = + Plugin.call_nocheck(Device, + Peer); + // if (result == PI_ERROR_PLUGIN_SPECIFIC_ERROR) { + if (result != PI_SUCCESS) { + char *message = nullptr; + auto err = Plugin.call_nocheck( + &message); + throw sycl::exception(make_error_code(errc::runtime), message); + } } } @@ -214,7 +223,18 @@ void device::ext_oneapi_disable_peer_access(const device &peer) { const RT::PiDevice Peer = peer.impl->getHandleRef(); if (Device != Peer) { const detail::plugin Plugin = impl->getPlugin(); - Plugin.call(Device, Peer); + pi_result result = + Plugin.call_nocheck(Device, + Peer); + + // if (result == PI_ERROR_PLUGIN_SPECIFIC_ERROR) { + if (result != PI_SUCCESS) { + + char *message = nullptr; + auto err = Plugin.call_nocheck( + &message); + throw sycl::exception(make_error_code(errc::runtime), message); + } } } From ea7e855e90bb5354eeaa9c63a83ef792af76fa17 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 9 May 2023 03:44:10 -0700 Subject: [PATCH 30/75] added some comments. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 8549967d4796c..a1895bd6d89a9 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -133,12 +133,14 @@ pi_result forLatestEvents(const pi_event *event_wait_list, /// \return PI_SUCCESS if \param result was CUDA_SUCCESS. /// \throw pi_error exception (integer) if input was not success. /// +//TODO this needs some thought! pi_result check_error(CUresult result, const char *function, int line, const char *file) { if (result == CUDA_SUCCESS || result == CUDA_ERROR_DEINITIALIZED) { return PI_SUCCESS; } +// can't throw cuda errors from UR!! /*if (std::getenv("SYCL_PI_SUPPRESS_ERROR_MESSAGE") == nullptr) { const char *errorString = nullptr; const char *errorName = nullptr; @@ -5600,7 +5602,8 @@ pi_result cuda_piextEnablePeerAccess(pi_device command_device, auto curesult = cuCtxEnablePeerAccess(peer_device->get_context(), 0); if (curesult != CUDA_SUCCESS) { const char *errorString = nullptr; - unused atm const char *errorName = nullptr; + //unused atm + const char *errorName = nullptr; cuGetErrorName(curesult, &errorName); cuGetErrorString(curesult, &errorString); From eae9b128e7bab40e67ec6bb6388841dd40f74a92 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 16 May 2023 02:28:24 -0700 Subject: [PATCH 31/75] working impl throwing exceptions in all backends. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 76 +++++++++++++------ .../esimd_emulator/pi_esimd_emulator.cpp | 15 +++- sycl/plugins/hip/pi_hip.cpp | 20 +++-- sycl/plugins/level_zero/pi_level_zero.cpp | 20 +++-- sycl/source/device.cpp | 22 +++--- 5 files changed, 99 insertions(+), 54 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index a1895bd6d89a9..4bbf760cfd1f5 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5594,24 +5594,30 @@ pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, pi_result cuda_piextEnablePeerAccess(pi_device command_device, pi_device peer_device) { - pi_result result = PI_SUCCESS; try { ScopedContext active(command_device->get_context()); - auto curesult = cuCtxEnablePeerAccess(peer_device->get_context(), 0); - if (curesult != CUDA_SUCCESS) { - const char *errorString = nullptr; - //unused atm - const char *errorName = nullptr; - cuGetErrorName(curesult, &errorName); - cuGetErrorString(curesult, &errorString); + CUresult cu_res = cuCtxEnablePeerAccess(peer_device->get_context(), 0); + if (cu_res != CUDA_SUCCESS) { + const char *error_string = nullptr; + const char *error_name = nullptr; + cuGetErrorName(cu_res, &error_name); + cuGetErrorString(cu_res, &error_string); + char *message = + (char *)malloc(strlen(error_string) + strlen(error_name) + 2); + strcpy(message, error_name); + strcat(message, "\n"); + strcat(message, error_string); + + setErrorMessage(message, PI_ERROR_PLUGIN_SPECIFIC_ERROR); + free(message); - setErrorMessage(errorString, PI_ERROR_PLUGIN_SPECIFIC_ERROR); + result = PI_ERROR_PLUGIN_SPECIFIC_ERROR; } - result = PI_ERROR_PLUGIN_SPECIFIC_ERROR; } catch (pi_result err) { + setErrorMessage("", err); result = err; } return result; @@ -5619,25 +5625,30 @@ pi_result cuda_piextEnablePeerAccess(pi_device command_device, pi_result cuda_piextDisablePeerAccess(pi_device command_device, pi_device peer_device) { - pi_result result = PI_SUCCESS; try { + ScopedContext active(command_device->get_context()); - result = PI_CHECK_ERROR(cuCtxDisablePeerAccess(peer_device->get_context())); - auto curesult = cuCtxDisablePeerAccess(peer_device->get_context()); - if (curesult != CUDA_SUCCESS) { - const char *errorString; - // unused atm - const char *errorName; - cuGetErrorName(curesult, &errorName); - cuGetErrorString(curesult, &errorString); + CUresult cu_res = cuCtxDisablePeerAccess(peer_device->get_context()); + if (cu_res != CUDA_SUCCESS) { + const char *error_string; + const char *error_name; + cuGetErrorName(cu_res, &error_name); + cuGetErrorString(cu_res, &error_string); + char *message = + (char *)malloc(strlen(error_string) + strlen(error_name) + 2); + strcpy(message, error_name); + strcat(message, "\n"); + strcat(message, error_string); - setErrorMessage(errorString, PI_ERROR_PLUGIN_SPECIFIC_ERROR); + setErrorMessage(message, PI_ERROR_PLUGIN_SPECIFIC_ERROR); + free(message); + result = PI_ERROR_PLUGIN_SPECIFIC_ERROR; } - result = PI_ERROR_PLUGIN_SPECIFIC_ERROR; } catch (pi_result err) { + setErrorMessage("", err); result = err; } return result; @@ -5661,13 +5672,28 @@ pi_result cuda_piextPeerAccessGetInfo(pi_device command_device, CUattr = CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; break; } - default: { - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(attr); + default: { __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(attr); } } + + CUresult cu_res = cuDeviceGetP2PAttribute( + &value, CUattr, command_device->get(), peer_device->get()); + if (cu_res != CUDA_SUCCESS) { + const char *error_string; + const char *error_name; + cuGetErrorName(cu_res, &error_name); + cuGetErrorString(cu_res, &error_string); + char *message = + (char *)malloc(2 + strlen(error_string) + strlen(error_name)); + strcpy(message, error_name); + strcat(message, "\n"); + strcat(message, error_string); + + setErrorMessage(message, PI_ERROR_PLUGIN_SPECIFIC_ERROR); + free(message); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } - PI_CHECK_ERROR(cuDeviceGetP2PAttribute( - &value, CUattr, command_device->get(), peer_device->get())); } catch (pi_result err) { + setErrorMessage("", err); return err; } return getInfo(param_value_size, param_value, param_value_size_ret, value); diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index e38ac0d5b5332..640b4d99635a6 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2113,7 +2113,10 @@ pi_result piextEnablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - DIE_NO_IMPLEMENTATION; + setErrorMessage("piextEnablePeerAccess not " + "implemented in esimd_emulator backend", + PI_ERROR_PLUGIN_SPECIFIC_ERROR); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } pi_result piextDisablePeerAccess(pi_device command_device, @@ -2122,7 +2125,10 @@ pi_result piextDisablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - DIE_NO_IMPLEMENTATION; + setErrorMessage("piextDisablePeerAccess not " + "implemented in esimd_emulator backend", + PI_ERROR_PLUGIN_SPECIFIC_ERROR); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } pi_result piextPeerAccessGetInfo(pi_device command_device, @@ -2137,7 +2143,10 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, std::ignore = param_value; std::ignore = param_value_size_ret; - DIE_NO_IMPLEMENTATION; + setErrorMessage("piextPeerAccessGetInfo not " + "implemented in esimd_emulator backend", + PI_ERROR_PLUGIN_SPECIFIC_ERROR); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } #ifdef _WIN32 diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 1f641420d620a..1b2af43efe8d0 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5369,9 +5369,10 @@ pi_result hip_piextEnablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - sycl::detail::pi::die("hip_piextEnablePeerAccess not " - "implemented"); - return {}; + setErrorMessage("piextEnablePeerAccess not " + "implemented in hip backend", + PI_ERROR_PLUGIN_SPECIFIC_ERROR); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } pi_result hip_piextDisablePeerAccess(pi_device command_device, @@ -5380,8 +5381,10 @@ pi_result hip_piextDisablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - sycl::detail::pi::die("hip_piextDisablePeerAccess not " - "implemented"); + setErrorMessage("piextDisablePeerAccess not " + "implemented in hip backend", + PI_ERROR_PLUGIN_SPECIFIC_ERROR); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; return {}; } @@ -5397,9 +5400,10 @@ pi_result hip_piextPeerAccessGetInfo(pi_device command_device, std::ignore = param_value; std::ignore = param_value_size_ret; - sycl::detail::pi::die("hip_piextPeerAccessGetInfo not " - "implemented"); - return {}; + setErrorMessage("piextPeerAccessGetInfo not " + "implemented in hip backend", + PI_ERROR_PLUGIN_SPECIFIC_ERROR); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 57f97390fc4cf..369ed934a5d76 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -8380,8 +8380,10 @@ pi_result piextEnablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - die("piextEnablePeerAccess not implemented in L0"); - return {}; + setErrorMessage("piextEnablePeerAccess not " + "implemented in L0", + ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } pi_result piextDisablePeerAccess(pi_device command_device, @@ -8390,9 +8392,10 @@ pi_result piextDisablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - die("piextDisablePeerAccess not " - "implemented in L0"); - return {}; + setErrorMessage("piextDisablePeerAccess not " + "implemented in L0", + ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } pi_result piextPeerAccessGetInfo(pi_device command_device, @@ -8407,9 +8410,10 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, std::ignore = param_value; std::ignore = param_value_size_ret; - die("piextPeerAccessGetInfo not " - "implemented in L0"); - return {}; + setErrorMessage("piextPeerAccessGetInfo not " + "implemented in L0", + ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } #ifdef _WIN32 diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 70277f6992f64..853cf700031da 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -208,11 +208,9 @@ void device::ext_oneapi_enable_peer_access(const device &peer) { pi_result result = Plugin.call_nocheck(Device, Peer); - // if (result == PI_ERROR_PLUGIN_SPECIFIC_ERROR) { if (result != PI_SUCCESS) { char *message = nullptr; - auto err = Plugin.call_nocheck( - &message); + Plugin.call(&message); throw sycl::exception(make_error_code(errc::runtime), message); } } @@ -226,13 +224,9 @@ void device::ext_oneapi_disable_peer_access(const device &peer) { pi_result result = Plugin.call_nocheck(Device, Peer); - - // if (result == PI_ERROR_PLUGIN_SPECIFIC_ERROR) { if (result != PI_SUCCESS) { - char *message = nullptr; - auto err = Plugin.call_nocheck( - &message); + Plugin.call(&message); throw sycl::exception(make_error_code(errc::runtime), message); } } @@ -261,8 +255,16 @@ bool device::ext_oneapi_can_access_peer(const device &peer, } } const detail::plugin Plugin = impl->getPlugin(); - Plugin.call_nocheck( - Device, Peer, PiAttr, sizeof(int), &value, &return_size); + pi_result result = + Plugin.call_nocheck( + Device, Peer, PiAttr, sizeof(int), &value, &return_size); + + if (result != PI_SUCCESS) { + char *message = nullptr; + Plugin.call(&message); + throw sycl::exception(make_error_code(errc::runtime), message); + } + return value == 1; } From c825b9c0719fccb8bcd4625ad37bfebd1bffc7f1 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 16 May 2023 02:34:10 -0700 Subject: [PATCH 32/75] remove comments. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 4bbf760cfd1f5..f3da29fa8edeb 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -58,8 +58,7 @@ pi_result map_error(CUresult result) { case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return PI_ERROR_OUT_OF_RESOURCES; default: - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; - //return PI_ERROR_UNKNOWN; + return PI_ERROR_UNKNOWN; } } @@ -133,15 +132,13 @@ pi_result forLatestEvents(const pi_event *event_wait_list, /// \return PI_SUCCESS if \param result was CUDA_SUCCESS. /// \throw pi_error exception (integer) if input was not success. /// -//TODO this needs some thought! pi_result check_error(CUresult result, const char *function, int line, const char *file) { if (result == CUDA_SUCCESS || result == CUDA_ERROR_DEINITIALIZED) { return PI_SUCCESS; } -// can't throw cuda errors from UR!! - /*if (std::getenv("SYCL_PI_SUPPRESS_ERROR_MESSAGE") == nullptr) { + if (std::getenv("SYCL_PI_SUPPRESS_ERROR_MESSAGE") == nullptr) { const char *errorString = nullptr; const char *errorName = nullptr; cuGetErrorName(result, &errorName); @@ -159,7 +156,7 @@ pi_result check_error(CUresult result, const char *function, int line, if (std::getenv("PI_CUDA_ABORT") != nullptr) { std::abort(); - }*/ + } throw map_error(result); } From 72aca2e30cf18939332348568b0e8dc8dc9ddd05 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 16 May 2023 03:02:36 -0700 Subject: [PATCH 33/75] format. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 1 - sycl/plugins/hip/pi_hip.cpp | 2 +- sycl/unittests/helpers/PiMockPlugin.hpp | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 747fb374a6d72..bd2aebf9a14e2 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -6124,7 +6124,6 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextDisablePeerAccess, cuda_piextDisablePeerAccess) _PI_CL(piextPeerAccessGetInfo, cuda_piextPeerAccessGetInfo) - #undef _PI_CL return PI_SUCCESS; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 80ab471771e18..b727ba47e7de1 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5803,7 +5803,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piGetDeviceAndHostTimer, hip_piGetDeviceAndHostTimer) _PI_CL(piPluginGetBackendOption, hip_piPluginGetBackendOption) - + // Peer to Peer _PI_CL(piextEnablePeerAccess, hip_piextEnablePeerAccess) _PI_CL(piextDisablePeerAccess, hip_piextDisablePeerAccess) diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 0c76c7416bd4e..3e992116381cc 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1224,6 +1224,6 @@ mock_piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, *static_cast(param_value) = 1; if (param_value_size_ret) *param_value_size_ret = sizeof(pi_int32); - + return PI_SUCCESS; -} \ No newline at end of file +} From c2579aa87383b154e0caf7a8dffb891c0d73973f Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 16 May 2023 03:48:08 -0700 Subject: [PATCH 34/75] fixes for recent sycl branch changes. Signed-off-by: JackAKirk --- sycl/plugins/level_zero/pi_level_zero.cpp | 6 +++--- sycl/source/device.cpp | 25 +++++++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index c73475b775e96..d00da49d3ff7c 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -8854,7 +8854,7 @@ pi_result piextEnablePeerAccess(pi_device command_device, setErrorMessage("piextEnablePeerAccess not " "implemented in L0", - ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR); + UR_EXT_RESULT_ADAPTER_SPECIFIC_ERROR); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } @@ -8866,7 +8866,7 @@ pi_result piextDisablePeerAccess(pi_device command_device, setErrorMessage("piextDisablePeerAccess not " "implemented in L0", - ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR); + UR_EXT_RESULT_ADAPTER_SPECIFIC_ERROR); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } @@ -8884,7 +8884,7 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, setErrorMessage("piextPeerAccessGetInfo not " "implemented in L0", - ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR); + UR_EXT_RESULT_ADAPTER_SPECIFIC_ERROR); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 3683146c56cd6..d6e0fe9cc07c5 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -212,13 +212,13 @@ void device::ext_oneapi_enable_peer_access(const device &peer) { const RT::PiDevice Device = impl->getHandleRef(); const RT::PiDevice Peer = peer.impl->getHandleRef(); if (Device != Peer) { - const detail::plugin &Plugin = impl->getPlugin(); + auto Plugin = impl->getPlugin(); pi_result result = - Plugin.call_nocheck(Device, + Plugin->call_nocheck(Device, Peer); if (result != PI_SUCCESS) { char *message = nullptr; - Plugin.call(&message); + Plugin->call(&message); throw sycl::exception(make_error_code(errc::runtime), message); } } @@ -228,13 +228,13 @@ void device::ext_oneapi_disable_peer_access(const device &peer) { const RT::PiDevice Device = impl->getHandleRef(); const RT::PiDevice Peer = peer.impl->getHandleRef(); if (Device != Peer) { - const detail::plugin Plugin = impl->getPlugin(); + auto Plugin = impl->getPlugin(); pi_result result = - Plugin.call_nocheck(Device, + Plugin->call_nocheck(Device, Peer); if (result != PI_SUCCESS) { char *message = nullptr; - Plugin.call(&message); + Plugin->call(&message); throw sycl::exception(make_error_code(errc::runtime), message); } } @@ -246,7 +246,14 @@ bool device::ext_oneapi_can_access_peer(const device &peer, const RT::PiDevice Peer = peer.impl->getHandleRef(); if (Device == Peer) + { return true; + } + + if (peer.get_backend() != backend::ext_oneapi_cuda) + { + return false; + } RT::PiPeerAttr PiAttr; size_t return_size; @@ -262,14 +269,14 @@ bool device::ext_oneapi_can_access_peer(const device &peer, break; } } - const detail::plugin Plugin = impl->getPlugin(); + auto Plugin = impl->getPlugin(); pi_result result = - Plugin.call_nocheck( + Plugin->call_nocheck( Device, Peer, PiAttr, sizeof(int), &value, &return_size); if (result != PI_SUCCESS) { char *message = nullptr; - Plugin.call(&message); + Plugin->call(&message); throw sycl::exception(make_error_code(errc::runtime), message); } From 0c874058fca8922c71c3e5a3822d313cdc7c2c87 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 16 May 2023 03:54:12 -0700 Subject: [PATCH 35/75] Format. Signed-off-by: JackAKirk --- sycl/source/device.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index d6e0fe9cc07c5..1882e46bb32cc 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -215,7 +215,7 @@ void device::ext_oneapi_enable_peer_access(const device &peer) { auto Plugin = impl->getPlugin(); pi_result result = Plugin->call_nocheck(Device, - Peer); + Peer); if (result != PI_SUCCESS) { char *message = nullptr; Plugin->call(&message); @@ -231,7 +231,7 @@ void device::ext_oneapi_disable_peer_access(const device &peer) { auto Plugin = impl->getPlugin(); pi_result result = Plugin->call_nocheck(Device, - Peer); + Peer); if (result != PI_SUCCESS) { char *message = nullptr; Plugin->call(&message); @@ -245,13 +245,11 @@ bool device::ext_oneapi_can_access_peer(const device &peer, const RT::PiDevice Device = impl->getHandleRef(); const RT::PiDevice Peer = peer.impl->getHandleRef(); - if (Device == Peer) - { + if (Device == Peer) { return true; } - if (peer.get_backend() != backend::ext_oneapi_cuda) - { + if (peer.get_backend() != backend::ext_oneapi_cuda) { return false; } From dfcb01c15fdfcccb152d7cb1c4e11e5ab936721f Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 16 May 2023 06:43:38 -0700 Subject: [PATCH 36/75] Added P2P USM tests. Signed-off-by: JackAKirk --- sycl/test-e2e/USM/P2P/p2p_access.cpp | 75 +++++++++++++++++++++++ sycl/test-e2e/USM/P2P/p2p_atomics.cpp | 85 +++++++++++++++++++++++++++ sycl/test-e2e/USM/P2P/p2p_copy.cpp | 78 ++++++++++++++++++++++++ sycl/unittests/Extensions/USMP2P.cpp | 2 +- 4 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 sycl/test-e2e/USM/P2P/p2p_access.cpp create mode 100644 sycl/test-e2e/USM/P2P/p2p_atomics.cpp create mode 100644 sycl/test-e2e/USM/P2P/p2p_copy.cpp diff --git a/sycl/test-e2e/USM/P2P/p2p_access.cpp b/sycl/test-e2e/USM/P2P/p2p_access.cpp new file mode 100644 index 0000000000000..fd5dcae4bf0e7 --- /dev/null +++ b/sycl/test-e2e/USM/P2P/p2p_access.cpp @@ -0,0 +1,75 @@ +// REQUIRES: cuda +// RUN: %{build} -o %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s + +#include +#include + +using namespace sycl; + +int main() { + + // Note that this code will largely be removed: it is temporary due to the + // temporary lack of multiple devices per sycl context in the Nvidia backend. + // A portable implementation, using a single gpu platform, should be possible + // once the Nvidia context issues are resolved. + //////////////////////////////////////////////////////////////////////// + std::vector Devs; + for (const auto &plt : sycl::platform::get_platforms()) { + + if (plt.get_backend() == sycl::backend::ext_oneapi_cuda) + Devs.push_back(plt.get_devices()[0]); + } + if (Devs.size() < 2) { + std::cout << "Cannot test P2P capabilities, at least two devices are " + "required, exiting." + << std::endl; + return 0; + } + + std::vector Queues; + std::transform(Devs.begin(), Devs.end(), std::back_inserter(Queues), + [](const sycl::device &D) { return sycl::queue{D}; }); + //////////////////////////////////////////////////////////////////////// + + if (!Devs[0].ext_oneapi_can_access_peer( + Devs[1], sycl::ext::oneapi::peer_access::access_supported)) { + std::cout << "P2P access is not supported by devices, exiting." + << std::endl; + return 0; + } + + // Enables Devs[0] to access Devs[1] memory. + Devs[0].ext_oneapi_enable_peer_access(Devs[1]); + + auto *arr1 = malloc(2, Queues[1], usm::alloc::device); + + // Calling fill on Devs[1] data with Devs[0] queue requires P2P enabled. + Queues[0].fill(arr1, 2, 2).wait(); + + // Access/write Devs[1] data with Devs[0] queue. + Queues[0] + .submit([&](handler &cgh) { + auto myRange = range<1>(1); + auto myKernel = ([=](id<1> idx) { arr1[0] *= 2; }); + + cgh.parallel_for(myRange, myKernel); + }) + .wait(); + + int2 out; + + Queues[0].memcpy(&out, arr1, 2 * sizeof(int)).wait(); + assert(out[0] == 4); + assert(out[1] == 2); + + sycl::free(arr1, Queues[1]); + + Devs[0].ext_oneapi_disable_peer_access(Devs[1]); + + return 0; +} + +// CHECK: ---> piextPeerAccessGetInfo( +// CHECK: ---> piextEnablePeerAccess( +// CHECK: ---> piextDisablePeerAccess( diff --git a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp new file mode 100644 index 0000000000000..527a248dfa6f8 --- /dev/null +++ b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp @@ -0,0 +1,85 @@ +// REQUIRES: cuda +// RUN: %if any-device-is-cuda %{ %{build} -DUSE_CUDA_SM80=1 -Xsycl-target-backend --cuda-gpu-arch=sm_61 -o %t.out %} +// RUN: %if ext_oneapi_cuda %{ env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s %} + +#include +#include +#include +#include + +using namespace sycl; + +// number of atomic operations +constexpr size_t N = 512; + +int main() { + + // Note that this code will largely be removed: it is temporary due to the + // temporary lack of multiple devices per sycl context in the Nvidia backend. + // A portable implementation, using a single gpu platform, should be possible + // once the Nvidia context issues are resolved. + //////////////////////////////////////////////////////////////////////// + std::vector Devs; + for (const auto &plt : sycl::platform::get_platforms()) { + + if (plt.get_backend() == sycl::backend::ext_oneapi_cuda) + Devs.push_back(plt.get_devices()[0]); + } + if (Devs.size() < 2) { + std::cout << "Cannot test P2P capabilities, at least two devices are " + "required, exiting." + << std::endl; + return 0; + } + + std::vector Queues; + std::transform(Devs.begin(), Devs.end(), std::back_inserter(Queues), + [](const sycl::device &D) { return sycl::queue{D}; }); + //////////////////////////////////////////////////////////////////////// + + if (!Devs[1].ext_oneapi_can_access_peer( + Devs[0], sycl::ext::oneapi::peer_access::atomics_supported)) { + std::cout << "P2P atomics are not supported by devices, exiting." + << std::endl; + return 0; + } + + // Enables Devs[1] to access Devs[0] memory. + Devs[1].ext_oneapi_enable_peer_access(Devs[0]); + + std::vector input(N); + std::iota(input.begin(), input.end(), 0); + + double h_sum = 0.; + for (const auto &value : input) { + h_sum += value; + } + + double *d_sum = malloc_shared(1, Queues[0]); + double *d_in = malloc_device(N, Queues[0]); + + Queues[0].memcpy(d_in, &input[0], N * sizeof(double)); + Queues[0].wait(); + + range global_range{N}; + + *d_sum = 0.; + Queues[1].submit([&](handler &h) { + h.parallel_for(global_range, [=](id<1> i) { + sycl::atomic_ref(*d_sum) += d_in[i]; + }); + }); + Queues[1].wait(); + + assert(*d_sum == h_sum); + + free(d_sum, Queues[0]); + free(d_in, Queues[0]); + + return 0; +} + +// CHECK: ---> piextPeerAccessGetInfo( +// CHECK: ---> piextEnablePeerAccess( diff --git a/sycl/test-e2e/USM/P2P/p2p_copy.cpp b/sycl/test-e2e/USM/P2P/p2p_copy.cpp new file mode 100644 index 0000000000000..4c22866a966b7 --- /dev/null +++ b/sycl/test-e2e/USM/P2P/p2p_copy.cpp @@ -0,0 +1,78 @@ +// REQUIRES: cuda +// RUN: %{build} -o %t.out +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s + +#include +#include +#include +#include + +using namespace sycl; + +// Array size to copy +constexpr int N = 100; + +int main() { + + // Note that this code will largely be removed: it is temporary due to the + // temporary lack of multiple devices per sycl context in the Nvidia backend. + // A portable implementation, using a single gpu platform, should be possible + // once the Nvidia context issues are resolved. + //////////////////////////////////////////////////////////////////////// + std::vector Devs; + for (const auto &plt : sycl::platform::get_platforms()) { + + if (plt.get_backend() == sycl::backend::ext_oneapi_cuda) + Devs.push_back(plt.get_devices()[0]); + } + if (Devs.size() < 2) { + std::cout << "Cannot test P2P capabilities, at least two devices are " + "required, exiting." + << std::endl; + return 0; + } + + std::vector Queues; + std::transform(Devs.begin(), Devs.end(), std::back_inserter(Queues), + [](const sycl::device &D) { return sycl::queue{D}; }); + //////////////////////////////////////////////////////////////////////// + + if (!Devs[0].ext_oneapi_can_access_peer( + Devs[1], sycl::ext::oneapi::peer_access::access_supported)) { + std::cout << "P2P access is not supported by devices, exiting." + << std::endl; + return 0; + } + + // Enables Devs[0] to access Devs[1] memory. + Devs[0].ext_oneapi_enable_peer_access(Devs[1]); + + std::vector input(N); + std::iota(input.begin(), input.end(), 0); + + int *arr0 = malloc(N, Queues[0], usm::alloc::device); + Queues[0].memcpy(arr0, &input[0], N * sizeof(int)); + + int *arr1 = malloc(N, Queues[1], usm::alloc::device); + // P2P copy performed here: + Queues[1].copy(arr0, arr1, N).wait(); + + int out[N]; + Queues[1].copy(arr1, out, N).wait(); + + sycl::free(arr0, Queues[0]); + sycl::free(arr1, Queues[1]); + + bool ok = true; + for (int i = 0; i < N; i++) { + if (out[i] != input[i]) { + printf("%d %d\n", out[i], input[i]); + ok = false; + break; + } + } + + printf("%s\n", ok ? "PASS" : "FAIL"); + + return 0; +} diff --git a/sycl/unittests/Extensions/USMP2P.cpp b/sycl/unittests/Extensions/USMP2P.cpp index ac44bb6ddd54e..0af41478603ee 100644 --- a/sycl/unittests/Extensions/USMP2P.cpp +++ b/sycl/unittests/Extensions/USMP2P.cpp @@ -56,7 +56,7 @@ pi_result redefinedPeerAccessGetInfo(pi_device command_device, TEST(USMP2PTest, USMP2PTest) { - sycl::unittest::PiMock Mock; + sycl::unittest::PiMock Mock(sycl::backend::ext_oneapi_cuda); Mock.redefine(redefinedDevicesGet); Mock.redefine( From b84da4dbd8c309490c788bcb75e8abbeeb0ddf67 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 16 May 2023 15:29:16 +0100 Subject: [PATCH 37/75] Update sycl/source/device.cpp Co-authored-by: Steffen Larsen --- sycl/source/device.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 1882e46bb32cc..f9432f17adc6f 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -253,20 +253,19 @@ bool device::ext_oneapi_can_access_peer(const device &peer, return false; } - RT::PiPeerAttr PiAttr; size_t return_size; int value; - switch (attr) { - case ext::oneapi::peer_access::access_supported: { - PiAttr = PI_PEER_ACCESS_SUPPORTED; - break; - } - case ext::oneapi::peer_access::atomics_supported: { - PiAttr = PI_PEER_ATOMICS_SUPPORTED; - break; - } - } + RT::PiPeerAttr PiAttr = [&]() { + switch (attr) { + case ext::oneapi::peer_access::access_supported: + return PI_PEER_ACCESS_SUPPORTED; + case ext::oneapi::peer_access::atomics_supported: + return PI_PEER_ATOMICS_SUPPORTED; + } + throw sycl::exception(make_error_code(errc::invalid), + "Unrecognized peer access attribute."); + }(); auto Plugin = impl->getPlugin(); pi_result result = Plugin->call_nocheck( From 9e5408b94e220c6a4a543d5310d2da740ec9e373 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 16 May 2023 07:36:52 -0700 Subject: [PATCH 38/75] Address review comments. Signed-off-by: JackAKirk --- sycl/include/sycl/detail/pi.h | 6 ++++-- sycl/include/sycl/device.hpp | 4 ++-- sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp | 13 ++----------- sycl/plugins/hip/pi_hip.cpp | 13 +------------ sycl/plugins/level_zero/pi_level_zero.cpp | 13 ++----------- sycl/source/device.cpp | 8 ++------ sycl/unittests/Extensions/USMP2P.cpp | 2 +- 7 files changed, 14 insertions(+), 45 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 0c3a3e04f56ce..0d697b92f1c86 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -92,6 +92,8 @@ // 12.30 Added PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT device info query. // 12.31 Added PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP device // info query. +// 12.32 Added piextEnablePeerAccess, piextDisablePeerAccess, +// piextPeerAccessGetInfo, and pi_peer_attr enum. #define _PI_H_VERSION_MAJOR 12 #define _PI_H_VERSION_MINOR 32 @@ -1023,8 +1025,8 @@ using pi_image_desc = _pi_image_desc; typedef enum { PI_MEM_CONTEXT = 0x1106, PI_MEM_SIZE = 0x1102 } _pi_mem_info; typedef enum { - PI_PEER_ACCESS_SUPPORTED, - PI_PEER_ATOMICS_SUPPORTED + PI_PEER_ACCESS_SUPPORTED = 0x0, + PI_PEER_ATOMICS_SUPPORTED = 0x1 } _pi_peer_attr; using pi_mem_info = _pi_mem_info; diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 6613192bd5e9d..8c8556a2c723a 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -42,8 +42,8 @@ namespace ext::oneapi { class filter_selector; enum class peer_access { - access_supported, - atomics_supported, + access_supported = 0x0, + atomics_supported = 0x1, }; } // namespace ext::oneapi diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 85acc135b2b45..2cf58315eabb4 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2200,17 +2200,8 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - std::ignore = command_device; - std::ignore = peer_device; - std::ignore = attr; - std::ignore = param_value_size; - std::ignore = param_value; - std::ignore = param_value_size_ret; - - setErrorMessage("piextPeerAccessGetInfo not " - "implemented in esimd_emulator backend", - PI_ERROR_PLUGIN_SPECIFIC_ERROR); - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; + ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); + return ReturnValue(0); } #ifdef _WIN32 diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index b727ba47e7de1..97711a47cd5af 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5631,18 +5631,7 @@ pi_result hip_piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, pi_peer_attr attr, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - - std::ignore = command_device; - std::ignore = peer_device; - std::ignore = attr; - std::ignore = param_value_size; - std::ignore = param_value; - std::ignore = param_value_size_ret; - - setErrorMessage("piextPeerAccessGetInfo not " - "implemented in hip backend", - PI_ERROR_PLUGIN_SPECIFIC_ERROR); - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; + return getInfo(param_value_size, param_value, param_value_size_ret, 0); } const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index d00da49d3ff7c..14b556883c834 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -8875,17 +8875,8 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - std::ignore = command_device; - std::ignore = peer_device; - std::ignore = attr; - std::ignore = param_value_size; - std::ignore = param_value; - std::ignore = param_value_size_ret; - - setErrorMessage("piextPeerAccessGetInfo not " - "implemented in L0", - UR_EXT_RESULT_ADAPTER_SPECIFIC_ERROR); - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; + ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); + return ReturnValue(0); } #ifdef _WIN32 diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index f9432f17adc6f..da410f9871fa0 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -249,11 +249,7 @@ bool device::ext_oneapi_can_access_peer(const device &peer, return true; } - if (peer.get_backend() != backend::ext_oneapi_cuda) { - return false; - } - - size_t return_size; + size_t returnSize; int value; RT::PiPeerAttr PiAttr = [&]() { @@ -269,7 +265,7 @@ bool device::ext_oneapi_can_access_peer(const device &peer, auto Plugin = impl->getPlugin(); pi_result result = Plugin->call_nocheck( - Device, Peer, PiAttr, sizeof(int), &value, &return_size); + Device, Peer, PiAttr, sizeof(int), &value, &returnSize); if (result != PI_SUCCESS) { char *message = nullptr; diff --git a/sycl/unittests/Extensions/USMP2P.cpp b/sycl/unittests/Extensions/USMP2P.cpp index 0af41478603ee..ac44bb6ddd54e 100644 --- a/sycl/unittests/Extensions/USMP2P.cpp +++ b/sycl/unittests/Extensions/USMP2P.cpp @@ -56,7 +56,7 @@ pi_result redefinedPeerAccessGetInfo(pi_device command_device, TEST(USMP2PTest, USMP2PTest) { - sycl::unittest::PiMock Mock(sycl::backend::ext_oneapi_cuda); + sycl::unittest::PiMock Mock; Mock.redefine(redefinedDevicesGet); Mock.redefine( From 57c5747e92c7d3ab1ba3ae2382427d7a5ca82915 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 16 May 2023 09:44:57 -0700 Subject: [PATCH 39/75] Just use Plugin::call. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 6 ---- .../esimd_emulator/pi_esimd_emulator.cpp | 7 +++-- sycl/plugins/hip/pi_hip.cpp | 3 ++ sycl/plugins/level_zero/pi_level_zero.cpp | 7 +++-- sycl/source/device.cpp | 29 +++---------------- 5 files changed, 17 insertions(+), 35 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index bd2aebf9a14e2..70607794bc92f 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5868,9 +5868,7 @@ pi_result cuda_piextEnablePeerAccess(pi_device command_device, result = PI_ERROR_PLUGIN_SPECIFIC_ERROR; } - } catch (pi_result err) { - setErrorMessage("", err); result = err; } return result; @@ -5880,7 +5878,6 @@ pi_result cuda_piextDisablePeerAccess(pi_device command_device, pi_device peer_device) { pi_result result = PI_SUCCESS; try { - ScopedContext active(command_device->get_context()); CUresult cu_res = cuCtxDisablePeerAccess(peer_device->get_context()); @@ -5899,9 +5896,7 @@ pi_result cuda_piextDisablePeerAccess(pi_device command_device, free(message); result = PI_ERROR_PLUGIN_SPECIFIC_ERROR; } - } catch (pi_result err) { - setErrorMessage("", err); result = err; } return result; @@ -5948,7 +5943,6 @@ pi_result cuda_piextPeerAccessGetInfo(pi_device command_device, return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } } catch (pi_result err) { - setErrorMessage("", err); return err; } return getInfo(param_value_size, param_value, param_value_size_ret, value); diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 2cf58315eabb4..a7c85399afe36 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2197,8 +2197,11 @@ pi_result piextDisablePeerAccess(pi_device command_device, pi_result piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, pi_peer_attr attr, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + std::ignore = command_device; + std::ignore = peer_device; + std::ignore = attr; ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); return ReturnValue(0); diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 97711a47cd5af..19a5205d17927 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5631,6 +5631,9 @@ pi_result hip_piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, pi_peer_attr attr, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { + std::ignore = command_device; + std::ignore = peer_device; + std::ignore = attr; return getInfo(param_value_size, param_value, param_value_size_ret, 0); } diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 14b556883c834..fced18f85f024 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -8872,8 +8872,11 @@ pi_result piextDisablePeerAccess(pi_device command_device, pi_result piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, pi_peer_attr attr, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + std::ignore = command_device; + std::ignore = peer_device; + std::ignore = attr; ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); return ReturnValue(0); diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index da410f9871fa0..5f89f02b19595 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -213,14 +213,7 @@ void device::ext_oneapi_enable_peer_access(const device &peer) { const RT::PiDevice Peer = peer.impl->getHandleRef(); if (Device != Peer) { auto Plugin = impl->getPlugin(); - pi_result result = - Plugin->call_nocheck(Device, - Peer); - if (result != PI_SUCCESS) { - char *message = nullptr; - Plugin->call(&message); - throw sycl::exception(make_error_code(errc::runtime), message); - } + Plugin->call(Device, Peer); } } @@ -229,14 +222,7 @@ void device::ext_oneapi_disable_peer_access(const device &peer) { const RT::PiDevice Peer = peer.impl->getHandleRef(); if (Device != Peer) { auto Plugin = impl->getPlugin(); - pi_result result = - Plugin->call_nocheck(Device, - Peer); - if (result != PI_SUCCESS) { - char *message = nullptr; - Plugin->call(&message); - throw sycl::exception(make_error_code(errc::runtime), message); - } + Plugin->call(Device, Peer); } } @@ -263,15 +249,8 @@ bool device::ext_oneapi_can_access_peer(const device &peer, "Unrecognized peer access attribute."); }(); auto Plugin = impl->getPlugin(); - pi_result result = - Plugin->call_nocheck( - Device, Peer, PiAttr, sizeof(int), &value, &returnSize); - - if (result != PI_SUCCESS) { - char *message = nullptr; - Plugin->call(&message); - throw sycl::exception(make_error_code(errc::runtime), message); - } + Plugin->call( + Device, Peer, PiAttr, sizeof(int), &value, &returnSize); return value == 1; } From 029c1fe87f09c8e1221c0b4d0c0409a1cc04945a Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 17 May 2023 01:30:36 -0700 Subject: [PATCH 40/75] Remove trace checks CI doesn't support. Signed-off-by: JackAKirk --- sycl/test-e2e/USM/P2P/p2p_access.cpp | 6 +----- sycl/test-e2e/USM/P2P/p2p_atomics.cpp | 5 +---- sycl/test-e2e/USM/P2P/p2p_copy.cpp | 2 +- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/sycl/test-e2e/USM/P2P/p2p_access.cpp b/sycl/test-e2e/USM/P2P/p2p_access.cpp index fd5dcae4bf0e7..ef32c0c07b2f8 100644 --- a/sycl/test-e2e/USM/P2P/p2p_access.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_access.cpp @@ -1,6 +1,6 @@ // REQUIRES: cuda // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: %if ext_oneapi_cuda %{ %{run} %t.out %} #include #include @@ -69,7 +69,3 @@ int main() { return 0; } - -// CHECK: ---> piextPeerAccessGetInfo( -// CHECK: ---> piextEnablePeerAccess( -// CHECK: ---> piextDisablePeerAccess( diff --git a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp index 527a248dfa6f8..09cf048a2de37 100644 --- a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp @@ -1,6 +1,6 @@ // REQUIRES: cuda // RUN: %if any-device-is-cuda %{ %{build} -DUSE_CUDA_SM80=1 -Xsycl-target-backend --cuda-gpu-arch=sm_61 -o %t.out %} -// RUN: %if ext_oneapi_cuda %{ env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s %} +// RUN: %if ext_oneapi_cuda %{ %{run} %t.out %} #include #include @@ -80,6 +80,3 @@ int main() { return 0; } - -// CHECK: ---> piextPeerAccessGetInfo( -// CHECK: ---> piextEnablePeerAccess( diff --git a/sycl/test-e2e/USM/P2P/p2p_copy.cpp b/sycl/test-e2e/USM/P2P/p2p_copy.cpp index 4c22866a966b7..99aab3e6c7d25 100644 --- a/sycl/test-e2e/USM/P2P/p2p_copy.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_copy.cpp @@ -1,6 +1,6 @@ // REQUIRES: cuda // RUN: %{build} -o %t.out -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s +// RUN: %if ext_oneapi_cuda %{ %{run} %t.out %} #include #include From 8d412407cd5cd6c95597938b2f72ef45276f54dd Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 17 May 2023 02:04:48 -0700 Subject: [PATCH 41/75] Added comments, updating namings. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 8 ++++---- sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp | 1 + sycl/plugins/hip/pi_hip.cpp | 1 + sycl/plugins/level_zero/pi_level_zero.cpp | 1 + 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 70607794bc92f..5007553c9506b 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5908,16 +5908,16 @@ pi_result cuda_piextPeerAccessGetInfo(pi_device command_device, void *param_value, size_t *param_value_size_ret) { int value; - CUdevice_P2PAttribute CUattr; + CUdevice_P2PAttribute cu_attr; try { ScopedContext active(command_device->get_context()); switch (attr) { case PI_PEER_ACCESS_SUPPORTED: { - CUattr = CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED; + cu_attr = CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED; break; } case PI_PEER_ATOMICS_SUPPORTED: { - CUattr = CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; + cu_attr = CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; break; } default: { @@ -5926,7 +5926,7 @@ pi_result cuda_piextPeerAccessGetInfo(pi_device command_device, } CUresult cu_res = cuDeviceGetP2PAttribute( - &value, CUattr, command_device->get(), peer_device->get()); + &value, cu_attr, command_device->get(), peer_device->get()); if (cu_res != CUDA_SUCCESS) { const char *error_string; const char *error_name; diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index a7c85399afe36..ecf52b7fee0b1 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2204,6 +2204,7 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, std::ignore = attr; ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); + // Zero return value indicates that all of the queries currently return false. return ReturnValue(0); } diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 19a5205d17927..90e3d42276039 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5634,6 +5634,7 @@ pi_result hip_piextPeerAccessGetInfo(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; std::ignore = attr; + // Zero return value indicates that all of the queries currently return false. return getInfo(param_value_size, param_value, param_value_size_ret, 0); } diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index fced18f85f024..cc7064de59f68 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -8879,6 +8879,7 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, std::ignore = attr; ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); + // Zero return value indicates that all of the queries currently return false. return ReturnValue(0); } From b86954d902583d4decf524721b211e7b50227126 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 17 May 2023 02:55:15 -0700 Subject: [PATCH 42/75] Added windows symbols. Signed-off-by: JackAKirk --- sycl/test/abi/sycl_symbols_linux.dump | 2 +- sycl/test/abi/sycl_symbols_windows.dump | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 3fb81a1e2c66e..80aa057270952 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4413,9 +4413,9 @@ _ZNK4sycl3_V16device8get_infoINS0_4info6device8atomic64EEENS0_6detail19is_device _ZNK4sycl3_V16device8get_infoINS0_4info6device8platformEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device9vendor_idEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device9getNativeEv +_ZN4sycl3_V16device26ext_oneapi_can_access_peerERKS1_NS0_3ext6oneapi11peer_accessE _ZN4sycl3_V16device29ext_oneapi_enable_peer_accessERKS1_ _ZN4sycl3_V16device30ext_oneapi_disable_peer_accessERKS1_ -_ZN4sycl3_V16device26ext_oneapi_can_access_peerERKS1_NS0_3ext6oneapi11peer_accessE _ZNK4sycl3_V16kernel11get_backendEv _ZNK4sycl3_V16kernel11get_contextEv _ZNK4sycl3_V16kernel13getNativeImplEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 2cb94e1270c38..6f5e8e8bf108c 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -888,7 +888,10 @@ ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXXZ +?ext_oneapi_can_access_peer@device@_V1@sycl@@QEAA_NAEBV123@W4peer_access@oneapi@ext@23@@Z +?ext_oneapi_disable_peer_access@device@_V1@sycl@@QEAAXAEBV123@@Z ?ext_oneapi_empty@queue@_V1@sycl@@QEBA_NXZ +?ext_oneapi_enable_peer_access@device@_V1@sycl@@QEAAXAEBV123@@Z ?ext_oneapi_fill2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z ?ext_oneapi_get_default_context@platform@_V1@sycl@@QEBA?AVcontext@23@XZ ?ext_oneapi_memcpy2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z From a6f760244bf68300a9774ec3de7c9a6a3eaffaab Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 17 May 2023 05:02:00 -0700 Subject: [PATCH 43/75] Refactor common code. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 57 +++++++++++------------------------ 1 file changed, 18 insertions(+), 39 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 5007553c9506b..6a79d73f20219 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -75,6 +75,21 @@ static void setErrorMessage(const char *message, pi_result error_code) { ErrorMessageCode = error_code; } +void setPluginSpecificMessage(CUresult cu_res) { + const char *error_string; + const char *error_name; + cuGetErrorName(cu_res, &error_name); + cuGetErrorString(cu_res, &error_string); + char *message = + (char *)malloc(strlen(error_string) + strlen(error_name) + 2); + strcpy(message, error_name); + strcat(message, "\n"); + strcat(message, error_string); + + setErrorMessage(message, PI_ERROR_PLUGIN_SPECIFIC_ERROR); + free(message); +} + // Returns plugin specific error and warning messages pi_result cuda_piPluginGetLastError(char **message) { *message = &ErrorMessage[0]; @@ -5850,22 +5865,9 @@ pi_result cuda_piextEnablePeerAccess(pi_device command_device, pi_result result = PI_SUCCESS; try { ScopedContext active(command_device->get_context()); - CUresult cu_res = cuCtxEnablePeerAccess(peer_device->get_context(), 0); if (cu_res != CUDA_SUCCESS) { - const char *error_string = nullptr; - const char *error_name = nullptr; - cuGetErrorName(cu_res, &error_name); - cuGetErrorString(cu_res, &error_string); - char *message = - (char *)malloc(strlen(error_string) + strlen(error_name) + 2); - strcpy(message, error_name); - strcat(message, "\n"); - strcat(message, error_string); - - setErrorMessage(message, PI_ERROR_PLUGIN_SPECIFIC_ERROR); - free(message); - + setPluginSpecificMessage(cu_res); result = PI_ERROR_PLUGIN_SPECIFIC_ERROR; } } catch (pi_result err) { @@ -5879,21 +5881,9 @@ pi_result cuda_piextDisablePeerAccess(pi_device command_device, pi_result result = PI_SUCCESS; try { ScopedContext active(command_device->get_context()); - CUresult cu_res = cuCtxDisablePeerAccess(peer_device->get_context()); if (cu_res != CUDA_SUCCESS) { - const char *error_string; - const char *error_name; - cuGetErrorName(cu_res, &error_name); - cuGetErrorString(cu_res, &error_string); - char *message = - (char *)malloc(strlen(error_string) + strlen(error_name) + 2); - strcpy(message, error_name); - strcat(message, "\n"); - strcat(message, error_string); - - setErrorMessage(message, PI_ERROR_PLUGIN_SPECIFIC_ERROR); - free(message); + setPluginSpecificMessage(cu_res); result = PI_ERROR_PLUGIN_SPECIFIC_ERROR; } } catch (pi_result err) { @@ -5928,18 +5918,7 @@ pi_result cuda_piextPeerAccessGetInfo(pi_device command_device, CUresult cu_res = cuDeviceGetP2PAttribute( &value, cu_attr, command_device->get(), peer_device->get()); if (cu_res != CUDA_SUCCESS) { - const char *error_string; - const char *error_name; - cuGetErrorName(cu_res, &error_name); - cuGetErrorString(cu_res, &error_string); - char *message = - (char *)malloc(2 + strlen(error_string) + strlen(error_name)); - strcpy(message, error_name); - strcat(message, "\n"); - strcat(message, error_string); - - setErrorMessage(message, PI_ERROR_PLUGIN_SPECIFIC_ERROR); - free(message); + setPluginSpecificMessage(cu_res); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } } catch (pi_result err) { From cc5eb638cd441a217f326c1fe894ad39b3d7f1ab Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 17 May 2023 05:52:36 -0700 Subject: [PATCH 44/75] Format. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 6a79d73f20219..d157ad1e1ac82 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -76,18 +76,17 @@ static void setErrorMessage(const char *message, pi_result error_code) { } void setPluginSpecificMessage(CUresult cu_res) { - const char *error_string; - const char *error_name; - cuGetErrorName(cu_res, &error_name); - cuGetErrorString(cu_res, &error_string); - char *message = - (char *)malloc(strlen(error_string) + strlen(error_name) + 2); - strcpy(message, error_name); - strcat(message, "\n"); - strcat(message, error_string); - - setErrorMessage(message, PI_ERROR_PLUGIN_SPECIFIC_ERROR); - free(message); + const char *error_string; + const char *error_name; + cuGetErrorName(cu_res, &error_name); + cuGetErrorString(cu_res, &error_string); + char *message = (char *)malloc(strlen(error_string) + strlen(error_name) + 2); + strcpy(message, error_name); + strcat(message, "\n"); + strcat(message, error_string); + + setErrorMessage(message, PI_ERROR_PLUGIN_SPECIFIC_ERROR); + free(message); } // Returns plugin specific error and warning messages From 716bf30b5e8d7aa85a6cd5ef6aa72eb956ebf616 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 18 May 2023 02:38:08 -0700 Subject: [PATCH 45/75] Some final corrections. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 3 ++- sycl/plugins/hip/pi_hip.cpp | 2 -- sycl/unittests/helpers/PiMockPlugin.hpp | 3 --- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index d157ad1e1ac82..ab4b1b11fce79 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -42,6 +42,8 @@ std::string getCudaVersionString() { return stream.str(); } + + pi_result map_error(CUresult result) { switch (result) { case CUDA_SUCCESS: @@ -6088,7 +6090,6 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPluginGetLastError, cuda_piPluginGetLastError) _PI_CL(piTearDown, cuda_piTearDown) _PI_CL(piGetDeviceAndHostTimer, cuda_piGetDeviceAndHostTimer) - _PI_CL(piPluginGetBackendOption, cuda_piPluginGetBackendOption) // Peer to Peer diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 90e3d42276039..afddf0c570c7a 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5624,7 +5624,6 @@ pi_result hip_piextDisablePeerAccess(pi_device command_device, "implemented in hip backend", PI_ERROR_PLUGIN_SPECIFIC_ERROR); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; - return {}; } pi_result hip_piextPeerAccessGetInfo(pi_device command_device, @@ -5794,7 +5793,6 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPluginGetLastError, hip_piPluginGetLastError) _PI_CL(piTearDown, hip_piTearDown) _PI_CL(piGetDeviceAndHostTimer, hip_piGetDeviceAndHostTimer) - _PI_CL(piPluginGetBackendOption, hip_piPluginGetBackendOption) // Peer to Peer diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 3e992116381cc..0d84096a9dc6e 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1205,13 +1205,11 @@ inline pi_result mock_piextEnqueueWriteHostPipe( inline pi_result mock_piextEnablePeerAccess(pi_device command_device, pi_device peer_device) { - return PI_SUCCESS; } inline pi_result mock_piextDisablePeerAccess(pi_device command_device, pi_device peer_device) { - return PI_SUCCESS; } @@ -1219,7 +1217,6 @@ inline pi_result mock_piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, pi_peer_attr attr, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - if (param_value) *static_cast(param_value) = 1; if (param_value_size_ret) From 484cf252246a958b089a8e94e35b14bd791a213c Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 18 May 2023 02:56:53 -0700 Subject: [PATCH 46/75] Format. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index ab4b1b11fce79..c6b9f304fb1ac 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -42,8 +42,6 @@ std::string getCudaVersionString() { return stream.str(); } - - pi_result map_error(CUresult result) { switch (result) { case CUDA_SUCCESS: From 308f45a65e1325b23bb194dab7990c8b6fc2a084 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 22 May 2023 09:05:51 -0700 Subject: [PATCH 47/75] Correct version comment order. Signed-off-by: JackAKirk --- sycl/include/sycl/detail/pi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 4a4355f076750..ae95fb6c72702 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -92,10 +92,10 @@ // 12.30 Added PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT device info query. // 12.31 Added PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP device // info query. -// 12.32 Added piextEnablePeerAccess, piextDisablePeerAccess, -// piextPeerAccessGetInfo, and pi_peer_attr enum. -// 13.33 Removed backwards compatibility of piextQueueCreateWithNativeHandle and +// 12.32 Removed backwards compatibility of piextQueueCreateWithNativeHandle and // piextQueueGetNativeHandle +// 13.33 Added piextEnablePeerAccess, piextDisablePeerAccess, +// piextPeerAccessGetInfo, and pi_peer_attr enum. #define _PI_H_VERSION_MAJOR 13 #define _PI_H_VERSION_MINOR 33 From b2b6fcbfaaafd697618cfb0e2809c50e0b66e741 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 31 May 2023 12:10:20 +0100 Subject: [PATCH 48/75] Removed broken error message. Signed-off-by: JackAKirk --- sycl/plugins/level_zero/pi_level_zero.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 4e792b52a7223..f9cd5e0b1e3aa 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1116,9 +1116,6 @@ pi_result piextEnablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - setErrorMessage("piextEnablePeerAccess not " - "implemented in L0", - UR_EXT_RESULT_ADAPTER_SPECIFIC_ERROR); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } @@ -1128,9 +1125,6 @@ pi_result piextDisablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; - setErrorMessage("piextDisablePeerAccess not " - "implemented in L0", - UR_EXT_RESULT_ADAPTER_SPECIFIC_ERROR); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } From 2bea80bd984c2f474393dfdc488935a3349a0906 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 31 May 2023 21:11:48 +0100 Subject: [PATCH 49/75] Addressed review comments. Signed-off-by: JackAKirk --- sycl/include/sycl/detail/pi.h | 7 +++++-- sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp | 2 +- sycl/plugins/hip/pi_hip.cpp | 2 +- sycl/plugins/level_zero/pi_level_zero.cpp | 10 +++++++++- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index ae95fb6c72702..2743ac81e515e 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1027,8 +1027,11 @@ using pi_image_desc = _pi_image_desc; typedef enum { PI_MEM_CONTEXT = 0x1106, PI_MEM_SIZE = 0x1102 } _pi_mem_info; typedef enum { - PI_PEER_ACCESS_SUPPORTED = 0x0, - PI_PEER_ATOMICS_SUPPORTED = 0x1 + PI_PEER_ACCESS_SUPPORTED = 0x0, ///< returns 1 if P2P Access is supported + ///< otherwise P2P Access is not supported. + PI_PEER_ATOMICS_SUPPORTED = + 0x1 ///< returns 1 if Atomic operations are supported over the P2P link, + ///< otherwise such operations are not supported. } _pi_peer_attr; using pi_mem_info = _pi_mem_info; diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 50b87c1b1dbc8..0eb1b034e4727 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2194,7 +2194,7 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); // Zero return value indicates that all of the queries currently return false. - return ReturnValue(0); + return ReturnValue(pi_int32{0}); } #ifdef _WIN32 diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index abc47db374266..9642da08ad2c0 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5634,7 +5634,7 @@ pi_result hip_piextPeerAccessGetInfo(pi_device command_device, std::ignore = peer_device; std::ignore = attr; // Zero return value indicates that all of the queries currently return false. - return getInfo(param_value_size, param_value, param_value_size_ret, 0); + return getInfo(param_value_size, param_value, param_value_size_ret, pi_int32{0}); } const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index f9cd5e0b1e3aa..ed8b4b4b7830a 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1116,6 +1116,10 @@ pi_result piextEnablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; + setErrorMessage("piextEnablePeerAccess not " + "implemented in L0", + PI_ERROR_PLUGIN_SPECIFIC_ERROR); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } @@ -1125,6 +1129,10 @@ pi_result piextDisablePeerAccess(pi_device command_device, std::ignore = command_device; std::ignore = peer_device; + setErrorMessage("piextDisablePeerAccess not " + "implemented in L0", + PI_ERROR_PLUGIN_SPECIFIC_ERROR); + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } @@ -1138,7 +1146,7 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); // Zero return value indicates that all of the queries currently return false. - return ReturnValue(0); + return ReturnValue(pi_int32{0}); } #ifdef _WIN32 From 04b8610e78c28f3d263d9b7e40e238e81a32daac Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 31 May 2023 21:16:08 +0100 Subject: [PATCH 50/75] Format. Signed-off-by: JackAKirk --- sycl/plugins/hip/pi_hip.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 9642da08ad2c0..e5f7be7f0d76a 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5634,7 +5634,8 @@ pi_result hip_piextPeerAccessGetInfo(pi_device command_device, std::ignore = peer_device; std::ignore = attr; // Zero return value indicates that all of the queries currently return false. - return getInfo(param_value_size, param_value, param_value_size_ret, pi_int32{0}); + return getInfo(param_value_size, param_value, param_value_size_ret, + pi_int32{0}); } const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; From d0b138c2a97db731047b35a1e7612cd4b0eb03d9 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 1 Jun 2023 11:10:47 +0100 Subject: [PATCH 51/75] Try UR invalid value error. Signed-off-by: JackAKirk --- sycl/plugins/level_zero/pi_level_zero.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index ed8b4b4b7830a..a167f6bd5d2c9 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1118,7 +1118,7 @@ pi_result piextEnablePeerAccess(pi_device command_device, setErrorMessage("piextEnablePeerAccess not " "implemented in L0", - PI_ERROR_PLUGIN_SPECIFIC_ERROR); + UR_RESULT_ERROR_INVALID_VALUE); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } @@ -1131,7 +1131,7 @@ pi_result piextDisablePeerAccess(pi_device command_device, setErrorMessage("piextDisablePeerAccess not " "implemented in L0", - PI_ERROR_PLUGIN_SPECIFIC_ERROR); + UR_RESULT_ERROR_INVALID_VALUE); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } From 3e488c4fa986c1aa90356de934185607ebfed832 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 6 Jun 2023 14:30:36 +0100 Subject: [PATCH 52/75] Stated return type of queries in comment. Signed-off-by: JackAKirk --- sycl/include/sycl/detail/pi.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 2743ac81e515e..f42a6c85ca84e 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1027,11 +1027,12 @@ using pi_image_desc = _pi_image_desc; typedef enum { PI_MEM_CONTEXT = 0x1106, PI_MEM_SIZE = 0x1102 } _pi_mem_info; typedef enum { - PI_PEER_ACCESS_SUPPORTED = 0x0, ///< returns 1 if P2P Access is supported - ///< otherwise P2P Access is not supported. + PI_PEER_ACCESS_SUPPORTED = + 0x0, ///< returns a uint32_t: 1 if P2P Access is supported + ///< otherwise P2P Access is not supported. PI_PEER_ATOMICS_SUPPORTED = - 0x1 ///< returns 1 if Atomic operations are supported over the P2P link, - ///< otherwise such operations are not supported. + 0x1 ///< returns a uint32_t: 1 if Atomic operations are supported over the + ///< P2P link, otherwise such operations are not supported. } _pi_peer_attr; using pi_mem_info = _pi_mem_info; From 19d2a7da242213b02eac4531fb360a40a99acd37 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 23 Jun 2023 12:58:32 +0100 Subject: [PATCH 53/75] Integrate UR usm-p2p interfaces. Signed-off-by: JackAKirk --- sycl/plugins/cuda/CMakeLists.txt | 2 + sycl/plugins/cuda/pi_cuda.cpp | 74 +---------------- sycl/plugins/level_zero/CMakeLists.txt | 2 + sycl/plugins/level_zero/pi_level_zero.cpp | 27 ++---- sycl/plugins/unified_runtime/CMakeLists.txt | 6 +- sycl/plugins/unified_runtime/pi2ur.hpp | 56 +++++++++++++ .../unified_runtime/pi_unified_runtime.cpp | 25 ++++++ .../ur/adapters/cuda/common.cpp | 14 ++++ .../ur/adapters/cuda/common.hpp | 2 + .../ur/adapters/cuda/ur_interface_loader.cpp | 13 +++ .../ur/adapters/cuda/usm_p2p.cpp | 82 +++++++++++++++++++ .../ur/adapters/cuda/usm_p2p.hpp | 10 +++ .../level_zero/ur_level_zero_common.cpp | 5 ++ .../level_zero/ur_level_zero_common.hpp | 18 ++++ .../level_zero/ur_level_zero_usm_p2p.cpp | 47 +++++++++++ .../level_zero/ur_level_zero_usm_p2p.hpp | 16 ++++ .../level_zero/ur_loader_interface.cpp | 13 +++ sycl/source/device.cpp | 14 ++-- 18 files changed, 325 insertions(+), 101 deletions(-) create mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.hpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp diff --git a/sycl/plugins/cuda/CMakeLists.txt b/sycl/plugins/cuda/CMakeLists.txt index 2570b6f7e7348..531b0a99f5983 100644 --- a/sycl/plugins/cuda/CMakeLists.txt +++ b/sycl/plugins/cuda/CMakeLists.txt @@ -79,6 +79,8 @@ add_sycl_plugin(cuda "../unified_runtime/ur/adapters/cuda/tracing.cpp" "../unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp" "../unified_runtime/ur/adapters/cuda/usm.cpp" + "../unified_runtime/ur/adapters/cuda/usm_p2p.hpp" + "../unified_runtime/ur/adapters/cuda/usm_p2p.cpp" # --- "${sycl_inc_dir}/sycl/detail/pi.h" "${sycl_inc_dir}/sycl/detail/pi.hpp" diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 95ad7dc8f858e..25b2b8c076fa8 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -21,74 +21,6 @@ void enableCUDATracing(); //-- PI API implementation extern "C" { - -pi_result cuda_piextEnablePeerAccess(pi_device command_device, - pi_device peer_device) { - pi_result result = PI_SUCCESS; - try { - ScopedContext active(command_device->get_context()); - CUresult cu_res = cuCtxEnablePeerAccess(peer_device->get_context(), 0); - if (cu_res != CUDA_SUCCESS) { - setPluginSpecificMessage(cu_res); - result = PI_ERROR_PLUGIN_SPECIFIC_ERROR; - } - } catch (pi_result err) { - result = err; - } - return result; -} - -pi_result cuda_piextDisablePeerAccess(pi_device command_device, - pi_device peer_device) { - pi_result result = PI_SUCCESS; - try { - ScopedContext active(command_device->get_context()); - CUresult cu_res = cuCtxDisablePeerAccess(peer_device->get_context()); - if (cu_res != CUDA_SUCCESS) { - setPluginSpecificMessage(cu_res); - result = PI_ERROR_PLUGIN_SPECIFIC_ERROR; - } - } catch (pi_result err) { - result = err; - } - return result; -} - -pi_result cuda_piextPeerAccessGetInfo(pi_device command_device, - pi_device peer_device, pi_peer_attr attr, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - int value; - CUdevice_P2PAttribute cu_attr; - try { - ScopedContext active(command_device->get_context()); - switch (attr) { - case PI_PEER_ACCESS_SUPPORTED: { - cu_attr = CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED; - break; - } - case PI_PEER_ATOMICS_SUPPORTED: { - cu_attr = CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; - break; - } - default: { - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(attr); - } - } - - CUresult cu_res = cuDeviceGetP2PAttribute( - &value, cu_attr, command_device->get(), peer_device->get()); - if (cu_res != CUDA_SUCCESS) { - setPluginSpecificMessage(cu_res); - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; - } - } catch (pi_result err) { - return err; - } - return getInfo(param_value_size, param_value, param_value_size_ret, value); -} - const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -254,9 +186,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPluginGetBackendOption, pi2ur::piPluginGetBackendOption) // Peer to Peer - _PI_CL(piextEnablePeerAccess, cuda_piextEnablePeerAccess) - _PI_CL(piextDisablePeerAccess, cuda_piextDisablePeerAccess) - _PI_CL(piextPeerAccessGetInfo, cuda_piextPeerAccessGetInfo) + _PI_CL(piextEnablePeerAccess, pi2ur::piextEnablePeerAccess) + _PI_CL(piextDisablePeerAccess, pi2ur::piextDisablePeerAccess) + _PI_CL(piextPeerAccessGetInfo, pi2ur::piextPeerAccessGetInfo) #undef _PI_CL diff --git a/sycl/plugins/level_zero/CMakeLists.txt b/sycl/plugins/level_zero/CMakeLists.txt index 242fafecd7395..d7857e50337c2 100755 --- a/sycl/plugins/level_zero/CMakeLists.txt +++ b/sycl/plugins/level_zero/CMakeLists.txt @@ -125,6 +125,8 @@ add_sycl_plugin(level_zero "../unified_runtime/ur/adapters/level_zero/ur_level_zero_queue.cpp" "../unified_runtime/ur/adapters/level_zero/ur_level_zero_sampler.cpp" "../unified_runtime/ur/adapters/level_zero/ur_level_zero_usm.cpp" + "../unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp" + "../unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp" # Following are the PI Level-Zero Plugin only codes. "pi_level_zero.cpp" "pi_level_zero.hpp" diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 464cfdbc9935f..b080f8f924f0c 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1114,40 +1114,23 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, pi_result piextEnablePeerAccess(pi_device command_device, pi_device peer_device) { - std::ignore = command_device; - std::ignore = peer_device; - - setErrorMessage("piextEnablePeerAccess not " - "implemented in L0", - UR_RESULT_ERROR_INVALID_VALUE); - - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; + return pi2ur::piextEnablePeerAccess(command_device, peer_device); } pi_result piextDisablePeerAccess(pi_device command_device, pi_device peer_device) { - std::ignore = command_device; - std::ignore = peer_device; - - setErrorMessage("piextDisablePeerAccess not " - "implemented in L0", - UR_RESULT_ERROR_INVALID_VALUE); - - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; + return pi2ur::piextDisablePeerAccess(command_device, peer_device); } pi_result piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, pi_peer_attr attr, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - std::ignore = command_device; - std::ignore = peer_device; - std::ignore = attr; - ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); - // Zero return value indicates that all of the queries currently return false. - return ReturnValue(pi_int32{0}); + return pi2ur::piextPeerAccessGetInfo(command_device, peer_device, attr, + ParamValueSize, ParamValue, + ParamValueSizeRet); } #ifdef _WIN32 diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 318059da5eaf9..0955314a672b7 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -4,7 +4,7 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - set(UNIFIED_RUNTIME_TAG 4136fbb19c37a8aa9d368559a738e2e7cc35033e) + set(UNIFIED_RUNTIME_TAG 7f62c15ced06bc72520d38a2edca0a113b06ad8e) message(STATUS "Will fetch Unified Runtime from ${UNIFIED_RUNTIME_REPO}") FetchContent_Declare(unified-runtime @@ -109,6 +109,8 @@ add_sycl_library("ur_adapter_level_zero" SHARED "ur/adapters/level_zero/ur_level_zero_queue.cpp" "ur/adapters/level_zero/ur_level_zero_sampler.cpp" "ur/adapters/level_zero/ur_level_zero_usm.cpp" + "ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp" + "ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp" INCLUDE_DIRS ${sycl_inc_dir} LIBRARIES @@ -155,6 +157,8 @@ if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS) "ur/adapters/cuda/tracing.cpp" "ur/adapters/cuda/ur_interface_loader.cpp" "ur/adapters/cuda/usm.cpp" + "ur/adapters/cuda/usm_p2p.hpp" + "ur/adapters/cuda/usm_p2p.cpp" INCLUDE_DIRS ${sycl_inc_dir} LIBRARIES diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index fbff734ecaf60..3732f9534f144 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -3949,4 +3949,60 @@ inline pi_result piSamplerRelease(pi_sampler Sampler) { // Sampler /////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// usm-p2p + +pi_result piextEnablePeerAccess(pi_device command_device, + pi_device peer_device) { + auto commandDevice = reinterpret_cast(command_device); + auto peerDevice = reinterpret_cast(peer_device); + + HANDLE_ERRORS(urUsmP2PEnablePeerAccessExp(commandDevice, peerDevice)); + + return PI_SUCCESS; +} + +pi_result piextDisablePeerAccess(pi_device command_device, + pi_device peer_device) { + auto commandDevice = reinterpret_cast(command_device); + auto peerDevice = reinterpret_cast(peer_device); + + HANDLE_ERRORS(urUsmP2PDisablePeerAccessExp(commandDevice, peerDevice)); + + return PI_SUCCESS; +} + +pi_result piextPeerAccessGetInfo(pi_device command_device, + pi_device peer_device, pi_peer_attr attr, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + auto commandDevice = reinterpret_cast(command_device); + auto peerDevice = reinterpret_cast(peer_device); + + ur_exp_peer_info_t propName; + switch (attr) { + case PI_PEER_ACCESS_SUPPORTED: { + propName = UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED; + break; + } + case PI_PEER_ATOMICS_SUPPORTED: { + propName = UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED; + break; + } + default: { + return PI_ERROR_UNKNOWN; + ; + } + } + + HANDLE_ERRORS(urUsmP2PPeerAccessGetInfoExp( + commandDevice, peerDevice, propName, param_value_size, param_value, + param_value_size_ret)); + + return PI_SUCCESS; +} + +// p2p-usm +/////////////////////////////////////////////////////////////////////////////// + } // namespace pi2ur diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index 20fe7384a9c63..3ae7c0ad48216 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -986,6 +986,26 @@ __SYCL_EXPORT pi_result piPluginGetBackendOption(pi_platform platform, backend_option); } +__SYCL_EXPORT pi_result piextEnablePeerAccess(pi_device command_device, + pi_device peer_device) { + + return pi2ur::piextEnablePeerAccess(command_device, peer_device); +} + +__SYCL_EXPORT pi_result piextDisablePeerAccess(pi_device command_device, + pi_device peer_device) { + + return pi2ur::piextDisablePeerAccess(command_device, peer_device); +} + +__SYCL_EXPORT pi_result piextPeerAccessGetInfo( + pi_device command_device, pi_device peer_device, pi_peer_attr attr, + size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { + return pi2ur::piextPeerAccessGetInfo(command_device, peer_device, attr, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + // This interface is not in Unified Runtime currently __SYCL_EXPORT pi_result piTearDown(void *PluginParameter) { return pi2ur::piTearDown(PluginParameter); @@ -1136,6 +1156,11 @@ __SYCL_EXPORT pi_result piPluginInit(pi_plugin *PluginInit) { _PI_API(piSamplerRetain) _PI_API(piSamplerRelease) + // Peer to Peer + _PI_API(piextEnablePeerAccess) + _PI_API(piextDisablePeerAccess) + _PI_API(piextPeerAccessGetInfo) + _PI_API(piextPluginGetOpaqueData) _PI_API(piTearDown) diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp index 86975e5097257..392498973f768 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp @@ -98,6 +98,20 @@ thread_local char ErrorMessage[MaxMessageSize]; ErrorMessageCode = ErrorCode; } +void setPluginSpecificMessage(CUresult cu_res) { + const char *error_string; + const char *error_name; + cuGetErrorName(cu_res, &error_name); + cuGetErrorString(cu_res, &error_string); + char *message = (char *)malloc(strlen(error_string) + strlen(error_name) + 2); + strcpy(message, error_name); + strcat(message, "\n"); + strcat(message, error_string); + + setErrorMessage(message, UR_RESULT_ERROR_ADAPTER_SPECIFIC); + free(message); +} + // Returns plugin specific error and warning messages; common implementation // that can be shared between adapters ur_result_t urGetLastResult(ur_platform_handle_t, const char **ppMessage) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp index 5cfa609018b29..1af46cbef00cd 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp @@ -36,6 +36,8 @@ extern thread_local char ErrorMessage[MaxMessageSize]; [[maybe_unused]] void setErrorMessage(const char *pMessage, ur_result_t ErrorCode); +void setPluginSpecificMessage(CUresult cu_res); + /// ------ Error handling, matching OpenCL plugin semantics. namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp index f8e806b0626a0..4f8eec96d5f5b 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp @@ -258,6 +258,19 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( return UR_RESULT_SUCCESS; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( + ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp; + pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp; + pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp; + + return retVal; +} + #if defined(__cplusplus) } // extern "C" #endif diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp new file mode 100644 index 0000000000000..e87e8a53f020e --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp @@ -0,0 +1,82 @@ +//===--------- usm_p2p.cpp - CUDA Adapter---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------===// + +#include "usm_p2p.hpp" +#include "common.hpp" +#include "context.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( + ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { + + ur_result_t result = UR_RESULT_SUCCESS; + try { + ScopedContext active(commandDevice->getContext()); + CUresult cu_res = cuCtxEnablePeerAccess(peerDevice->getContext(), 0); + if (cu_res != CUDA_SUCCESS) { + setPluginSpecificMessage(cu_res); + result = UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + } catch (ur_result_t err) { + result = err; + } + return result; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( + ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { + + ur_result_t result = UR_RESULT_SUCCESS; + try { + ScopedContext active(commandDevice->getContext()); + CUresult cu_res = cuCtxDisablePeerAccess(peerDevice->getContext()); + if (cu_res != CUDA_SUCCESS) { + setPluginSpecificMessage(cu_res); + result = UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + } catch (ur_result_t err) { + result = err; + } + return result; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( + ur_device_handle_t commandDevice, ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + int value; + CUdevice_P2PAttribute cu_attr; + try { + ScopedContext active(commandDevice->getContext()); + switch (propName) { + case UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED: { + cu_attr = CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED; + break; + } + case UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED: { + cu_attr = CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED; + break; + } + default: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } + + CUresult cu_res = cuDeviceGetP2PAttribute( + &value, cu_attr, commandDevice->get(), peerDevice->get()); + if (cu_res != CUDA_SUCCESS) { + setPluginSpecificMessage(cu_res); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + } catch (ur_result_t err) { + return err; + } + return ReturnValue(value); +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.hpp new file mode 100644 index 0000000000000..f927860839860 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.hpp @@ -0,0 +1,10 @@ +//===--------- usm_p2p.hpp - CUDA Adapter---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------===// + +#pragma once +#include diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.cpp index 4603fbe741354..d48746c3fb1da 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.cpp @@ -281,3 +281,8 @@ ur_result_t zerPluginGetLastError(char **message) { *message = &ErrorMessage[0]; return ErrorMessageCode; } + +/*void sycl::detail::ur::die(const char *Message) { + std::cerr << "ur_die: " << Message << std::endl; + std::terminate(); +}*/ diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp index ed269665cd99b..30bc748deba55 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp @@ -466,3 +466,21 @@ extern thread_local char ErrorMessage[MaxMessageSize]; // Utility function for setting a message and warning [[maybe_unused]] void setErrorMessage(const char *message, ur_result_t error_code); + +/// ------ Error handling, matching OpenCL plugin semantics. +/*namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace detail { +namespace ur { + +// Report error and no return (keeps compiler from printing warnings). +// TODO: Probably change that to throw a catchable exception, +// but for now it is useful to see every failure. +// +[[noreturn]] void die(const char *Message); + +} // namespace ur +} // namespace detail +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl +*/ diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp new file mode 100644 index 0000000000000..a008fc3ae2fc8 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp @@ -0,0 +1,47 @@ +//===--------- ur_level_zero_usm_p2p.cpp - L0 Adapter +//----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "ur_level_zero_usm_p2p.hpp" +#include "ur_level_zero.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( + ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { + + std::ignore = commandDevice; + std::ignore = peerDevice; + + //sycl::detail::ur::die("Experimental USM-P2P feature is not " + // "implemented in the L0 adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( + ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { + + std::ignore = commandDevice; + std::ignore = peerDevice; + + //sycl::detail::ur::die("Experimental USM-P2P feature is not " + // "implemented in the L0 adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( + ur_device_handle_t commandDevice, ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { + + std::ignore = commandDevice; + std::ignore = peerDevice; + std::ignore = propName; + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + // Zero return value indicates that all of the queries currently return false. + return ReturnValue(u_int32_t{0}); +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp new file mode 100644 index 0000000000000..f198174737467 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp @@ -0,0 +1,16 @@ +//===--------- ur_level_zero_usm_p2p.hpp - L0 Adapter-----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include + +#include "ur_level_zero_common.hpp" diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_loader_interface.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_loader_interface.cpp index 280c9d025d702..57e0f6473878d 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_loader_interface.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_loader_interface.cpp @@ -300,3 +300,16 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( return retVal; } + +UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( + ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp; + pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp; + pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp; + + return retVal; +} diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 5f89f02b19595..24048892cd0ac 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -209,8 +209,8 @@ pi_native_handle device::getNative() const { return impl->getNative(); } bool device::has(aspect Aspect) const { return impl->has(Aspect); } void device::ext_oneapi_enable_peer_access(const device &peer) { - const RT::PiDevice Device = impl->getHandleRef(); - const RT::PiDevice Peer = peer.impl->getHandleRef(); + const detail::RT::PiDevice Device = impl->getHandleRef(); + const detail::RT::PiDevice Peer = peer.impl->getHandleRef(); if (Device != Peer) { auto Plugin = impl->getPlugin(); Plugin->call(Device, Peer); @@ -218,8 +218,8 @@ void device::ext_oneapi_enable_peer_access(const device &peer) { } void device::ext_oneapi_disable_peer_access(const device &peer) { - const RT::PiDevice Device = impl->getHandleRef(); - const RT::PiDevice Peer = peer.impl->getHandleRef(); + const detail::RT::PiDevice Device = impl->getHandleRef(); + const detail::RT::PiDevice Peer = peer.impl->getHandleRef(); if (Device != Peer) { auto Plugin = impl->getPlugin(); Plugin->call(Device, Peer); @@ -228,8 +228,8 @@ void device::ext_oneapi_disable_peer_access(const device &peer) { bool device::ext_oneapi_can_access_peer(const device &peer, ext::oneapi::peer_access attr) { - const RT::PiDevice Device = impl->getHandleRef(); - const RT::PiDevice Peer = peer.impl->getHandleRef(); + const detail::RT::PiDevice Device = impl->getHandleRef(); + const detail::RT::PiDevice Peer = peer.impl->getHandleRef(); if (Device == Peer) { return true; @@ -238,7 +238,7 @@ bool device::ext_oneapi_can_access_peer(const device &peer, size_t returnSize; int value; - RT::PiPeerAttr PiAttr = [&]() { + detail::RT::PiPeerAttr PiAttr = [&]() { switch (attr) { case ext::oneapi::peer_access::access_supported: return PI_PEER_ACCESS_SUPPORTED; From f2a2026c16de51b68de60b503414f93aa43fb699 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 23 Jun 2023 16:37:52 +0100 Subject: [PATCH 54/75] Attempt to fix getLastError. Signed-off-by: JackAKirk --- sycl/plugins/cuda/pi_cuda.cpp | 1 + sycl/plugins/unified_runtime/pi2ur.hpp | 4 +--- sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp | 2 +- .../ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp | 8 ++++---- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 25b2b8c076fa8..9bead08d961f3 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -21,6 +21,7 @@ void enableCUDATracing(); //-- PI API implementation extern "C" { + const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 3732f9534f144..9a4a50a951b0c 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -681,8 +681,7 @@ inline pi_result piDeviceRelease(pi_device Device) { } inline pi_result piPluginGetLastError(char **message) { - std::ignore = message; - return PI_SUCCESS; + return HANDLE_ERRORS(urGetLastResult(message)); } inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, @@ -3991,7 +3990,6 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, } default: { return PI_ERROR_UNKNOWN; - ; } } diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp index e87e8a53f020e..fa02e3cec3cf2 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp @@ -48,7 +48,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( ur_device_handle_t commandDevice, ur_device_handle_t peerDevice, ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); int value; diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp index a008fc3ae2fc8..24b7684ca49e7 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp @@ -16,8 +16,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( std::ignore = commandDevice; std::ignore = peerDevice; - //sycl::detail::ur::die("Experimental USM-P2P feature is not " - // "implemented in the L0 adapter."); + sycl::detail::ur::die("Experimental USM-P2P feature is not " + "implemented in the L0 adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -27,8 +27,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( std::ignore = commandDevice; std::ignore = peerDevice; - //sycl::detail::ur::die("Experimental USM-P2P feature is not " - // "implemented in the L0 adapter."); + sycl::detail::ur::die("Experimental USM-P2P feature is not " + "implemented in the L0 adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } From 37aecb11e768fdb4ad3c590d83b3ac4f5e03db7b Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 26 Jun 2023 07:37:12 -0700 Subject: [PATCH 55/75] Switch to UR_CHECK_ERROR usage. Added UR::die for l0 feature not implemented. Signed-off-by: JackAKirk --- sycl/plugins/unified_runtime/pi2ur.hpp | 3 ++- .../unified_runtime/ur/adapters/cuda/usm_p2p.cpp | 12 ++---------- .../ur/adapters/level_zero/ur_level_zero_common.cpp | 4 ++-- .../ur/adapters/level_zero/ur_level_zero_common.hpp | 4 ++-- sycl/test-e2e/USM/P2P/p2p_access.cpp | 2 +- sycl/test-e2e/USM/P2P/p2p_atomics.cpp | 4 ++-- 6 files changed, 11 insertions(+), 18 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 9a4a50a951b0c..9ee721ad3c10f 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -681,7 +681,8 @@ inline pi_result piDeviceRelease(pi_device Device) { } inline pi_result piPluginGetLastError(char **message) { - return HANDLE_ERRORS(urGetLastResult(message)); + std::ignore = message; + return PI_SUCCESS; } inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp index fa02e3cec3cf2..e2532a0bf22bb 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp @@ -16,11 +16,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( ur_result_t result = UR_RESULT_SUCCESS; try { ScopedContext active(commandDevice->getContext()); - CUresult cu_res = cuCtxEnablePeerAccess(peerDevice->getContext(), 0); - if (cu_res != CUDA_SUCCESS) { - setPluginSpecificMessage(cu_res); - result = UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } + UR_CHECK_ERROR(cuCtxEnablePeerAccess(peerDevice->getContext(), 0)); } catch (ur_result_t err) { result = err; } @@ -33,11 +29,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( ur_result_t result = UR_RESULT_SUCCESS; try { ScopedContext active(commandDevice->getContext()); - CUresult cu_res = cuCtxDisablePeerAccess(peerDevice->getContext()); - if (cu_res != CUDA_SUCCESS) { - setPluginSpecificMessage(cu_res); - result = UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } + UR_CHECK_ERROR(cuCtxDisablePeerAccess(peerDevice->getContext())); } catch (ur_result_t err) { result = err; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.cpp index d48746c3fb1da..3397470369a77 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.cpp @@ -282,7 +282,7 @@ ur_result_t zerPluginGetLastError(char **message) { return ErrorMessageCode; } -/*void sycl::detail::ur::die(const char *Message) { +void sycl::detail::ur::die(const char *Message) { std::cerr << "ur_die: " << Message << std::endl; std::terminate(); -}*/ +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp index 30bc748deba55..bd01c8f2122ce 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp @@ -22,6 +22,7 @@ #include #include +#include struct _ur_platform_handle_t; @@ -468,7 +469,7 @@ extern thread_local char ErrorMessage[MaxMessageSize]; ur_result_t error_code); /// ------ Error handling, matching OpenCL plugin semantics. -/*namespace sycl { +namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { namespace ur { @@ -483,4 +484,3 @@ namespace ur { } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl -*/ diff --git a/sycl/test-e2e/USM/P2P/p2p_access.cpp b/sycl/test-e2e/USM/P2P/p2p_access.cpp index ef32c0c07b2f8..d1b3c2f0b2f40 100644 --- a/sycl/test-e2e/USM/P2P/p2p_access.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_access.cpp @@ -66,6 +66,6 @@ int main() { sycl::free(arr1, Queues[1]); Devs[0].ext_oneapi_disable_peer_access(Devs[1]); - + std::cout << "PASS" << std::endl; return 0; } diff --git a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp index 09cf048a2de37..2f58c063c3c62 100644 --- a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp @@ -1,5 +1,5 @@ // REQUIRES: cuda -// RUN: %if any-device-is-cuda %{ %{build} -DUSE_CUDA_SM80=1 -Xsycl-target-backend --cuda-gpu-arch=sm_61 -o %t.out %} +// RUN: %if any-device-is-cuda %{ %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_61 -o %t.out %} // RUN: %if ext_oneapi_cuda %{ %{run} %t.out %} #include @@ -77,6 +77,6 @@ int main() { free(d_sum, Queues[0]); free(d_in, Queues[0]); - + std::cout << "PASS" << std::endl; return 0; } From e80ff9b29d686907149051bfaf3b441a33fd9921 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 26 Jun 2023 07:41:01 -0700 Subject: [PATCH 56/75] Format. Signed-off-by: JackAKirk --- .../ur/adapters/level_zero/ur_level_zero_common.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp index bd01c8f2122ce..11fe36ef10fff 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp @@ -21,8 +21,8 @@ #include #include -#include #include +#include struct _ur_platform_handle_t; From b987dbabed694b62251de850b7000f638b6f8878 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 26 Jun 2023 08:05:27 -0700 Subject: [PATCH 57/75] RT:: -> sycl::detail::pi:: Signed-off-by: JackAKirk --- sycl/source/device.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 2ded10de5bed0..5d84f101cbe0a 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -209,8 +209,8 @@ pi_native_handle device::getNative() const { return impl->getNative(); } bool device::has(aspect Aspect) const { return impl->has(Aspect); } void device::ext_oneapi_enable_peer_access(const device &peer) { - const detail::RT::PiDevice Device = impl->getHandleRef(); - const detail::RT::PiDevice Peer = peer.impl->getHandleRef(); + const sycl::detail::pi::PiDevice Device = impl->getHandleRef(); + const sycl::detail::pi::PiDevice Peer = peer.impl->getHandleRef(); if (Device != Peer) { auto Plugin = impl->getPlugin(); Plugin->call(Device, Peer); @@ -218,8 +218,8 @@ void device::ext_oneapi_enable_peer_access(const device &peer) { } void device::ext_oneapi_disable_peer_access(const device &peer) { - const detail::RT::PiDevice Device = impl->getHandleRef(); - const detail::RT::PiDevice Peer = peer.impl->getHandleRef(); + const sycl::detail::pi::PiDevice Device = impl->getHandleRef(); + const sycl::detail::pi::PiDevice Peer = peer.impl->getHandleRef(); if (Device != Peer) { auto Plugin = impl->getPlugin(); Plugin->call(Device, Peer); @@ -228,8 +228,8 @@ void device::ext_oneapi_disable_peer_access(const device &peer) { bool device::ext_oneapi_can_access_peer(const device &peer, ext::oneapi::peer_access attr) { - const detail::RT::PiDevice Device = impl->getHandleRef(); - const detail::RT::PiDevice Peer = peer.impl->getHandleRef(); + const sycl::detail::pi::PiDevice Device = impl->getHandleRef(); + const sycl::detail::pi::PiDevice Peer = peer.impl->getHandleRef(); if (Device == Peer) { return true; @@ -238,7 +238,7 @@ bool device::ext_oneapi_can_access_peer(const device &peer, size_t returnSize; int value; - detail::RT::PiPeerAttr PiAttr = [&]() { + sycl::detail::pi::PiPeerAttr PiAttr = [&]() { switch (attr) { case ext::oneapi::peer_access::access_supported: return PI_PEER_ACCESS_SUPPORTED; From 6724a40b63807980fe695c472a22cadaa65118f1 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 26 Jun 2023 08:25:27 -0700 Subject: [PATCH 58/75] u_int32_t -> uint32_t Added missing UR_CHECK_ERROR use. Signed-off-by: JackAKirk --- sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp | 8 ++------ .../ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp index e2532a0bf22bb..eb98111b1429d 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp @@ -61,12 +61,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( } } - CUresult cu_res = cuDeviceGetP2PAttribute( - &value, cu_attr, commandDevice->get(), peerDevice->get()); - if (cu_res != CUDA_SUCCESS) { - setPluginSpecificMessage(cu_res); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } + UR_CHECK_ERROR(cuDeviceGetP2PAttribute( + &value, cu_attr, commandDevice->get(), peerDevice->get())); } catch (ur_result_t err) { return err; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp index 24b7684ca49e7..5a29c002f87e9 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp @@ -43,5 +43,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); // Zero return value indicates that all of the queries currently return false. - return ReturnValue(u_int32_t{0}); + return ReturnValue(uint32_t{0}); } From 69a65067932e4111f77ab4e1fe4985a43a38e140 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 26 Jun 2023 08:41:53 -0700 Subject: [PATCH 59/75] Removed unnecessary includes. Signed-off-by: JackAKirk --- .../ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp index f198174737467..27d2199920d7f 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp @@ -9,8 +9,5 @@ #pragma once #include -#include -#include -#include #include "ur_level_zero_common.hpp" From 8b9c2ab5aa985f8dd95c89224f116542b6e24baa Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 3 Jul 2023 14:07:43 +0100 Subject: [PATCH 60/75] PI_ERROR_UNKNOWN -> return PI_ERROR_INVALID_VALUE Signed-off-by: JackAKirk --- sycl/plugins/unified_runtime/pi2ur.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index b8735439930d9..a82f28434cf66 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -4116,7 +4116,7 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, break; } default: { - return PI_ERROR_UNKNOWN; + return PI_ERROR_INVALID_VALUE; } } From e5e967f4414ad85a2454f61f10fc6b745367bbc6 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 4 Jul 2023 08:12:20 -0700 Subject: [PATCH 61/75] Conflict fix. Signed-off-by: JackAKirk --- sycl/plugins/unified_runtime/CMakeLists.txt | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 931ba19330f75..c5ae533f6ca6d 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -98,6 +98,7 @@ add_sycl_library("ur_adapter_level_zero" SHARED "ur/adapters/level_zero/ur_level_zero_sampler.hpp" "ur/adapters/level_zero/ur_level_zero_usm.hpp" "ur/adapters/level_zero/ur_level_zero_command_buffer.hpp" + "ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp" "ur/adapters/level_zero/ur_level_zero.cpp" "ur/adapters/level_zero/ur_level_zero_common.cpp" "ur/adapters/level_zero/ur_level_zero_context.cpp" @@ -110,12 +111,8 @@ add_sycl_library("ur_adapter_level_zero" SHARED "ur/adapters/level_zero/ur_level_zero_queue.cpp" "ur/adapters/level_zero/ur_level_zero_sampler.cpp" "ur/adapters/level_zero/ur_level_zero_usm.cpp" -<<<<<<< HEAD - "ur/adapters/level_zero/ur_level_zero_usm_p2p.hpp" - "ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp" -======= "ur/adapters/level_zero/ur_level_zero_command_buffer.cpp" ->>>>>>> sycl + "ur/adapters/level_zero/ur_level_zero_usm_p2p.cpp" INCLUDE_DIRS ${sycl_inc_dir} LIBRARIES From f0993b926e7398f3c3be7550ca65d09535b89e7a Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 5 Jul 2023 05:14:39 -0700 Subject: [PATCH 62/75] Reverted pi2ur.hpp due to problem diff. Signed-off-by: JackAKirk --- sycl/plugins/unified_runtime/pi2ur.hpp | 55 -------------------------- 1 file changed, 55 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 2f01c917ff6cc..bf0dad1b17e4d 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -4322,59 +4322,4 @@ inline pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, // Command-buffer extension /////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -// usm-p2p - -pi_result piextEnablePeerAccess(pi_device command_device, - pi_device peer_device) { - auto commandDevice = reinterpret_cast(command_device); - auto peerDevice = reinterpret_cast(peer_device); - - HANDLE_ERRORS(urUsmP2PEnablePeerAccessExp(commandDevice, peerDevice)); - - return PI_SUCCESS; -} - -pi_result piextDisablePeerAccess(pi_device command_device, - pi_device peer_device) { - auto commandDevice = reinterpret_cast(command_device); - auto peerDevice = reinterpret_cast(peer_device); - - HANDLE_ERRORS(urUsmP2PDisablePeerAccessExp(commandDevice, peerDevice)); - - return PI_SUCCESS; -} - -pi_result piextPeerAccessGetInfo(pi_device command_device, - pi_device peer_device, pi_peer_attr attr, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - auto commandDevice = reinterpret_cast(command_device); - auto peerDevice = reinterpret_cast(peer_device); - - ur_exp_peer_info_t propName; - switch (attr) { - case PI_PEER_ACCESS_SUPPORTED: { - propName = UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED; - break; - } - case PI_PEER_ATOMICS_SUPPORTED: { - propName = UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED; - break; - } - default: { - return PI_ERROR_INVALID_VALUE; - } - } - - HANDLE_ERRORS(urUsmP2PPeerAccessGetInfoExp( - commandDevice, peerDevice, propName, param_value_size, param_value, - param_value_size_ret)); - - return PI_SUCCESS; -} - -// usm-p2p -/////////////////////////////////////////////////////////////////////////////// - } // namespace pi2ur From 28028ac148eacae4e413a02657f8ddc1d0786eb4 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 5 Jul 2023 05:20:11 -0700 Subject: [PATCH 63/75] same as last commit. Signed-off-by: JackAKirk --- sycl/plugins/unified_runtime/pi2ur.hpp | 8650 ++++++++++++------------ 1 file changed, 4325 insertions(+), 4325 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index bf0dad1b17e4d..79b6d44a4d0d8 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1,4325 +1,4325 @@ -//===---------------- pi2ur.hpp - PI API to UR API --------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===------------------------------------------------------------------===// -#pragma once - -#include "ur_api.h" -#include -#include -#include -#include - -// Map of UR error codes to PI error codes -static pi_result ur2piResult(ur_result_t urResult) { - if (urResult == UR_RESULT_SUCCESS) - return PI_SUCCESS; - - switch (urResult) { - case UR_RESULT_ERROR_INVALID_OPERATION: - return PI_ERROR_INVALID_OPERATION; - case UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES: - return PI_ERROR_INVALID_QUEUE_PROPERTIES; - case UR_RESULT_ERROR_INVALID_QUEUE: - return PI_ERROR_INVALID_QUEUE; - case UR_RESULT_ERROR_INVALID_VALUE: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_INVALID_CONTEXT: - return PI_ERROR_INVALID_CONTEXT; - case UR_RESULT_ERROR_INVALID_PLATFORM: - return PI_ERROR_INVALID_PLATFORM; - case UR_RESULT_ERROR_INVALID_BINARY: - return PI_ERROR_INVALID_BINARY; - case UR_RESULT_ERROR_INVALID_PROGRAM: - return PI_ERROR_INVALID_PROGRAM; - case UR_RESULT_ERROR_INVALID_SAMPLER: - return PI_ERROR_INVALID_SAMPLER; - case UR_RESULT_ERROR_INVALID_MEM_OBJECT: - return PI_ERROR_INVALID_MEM_OBJECT; - case UR_RESULT_ERROR_INVALID_EVENT: - return PI_ERROR_INVALID_EVENT; - case UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: - return PI_ERROR_INVALID_EVENT_WAIT_LIST; - case UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET: - return PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET; - case UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE: - return PI_ERROR_INVALID_WORK_GROUP_SIZE; - case UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE: - return PI_ERROR_COMPILER_NOT_AVAILABLE; - case UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE: - return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE; - case UR_RESULT_ERROR_DEVICE_NOT_FOUND: - return PI_ERROR_DEVICE_NOT_FOUND; - case UR_RESULT_ERROR_INVALID_DEVICE: - return PI_ERROR_INVALID_DEVICE; - case UR_RESULT_ERROR_DEVICE_REQUIRES_RESET: - case UR_RESULT_ERROR_DEVICE_LOST: - return PI_ERROR_DEVICE_NOT_AVAILABLE; - case UR_RESULT_ERROR_DEVICE_PARTITION_FAILED: - return PI_ERROR_DEVICE_PARTITION_FAILED; - case UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT: - return PI_ERROR_INVALID_DEVICE_PARTITION_COUNT; - case UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE: - return PI_ERROR_INVALID_WORK_ITEM_SIZE; - case UR_RESULT_ERROR_INVALID_WORK_DIMENSION: - return PI_ERROR_INVALID_WORK_DIMENSION; - case UR_RESULT_ERROR_INVALID_KERNEL_ARGS: - return PI_ERROR_INVALID_KERNEL_ARGS; - case UR_RESULT_ERROR_INVALID_KERNEL: - return PI_ERROR_INVALID_KERNEL; - case UR_RESULT_ERROR_INVALID_KERNEL_NAME: - return PI_ERROR_INVALID_KERNEL_NAME; - case UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX: - return PI_ERROR_INVALID_ARG_INDEX; - case UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE: - return PI_ERROR_INVALID_ARG_SIZE; - case UR_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_INVALID_IMAGE_SIZE: - return PI_ERROR_INVALID_IMAGE_SIZE; - case UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR: - return PI_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; - case UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED: - return PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED; - case UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE: - return PI_ERROR_MEM_OBJECT_ALLOCATION_FAILURE; - case UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE: - return PI_ERROR_INVALID_PROGRAM_EXECUTABLE; - case UR_RESULT_ERROR_UNINITIALIZED: - return PI_ERROR_UNINITIALIZED; - case UR_RESULT_ERROR_OUT_OF_HOST_MEMORY: - return PI_ERROR_OUT_OF_HOST_MEMORY; - case UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY: - case UR_RESULT_ERROR_OUT_OF_RESOURCES: - return PI_ERROR_OUT_OF_RESOURCES; - case UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE: - return PI_ERROR_BUILD_PROGRAM_FAILURE; - case UR_RESULT_ERROR_PROGRAM_LINK_FAILURE: - return PI_ERROR_LINK_PROGRAM_FAILURE; - case UR_RESULT_ERROR_UNSUPPORTED_VERSION: - case UR_RESULT_ERROR_UNSUPPORTED_FEATURE: - case UR_RESULT_ERROR_INVALID_ARGUMENT: - case UR_RESULT_ERROR_INVALID_NULL_HANDLE: - case UR_RESULT_ERROR_HANDLE_OBJECT_IN_USE: - case UR_RESULT_ERROR_INVALID_NULL_POINTER: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_INVALID_SIZE: - case UR_RESULT_ERROR_UNSUPPORTED_SIZE: - return PI_ERROR_INVALID_BUFFER_SIZE; - case UR_RESULT_ERROR_UNSUPPORTED_ALIGNMENT: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT: - case UR_RESULT_ERROR_INVALID_ENUMERATION: - case UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT: - return PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED; - case UR_RESULT_ERROR_INVALID_NATIVE_BINARY: - return PI_ERROR_INVALID_BINARY; - case UR_RESULT_ERROR_INVALID_GLOBAL_NAME: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_INVALID_FUNCTION_NAME: - return PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE; - case UR_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION: - return PI_ERROR_INVALID_WORK_DIMENSION; - case UR_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION: - return PI_ERROR_INVALID_VALUE; - - case UR_RESULT_ERROR_PROGRAM_UNLINKED: - return PI_ERROR_INVALID_PROGRAM_EXECUTABLE; - case UR_RESULT_ERROR_OVERLAPPING_REGIONS: - return PI_ERROR_MEM_COPY_OVERLAP; - case UR_RESULT_ERROR_INVALID_HOST_PTR: - return PI_ERROR_INVALID_HOST_PTR; - case UR_RESULT_ERROR_INVALID_USM_SIZE: - return PI_ERROR_INVALID_BUFFER_SIZE; - case UR_RESULT_ERROR_OBJECT_ALLOCATION_FAILURE: - return PI_ERROR_OUT_OF_RESOURCES; - case UR_RESULT_ERROR_ADAPTER_SPECIFIC: - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; - case UR_RESULT_ERROR_UNKNOWN: - default: - return PI_ERROR_UNKNOWN; - }; -} - -// Helper for one-liner validation -#define PI_ASSERT(condition, error) \ - if (!(condition)) \ - return error; - -// Early exits on any error -#define HANDLE_ERRORS(urCall) \ - if (auto Result = urCall) \ - return ur2piResult(Result); - -// A version of return helper that returns pi_result and not ur_result_t -class ReturnHelper : public UrReturnHelper { -public: - using UrReturnHelper::UrReturnHelper; - - template pi_result operator()(const T &t) { - return ur2piResult(UrReturnHelper::operator()(t)); - } - // Array return value - template pi_result operator()(const T *t, size_t s) { - return ur2piResult(UrReturnHelper::operator()(t, s)); - } - // Array return value where element type is differrent from T - template pi_result operator()(const T *t, size_t s) { - return ur2piResult(UrReturnHelper::operator()(t, s)); - } -}; - -// A version of return helper that supports conversion through a map -class ConvertHelper : public ReturnHelper { - using ReturnHelper::ReturnHelper; - -public: - // Convert the value using a conversion map - template - pi_result convert(std::function Func) { - *param_value_size_ret = sizeof(TypePI); - - // There is no value to convert. - if (!param_value) - return PI_SUCCESS; - - auto pValueUR = static_cast(param_value); - auto pValuePI = static_cast(param_value); - - // Cannot convert to a smaller storage type - PI_ASSERT(sizeof(TypePI) >= sizeof(TypeUR), PI_ERROR_UNKNOWN); - - *pValuePI = Func(*pValueUR); - return PI_SUCCESS; - } - - // Convert the array (0-terminated) using a conversion map - template - pi_result convertArray(std::function Func) { - // Cannot convert to a smaller element storage type - PI_ASSERT(sizeof(TypePI) >= sizeof(TypeUR), PI_ERROR_UNKNOWN); - *param_value_size_ret *= sizeof(TypePI) / sizeof(TypeUR); - - // There is no value to convert. Adjust to a possibly bigger PI storage. - if (!param_value) - return PI_SUCCESS; - - PI_ASSERT(*param_value_size_ret % sizeof(TypePI) == 0, PI_ERROR_UNKNOWN); - - // Make a copy of the input UR array as we may possibly overwrite - // following elements while converting previous ones (if extending). - auto ValueUR = new char[*param_value_size_ret]; - auto pValueUR = reinterpret_cast(ValueUR); - auto pValuePI = static_cast(param_value); - memcpy(pValueUR, param_value, *param_value_size_ret); - - while (pValueUR) { - if (*pValueUR == 0) { - *pValuePI = 0; - break; - } - - *pValuePI = Func(*pValueUR); - ++pValuePI; - ++pValueUR; - } - - delete[] ValueUR; - return PI_SUCCESS; - } - - // Convert the bitset using a conversion map - template - pi_result convertBitSet(std::function Func) { - // There is no value to convert. - if (!param_value) - return PI_SUCCESS; - - auto pValuePI = static_cast(param_value); - auto pValueUR = static_cast(param_value); - - // Cannot handle biteset large than size_t - PI_ASSERT(sizeof(TypeUR) <= sizeof(size_t), PI_ERROR_UNKNOWN); - size_t In = *pValueUR; - TypePI Out = 0; - - size_t Val; - while ((Val = In & -In)) { // Val is the rightmost set bit in In - In &= In - 1; // Reset the rightmost set bit - - // Convert the Val alone and merge it into Out - *pValueUR = TypeUR(Val); - if (auto Res = convert(Func)) - return Res; - Out |= *pValuePI; - } - *pValuePI = TypePI(Out); - return PI_SUCCESS; - } -}; - -// Translate UR platform info values to PI info values -inline pi_result ur2piPlatformInfoValue(ur_platform_info_t ParamName, - size_t ParamValueSizePI, - size_t *ParamValueSizeUR, - void *ParamValue) { - - ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); - - switch (ParamName) { - case UR_PLATFORM_INFO_EXTENSIONS: - case UR_PLATFORM_INFO_NAME: - case UR_PLATFORM_INFO_PROFILE: - case UR_PLATFORM_INFO_VENDOR_NAME: - case UR_PLATFORM_INFO_VERSION: - // These ones do not need ur2pi translations - break; - case UR_PLATFORM_INFO_BACKEND: { - auto ConvertFunc = [](ur_platform_backend_t UrValue) { - switch (UrValue) { - case UR_PLATFORM_BACKEND_UNKNOWN: - return PI_EXT_PLATFORM_BACKEND_UNKNOWN; - case UR_PLATFORM_BACKEND_LEVEL_ZERO: - return PI_EXT_PLATFORM_BACKEND_LEVEL_ZERO; - case UR_PLATFORM_BACKEND_OPENCL: - return PI_EXT_PLATFORM_BACKEND_OPENCL; - case UR_PLATFORM_BACKEND_CUDA: - return PI_EXT_PLATFORM_BACKEND_CUDA; - case UR_PLATFORM_BACKEND_HIP: - return PI_EXT_PLATFORM_BACKEND_CUDA; - default: - die("UR_PLATFORM_INFO_BACKEND: unhandled value"); - } - }; - return Value.convert( - ConvertFunc); - } - default: - return PI_ERROR_UNKNOWN; - } - - if (ParamValueSizePI && ParamValueSizePI != *ParamValueSizeUR) { - fprintf(stderr, "UR PlatformInfoType=%d PI=%d but UR=%d\n", ParamName, - (int)ParamValueSizePI, (int)*ParamValueSizeUR); - die("ur2piPlatformInfoValue: size mismatch"); - } - return PI_SUCCESS; -} - -// Translate UR device info values to PI info values -inline pi_result ur2piDeviceInfoValue(ur_device_info_t ParamName, - size_t ParamValueSizePI, - size_t *ParamValueSizeUR, - void *ParamValue) { - - ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); - - if (ParamName == UR_DEVICE_INFO_TYPE) { - auto ConvertFunc = [](ur_device_type_t UrValue) { - switch (UrValue) { - case UR_DEVICE_TYPE_CPU: - return PI_DEVICE_TYPE_CPU; - case UR_DEVICE_TYPE_GPU: - return PI_DEVICE_TYPE_GPU; - case UR_DEVICE_TYPE_FPGA: - return PI_DEVICE_TYPE_ACC; - default: - die("UR_DEVICE_INFO_TYPE: unhandled value"); - } - }; - return Value.convert(ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_QUEUE_PROPERTIES) { - auto ConvertFunc = [](ur_queue_flag_t UrValue) { - switch (UrValue) { - case UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE: - return PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; - case UR_QUEUE_FLAG_PROFILING_ENABLE: - return PI_QUEUE_FLAG_PROFILING_ENABLE; - case UR_QUEUE_FLAG_ON_DEVICE: - return PI_QUEUE_FLAG_ON_DEVICE; - case UR_QUEUE_FLAG_ON_DEVICE_DEFAULT: - return PI_QUEUE_FLAG_ON_DEVICE_DEFAULT; - case UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM: - return static_cast(__SYCL_PI_CUDA_SYNC_WITH_DEFAULT); - case UR_QUEUE_FLAG_USE_DEFAULT_STREAM: - return static_cast(__SYCL_PI_CUDA_USE_DEFAULT_STREAM); - default: - die("UR_DEVICE_INFO_QUEUE_PROPERTIES: unhandled value"); - } - }; - return Value.convertBitSet( - ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_EXECUTION_CAPABILITIES) { - auto ConvertFunc = [](ur_device_exec_capability_flag_t UrValue) { - switch (UrValue) { - case UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL: - return PI_DEVICE_EXEC_CAPABILITIES_KERNEL; - case UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL: - return PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL; - default: - die("UR_DEVICE_INFO_EXECUTION_CAPABILITIES: unhandled value"); - } - }; - return Value - .convertBitSet( - ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { - auto ConvertFunc = [](ur_device_affinity_domain_flag_t UrValue) { - switch (UrValue) { - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA: - return PI_DEVICE_AFFINITY_DOMAIN_NUMA; - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE: - return PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; - default: - die("UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: unhandled value"); - } - }; - return Value.convertBitSet(ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_PARTITION_TYPE) { - auto ConvertFunc = [](ur_device_partition_t UrValue) { - if (UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN == UrValue) - return PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; - else if (UR_DEVICE_PARTITION_BY_CSLICE == UrValue) - return PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE; - else if ((ur_device_partition_t) - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE == UrValue) - return (pi_device_partition_property) - PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; - die("UR_DEVICE_INFO_PARTITION_TYPE: unhandled value"); - }; - return Value - .convertArray( - ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_SUPPORTED_PARTITIONS) { - auto ConvertFunc = [](ur_device_partition_t UrValue) { - switch (UrValue) { - case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: - return PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; - case UR_DEVICE_PARTITION_BY_CSLICE: - return PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE; - default: - die("UR_DEVICE_INFO_SUPPORTED_PARTITIONS: unhandled value"); - } - }; - return Value - .convertArray( - ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_LOCAL_MEM_TYPE) { - auto ConvertFunc = [](ur_device_local_mem_type_t UrValue) { - switch (UrValue) { - case UR_DEVICE_LOCAL_MEM_TYPE_LOCAL: - return PI_DEVICE_LOCAL_MEM_TYPE_LOCAL; - case UR_DEVICE_LOCAL_MEM_TYPE_GLOBAL: - return PI_DEVICE_LOCAL_MEM_TYPE_GLOBAL; - default: - die("UR_DEVICE_INFO_LOCAL_MEM_TYPE: unhandled value"); - } - }; - return Value.convert( - ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES || - ParamName == UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { - auto ConvertFunc = [](ur_memory_order_capability_flag_t UrValue) { - switch (UrValue) { - case UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED: - return PI_MEMORY_ORDER_RELAXED; - case UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE: - return PI_MEMORY_ORDER_ACQUIRE; - case UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE: - return PI_MEMORY_ORDER_RELEASE; - case UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL: - return PI_MEMORY_ORDER_ACQ_REL; - case UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST: - return PI_MEMORY_ORDER_SEQ_CST; - default: - die("UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: unhandled " - "value"); - } - }; - return Value.convertBitSet(ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES || - ParamName == UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { - auto ConvertFunc = [](ur_memory_scope_capability_flag_t UrValue) { - switch (UrValue) { - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM: - return PI_MEMORY_SCOPE_WORK_ITEM; - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP: - return PI_MEMORY_SCOPE_SUB_GROUP; - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP: - return PI_MEMORY_SCOPE_WORK_GROUP; - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE: - return PI_MEMORY_SCOPE_DEVICE; - case UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM: - return PI_MEMORY_SCOPE_SYSTEM; - default: - die("UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: unhandled " - "value"); - } - }; - return Value.convertBitSet(ConvertFunc); - } else { - // TODO: what else needs a UR-PI translation? - } - - if (ParamValueSizePI && ParamValueSizePI != *ParamValueSizeUR) { - fprintf(stderr, "UR DeviceInfoType=%d PI=%d but UR=%d\n", ParamName, - (int)ParamValueSizePI, (int)*ParamValueSizeUR); - die("ur2piDeviceInfoValue: size mismatch"); - } - return PI_SUCCESS; -} - -inline pi_result ur2piSamplerInfoValue(ur_sampler_info_t ParamName, - size_t ParamValueSizePI, - size_t *ParamValueSizeUR, - void *ParamValue) { - - ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); - switch (ParamName) { - case UR_SAMPLER_INFO_ADDRESSING_MODE: { - auto ConvertFunc = [](ur_sampler_addressing_mode_t UrValue) { - switch (UrValue) { - case UR_SAMPLER_ADDRESSING_MODE_CLAMP: - return PI_SAMPLER_ADDRESSING_MODE_CLAMP; - case UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: - return PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; - case UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT: - return PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; - case UR_SAMPLER_ADDRESSING_MODE_NONE: - return PI_SAMPLER_ADDRESSING_MODE_NONE; - case UR_SAMPLER_ADDRESSING_MODE_REPEAT: - return PI_SAMPLER_ADDRESSING_MODE_REPEAT; - - default: - die("UR_SAMPLER_ADDRESSING_MODE_TYPE: unhandled value"); - } - }; - return Value - .convert( - ConvertFunc); - } - case UR_SAMPLER_INFO_FILTER_MODE: { - auto ConvertFunc = [](ur_sampler_filter_mode_t UrValue) { - switch (UrValue) { - case UR_SAMPLER_FILTER_MODE_LINEAR: - return PI_SAMPLER_FILTER_MODE_LINEAR; - case UR_SAMPLER_FILTER_MODE_NEAREST: - return PI_SAMPLER_FILTER_MODE_NEAREST; - default: - die("UR_SAMPLER_FILTER_MODE: unhandled value"); - } - }; - return Value.convert( - ConvertFunc); - } - default: - return PI_SUCCESS; - } -} - -// Translate UR device info values to PI info values -inline pi_result ur2piUSMAllocInfoValue(ur_usm_alloc_info_t ParamName, - size_t ParamValueSizePI, - size_t *ParamValueSizeUR, - void *ParamValue) { - ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); - - if (ParamName == UR_USM_ALLOC_INFO_TYPE) { - auto ConvertFunc = [](ur_usm_type_t UrValue) { - switch (UrValue) { - case UR_USM_TYPE_UNKNOWN: - return PI_MEM_TYPE_UNKNOWN; - case UR_USM_TYPE_HOST: - return PI_MEM_TYPE_HOST; - case UR_USM_TYPE_DEVICE: - return PI_MEM_TYPE_DEVICE; - case UR_USM_TYPE_SHARED: - return PI_MEM_TYPE_SHARED; - default: - die("UR_USM_ALLOC_INFO_TYPE: unhandled value"); - } - }; - return Value.convert(ConvertFunc); - } - - return PI_SUCCESS; -} - -// Handle mismatched PI and UR type return sizes for info queries -inline pi_result fixupInfoValueTypes(size_t ParamValueSizeRetUR, - size_t *ParamValueSizeRetPI, - size_t ParamValueSize, void *ParamValue) { - if (ParamValueSizeRetUR == 1 && ParamValueSize == 4) { - // extend bool to pi_bool (uint32_t) - if (ParamValue) { - auto *ValIn = static_cast(ParamValue); - auto *ValOut = static_cast(ParamValue); - *ValOut = static_cast(*ValIn); - } - if (ParamValueSizeRetPI) { - *ParamValueSizeRetPI = sizeof(pi_bool); - } - } - - return PI_SUCCESS; -} - -inline ur_result_t -mapPIMetadataToUR(const pi_device_binary_property *pi_metadata, - ur_program_metadata_t *ur_metadata) { - ur_metadata->pName = (*pi_metadata)->Name; - ur_metadata->size = (*pi_metadata)->ValSize; - switch ((*pi_metadata)->Type) { - case PI_PROPERTY_TYPE_UINT32: - ur_metadata->type = UR_PROGRAM_METADATA_TYPE_UINT32; - ur_metadata->value.data32 = (*pi_metadata)->ValSize; - return UR_RESULT_SUCCESS; - case PI_PROPERTY_TYPE_BYTE_ARRAY: - ur_metadata->type = UR_PROGRAM_METADATA_TYPE_BYTE_ARRAY; - ur_metadata->value.pData = (*pi_metadata)->ValAddr; - return UR_RESULT_SUCCESS; - case PI_PROPERTY_TYPE_STRING: - ur_metadata->type = UR_PROGRAM_METADATA_TYPE_STRING; - ur_metadata->value.pString = - reinterpret_cast((*pi_metadata)->ValAddr); - return UR_RESULT_SUCCESS; - default: - return UR_RESULT_ERROR_INVALID_VALUE; - } -} - -namespace pi2ur { - -inline pi_result piTearDown(void *PluginParameter) { - std::ignore = PluginParameter; - // TODO: Dont check for errors in urTearDown, since - // when using Level Zero plugin, the second urTearDown - // will fail as ur_loader.so has already been unloaded, - urTearDown(nullptr); - return PI_SUCCESS; -} - -/////////////////////////////////////////////////////////////////////////////// -// Platform -inline pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, - pi_uint32 *NumPlatforms) { - - urInit(0); - auto phPlatforms = reinterpret_cast(Platforms); - HANDLE_ERRORS(urPlatformGet(NumEntries, phPlatforms, NumPlatforms)); - return PI_SUCCESS; -} - -inline pi_result piextPlatformGetNativeHandle(pi_platform Platform, - pi_native_handle *NativeHandle) { - - PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - auto UrPlatform = reinterpret_cast(Platform); - - ur_native_handle_t UrNativeHandle{}; - HANDLE_ERRORS(urPlatformGetNativeHandle(UrPlatform, &UrNativeHandle)); - - *NativeHandle = reinterpret_cast(UrNativeHandle); - - return PI_SUCCESS; -} - -inline pi_result -piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform *Platform) { - - PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_platform_handle_t UrPlatform{}; - ur_native_handle_t UrNativeHandle = - reinterpret_cast(NativeHandle); - ur_platform_native_properties_t UrProperties{}; - urPlatformCreateWithNativeHandle(UrNativeHandle, &UrProperties, &UrPlatform); - - *Platform = reinterpret_cast(UrPlatform); - - return PI_SUCCESS; -} - -inline pi_result piPlatformGetInfo(pi_platform Platform, - pi_platform_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM); - - ur_platform_info_t UrParamName = {}; - switch (ParamName) { - case PI_PLATFORM_INFO_EXTENSIONS: { - UrParamName = UR_PLATFORM_INFO_EXTENSIONS; - break; - } - case PI_PLATFORM_INFO_NAME: { - UrParamName = UR_PLATFORM_INFO_NAME; - break; - } - case PI_PLATFORM_INFO_PROFILE: { - UrParamName = UR_PLATFORM_INFO_PROFILE; - break; - } - case PI_PLATFORM_INFO_VENDOR: { - UrParamName = UR_PLATFORM_INFO_VENDOR_NAME; - break; - } - case PI_PLATFORM_INFO_VERSION: { - UrParamName = UR_PLATFORM_INFO_VERSION; - break; - } - case PI_EXT_PLATFORM_INFO_BACKEND: { - UrParamName = UR_PLATFORM_INFO_BACKEND; - break; - } - default: - die("urGetContextInfo: unsuppported ParamName."); - } - - size_t UrParamValueSizeRet; - auto UrPlatform = reinterpret_cast(Platform); - HANDLE_ERRORS(urPlatformGetInfo(UrPlatform, UrParamName, ParamValueSize, - ParamValue, &UrParamValueSizeRet)); - - if (ParamValueSizeRet) { - *ParamValueSizeRet = UrParamValueSizeRet; - } - ur2piPlatformInfoValue(UrParamName, ParamValueSize, &ParamValueSize, - ParamValue); - fixupInfoValueTypes(UrParamValueSizeRet, ParamValueSizeRet, ParamValueSize, - ParamValue); - - return PI_SUCCESS; -} - -inline pi_result piextPluginGetOpaqueData(void *opaque_data_param, - void **opaque_data_return) { - (void)opaque_data_param; - (void)opaque_data_return; - return PI_ERROR_UNKNOWN; -} - -inline pi_result piPluginGetBackendOption(pi_platform Platform, - const char *FrontendOption, - const char **PlatformOption) { - - auto UrPlatform = reinterpret_cast(Platform); - HANDLE_ERRORS( - urPlatformGetBackendOption(UrPlatform, FrontendOption, PlatformOption)); - - return PI_SUCCESS; -} - -// Platform -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Device -inline pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, - pi_uint32 NumEntries, pi_device *Devices, - pi_uint32 *NumDevices) { - ur_device_type_t Type; - switch (DeviceType) { - case PI_DEVICE_TYPE_ALL: - Type = UR_DEVICE_TYPE_ALL; - break; - case PI_DEVICE_TYPE_GPU: - Type = UR_DEVICE_TYPE_GPU; - break; - case PI_DEVICE_TYPE_CPU: - Type = UR_DEVICE_TYPE_CPU; - break; - case PI_DEVICE_TYPE_ACC: - Type = UR_DEVICE_TYPE_FPGA; - break; - default: - return PI_ERROR_UNKNOWN; - } - - PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM); - - auto UrPlatform = reinterpret_cast(Platform); - auto UrDevices = reinterpret_cast(Devices); - HANDLE_ERRORS( - urDeviceGet(UrPlatform, Type, NumEntries, UrDevices, NumDevices)); - - return PI_SUCCESS; -} - -inline pi_result piDeviceRetain(pi_device Device) { - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrDevice = reinterpret_cast(Device); - HANDLE_ERRORS(urDeviceRetain(UrDevice)); - return PI_SUCCESS; -} - -inline pi_result piDeviceRelease(pi_device Device) { - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - auto UrDevice = reinterpret_cast(Device); - HANDLE_ERRORS(urDeviceRelease(UrDevice)); - return PI_SUCCESS; -} - -inline pi_result piPluginGetLastError(char **message) { - std::ignore = message; - return PI_SUCCESS; -} - -inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - ur_device_info_t InfoType; - switch (ParamName) { - case PI_DEVICE_INFO_TYPE: - InfoType = UR_DEVICE_INFO_TYPE; - break; - case PI_DEVICE_INFO_PARENT_DEVICE: - InfoType = UR_DEVICE_INFO_PARENT_DEVICE; - break; - case PI_DEVICE_INFO_PLATFORM: - InfoType = UR_DEVICE_INFO_PLATFORM; - break; - case PI_DEVICE_INFO_VENDOR_ID: - InfoType = UR_DEVICE_INFO_VENDOR_ID; - break; - case PI_DEVICE_INFO_UUID: - InfoType = UR_DEVICE_INFO_UUID; - break; - case PI_DEVICE_INFO_ATOMIC_64: - InfoType = UR_DEVICE_INFO_ATOMIC_64; - break; - case PI_DEVICE_INFO_EXTENSIONS: - InfoType = UR_DEVICE_INFO_EXTENSIONS; - break; - case PI_DEVICE_INFO_NAME: - InfoType = UR_DEVICE_INFO_NAME; - break; - case PI_DEVICE_INFO_COMPILER_AVAILABLE: - InfoType = UR_DEVICE_INFO_COMPILER_AVAILABLE; - break; - case PI_DEVICE_INFO_LINKER_AVAILABLE: - InfoType = UR_DEVICE_INFO_LINKER_AVAILABLE; - break; - case PI_DEVICE_INFO_MAX_COMPUTE_UNITS: - InfoType = UR_DEVICE_INFO_MAX_COMPUTE_UNITS; - break; - case PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: - InfoType = UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS; - break; - case PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE: - InfoType = UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE; - break; - case PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES: - InfoType = UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES; - break; - case PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY: - InfoType = UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY; - break; - case PI_DEVICE_INFO_ADDRESS_BITS: - InfoType = UR_DEVICE_INFO_ADDRESS_BITS; - break; - case PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: - InfoType = UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE; - break; - case PI_DEVICE_INFO_GLOBAL_MEM_SIZE: - InfoType = UR_DEVICE_INFO_GLOBAL_MEM_SIZE; - break; - case PI_DEVICE_INFO_LOCAL_MEM_SIZE: - InfoType = UR_DEVICE_INFO_LOCAL_MEM_SIZE; - break; - case PI_DEVICE_INFO_IMAGE_SUPPORT: - InfoType = UR_DEVICE_INFO_IMAGE_SUPPORTED; - break; - case PI_DEVICE_INFO_HOST_UNIFIED_MEMORY: - InfoType = UR_DEVICE_INFO_HOST_UNIFIED_MEMORY; - break; - case PI_DEVICE_INFO_AVAILABLE: - InfoType = UR_DEVICE_INFO_AVAILABLE; - break; - case PI_DEVICE_INFO_VENDOR: - InfoType = UR_DEVICE_INFO_VENDOR; - break; - case PI_DEVICE_INFO_DRIVER_VERSION: - InfoType = UR_DEVICE_INFO_DRIVER_VERSION; - break; - case PI_DEVICE_INFO_VERSION: - InfoType = UR_DEVICE_INFO_VERSION; - break; - case PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: - InfoType = UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES; - break; - case PI_DEVICE_INFO_REFERENCE_COUNT: - InfoType = UR_DEVICE_INFO_REFERENCE_COUNT; - break; - case PI_DEVICE_INFO_PARTITION_PROPERTIES: - InfoType = UR_DEVICE_INFO_SUPPORTED_PARTITIONS; - break; - case PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: - InfoType = UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN; - break; - case PI_DEVICE_INFO_PARTITION_TYPE: - InfoType = UR_DEVICE_INFO_PARTITION_TYPE; - break; - case PI_DEVICE_INFO_OPENCL_C_VERSION: - InfoType = UR_EXT_DEVICE_INFO_OPENCL_C_VERSION; - break; - case PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: - InfoType = UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC; - break; - case PI_DEVICE_INFO_PRINTF_BUFFER_SIZE: - InfoType = UR_DEVICE_INFO_PRINTF_BUFFER_SIZE; - break; - case PI_DEVICE_INFO_PROFILE: - InfoType = UR_DEVICE_INFO_PROFILE; - break; - case PI_DEVICE_INFO_BUILT_IN_KERNELS: - InfoType = UR_DEVICE_INFO_BUILT_IN_KERNELS; - break; - case PI_DEVICE_INFO_QUEUE_PROPERTIES: - InfoType = UR_DEVICE_INFO_QUEUE_PROPERTIES; - break; - case PI_DEVICE_INFO_EXECUTION_CAPABILITIES: - InfoType = UR_DEVICE_INFO_EXECUTION_CAPABILITIES; - break; - case PI_DEVICE_INFO_ENDIAN_LITTLE: - InfoType = UR_DEVICE_INFO_ENDIAN_LITTLE; - break; - case PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: - InfoType = UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT; - break; - case PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: - InfoType = UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION; - break; - case PI_DEVICE_INFO_LOCAL_MEM_TYPE: - InfoType = UR_DEVICE_INFO_LOCAL_MEM_TYPE; - break; - case PI_DEVICE_INFO_MAX_CONSTANT_ARGS: - InfoType = UR_DEVICE_INFO_MAX_CONSTANT_ARGS; - break; - case PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: - InfoType = UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE; - break; - case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: - InfoType = UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE; - break; - case PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: - InfoType = UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE; - break; - case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: - InfoType = UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE; - break; - case PI_DEVICE_INFO_MAX_PARAMETER_SIZE: - InfoType = UR_DEVICE_INFO_MAX_PARAMETER_SIZE; - break; - case PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: - InfoType = UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN; - break; - case PI_DEVICE_INFO_MAX_SAMPLERS: - InfoType = UR_DEVICE_INFO_MAX_SAMPLERS; - break; - case PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS: - InfoType = UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS; - break; - case PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: - InfoType = UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS; - break; - case PI_DEVICE_INFO_SINGLE_FP_CONFIG: - InfoType = UR_DEVICE_INFO_SINGLE_FP_CONFIG; - break; - case PI_DEVICE_INFO_HALF_FP_CONFIG: - InfoType = UR_DEVICE_INFO_HALF_FP_CONFIG; - break; - case PI_DEVICE_INFO_DOUBLE_FP_CONFIG: - InfoType = UR_DEVICE_INFO_DOUBLE_FP_CONFIG; - break; - case PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH: - InfoType = UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH; - break; - case PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: - InfoType = UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT; - break; - case PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH: - InfoType = UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH; - break; - case PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: - InfoType = UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT; - break; - case PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH: - InfoType = UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH; - break; - case PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: - InfoType = UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE; - break; - case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: - InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF; - break; - case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: - InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF; - break; - case PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS: - InfoType = UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS; - break; - case PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: - InfoType = UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS; - break; - case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: - InfoType = UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL; - break; - case PI_DEVICE_INFO_IL_VERSION: - InfoType = UR_DEVICE_INFO_IL_VERSION; - break; - case PI_DEVICE_INFO_USM_HOST_SUPPORT: - InfoType = UR_DEVICE_INFO_USM_HOST_SUPPORT; - break; - case PI_DEVICE_INFO_USM_DEVICE_SUPPORT: - InfoType = UR_DEVICE_INFO_USM_DEVICE_SUPPORT; - break; - case PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: - InfoType = UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT; - break; - case PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: - InfoType = UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT; - break; - case PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: - InfoType = UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT; - break; - case PI_DEVICE_INFO_PCI_ADDRESS: - InfoType = UR_DEVICE_INFO_PCI_ADDRESS; - break; - case PI_DEVICE_INFO_GPU_EU_COUNT: - InfoType = UR_DEVICE_INFO_GPU_EU_COUNT; - break; - case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH: - InfoType = UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH; - break; - case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: - InfoType = UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE; - break; - case PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION: - InfoType = UR_DEVICE_INFO_IP_VERSION; - break; - case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE: - InfoType = UR_DEVICE_INFO_BUILD_ON_SUBDEVICE; - break; - case PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D: - InfoType = UR_DEVICE_INFO_MAX_WORK_GROUPS_3D; - break; - case PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: - InfoType = UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE; - break; - case PI_DEVICE_INFO_DEVICE_ID: - InfoType = UR_DEVICE_INFO_DEVICE_ID; - break; - case PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY: - InfoType = UR_DEVICE_INFO_GLOBAL_MEM_FREE; - break; - case PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE: - InfoType = UR_DEVICE_INFO_MEMORY_CLOCK_RATE; - break; - case PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH: - InfoType = UR_DEVICE_INFO_MEMORY_BUS_WIDTH; - break; - case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: - InfoType = UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES; - break; - case PI_DEVICE_INFO_GPU_SLICES: - InfoType = UR_DEVICE_INFO_GPU_EU_SLICES; - break; - case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: - InfoType = UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE; - break; - case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: - InfoType = UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU; - break; - case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: - InfoType = UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH; - break; - case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: - InfoType = UR_DEVICE_INFO_BFLOAT16; - break; - case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: - InfoType = UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES; - break; - case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: - InfoType = UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES; - break; - case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: - InfoType = UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES; - break; - case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: - InfoType = UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES; - break; - case PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT: - InfoType = UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT; - break; - case PI_DEVICE_INFO_IMAGE_SRGB: - InfoType = UR_DEVICE_INFO_IMAGE_SRGB; - break; - case PI_DEVICE_INFO_BACKEND_VERSION: { - InfoType = UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION; - break; - } - case PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: { - InfoType = UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP; - break; - } - default: - return PI_ERROR_UNKNOWN; - }; - - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - size_t UrParamValueSizeRet; - auto UrDevice = reinterpret_cast(Device); - - HANDLE_ERRORS(urDeviceGetInfo(UrDevice, InfoType, ParamValueSize, ParamValue, - &UrParamValueSizeRet)); - - if (ParamValueSizeRet) { - *ParamValueSizeRet = UrParamValueSizeRet; - } - ur2piDeviceInfoValue(InfoType, ParamValueSize, &ParamValueSize, ParamValue); - fixupInfoValueTypes(UrParamValueSizeRet, ParamValueSizeRet, ParamValueSize, - ParamValue); - - return PI_SUCCESS; -} - -inline pi_result piextDeviceGetNativeHandle(pi_device Device, - pi_native_handle *NativeHandle) { - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - auto UrDevice = reinterpret_cast(Device); - - ur_native_handle_t UrNativeHandle{}; - HANDLE_ERRORS(urDeviceGetNativeHandle(UrDevice, &UrNativeHandle)); - *NativeHandle = reinterpret_cast(UrNativeHandle); - return PI_SUCCESS; -} - -inline pi_result -piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_platform Platform, pi_device *Device) { - - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_native_handle_t UrNativeDevice = - reinterpret_cast(NativeHandle); - ur_platform_handle_t UrPlatform = - reinterpret_cast(Platform); - auto UrDevice = reinterpret_cast(Device); - ur_device_native_properties_t UrProperties{}; - HANDLE_ERRORS(urDeviceCreateWithNativeHandle(UrNativeDevice, UrPlatform, - &UrProperties, UrDevice)); - - return PI_SUCCESS; -} - -inline pi_result piDevicePartition( - pi_device Device, const pi_device_partition_property *Properties, - pi_uint32 NumEntries, pi_device *SubDevices, pi_uint32 *NumSubDevices) { - - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - if (!Properties || !Properties[0]) - return PI_ERROR_INVALID_VALUE; - - ur_device_partition_t Property; - switch (Properties[0]) { - case PI_DEVICE_PARTITION_EQUALLY: - Property = UR_DEVICE_PARTITION_EQUALLY; - break; - case PI_DEVICE_PARTITION_BY_COUNTS: - Property = UR_DEVICE_PARTITION_BY_COUNTS; - break; - case PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: - Property = UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; - break; - case PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE: - Property = UR_DEVICE_PARTITION_BY_CSLICE; - break; - default: - return PI_ERROR_UNKNOWN; - } - - // Some partitioning types require a value - auto Value = uint32_t(Properties[1]); - if (Property == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { - switch (Properties[1]) { - case PI_DEVICE_AFFINITY_DOMAIN_NUMA: - Value = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; - break; - case PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE: - Value = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE; - break; - default: - return PI_ERROR_UNKNOWN; - } - } - - // Translate partitioning properties from PI-way - // (array of uintptr_t values) to UR-way - // (array of {uint32_t, uint32_t} pairs) - // - // TODO: correctly terminate the UR properties, see: - // https://github.com/oneapi-src/unified-runtime/issues/183 - // - ur_device_partition_property_t UrProperty; - UrProperty.type = Property; - UrProperty.value.equally = Value; - - ur_device_partition_properties_t UrProperties{ - UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, - nullptr, - &UrProperty, - 1, - }; - - auto UrDevice = reinterpret_cast(Device); - auto UrSubDevices = reinterpret_cast(SubDevices); - HANDLE_ERRORS(urDevicePartition(UrDevice, &UrProperties, NumEntries, - UrSubDevices, NumSubDevices)); - return PI_SUCCESS; -} - -inline pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, - uint64_t *HostTime) { - auto UrDevice = reinterpret_cast(Device); - HANDLE_ERRORS(urDeviceGetGlobalTimestamps(UrDevice, DeviceTime, HostTime)); - return PI_SUCCESS; -} - -inline pi_result -piextDeviceSelectBinary(pi_device Device, // TODO: does this need to be context? - pi_device_binary *Binaries, pi_uint32 NumBinaries, - pi_uint32 *SelectedBinaryInd) { - - auto UrDevice = reinterpret_cast(Device); - std::vector UrBinaries(NumBinaries); - - for (uint32_t BinaryCount = 0; BinaryCount < NumBinaries; BinaryCount++) { - if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_PI_DEVICE_BINARY_TARGET_UNKNOWN) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_UNKNOWN; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV32) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_SPIRV32; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_SPIRV64; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_PI_DEVICE_BINARY_TARGET_NVPTX64) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_NVPTX64; - else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, - __SYCL_PI_DEVICE_BINARY_TARGET_AMDGCN) == 0) - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_AMDGCN; - else - UrBinaries[BinaryCount].pDeviceTargetSpec = - UR_DEVICE_BINARY_TARGET_UNKNOWN; - } - - HANDLE_ERRORS(urDeviceSelectBinary(UrDevice, UrBinaries.data(), NumBinaries, - SelectedBinaryInd)); - return PI_SUCCESS; -} - -// Device -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Context -inline pi_result piContextCreate(const pi_context_properties *Properties, - pi_uint32 NumDevices, const pi_device *Devices, - void (*PFnNotify)(const char *ErrInfo, - const void *PrivateInfo, - size_t CB, void *UserData), - void *UserData, pi_context *RetContext) { - std::ignore = Properties; - std::ignore = PFnNotify; - std::ignore = UserData; - auto UrDevices = reinterpret_cast(Devices); - - ur_context_handle_t *UrContext = - reinterpret_cast(RetContext); - // TODO: Parse PI Context Properties into UR - ur_context_properties_t UrProperties{}; - HANDLE_ERRORS( - urContextCreate(NumDevices, UrDevices, &UrProperties, UrContext)); - return PI_SUCCESS; -} - -inline pi_result piextContextSetExtendedDeleter( - pi_context Context, pi_context_extended_deleter Function, void *UserData) { - auto hContext = reinterpret_cast(Context); - - HANDLE_ERRORS(urContextSetExtendedDeleter(hContext, Function, UserData)); - - return PI_SUCCESS; -} - -inline pi_result piextContextGetNativeHandle(pi_context Context, - pi_native_handle *NativeHandle) { - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_native_handle_t UrNativeHandle{}; - HANDLE_ERRORS(urContextGetNativeHandle(UrContext, &UrNativeHandle)); - *NativeHandle = reinterpret_cast(UrNativeHandle); - return PI_SUCCESS; -} - -inline pi_result piextContextCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_uint32 NumDevices, - const pi_device *Devices, bool OwnNativeHandle, pi_context *RetContext) { - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Devices, PI_ERROR_INVALID_DEVICE); - PI_ASSERT(RetContext, PI_ERROR_INVALID_VALUE); - PI_ASSERT(NumDevices, PI_ERROR_INVALID_VALUE); - - ur_native_handle_t NativeContext = - reinterpret_cast(NativeHandle); - const ur_device_handle_t *UrDevices = - reinterpret_cast(Devices); - ur_context_handle_t *UrContext = - reinterpret_cast(RetContext); - - ur_context_native_properties_t Properties{}; - Properties.isNativeHandleOwned = OwnNativeHandle; - HANDLE_ERRORS(urContextCreateWithNativeHandle( - NativeContext, NumDevices, UrDevices, &Properties, UrContext)); - - return PI_SUCCESS; -} - -inline pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - - ur_context_handle_t hContext = reinterpret_cast(Context); - ur_context_info_t ContextInfoType{}; - - switch (ParamName) { - case PI_CONTEXT_INFO_DEVICES: { - ContextInfoType = UR_CONTEXT_INFO_DEVICES; - break; - } - case PI_CONTEXT_INFO_NUM_DEVICES: { - ContextInfoType = UR_CONTEXT_INFO_NUM_DEVICES; - break; - } - case PI_CONTEXT_INFO_REFERENCE_COUNT: { - ContextInfoType = UR_CONTEXT_INFO_REFERENCE_COUNT; - break; - } - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT: - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT: { - ContextInfoType = UR_CONTEXT_INFO_USM_FILL2D_SUPPORT; - break; - } - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: { - ContextInfoType = UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT; - break; - } - case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: - case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: - case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: - case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - // These queries should be dealt with in context_impl.cpp by calling the - // queries of each device separately and building the intersection set. - die("These queries should have never come here"); - } - default: { - die("piContextGetInfo: unsuppported ParamName."); - } - } - - size_t UrParamValueSizeRet; - HANDLE_ERRORS(urContextGetInfo(hContext, ContextInfoType, ParamValueSize, - ParamValue, &UrParamValueSizeRet)); - if (ParamValueSizeRet) { - *ParamValueSizeRet = UrParamValueSizeRet; - } - fixupInfoValueTypes(UrParamValueSizeRet, ParamValueSizeRet, ParamValueSize, - ParamValue); - return PI_SUCCESS; -} - -inline pi_result piContextRetain(pi_context Context) { - ur_context_handle_t hContext = reinterpret_cast(Context); - - HANDLE_ERRORS(urContextRetain(hContext)); - - return PI_SUCCESS; -} - -inline pi_result piContextRelease(pi_context Context) { - ur_context_handle_t UrContext = - reinterpret_cast(Context); - HANDLE_ERRORS(urContextRelease(UrContext)); - return PI_SUCCESS; -} -// Context -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Queue -inline pi_result piextQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties *Properties, - pi_queue *Queue) { - - PI_ASSERT(Properties, PI_ERROR_INVALID_VALUE); - // Expect flags mask to be passed first. - PI_ASSERT(Properties[0] == PI_QUEUE_FLAGS, PI_ERROR_INVALID_VALUE); - - PI_ASSERT(Properties[2] == 0 || - (Properties[2] == PI_QUEUE_COMPUTE_INDEX && Properties[4] == 0), - PI_ERROR_INVALID_VALUE); - - // Check that unexpected bits are not set. - PI_ASSERT(!(Properties[1] & - ~(PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | - PI_QUEUE_FLAG_PROFILING_ENABLE | PI_QUEUE_FLAG_ON_DEVICE | - PI_QUEUE_FLAG_ON_DEVICE_DEFAULT | - PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS | - PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW | - PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH)), - PI_ERROR_INVALID_VALUE); - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - ur_queue_properties_t UrProperties{}; - UrProperties.stype = UR_STRUCTURE_TYPE_QUEUE_PROPERTIES; - if (Properties[1] & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) - UrProperties.flags |= UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; - if (Properties[1] & PI_QUEUE_FLAG_PROFILING_ENABLE) - UrProperties.flags |= UR_QUEUE_FLAG_PROFILING_ENABLE; - if (Properties[1] & PI_QUEUE_FLAG_ON_DEVICE) - UrProperties.flags |= UR_QUEUE_FLAG_ON_DEVICE; - if (Properties[1] & PI_QUEUE_FLAG_ON_DEVICE_DEFAULT) - UrProperties.flags |= UR_QUEUE_FLAG_ON_DEVICE_DEFAULT; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS) - UrProperties.flags |= UR_QUEUE_FLAG_DISCARD_EVENTS; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW) - UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_LOW; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH) - UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_HIGH; - if (Properties[1] & __SYCL_PI_CUDA_SYNC_WITH_DEFAULT) - UrProperties.flags |= UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM; - if (Properties[1] & __SYCL_PI_CUDA_USE_DEFAULT_STREAM) - UrProperties.flags |= UR_QUEUE_FLAG_USE_DEFAULT_STREAM; - - ur_queue_index_properties_t IndexProperties{}; - IndexProperties.stype = UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES; - if (Properties[2] != 0) { - IndexProperties.computeIndex = Properties[3]; - } - - UrProperties.pNext = &IndexProperties; - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - - ur_queue_handle_t *UrQueue = reinterpret_cast(Queue); - HANDLE_ERRORS(urQueueCreate(UrContext, UrDevice, &UrProperties, UrQueue)); - - return PI_SUCCESS; -} - -inline pi_result piQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties Flags, pi_queue *Queue) { - pi_queue_properties Properties[] = {PI_QUEUE_FLAGS, Flags, 0}; - return pi2ur::piextQueueCreate(Context, Device, Properties, Queue); -} - -inline pi_result piextQueueCreateWithNativeHandle( - pi_native_handle NativeHandle, int32_t NativeHandleDesc, pi_context Context, - pi_device Device, bool OwnNativeHandle, pi_queue_properties *Properties, - pi_queue *Queue) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_device_handle_t UrDevice = reinterpret_cast(Device); - ur_native_handle_t UrNativeHandle = - reinterpret_cast(NativeHandle); - ur_queue_handle_t *UrQueue = reinterpret_cast(Queue); - ur_queue_native_properties_t UrNativeProperties{}; - UrNativeProperties.isNativeHandleOwned = OwnNativeHandle; - - ur_queue_properties_t UrProperties{}; - UrProperties.stype = UR_STRUCTURE_TYPE_QUEUE_PROPERTIES; - if (Properties[1] & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) - UrProperties.flags |= UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; - if (Properties[1] & PI_QUEUE_FLAG_PROFILING_ENABLE) - UrProperties.flags |= UR_QUEUE_FLAG_PROFILING_ENABLE; - if (Properties[1] & PI_QUEUE_FLAG_ON_DEVICE) - UrProperties.flags |= UR_QUEUE_FLAG_ON_DEVICE; - if (Properties[1] & PI_QUEUE_FLAG_ON_DEVICE_DEFAULT) - UrProperties.flags |= UR_QUEUE_FLAG_ON_DEVICE_DEFAULT; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS) - UrProperties.flags |= UR_QUEUE_FLAG_DISCARD_EVENTS; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW) - UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_LOW; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH) - UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_HIGH; - - ur_queue_native_desc_t UrNativeDesc{}; - UrNativeDesc.stype = UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC; - UrNativeDesc.pNativeData = &NativeHandleDesc; - - UrProperties.pNext = &UrNativeDesc; - UrNativeProperties.pNext = &UrProperties; - - HANDLE_ERRORS(urQueueCreateWithNativeHandle( - UrNativeHandle, UrContext, UrDevice, &UrNativeProperties, UrQueue)); - return PI_SUCCESS; -} - -inline pi_result piextQueueGetNativeHandle(pi_queue Queue, - pi_native_handle *NativeHandle, - int32_t *NativeHandleDesc) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_queue_native_desc_t UrNativeDesc{}; - UrNativeDesc.pNativeData = NativeHandleDesc; - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - ur_native_handle_t UrNativeQueue{}; - HANDLE_ERRORS(urQueueGetNativeHandle(UrQueue, &UrNativeDesc, &UrNativeQueue)); - - *NativeHandle = reinterpret_cast(UrNativeQueue); - - return PI_SUCCESS; -} - -inline pi_result piQueueRelease(pi_queue Queue) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - HANDLE_ERRORS(urQueueRelease(UrQueue)); - - return PI_SUCCESS; -} - -inline pi_result piQueueFinish(pi_queue Queue) { - // Wait until command lists attached to the command queue are executed. - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - HANDLE_ERRORS(urQueueFinish(UrQueue)); - - return PI_SUCCESS; -} - -inline pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - ur_queue_info_t UrParamName{}; - - switch (ParamName) { - case PI_QUEUE_INFO_CONTEXT: { - UrParamName = UR_QUEUE_INFO_CONTEXT; - break; - } - case PI_QUEUE_INFO_DEVICE: { - UrParamName = UR_QUEUE_INFO_DEVICE; - break; - } - case PI_QUEUE_INFO_DEVICE_DEFAULT: { - UrParamName = UR_QUEUE_INFO_DEVICE_DEFAULT; - break; - } - case PI_QUEUE_INFO_PROPERTIES: { - UrParamName = UR_QUEUE_INFO_FLAGS; - break; - } - case PI_QUEUE_INFO_REFERENCE_COUNT: { - UrParamName = UR_QUEUE_INFO_REFERENCE_COUNT; - break; - } - case PI_QUEUE_INFO_SIZE: { - UrParamName = UR_QUEUE_INFO_SIZE; - break; - } - case PI_EXT_ONEAPI_QUEUE_INFO_EMPTY: { - UrParamName = UR_QUEUE_INFO_EMPTY; - break; - } - default: { - die("Unsupported ParamName in piQueueGetInfo"); - return PI_ERROR_INVALID_VALUE; - } - } - - HANDLE_ERRORS(urQueueGetInfo(UrQueue, UrParamName, ParamValueSize, ParamValue, - ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piQueueRetain(pi_queue Queue) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - HANDLE_ERRORS(urQueueRetain(UrQueue)); - - return PI_SUCCESS; -} - -inline pi_result piQueueFlush(pi_queue Queue) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - HANDLE_ERRORS(urQueueFlush(UrQueue)); - - return PI_SUCCESS; -} - -// Queue -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Program - -inline pi_result piProgramCreate(pi_context Context, const void *ILBytes, - size_t Length, pi_program *Program) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(ILBytes && Length, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_program_properties_t UrProperties{}; - ur_program_handle_t *UrProgram = - reinterpret_cast(Program); - HANDLE_ERRORS(urProgramCreateWithIL(UrContext, ILBytes, Length, &UrProperties, - UrProgram)); - - return PI_SUCCESS; -} - -inline pi_result piProgramCreateWithBinary( - pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, - const size_t *Lengths, const unsigned char **Binaries, - size_t NumMetadataEntries, const pi_device_binary_property *Metadata, - pi_int32 *BinaryStatus, pi_program *Program) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(DeviceList && NumDevices, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Binaries && Lengths, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - // For now we support only one device. - if (NumDevices != 1) { - die("piProgramCreateWithBinary: level_zero supports only one device."); - return PI_ERROR_INVALID_VALUE; - } - if (!Binaries[0] || !Lengths[0]) { - if (BinaryStatus) - *BinaryStatus = PI_ERROR_INVALID_VALUE; - return PI_ERROR_INVALID_VALUE; - } - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(DeviceList[0]); - - ur_program_properties_t Properties = {}; - Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES; - Properties.pNext = nullptr; - Properties.count = NumMetadataEntries; - - std::unique_ptr pMetadatas; - if (NumMetadataEntries) { - pMetadatas.reset(new ur_program_metadata_t[NumMetadataEntries]); - for (unsigned i = 0; i < NumMetadataEntries; i++) { - HANDLE_ERRORS(mapPIMetadataToUR(&Metadata[i], &pMetadatas[i])); - } - - Properties.pMetadatas = pMetadatas.get(); - } - - ur_program_handle_t *UrProgram = - reinterpret_cast(Program); - HANDLE_ERRORS(urProgramCreateWithBinary(UrContext, UrDevice, Lengths[0], - Binaries[0], &Properties, UrProgram)); - - if (BinaryStatus) - *BinaryStatus = PI_SUCCESS; - - return PI_SUCCESS; -} - -inline pi_result piclProgramCreateWithSource(pi_context Context, - pi_uint32 Count, - const char **Strings, - const size_t *Lengths, - pi_program *RetProgram) { - std::ignore = Context; - std::ignore = Count; - std::ignore = Strings; - std::ignore = Lengths; - std::ignore = RetProgram; - die("piclProgramCreateWithSource: not supported in UR\n"); - return PI_ERROR_INVALID_OPERATION; -} - -inline pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - - ur_program_info_t PropName{}; - - switch (ParamName) { - case PI_PROGRAM_INFO_REFERENCE_COUNT: { - PropName = UR_PROGRAM_INFO_REFERENCE_COUNT; - break; - } - case PI_PROGRAM_INFO_CONTEXT: { - PropName = UR_PROGRAM_INFO_CONTEXT; - break; - } - case PI_PROGRAM_INFO_NUM_DEVICES: { - PropName = UR_PROGRAM_INFO_NUM_DEVICES; - break; - } - case PI_PROGRAM_INFO_DEVICES: { - PropName = UR_PROGRAM_INFO_DEVICES; - break; - } - case PI_PROGRAM_INFO_SOURCE: { - PropName = UR_PROGRAM_INFO_SOURCE; - break; - } - case PI_PROGRAM_INFO_BINARY_SIZES: { - PropName = UR_PROGRAM_INFO_BINARY_SIZES; - break; - } - case PI_PROGRAM_INFO_BINARIES: { - PropName = UR_PROGRAM_INFO_BINARIES; - break; - } - case PI_PROGRAM_INFO_NUM_KERNELS: { - PropName = UR_PROGRAM_INFO_NUM_KERNELS; - break; - } - case PI_PROGRAM_INFO_KERNEL_NAMES: { - PropName = UR_PROGRAM_INFO_KERNEL_NAMES; - break; - } - default: { - die("urProgramGetInfo: not implemented"); - } - } - - HANDLE_ERRORS(urProgramGetInfo(UrProgram, PropName, ParamValueSize, - ParamValue, ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result -piProgramLink(pi_context Context, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - pi_uint32 NumInputPrograms, const pi_program *InputPrograms, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData, pi_program *RetProgram) { - // We only support one device with Level Zero currently. - if (NumDevices != 1) { - die("piProgramLink: level_zero supports only one device."); - return PI_ERROR_INVALID_VALUE; - } - - // Validate input parameters. - PI_ASSERT(DeviceList, PI_ERROR_INVALID_DEVICE); - PI_ASSERT(!PFnNotify && !UserData, PI_ERROR_INVALID_VALUE); - if (NumInputPrograms == 0 || InputPrograms == nullptr) - return PI_ERROR_INVALID_VALUE; - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - const ur_program_handle_t *UrInputPrograms = - reinterpret_cast(InputPrograms); - ur_program_handle_t *UrProgram = - reinterpret_cast(RetProgram); - - HANDLE_ERRORS(urProgramLink(UrContext, NumInputPrograms, UrInputPrograms, - Options, UrProgram)); - - return PI_SUCCESS; -} - -inline pi_result piProgramCompile( - pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, - const char *Options, pi_uint32 NumInputHeaders, - const pi_program *InputHeaders, const char **HeaderIncludeNames, - void (*PFnNotify)(pi_program Program, void *UserData), void *UserData) { - - std::ignore = NumInputHeaders; - std::ignore = InputHeaders; - std::ignore = HeaderIncludeNames; - - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - if ((NumDevices && !DeviceList) || (!NumDevices && DeviceList)) - return PI_ERROR_INVALID_VALUE; - - // These aren't supported. - PI_ASSERT(!PFnNotify && !UserData, PI_ERROR_INVALID_VALUE); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - - ur_program_info_t PropName = UR_PROGRAM_INFO_CONTEXT; - ur_context_handle_t UrContext{}; - HANDLE_ERRORS(urProgramGetInfo(UrProgram, PropName, sizeof(&UrContext), - &UrContext, nullptr)); - - HANDLE_ERRORS(urProgramCompile(UrContext, UrProgram, Options)); - - return PI_SUCCESS; -} - -inline pi_result -piProgramBuild(pi_program Program, pi_uint32 NumDevices, - const pi_device *DeviceList, const char *Options, - void (*PFnNotify)(pi_program Program, void *UserData), - void *UserData) { - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - if ((NumDevices && !DeviceList) || (!NumDevices && DeviceList)) { - return PI_ERROR_INVALID_VALUE; - } - - // We only support build to one device with Level Zero now. - // TODO: we should eventually build to the possibly multiple root - // devices in the context. - if (NumDevices != 1) { - die("piProgramBuild: level_zero supports only one device."); - return PI_ERROR_INVALID_VALUE; - } - - // These aren't supported. - PI_ASSERT(!PFnNotify && !UserData, PI_ERROR_INVALID_VALUE); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - ur_program_info_t PropName = UR_PROGRAM_INFO_CONTEXT; - ur_context_handle_t UrContext{}; - HANDLE_ERRORS(urProgramGetInfo(UrProgram, PropName, sizeof(&UrContext), - &UrContext, nullptr)); - - HANDLE_ERRORS(urProgramBuild(UrContext, UrProgram, Options)); - - return PI_SUCCESS; -} - -inline pi_result piextProgramSetSpecializationConstant(pi_program Program, - pi_uint32 SpecID, - size_t Size, - const void *SpecValue) { - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - uint32_t Count = 1; - ur_specialization_constant_info_t SpecConstant{}; - SpecConstant.id = SpecID; - SpecConstant.size = Size; - SpecConstant.pValue = SpecValue; - HANDLE_ERRORS( - urProgramSetSpecializationConstants(UrProgram, Count, &SpecConstant)); - - return PI_SUCCESS; -} - -inline pi_result piKernelCreate(pi_program Program, const char *KernelName, - pi_kernel *RetKernel) { - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - PI_ASSERT(RetKernel, PI_ERROR_INVALID_VALUE); - PI_ASSERT(KernelName, PI_ERROR_INVALID_VALUE); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - ur_kernel_handle_t *UrKernel = - reinterpret_cast(RetKernel); - - HANDLE_ERRORS(urKernelCreate(UrProgram, KernelName, UrKernel)); - - return PI_SUCCESS; -} - -inline pi_result -piEnqueueMemImageFill(pi_queue Queue, pi_mem Image, const void *FillColor, - const size_t *Origin, const size_t *Region, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - - std::ignore = Image; - std::ignore = FillColor; - std::ignore = Origin; - std::ignore = Region; - std::ignore = NumEventsInWaitList; - std::ignore = EventsWaitList; - std::ignore = Event; - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - die("piEnqueueMemImageFill: not implemented"); - return PI_SUCCESS; -} - -inline pi_result -piEnqueueNativeKernel(pi_queue Queue, void (*UserFunc)(void *), void *Args, - size_t CbArgs, pi_uint32 NumMemObjects, - const pi_mem *MemList, const void **ArgsMemLoc, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *Event) { - std::ignore = UserFunc; - std::ignore = Args; - std::ignore = CbArgs; - std::ignore = NumMemObjects; - std::ignore = MemList; - std::ignore = ArgsMemLoc; - std::ignore = NumEventsInWaitList; - std::ignore = EventsWaitList; - std::ignore = Event; - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - die("piEnqueueNativeKernel: not implemented"); - return PI_SUCCESS; -} - -inline pi_result piextGetDeviceFunctionPointer(pi_device Device, - pi_program Program, - const char *FunctionName, - pi_uint64 *FunctionPointerRet) { - - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - auto UrDevice = reinterpret_cast(Device); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - - void **FunctionPointer = reinterpret_cast(FunctionPointerRet); - - HANDLE_ERRORS(urProgramGetFunctionPointer(UrDevice, UrProgram, FunctionName, - FunctionPointer)); - return PI_SUCCESS; -} - -// Special version of piKernelSetArg to accept pi_mem. -inline pi_result -piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_mem_obj_property *ArgProperties, - const pi_mem *ArgValue) { - - // TODO: the better way would probably be to add a new PI API for - // extracting native PI object from PI handle, and have SYCL - // RT pass that directly to the regular piKernelSetArg (and - // then remove this piextKernelSetArgMemObj). - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - - ur_mem_handle_t UrMemory{}; - if (ArgValue) - UrMemory = reinterpret_cast(*ArgValue); - - // We don't yet know the device where this kernel will next be run on. - // Thus we can't know the actual memory allocation that needs to be used. - // Remember the memory object being used as an argument for this kernel - // to process it later when the device is known (at the kernel enqueue). - // - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - // the only applicable type, just ignore anything else - if (ArgProperties && ArgProperties->type == PI_KERNEL_ARG_MEM_OBJ_ACCESS) { - // following structure layout checks to be replaced with - // std::is_layout_compatible after move to C++20 - static_assert(sizeof(pi_mem_obj_property) == - sizeof(ur_kernel_arg_mem_obj_properties_t)); - static_assert(sizeof(pi_mem_obj_property::type) == - sizeof(ur_kernel_arg_mem_obj_properties_t::stype)); - static_assert(sizeof(pi_mem_obj_property::pNext) == - sizeof(ur_kernel_arg_mem_obj_properties_t::pNext)); - static_assert(sizeof(pi_mem_obj_property::mem_access) == - sizeof(ur_kernel_arg_mem_obj_properties_t::memoryAccess)); - - static_assert(uint32_t(PI_ACCESS_READ_WRITE) == - uint32_t(UR_MEM_FLAG_READ_WRITE)); - static_assert(uint32_t(PI_ACCESS_READ_ONLY) == - uint32_t(UR_MEM_FLAG_READ_ONLY)); - static_assert(uint32_t(PI_ACCESS_WRITE_ONLY) == - uint32_t(UR_MEM_FLAG_WRITE_ONLY)); - static_assert(uint32_t(PI_KERNEL_ARG_MEM_OBJ_ACCESS) == - uint32_t(UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES)); - - const ur_kernel_arg_mem_obj_properties_t *UrMemProperties = - reinterpret_cast( - ArgProperties); - HANDLE_ERRORS( - urKernelSetArgMemObj(UrKernel, ArgIndex, UrMemProperties, UrMemory)); - } else { - HANDLE_ERRORS(urKernelSetArgMemObj(UrKernel, ArgIndex, nullptr, UrMemory)); - } - - return PI_SUCCESS; -} - -inline pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, - size_t ArgSize, const void *ArgValue) { - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - - HANDLE_ERRORS( - urKernelSetArgValue(UrKernel, ArgIndex, ArgSize, nullptr, ArgValue)); - return PI_SUCCESS; -} - -inline pi_result piKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, - size_t ArgSize, const void *ArgValue) { - std::ignore = ArgSize; - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - HANDLE_ERRORS(urKernelSetArgPointer(UrKernel, ArgIndex, nullptr, ArgValue)); - - return PI_SUCCESS; -} - -inline pi_result -piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, pi_program Program, - bool OwnNativeHandle, pi_kernel *Kernel) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - - ur_native_handle_t UrNativeKernel = - reinterpret_cast(NativeHandle); - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - ur_kernel_handle_t *UrKernel = reinterpret_cast(Kernel); - ur_kernel_native_properties_t Properties{}; - Properties.isNativeHandleOwned = OwnNativeHandle; - HANDLE_ERRORS(urKernelCreateWithNativeHandle( - UrNativeKernel, UrContext, UrProgram, &Properties, UrKernel)); - - return PI_SUCCESS; -} - -inline pi_result piProgramRetain(pi_program Program) { - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - HANDLE_ERRORS( - urProgramRetain(reinterpret_cast(UrProgram))); - - return PI_SUCCESS; -} - -inline pi_result piKernelSetExecInfo(pi_kernel Kernel, - pi_kernel_exec_info ParamName, - size_t ParamValueSize, - const void *ParamValue) { - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - PI_ASSERT(ParamValue, PI_ERROR_INVALID_VALUE); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - ur_kernel_exec_info_t PropName{}; - uint64_t PropValue{}; - switch (ParamName) { - case PI_USM_INDIRECT_ACCESS: { - PropName = UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS; - PropValue = *(static_cast(const_cast(ParamValue))); - break; - } - case PI_USM_PTRS: { - PropName = UR_KERNEL_EXEC_INFO_USM_PTRS; - break; - } - case PI_EXT_KERNEL_EXEC_INFO_CACHE_CONFIG: { - PropName = UR_KERNEL_EXEC_INFO_CACHE_CONFIG; - auto Param = (*(static_cast(ParamValue))); - if (Param == PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_SLM) { - PropValue = static_cast(UR_KERNEL_CACHE_CONFIG_LARGE_SLM); - } else if (Param == PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_DATA) { - PropValue = static_cast(UR_KERNEL_CACHE_CONFIG_LARGE_DATA); - break; - } else if (Param == PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT) { - PropValue = static_cast(UR_KERNEL_CACHE_CONFIG_DEFAULT); - } else { - die("piKernelSetExecInfo: unsupported ParamValue\n"); - } - break; - } - default: - die("piKernelSetExecInfo: unsupported ParamName\n"); - } - HANDLE_ERRORS(urKernelSetExecInfo(UrKernel, PropName, ParamValueSize, nullptr, - &PropValue)); - - return PI_SUCCESS; -} - -inline pi_result piextProgramGetNativeHandle(pi_program Program, - pi_native_handle *NativeHandle) { - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - ur_native_handle_t NativeProgram{}; - HANDLE_ERRORS(urProgramGetNativeHandle(UrProgram, &NativeProgram)); - - *NativeHandle = reinterpret_cast(NativeProgram); - - return PI_SUCCESS; -} - -inline pi_result -piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, bool OwnNativeHandle, - pi_program *Program) { - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - - ur_native_handle_t NativeProgram = - reinterpret_cast(NativeHandle); - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_program_handle_t *UrProgram = - reinterpret_cast(Program); - ur_program_native_properties_t UrProperties{}; - UrProperties.isNativeHandleOwned = OwnNativeHandle; - HANDLE_ERRORS(urProgramCreateWithNativeHandle(NativeProgram, UrContext, - &UrProperties, UrProgram)); - return PI_SUCCESS; -} - -inline pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - ur_kernel_info_t UrParamName{}; - switch (ParamName) { - case PI_KERNEL_INFO_FUNCTION_NAME: { - UrParamName = UR_KERNEL_INFO_FUNCTION_NAME; - break; - } - case PI_KERNEL_INFO_NUM_ARGS: { - UrParamName = UR_KERNEL_INFO_NUM_ARGS; - break; - } - case PI_KERNEL_INFO_REFERENCE_COUNT: { - UrParamName = UR_KERNEL_INFO_REFERENCE_COUNT; - break; - } - case PI_KERNEL_INFO_CONTEXT: { - UrParamName = UR_KERNEL_INFO_CONTEXT; - break; - } - case PI_KERNEL_INFO_PROGRAM: { - UrParamName = UR_KERNEL_INFO_PROGRAM; - break; - } - case PI_KERNEL_INFO_ATTRIBUTES: { - UrParamName = UR_KERNEL_INFO_ATTRIBUTES; - break; - } - default: - return PI_ERROR_INVALID_PROPERTY; - } - - HANDLE_ERRORS(urKernelGetInfo(UrKernel, UrParamName, ParamValueSize, - ParamValue, ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, - pi_kernel_group_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - auto UrDevice = reinterpret_cast(Device); - - ur_kernel_group_info_t UrParamName{}; - switch (ParamName) { - case PI_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: { - UrParamName = UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE; - break; - } - case PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { - UrParamName = UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE; - break; - } - case PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: { - UrParamName = UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE; - break; - } - case PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: { - UrParamName = UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE; - break; - } - case PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { - UrParamName = UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE; - break; - } - case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: { - UrParamName = UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE; - break; - } - // The number of registers used by the compiled kernel (device specific) - case PI_KERNEL_GROUP_INFO_NUM_REGS: { - HANDLE_ERRORS(urKernelGetInfo(UrKernel, UR_KERNEL_INFO_NUM_REGS, - ParamValueSize, ParamValue, - ParamValueSizeRet)); - return PI_SUCCESS; - } - default: { - die("Unknown ParamName in piKernelGetGroupInfo"); - return PI_ERROR_INVALID_VALUE; - } - } - - HANDLE_ERRORS(urKernelGetGroupInfo(UrKernel, UrDevice, UrParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piKernelRetain(pi_kernel Kernel) { - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - - HANDLE_ERRORS(urKernelRetain(UrKernel)); - - return PI_SUCCESS; -} - -inline pi_result piKernelRelease(pi_kernel Kernel) { - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - - HANDLE_ERRORS(urKernelRelease(UrKernel)); - - return PI_SUCCESS; -} - -inline pi_result piProgramRelease(pi_program Program) { - - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - - HANDLE_ERRORS(urProgramRelease(UrProgram)); - - return PI_SUCCESS; -} - -inline pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, - size_t ArgSize, - const void *ArgValue) { - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - - HANDLE_ERRORS( - urKernelSetArgValue(UrKernel, ArgIndex, ArgSize, nullptr, ArgValue)); - - return PI_SUCCESS; -} - -inline pi_result piKernelGetSubGroupInfo( - pi_kernel Kernel, pi_device Device, pi_kernel_sub_group_info ParamName, - size_t InputValueSize, const void *InputValue, size_t ParamValueSize, - void *ParamValue, size_t *ParamValueSizeRet) { - - std::ignore = InputValueSize; - std::ignore = InputValue; - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - auto UrDevice = reinterpret_cast(Device); - - ur_kernel_sub_group_info_t PropName{}; - switch (ParamName) { - case PI_KERNEL_MAX_SUB_GROUP_SIZE: { - PropName = UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE; - break; - } - case PI_KERNEL_MAX_NUM_SUB_GROUPS: { - PropName = UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS; - break; - } - case PI_KERNEL_COMPILE_NUM_SUB_GROUPS: { - PropName = UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS; - break; - } - case PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: { - PropName = UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL; - break; - } - } - HANDLE_ERRORS(urKernelGetSubGroupInfo(UrKernel, UrDevice, PropName, - ParamValueSize, ParamValue, - ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device, - pi_program_build_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - auto UrDevice = reinterpret_cast(Device); - - ur_program_build_info_t PropName{}; - switch (ParamName) { - case PI_PROGRAM_BUILD_INFO_STATUS: { - PropName = UR_PROGRAM_BUILD_INFO_STATUS; - break; - } - case PI_PROGRAM_BUILD_INFO_OPTIONS: { - PropName = UR_PROGRAM_BUILD_INFO_OPTIONS; - break; - } - case PI_PROGRAM_BUILD_INFO_LOG: { - PropName = UR_PROGRAM_BUILD_INFO_LOG; - break; - } - case PI_PROGRAM_BUILD_INFO_BINARY_TYPE: { - PropName = UR_PROGRAM_BUILD_INFO_BINARY_TYPE; - break; - } - default: { - die("piProgramGetBuildInfo: not implemented"); - } - } - HANDLE_ERRORS(urProgramGetBuildInfo(UrProgram, UrDevice, PropName, - ParamValueSize, ParamValue, - ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piextKernelGetNativeHandle(pi_kernel Kernel, - pi_native_handle *NativeHandle) { - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - ur_native_handle_t NativeKernel{}; - HANDLE_ERRORS(urKernelGetNativeHandle(UrKernel, &NativeKernel)); - - *NativeHandle = reinterpret_cast(NativeKernel); - - return PI_SUCCESS; -} - -/// API for writing data from host to a device global variable. -/// -/// \param Queue is the queue -/// \param Program is the program containing the device global variable -/// \param Name is the unique identifier for the device global variable -/// \param BlockingWrite is true if the write should block -/// \param Count is the number of bytes to copy -/// \param Offset is the byte offset into the device global variable to start -/// copying -/// \param Src is a pointer to where the data must be copied from -/// \param NumEventsInWaitList is a number of events in the wait list -/// \param EventWaitList is the wait list -/// \param Event is the resulting event -inline pi_result piextEnqueueDeviceGlobalVariableWrite( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingWrite, - size_t Count, size_t Offset, const void *Src, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - HANDLE_ERRORS(urEnqueueDeviceGlobalVariableWrite( - UrQueue, UrProgram, Name, BlockingWrite, Count, Offset, Src, - NumEventsInWaitList, UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -/// API reading data from a device global variable to host. -/// -/// \param Queue is the queue -/// \param Program is the program containing the device global variable -/// \param Name is the unique identifier for the device global variable -/// \param BlockingRead is true if the read should block -/// \param Count is the number of bytes to copy -/// \param Offset is the byte offset into the device global variable to start -/// copying -/// \param Dst is a pointer to where the data must be copied to -/// \param NumEventsInWaitList is a number of events in the wait list -/// \param EventWaitList is the wait list -/// \param Event is the resulting event -inline pi_result piextEnqueueDeviceGlobalVariableRead( - pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingRead, - size_t Count, size_t Offset, void *Dst, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_program_handle_t UrProgram = - reinterpret_cast(Program); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueDeviceGlobalVariableRead( - UrQueue, UrProgram, Name, BlockingRead, Count, Offset, Dst, - NumEventsInWaitList, UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -// Program -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Memory -inline pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, - size_t Size, void *HostPtr, pi_mem *RetMem, - const pi_mem_properties *properties) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(RetMem, PI_ERROR_INVALID_VALUE); - - if (properties != nullptr) { - die("piMemBufferCreate: no mem properties goes to Level-Zero RT yet"); - } - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_mem_flags_t UrBufferFlags{}; - if (Flags & PI_MEM_FLAGS_ACCESS_RW) { - UrBufferFlags |= UR_MEM_FLAG_READ_WRITE; - } - if (Flags & PI_MEM_ACCESS_READ_ONLY) { - UrBufferFlags |= UR_MEM_FLAG_READ_ONLY; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_USE) { - UrBufferFlags |= UR_MEM_FLAG_USE_HOST_POINTER; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) { - UrBufferFlags |= UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) { - UrBufferFlags |= UR_MEM_FLAG_ALLOC_HOST_POINTER; - } - - ur_buffer_properties_t UrProps{}; - UrProps.stype = UR_STRUCTURE_TYPE_BUFFER_PROPERTIES; - UrProps.pHost = HostPtr; - ur_mem_handle_t *UrBuffer = reinterpret_cast(RetMem); - HANDLE_ERRORS( - urMemBufferCreate(UrContext, UrBufferFlags, Size, &UrProps, UrBuffer)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, - pi_usm_mem_properties *Properties, - size_t Size, pi_uint32 Alignment) { - - std::ignore = Properties; - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_usm_desc_t USMDesc{}; - USMDesc.align = Alignment; - ur_usm_pool_handle_t Pool{}; - HANDLE_ERRORS(urUSMHostAlloc(UrContext, &USMDesc, Pool, Size, ResultPtr)); - return PI_SUCCESS; -} - -inline pi_result piMemGetInfo(pi_mem Mem, pi_mem_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - PI_ASSERT(Mem, PI_ERROR_INVALID_VALUE); - // piMemImageGetInfo must be used for images - - ur_mem_handle_t UrMemory = reinterpret_cast(Mem); - ur_mem_info_t MemInfoType{}; - switch (ParamName) { - case PI_MEM_CONTEXT: { - MemInfoType = UR_MEM_INFO_CONTEXT; - break; - } - case PI_MEM_SIZE: { - MemInfoType = UR_MEM_INFO_SIZE; - break; - } - default: { - die("piMemGetInfo: unsuppported ParamName."); - } - } - HANDLE_ERRORS(urMemGetInfo(UrMemory, MemInfoType, ParamValueSize, ParamValue, - ParamValueSizeRet)); - return PI_SUCCESS; -} - -static void pi2urImageDesc(const pi_image_format *ImageFormat, - const pi_image_desc *ImageDesc, - ur_image_format_t *UrFormat, - ur_image_desc_t *UrDesc) { - - switch (ImageFormat->image_channel_data_type) { - case PI_IMAGE_CHANNEL_TYPE_SNORM_INT8: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SNORM_INT8; - break; - } - case PI_IMAGE_CHANNEL_TYPE_SNORM_INT16: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SNORM_INT16; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNORM_INT8: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNORM_INT8; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNORM_INT16: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNORM_INT16; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_INT_101010; - break; - } - case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8; - break; - } - case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16; - break; - } - case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16; - break; - } - case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32; - break; - } - case PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT; - break; - } - case PI_IMAGE_CHANNEL_TYPE_FLOAT: { - UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_FLOAT; - break; - } - default: { - die("piMemImageCreate: unsuppported image_channel_data_type."); - } - } - switch (ImageFormat->image_channel_order) { - case PI_IMAGE_CHANNEL_ORDER_A: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_A; - break; - } - case PI_IMAGE_CHANNEL_ORDER_R: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_R; - break; - } - case PI_IMAGE_CHANNEL_ORDER_RG: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RG; - break; - } - case PI_IMAGE_CHANNEL_ORDER_RA: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RA; - break; - } - case PI_IMAGE_CHANNEL_ORDER_RGB: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RGB; - break; - } - case PI_IMAGE_CHANNEL_ORDER_RGBA: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RGBA; - break; - } - case PI_IMAGE_CHANNEL_ORDER_BGRA: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_BGRA; - break; - } - case PI_IMAGE_CHANNEL_ORDER_ARGB: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_ARGB; - break; - } - case PI_IMAGE_CHANNEL_ORDER_ABGR: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_ABGR; - break; - } - case PI_IMAGE_CHANNEL_ORDER_INTENSITY: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_INTENSITY; - break; - } - case PI_IMAGE_CHANNEL_ORDER_LUMINANCE: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_LUMINANCE; - break; - } - case PI_IMAGE_CHANNEL_ORDER_Rx: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RX; - break; - } - case PI_IMAGE_CHANNEL_ORDER_RGx: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RGX; - break; - } - case PI_IMAGE_CHANNEL_ORDER_RGBx: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RGBX; - break; - } - case PI_IMAGE_CHANNEL_ORDER_sRGBA: { - UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_SRGBA; - break; - } - default: { - die("piMemImageCreate: unsuppported image_channel_data_type."); - } - } - - UrDesc->stype = UR_STRUCTURE_TYPE_IMAGE_DESC; - UrDesc->arraySize = ImageDesc->image_array_size; - UrDesc->depth = ImageDesc->image_depth; - UrDesc->height = ImageDesc->image_height; - UrDesc->numMipLevel = ImageDesc->num_mip_levels; - UrDesc->numSamples = ImageDesc->num_samples; - UrDesc->rowPitch = ImageDesc->image_row_pitch; - UrDesc->slicePitch = ImageDesc->image_slice_pitch; - switch (ImageDesc->image_type) { - case PI_MEM_TYPE_BUFFER: { - UrDesc->type = UR_MEM_TYPE_BUFFER; - break; - } - case PI_MEM_TYPE_IMAGE2D: { - UrDesc->type = UR_MEM_TYPE_IMAGE2D; - break; - } - case PI_MEM_TYPE_IMAGE3D: { - UrDesc->type = UR_MEM_TYPE_IMAGE3D; - break; - } - case PI_MEM_TYPE_IMAGE2D_ARRAY: { - UrDesc->type = UR_MEM_TYPE_IMAGE2D_ARRAY; - break; - } - case PI_MEM_TYPE_IMAGE1D: { - UrDesc->type = UR_MEM_TYPE_IMAGE1D; - break; - } - case PI_MEM_TYPE_IMAGE1D_ARRAY: { - UrDesc->type = UR_MEM_TYPE_IMAGE1D_ARRAY; - break; - } - case PI_MEM_TYPE_IMAGE1D_BUFFER: { - UrDesc->type = UR_MEM_TYPE_IMAGE1D_BUFFER; - break; - } - default: { - die("piMemImageCreate: unsuppported image_type."); - } - } - UrDesc->width = ImageDesc->image_width; - UrDesc->arraySize = ImageDesc->image_array_size; - UrDesc->arraySize = ImageDesc->image_array_size; -} - -inline pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, - const pi_image_format *ImageFormat, - const pi_image_desc *ImageDesc, void *HostPtr, - pi_mem *RetImage) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(RetImage, PI_ERROR_INVALID_VALUE); - PI_ASSERT(ImageFormat, PI_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_mem_flags_t UrFlags{}; - if (Flags & PI_MEM_FLAGS_ACCESS_RW) { - UrFlags |= UR_MEM_FLAG_READ_WRITE; - } - if (Flags & PI_MEM_ACCESS_READ_ONLY) { - UrFlags |= UR_MEM_FLAG_READ_ONLY; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_USE) { - UrFlags |= UR_MEM_FLAG_USE_HOST_POINTER; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) { - UrFlags |= UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) { - UrFlags |= UR_MEM_FLAG_ALLOC_HOST_POINTER; - } - - ur_image_format_t UrFormat{}; - ur_image_desc_t UrDesc{}; - pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); - - // TODO: UrDesc doesn't have something for ImageDesc->buffer - - ur_mem_handle_t *UrMem = reinterpret_cast(RetImage); - HANDLE_ERRORS( - urMemImageCreate(UrContext, UrFlags, &UrFormat, &UrDesc, HostPtr, UrMem)); - - return PI_SUCCESS; -} - -inline pi_result piextMemImageCreateWithNativeHandle( - pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, - const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, - pi_mem *RetImage) { - - PI_ASSERT(RetImage, PI_ERROR_INVALID_VALUE); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - - ur_native_handle_t UrNativeMem = - reinterpret_cast(NativeHandle); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_mem_handle_t *UrMem = reinterpret_cast(RetImage); - ur_mem_native_properties_t Properties{}; - Properties.isNativeHandleOwned = OwnNativeHandle; - - ur_image_format_t UrFormat{}; - ur_image_desc_t UrDesc{}; - pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); - - HANDLE_ERRORS(urMemImageCreateWithNativeHandle( - UrNativeMem, UrContext, &UrFormat, &UrDesc, &Properties, UrMem)); - - return PI_SUCCESS; -} - -inline pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, - pi_buffer_create_type BufferCreateType, - void *BufferCreateInfo, pi_mem *RetMem) { - - PI_ASSERT(BufferCreateType == PI_BUFFER_CREATE_TYPE_REGION && - BufferCreateInfo && RetMem, - PI_ERROR_INVALID_VALUE); - - auto Region = (pi_buffer_region)BufferCreateInfo; - PI_ASSERT(Region->size != 0u, PI_ERROR_INVALID_BUFFER_SIZE); - PI_ASSERT(Region->origin <= (Region->origin + Region->size), - PI_ERROR_INVALID_VALUE); - - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - - ur_mem_flags_t UrFlags{}; - if (Flags & PI_MEM_FLAGS_ACCESS_RW) { - UrFlags |= UR_MEM_FLAG_READ_WRITE; - } - if (Flags & PI_MEM_ACCESS_READ_ONLY) { - UrFlags |= UR_MEM_FLAG_READ_ONLY; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_USE) { - UrFlags |= UR_MEM_FLAG_USE_HOST_POINTER; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) { - UrFlags |= UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; - } - if (Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) { - UrFlags |= UR_MEM_FLAG_ALLOC_HOST_POINTER; - } - - ur_buffer_create_type_t UrBufferCreateType{}; - if (BufferCreateType == PI_BUFFER_CREATE_TYPE_REGION) { - UrBufferCreateType = UR_BUFFER_CREATE_TYPE_REGION; - } - - ur_buffer_region_t UrBufferCreateInfo{}; - UrBufferCreateInfo.origin = Region->origin; - UrBufferCreateInfo.size = Region->size; - ur_mem_handle_t *UrMem = reinterpret_cast(RetMem); - HANDLE_ERRORS(urMemBufferPartition(UrBuffer, UrFlags, UrBufferCreateType, - &UrBufferCreateInfo, UrMem)); - - return PI_SUCCESS; -} - -inline pi_result piextMemGetNativeHandle(pi_mem Mem, - pi_native_handle *NativeHandle) { - PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); - - ur_mem_handle_t UrMem = reinterpret_cast(Mem); - ur_native_handle_t NativeMem{}; - HANDLE_ERRORS(urMemGetNativeHandle(UrMem, &NativeMem)); - - *NativeHandle = reinterpret_cast(NativeMem); - - return PI_SUCCESS; -} - -inline pi_result -piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, - pi_image_offset SrcOrigin, pi_image_offset DstOrigin, - pi_image_region Region, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - ur_mem_handle_t UrImageSrc = reinterpret_cast(SrcImage); - ur_mem_handle_t UrImageDst = reinterpret_cast(DstImage); - - ur_rect_offset_t UrSrcOrigin{SrcOrigin->x, SrcOrigin->y, SrcOrigin->z}; - ur_rect_offset_t UrDstOrigin{DstOrigin->x, DstOrigin->y, DstOrigin->z}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth; - UrRegion.height = Region->height; - UrRegion.width = Region->width; - - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemImageCopy( - UrQueue, UrImageSrc, UrImageDst, UrSrcOrigin, UrDstOrigin, UrRegion, - NumEventsInWaitList, UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_mem *Mem) { - PI_ASSERT(Mem, PI_ERROR_INVALID_VALUE); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - - ur_native_handle_t UrNativeMem = - reinterpret_cast(NativeHandle); - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_mem_handle_t *UrMem = reinterpret_cast(Mem); - ur_mem_native_properties_t Properties{}; - Properties.isNativeHandleOwned = OwnNativeHandle; - HANDLE_ERRORS(urMemBufferCreateWithNativeHandle(UrNativeMem, UrContext, - &Properties, UrMem)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, - size_t Size, pi_uint32 Alignment) { - - std::ignore = Properties; - ur_context_handle_t UrContext = - reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - - ur_usm_desc_t USMDesc{}; - USMDesc.align = Alignment; - ur_usm_pool_handle_t Pool{}; - HANDLE_ERRORS( - urUSMDeviceAlloc(UrContext, UrDevice, &USMDesc, Pool, Size, ResultPtr)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, - pi_device Device, - pi_usm_mem_properties *Properties, - size_t Size, pi_uint32 Alignment) { - - std::ignore = Properties; - if (Properties && *Properties != 0) { - PI_ASSERT(*(Properties) == PI_MEM_ALLOC_FLAGS && *(Properties + 2) == 0, - PI_ERROR_INVALID_VALUE); - } - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - auto UrDevice = reinterpret_cast(Device); - - ur_usm_desc_t USMDesc{}; - ur_usm_device_desc_t UsmDeviceDesc{}; - UsmDeviceDesc.stype = UR_STRUCTURE_TYPE_USM_DEVICE_DESC; - ur_usm_host_desc_t UsmHostDesc{}; - UsmHostDesc.stype = UR_STRUCTURE_TYPE_USM_HOST_DESC; - if (Properties) { - if (Properties[0] == PI_MEM_ALLOC_FLAGS) { - if (Properties[1] == PI_MEM_ALLOC_WRTITE_COMBINED) { - UsmDeviceDesc.flags |= UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED; - } - if (Properties[1] == PI_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE) { - UsmDeviceDesc.flags |= UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT; - } - if (Properties[1] == PI_MEM_ALLOC_INITIAL_PLACEMENT_HOST) { - UsmHostDesc.flags |= UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT; - } - if (Properties[1] == PI_MEM_ALLOC_DEVICE_READ_ONLY) { - UsmDeviceDesc.flags |= UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY; - } - } - } - UsmDeviceDesc.pNext = &UsmHostDesc; - USMDesc.pNext = &UsmDeviceDesc; - - USMDesc.align = Alignment; - - ur_usm_pool_handle_t Pool{}; - HANDLE_ERRORS( - urUSMSharedAlloc(UrContext, UrDevice, &USMDesc, Pool, Size, ResultPtr)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMFree(pi_context Context, void *Ptr) { - ur_context_handle_t UrContext = - reinterpret_cast(Context); - HANDLE_ERRORS(urUSMFree(UrContext, Ptr)); - return PI_SUCCESS; -} - -inline pi_result piMemRetain(pi_mem Mem) { - PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); - - ur_mem_handle_t UrMem = reinterpret_cast(Mem); - - HANDLE_ERRORS(urMemRetain(UrMem)); - - return PI_SUCCESS; -} - -inline pi_result piMemRelease(pi_mem Mem) { - PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); - - ur_mem_handle_t UrMem = reinterpret_cast(Mem); - - HANDLE_ERRORS(urMemRelease(UrMem)); - - return PI_SUCCESS; -} - -/// Hint to migrate memory to the device -/// -/// @param Queue is the queue to submit to -/// @param Ptr points to the memory to migrate -/// @param Size is the number of bytes to migrate -/// @param Flags is a bitfield used to specify memory migration options -/// @param NumEventsInWaitList is the number of events to wait on -/// @param EventsWaitList is an array of events to wait on -/// @param Event is the event that represents this operation -inline pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, - size_t Size, - pi_usm_migration_flags Flags, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - // flags is currently unused so fail if set - PI_ASSERT(Flags == 0, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - // TODO: to map from pi_usm_migration_flags to - // ur_usm_migration_flags_t - // once we have those defined - ur_usm_migration_flags_t UrFlags{}; - HANDLE_ERRORS(urEnqueueUSMPrefetch(UrQueue, Ptr, Size, UrFlags, - NumEventsInWaitList, UrEventsWaitList, - UrEvent)); - - return PI_SUCCESS; -} - -/// USM memadvise API to govern behavior of automatic migration mechanisms -/// -/// @param Queue is the queue to submit to -/// @param Ptr is the data to be advised -/// @param Length is the size in bytes of the meory to advise -/// @param Advice is device specific advice -/// @param Event is the event that represents this operation -/// -inline pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, - size_t Length, pi_mem_advice Advice, - pi_event *OutEvent) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - ur_usm_advice_flags_t UrAdvice{}; - if (Advice & PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY) { - UrAdvice |= UR_USM_ADVICE_FLAG_SET_READ_MOSTLY; - } - if (Advice & PI_MEM_ADVICE_CUDA_UNSET_READ_MOSTLY) { - UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY; - } - if (Advice & PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION) { - UrAdvice |= UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION; - } - if (Advice & PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION) { - UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION; - } - if (Advice & PI_MEM_ADVICE_RESET) { - UrAdvice |= UR_USM_ADVICE_FLAG_DEFAULT; - } - - HANDLE_ERRORS(urEnqueueUSMAdvise(UrQueue, Ptr, Length, UrAdvice, UrEvent)); - - return PI_SUCCESS; -} - -/// USM 2D Fill API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pitch is the total width of the destination memory including padding -/// \param pattern is a pointer with the bytes of the pattern to set -/// \param pattern_size is the size in bytes of the pattern -/// \param width is width in bytes of each row to fill -/// \param height is height the columns to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -inline pi_result piextUSMEnqueueFill2D(pi_queue Queue, void *Ptr, size_t Pitch, - size_t PatternSize, const void *Pattern, - size_t Width, size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - - auto hQueue = reinterpret_cast(Queue); - auto phEventWaitList = - reinterpret_cast(EventsWaitList); - auto phEvent = reinterpret_cast(Event); - - HANDLE_ERRORS(urEnqueueUSMFill2D(hQueue, Ptr, Pitch, PatternSize, Pattern, - Width, Height, NumEventsWaitList, - phEventWaitList, phEvent)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMEnqueueMemset2D(pi_queue Queue, void *Ptr, - size_t Pitch, int Value, size_t Width, - size_t Height, - pi_uint32 NumEventsWaitList, - const pi_event *EventsWaitList, - pi_event *Event) { - std::ignore = Queue; - std::ignore = Ptr; - std::ignore = Pitch; - std::ignore = Value; - std::ignore = Width; - std::ignore = Height; - std::ignore = NumEventsWaitList; - std::ignore = EventsWaitList; - std::ignore = Event; - die("piextUSMEnqueueMemset2D: not implemented"); - return PI_SUCCESS; -} - -inline pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, - pi_mem_alloc_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_usm_alloc_info_t UrParamName{}; - switch (ParamName) { - case PI_MEM_ALLOC_TYPE: { - UrParamName = UR_USM_ALLOC_INFO_TYPE; - break; - } - case PI_MEM_ALLOC_BASE_PTR: { - UrParamName = UR_USM_ALLOC_INFO_BASE_PTR; - break; - } - case PI_MEM_ALLOC_SIZE: { - UrParamName = UR_USM_ALLOC_INFO_SIZE; - break; - } - case PI_MEM_ALLOC_DEVICE: { - UrParamName = UR_USM_ALLOC_INFO_DEVICE; - break; - } - default: { - die("piextUSMGetMemAllocInfo: unsuppported ParamName."); - } - } - - size_t SizeInOut = ParamValueSize; - HANDLE_ERRORS(urUSMGetMemAllocInfo(UrContext, Ptr, UrParamName, - ParamValueSize, ParamValue, - ParamValueSizeRet)) - ur2piUSMAllocInfoValue(UrParamName, ParamValueSize, &SizeInOut, ParamValue); - return PI_SUCCESS; -} - -inline pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - auto hMem = reinterpret_cast(Image); - - ur_image_info_t UrParamName{}; - switch (ParamName) { - case PI_IMAGE_INFO_FORMAT: { - UrParamName = UR_IMAGE_INFO_FORMAT; - break; - } - case PI_IMAGE_INFO_ELEMENT_SIZE: { - UrParamName = UR_IMAGE_INFO_ELEMENT_SIZE; - break; - } - case PI_IMAGE_INFO_ROW_PITCH: { - UrParamName = UR_IMAGE_INFO_ROW_PITCH; - break; - } - case PI_IMAGE_INFO_SLICE_PITCH: { - UrParamName = UR_IMAGE_INFO_SLICE_PITCH; - break; - } - case PI_IMAGE_INFO_WIDTH: { - UrParamName = UR_IMAGE_INFO_WIDTH; - break; - } - case PI_IMAGE_INFO_HEIGHT: { - UrParamName = UR_IMAGE_INFO_HEIGHT; - break; - } - case PI_IMAGE_INFO_DEPTH: { - UrParamName = UR_IMAGE_INFO_DEPTH; - break; - } - default: - return PI_ERROR_UNKNOWN; - } - - HANDLE_ERRORS(urMemImageGetInfo(hMem, UrParamName, ParamValueSize, ParamValue, - ParamValueSizeRet)); - return PI_SUCCESS; -} - -/// USM 2D Memcpy API -/// -/// \param queue is the queue to submit to -/// \param blocking is whether this operation should block the host -/// \param dst_ptr is the location the data will be copied -/// \param dst_pitch is the total width of the destination memory including -/// padding -/// \param src_ptr is the data to be copied -/// \param dst_pitch is the total width of the source memory including padding -/// \param width is width in bytes of each row to be copied -/// \param height is height the columns to be copied -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -inline pi_result piextUSMEnqueueMemcpy2D(pi_queue Queue, pi_bool Blocking, - void *DstPtr, size_t DstPitch, - const void *SrcPtr, size_t SrcPitch, - size_t Width, size_t Height, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - if (!DstPtr || !SrcPtr) - return PI_ERROR_INVALID_VALUE; - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueUSMMemcpy2D( - UrQueue, Blocking, DstPtr, DstPitch, SrcPtr, SrcPitch, Width, Height, - NumEventsInWaitList, UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -// Memory -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Enqueue - -inline pi_result -piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - PI_ASSERT((WorkDim > 0) && (WorkDim < 4), PI_ERROR_INVALID_WORK_DIMENSION); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueKernelLaunch( - UrQueue, UrKernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, - LocalWorkSize, NumEventsInWaitList, UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result -piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, pi_bool BlockingWrite, - pi_image_offset Origin, pi_image_region Region, - size_t InputRowPitch, size_t InputSlicePitch, - const void *Ptr, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrImage = reinterpret_cast(Image); - ur_rect_offset_t UrOrigin{Origin->x, Origin->y, Origin->z}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth; - UrRegion.height = Region->height; - UrRegion.width = Region->width; - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemImageWrite( - UrQueue, UrImage, BlockingWrite, UrOrigin, UrRegion, InputRowPitch, - InputSlicePitch, const_cast(Ptr), NumEventsInWaitList, - UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result -piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, pi_bool BlockingRead, - pi_image_offset Origin, pi_image_region Region, - size_t RowPitch, size_t SlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrImage = reinterpret_cast(Image); - ur_rect_offset_t UrOrigin{Origin->x, Origin->y, Origin->z}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth; - UrRegion.height = Region->height; - UrRegion.width = Region->width; - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemImageRead( - UrQueue, UrImage, BlockingRead, UrOrigin, UrRegion, RowPitch, SlicePitch, - Ptr, NumEventsInWaitList, UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferMap( - pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, pi_map_flags MapFlags, - size_t Offset, size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent, void **RetMap) { - // TODO: we don't implement read-only or write-only, always read-write. - // assert((map_flags & PI_MAP_READ) != 0); - // assert((map_flags & PI_MAP_WRITE) != 0); - PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrMem = reinterpret_cast(Mem); - - ur_map_flags_t UrMapFlags{}; - if (MapFlags & PI_MAP_READ) - UrMapFlags |= UR_MAP_FLAG_READ; - if (MapFlags & PI_MAP_WRITE) - UrMapFlags |= UR_MAP_FLAG_WRITE; - if (MapFlags & PI_MAP_WRITE_INVALIDATE_REGION) - UrMapFlags |= UR_MAP_FLAG_WRITE_INVALIDATE_REGION; - - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferMap(UrQueue, UrMem, BlockingMap, UrMapFlags, - Offset, Size, NumEventsInWaitList, - UrEventsWaitList, UrEvent, RetMap)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem Mem, void *MappedPtr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrMem = reinterpret_cast(Mem); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemUnmap(UrQueue, UrMem, MappedPtr, - NumEventsInWaitList, UrEventsWaitList, - UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, - const void *Pattern, size_t PatternSize, - size_t Offset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferFill(UrQueue, UrBuffer, Pattern, PatternSize, - Offset, Size, NumEventsInWaitList, - UrEventsWaitList, UrEvent)); - return PI_SUCCESS; -} - -inline pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, - pi_int32 Value, size_t Count, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - if (!Ptr) { - return PI_ERROR_INVALID_VALUE; - } - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - size_t PatternSize = 1; - HANDLE_ERRORS(urEnqueueUSMFill(UrQueue, Ptr, PatternSize, &Value, Count, - NumEventsInWaitList, UrEventsWaitList, - UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferCopyRect( - pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, - pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, - size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, - size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, pi_event *OutEvent) { - - PI_ASSERT(SrcMem && DstMem, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBufferSrc = reinterpret_cast(SrcMem); - ur_mem_handle_t UrBufferDst = reinterpret_cast(DstMem); - ur_rect_offset_t UrSrcOrigin{SrcOrigin->x_bytes, SrcOrigin->y_scalar, - SrcOrigin->z_scalar}; - ur_rect_offset_t UrDstOrigin{DstOrigin->x_bytes, DstOrigin->y_scalar, - DstOrigin->z_scalar}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth_scalar; - UrRegion.height = Region->height_scalar; - UrRegion.width = Region->width_bytes; - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferCopyRect( - UrQueue, UrBufferSrc, UrBufferDst, UrSrcOrigin, UrDstOrigin, UrRegion, - SrcRowPitch, SrcSlicePitch, DstRowPitch, DstSlicePitch, - NumEventsInWaitList, UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcMem, - pi_mem DstMem, size_t SrcOffset, - size_t DstOffset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(SrcMem && DstMem, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBufferSrc = reinterpret_cast(SrcMem); - ur_mem_handle_t UrBufferDst = reinterpret_cast(DstMem); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferCopy( - UrQueue, UrBufferSrc, UrBufferDst, SrcOffset, DstOffset, Size, - NumEventsInWaitList, UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, - void *DstPtr, const void *SrcPtr, - size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueUSMMemcpy(UrQueue, Blocking, DstPtr, SrcPtr, Size, - NumEventsInWaitList, UrEventsWaitList, - UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferWriteRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - ur_rect_offset_t UrBufferOffset{BufferOffset->x_bytes, BufferOffset->y_scalar, - BufferOffset->z_scalar}; - ur_rect_offset_t UrHostOffset{HostOffset->x_bytes, HostOffset->y_scalar, - HostOffset->z_scalar}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth_scalar; - UrRegion.height = Region->height_scalar; - UrRegion.width = Region->width_bytes; - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferWriteRect( - UrQueue, UrBuffer, BlockingWrite, UrBufferOffset, UrHostOffset, UrRegion, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, - const_cast(Ptr), NumEventsInWaitList, UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, - pi_bool BlockingWrite, size_t Offset, - size_t Size, const void *Ptr, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferWrite( - UrQueue, UrBuffer, BlockingWrite, Offset, Size, const_cast(Ptr), - NumEventsInWaitList, UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferReadRect( - pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumEventsInWaitList, const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - ur_rect_offset_t UrBufferOffset{BufferOffset->x_bytes, BufferOffset->y_scalar, - BufferOffset->z_scalar}; - ur_rect_offset_t UrHostOffset{HostOffset->x_bytes, HostOffset->y_scalar, - HostOffset->z_scalar}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth_scalar; - UrRegion.height = Region->height_scalar; - UrRegion.width = Region->width_bytes; - - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferReadRect( - UrQueue, UrBuffer, BlockingRead, UrBufferOffset, UrHostOffset, UrRegion, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumEventsInWaitList, UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, - pi_bool BlockingRead, size_t Offset, - size_t Size, void *Dst, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - PI_ASSERT(Src, PI_ERROR_INVALID_MEM_OBJECT); - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - ur_mem_handle_t UrBuffer = reinterpret_cast(Src); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueMemBufferRead(UrQueue, UrBuffer, BlockingRead, Offset, - Size, Dst, NumEventsInWaitList, - UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueEventsWaitWithBarrier(UrQueue, NumEventsInWaitList, - UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEnqueueEventsWait(pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { - - PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - if (EventsWaitList) { - PI_ASSERT(NumEventsInWaitList > 0, PI_ERROR_INVALID_VALUE); - } - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); - - HANDLE_ERRORS(urEnqueueEventsWait(UrQueue, NumEventsInWaitList, - UrEventsWaitList, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result -piextEnqueueReadHostPipe(pi_queue queue, pi_program program, - const char *pipe_symbol, pi_bool blocking, void *ptr, - size_t size, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, pi_event *event) { - auto hQueue = reinterpret_cast(queue); - auto hProgram = reinterpret_cast(program); - auto phEventWaitList = - reinterpret_cast(events_waitlist); - auto phEvent = reinterpret_cast(event); - - HANDLE_ERRORS(urEnqueueReadHostPipe(hQueue, hProgram, pipe_symbol, blocking, - ptr, size, num_events_in_waitlist, - phEventWaitList, phEvent)); - - return PI_SUCCESS; -} - -inline pi_result -piextEnqueueWriteHostPipe(pi_queue queue, pi_program program, - const char *pipe_symbol, pi_bool blocking, void *ptr, - size_t size, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, pi_event *event) { - auto hQueue = reinterpret_cast(queue); - auto hProgram = reinterpret_cast(program); - auto phEventWaitList = - reinterpret_cast(events_waitlist); - auto phEvent = reinterpret_cast(event); - - HANDLE_ERRORS(urEnqueueWriteHostPipe(hQueue, hProgram, pipe_symbol, blocking, - ptr, size, num_events_in_waitlist, - phEventWaitList, phEvent)); - - return PI_SUCCESS; -} -// Enqueue -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Events -inline pi_result piEventsWait(pi_uint32 NumEvents, - const pi_event *EventsWaitList) { - if (NumEvents && !EventsWaitList) { - return PI_ERROR_INVALID_EVENT; - } - - const ur_event_handle_t *UrEventsWaitList = - reinterpret_cast(EventsWaitList); - - HANDLE_ERRORS(urEventWait(NumEvents, UrEventsWaitList)); - - return PI_SUCCESS; -} - -inline pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - - PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); - - ur_event_handle_t UrEvent = reinterpret_cast(Event); - - ur_event_info_t PropName{}; - if (ParamName == PI_EVENT_INFO_COMMAND_QUEUE) { - PropName = UR_EVENT_INFO_COMMAND_QUEUE; - } else if (ParamName == PI_EVENT_INFO_CONTEXT) { - PropName = UR_EVENT_INFO_CONTEXT; - } else if (ParamName == PI_EVENT_INFO_COMMAND_TYPE) { - PropName = UR_EVENT_INFO_COMMAND_TYPE; - } else if (ParamName == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { - PropName = UR_EVENT_INFO_COMMAND_EXECUTION_STATUS; - } else if (ParamName == PI_EVENT_INFO_REFERENCE_COUNT) { - PropName = UR_EVENT_INFO_REFERENCE_COUNT; - } else { - return PI_ERROR_INVALID_VALUE; - } - - HANDLE_ERRORS(urEventGetInfo(UrEvent, PropName, ParamValueSize, ParamValue, - ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piextEventGetNativeHandle(pi_event Event, - pi_native_handle *NativeHandle) { - - PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_event_handle_t UrEvent = reinterpret_cast(Event); - - ur_native_handle_t *UrNativeEvent = - reinterpret_cast(NativeHandle); - HANDLE_ERRORS(urEventGetNativeHandle(UrEvent, UrNativeEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEventGetProfilingInfo(pi_event Event, - pi_profiling_info ParamName, - size_t ParamValueSize, - void *ParamValue, - size_t *ParamValueSizeRet) { - - PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); - - ur_event_handle_t UrEvent = reinterpret_cast(Event); - - ur_profiling_info_t PropName{}; - switch (ParamName) { - case PI_PROFILING_INFO_COMMAND_QUEUED: { - PropName = UR_PROFILING_INFO_COMMAND_QUEUED; - break; - } - case PI_PROFILING_INFO_COMMAND_SUBMIT: { - PropName = UR_PROFILING_INFO_COMMAND_SUBMIT; - break; - } - case PI_PROFILING_INFO_COMMAND_START: { - PropName = UR_PROFILING_INFO_COMMAND_START; - break; - } - case PI_PROFILING_INFO_COMMAND_END: { - PropName = UR_PROFILING_INFO_COMMAND_END; - break; - } - default: - return PI_ERROR_INVALID_PROPERTY; - } - - HANDLE_ERRORS(urEventGetProfilingInfo(UrEvent, PropName, ParamValueSize, - ParamValue, ParamValueSizeRet)); - - return PI_SUCCESS; -} - -inline pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_event_handle_t *UrEvent = reinterpret_cast(RetEvent); - // pass null for the hNativeHandle to use urEventCreateWithNativeHandle - // as urEventCreate - ur_event_native_properties_t Properties{}; - HANDLE_ERRORS( - urEventCreateWithNativeHandle(nullptr, UrContext, &Properties, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle, - pi_context Context, - bool OwnNativeHandle, - pi_event *Event) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); - PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - - ur_native_handle_t UrNativeKernel = - reinterpret_cast(NativeHandle); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - - ur_event_handle_t *UrEvent = reinterpret_cast(Event); - ur_event_native_properties_t Properties{}; - Properties.isNativeHandleOwned = OwnNativeHandle; - HANDLE_ERRORS(urEventCreateWithNativeHandle(UrNativeKernel, UrContext, - &Properties, UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEventSetCallback( - pi_event Event, pi_int32 CommandExecCallbackType, - void (*PFnNotify)(pi_event Event, pi_int32 EventCommandStatus, - void *UserData), - void *UserData) { - std::ignore = Event; - std::ignore = CommandExecCallbackType; - std::ignore = PFnNotify; - std::ignore = UserData; - die("piEventSetCallback: deprecated, to be removed"); - return PI_SUCCESS; -} - -inline pi_result piEventSetStatus(pi_event Event, pi_int32 ExecutionStatus) { - std::ignore = Event; - std::ignore = ExecutionStatus; - die("piEventSetStatus: deprecated, to be removed"); - return PI_SUCCESS; -} - -inline pi_result piEventRetain(pi_event Event) { - PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); - - ur_event_handle_t UrEvent = reinterpret_cast(Event); - HANDLE_ERRORS(urEventRetain(UrEvent)); - - return PI_SUCCESS; -} - -inline pi_result piEventRelease(pi_event Event) { - PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); - - ur_event_handle_t UrEvent = reinterpret_cast(Event); - HANDLE_ERRORS(urEventRelease(UrEvent)); - - return PI_SUCCESS; -} - -// Events -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Sampler -inline pi_result piSamplerCreate(pi_context Context, - const pi_sampler_properties *SamplerProperties, - pi_sampler *RetSampler) { - - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(RetSampler, PI_ERROR_INVALID_VALUE); - - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_sampler_desc_t UrProps{}; - UrProps.stype = UR_STRUCTURE_TYPE_SAMPLER_DESC; - const pi_sampler_properties *CurProperty = SamplerProperties; - while (*CurProperty != 0) { - switch (*CurProperty) { - case PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS: { - UrProps.normalizedCoords = ur_cast(*(++CurProperty)); - } break; - - case PI_SAMPLER_PROPERTIES_ADDRESSING_MODE: { - pi_sampler_addressing_mode CurValueAddressingMode = - ur_cast( - ur_cast(*(++CurProperty))); - - if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT) - UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; - else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_REPEAT) - UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_REPEAT; - else if (CurValueAddressingMode == - PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE) - UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; - else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_CLAMP) - UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_CLAMP; - else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_NONE) - UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_NONE; - } break; - - case PI_SAMPLER_PROPERTIES_FILTER_MODE: { - pi_sampler_filter_mode CurValueFilterMode = - ur_cast(ur_cast(*(++CurProperty))); - - if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_NEAREST) - UrProps.filterMode = UR_SAMPLER_FILTER_MODE_NEAREST; - else if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_LINEAR) - UrProps.filterMode = UR_SAMPLER_FILTER_MODE_LINEAR; - } break; - - default: - break; - } - CurProperty++; - } - - ur_sampler_handle_t *UrSampler = - reinterpret_cast(RetSampler); - - HANDLE_ERRORS(urSamplerCreate(UrContext, &UrProps, UrSampler)); - - return PI_SUCCESS; -} - -inline pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName, - size_t ParamValueSize, void *ParamValue, - size_t *ParamValueSizeRet) { - ur_sampler_info_t InfoType{}; - switch (ParamName) { - case PI_SAMPLER_INFO_REFERENCE_COUNT: - InfoType = UR_SAMPLER_INFO_REFERENCE_COUNT; - break; - case PI_SAMPLER_INFO_CONTEXT: - InfoType = UR_SAMPLER_INFO_CONTEXT; - break; - case PI_SAMPLER_INFO_NORMALIZED_COORDS: - InfoType = UR_SAMPLER_INFO_NORMALIZED_COORDS; - break; - case PI_SAMPLER_INFO_ADDRESSING_MODE: - InfoType = UR_SAMPLER_INFO_ADDRESSING_MODE; - break; - case PI_SAMPLER_INFO_FILTER_MODE: - InfoType = UR_SAMPLER_INFO_FILTER_MODE; - break; - default: - return PI_ERROR_UNKNOWN; - } - - size_t UrParamValueSizeRet; - auto hSampler = reinterpret_cast(Sampler); - HANDLE_ERRORS(urSamplerGetInfo(hSampler, InfoType, ParamValueSize, ParamValue, - &UrParamValueSizeRet)); - if (ParamValueSizeRet) { - *ParamValueSizeRet = UrParamValueSizeRet; - } - ur2piSamplerInfoValue(InfoType, ParamValueSize, &ParamValueSize, ParamValue); - fixupInfoValueTypes(UrParamValueSizeRet, ParamValueSizeRet, ParamValueSize, - ParamValue); - return PI_SUCCESS; -} - -// Special version of piKernelSetArg to accept pi_sampler. -inline pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_sampler *ArgValue) { - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - ur_sampler_handle_t UrSampler = - reinterpret_cast(*ArgValue); - - HANDLE_ERRORS(urKernelSetArgSampler(UrKernel, ArgIndex, nullptr, UrSampler)); - - return PI_SUCCESS; -} - -inline pi_result piSamplerRetain(pi_sampler Sampler) { - PI_ASSERT(Sampler, PI_ERROR_INVALID_SAMPLER); - - ur_sampler_handle_t UrSampler = - reinterpret_cast(Sampler); - - HANDLE_ERRORS(urSamplerRetain(UrSampler)); - - return PI_SUCCESS; -} - -inline pi_result piSamplerRelease(pi_sampler Sampler) { - PI_ASSERT(Sampler, PI_ERROR_INVALID_SAMPLER); - - ur_sampler_handle_t UrSampler = - reinterpret_cast(Sampler); - - HANDLE_ERRORS(urSamplerRelease(UrSampler)); - - return PI_SUCCESS; -} - -// Sampler -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Command-buffer extension - -inline pi_result -piextCommandBufferCreate(pi_context Context, pi_device Device, - const pi_ext_command_buffer_desc *Desc, - pi_ext_command_buffer *RetCommandBuffer) { - ur_context_handle_t UrContext = - reinterpret_cast(Context); - ur_device_handle_t UrDevice = reinterpret_cast(Device); - const ur_exp_command_buffer_desc_t *UrDesc = - reinterpret_cast(Desc); - ur_exp_command_buffer_handle_t *UrCommandBuffer = - reinterpret_cast(RetCommandBuffer); - - HANDLE_ERRORS( - urCommandBufferCreateExp(UrContext, UrDevice, UrDesc, UrCommandBuffer)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferRetain(pi_ext_command_buffer CommandBuffer) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - HANDLE_ERRORS(urCommandBufferRetainExp(UrCommandBuffer)); - - return PI_SUCCESS; -} - -inline pi_result -piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - HANDLE_ERRORS(urCommandBufferReleaseExp(UrCommandBuffer)); - - return PI_SUCCESS; -} - -inline pi_result -piextCommandBufferFinalize(pi_ext_command_buffer CommandBuffer) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - HANDLE_ERRORS(urCommandBufferFinalizeExp(UrCommandBuffer)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferNDRangeKernel( - pi_ext_command_buffer CommandBuffer, pi_kernel Kernel, pi_uint32 WorkDim, - const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, - const size_t *LocalWorkSize, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - - HANDLE_ERRORS(urCommandBufferAppendKernelLaunchExp( - UrCommandBuffer, UrKernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, - LocalWorkSize, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemcpyUSM( - pi_ext_command_buffer CommandBuffer, void *DstPtr, const void *SrcPtr, - size_t Size, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - HANDLE_ERRORS(urCommandBufferAppendMemcpyUSMExp( - UrCommandBuffer, DstPtr, SrcPtr, Size, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferCopy( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - size_t SrcOffset, size_t DstOffset, size_t Size, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - ur_mem_handle_t UrSrcMem = reinterpret_cast(SrcMem); - ur_mem_handle_t UrDstMem = reinterpret_cast(DstMem); - - HANDLE_ERRORS(urCommandBufferAppendMembufferCopyExp( - UrCommandBuffer, UrSrcMem, UrDstMem, SrcOffset, DstOffset, Size, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferCopyRect( - pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, - pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, - pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, - size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - ur_mem_handle_t UrSrcMem = reinterpret_cast(SrcMem); - ur_mem_handle_t UrDstMem = reinterpret_cast(DstMem); - - ur_rect_offset_t UrSrcOrigin{SrcOrigin->x_bytes, SrcOrigin->y_scalar, - SrcOrigin->z_scalar}; - ur_rect_offset_t UrDstOrigin{DstOrigin->x_bytes, DstOrigin->y_scalar, - DstOrigin->z_scalar}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth_scalar; - UrRegion.height = Region->height_scalar; - UrRegion.width = Region->width_bytes; - - HANDLE_ERRORS(urCommandBufferAppendMembufferCopyRectExp( - UrCommandBuffer, UrSrcMem, UrDstMem, UrSrcOrigin, UrDstOrigin, UrRegion, - SrcRowPitch, SrcSlicePitch, DstRowPitch, DstSlicePitch, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferReadRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - ur_rect_offset_t UrBufferOffset{BufferOffset->x_bytes, BufferOffset->y_scalar, - BufferOffset->z_scalar}; - ur_rect_offset_t UrHostOffset{HostOffset->x_bytes, HostOffset->y_scalar, - HostOffset->z_scalar}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth_scalar; - UrRegion.height = Region->height_scalar; - UrRegion.width = Region->width_bytes; - - HANDLE_ERRORS(urCommandBufferAppendMembufferReadRectExp( - UrCommandBuffer, UrBuffer, UrBufferOffset, UrHostOffset, UrRegion, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferRead( - pi_ext_command_buffer CommandBuffer, pi_mem Src, size_t Offset, size_t Size, - void *Dst, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - PI_ASSERT(Src, PI_ERROR_INVALID_MEM_OBJECT); - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - ur_mem_handle_t UrBuffer = reinterpret_cast(Src); - - HANDLE_ERRORS(urCommandBufferAppendMembufferReadExp( - UrCommandBuffer, UrBuffer, Offset, Size, Dst, NumSyncPointsInWaitList, - SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferWriteRect( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, - pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, - pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, - size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, - pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - ur_rect_offset_t UrBufferOffset{BufferOffset->x_bytes, BufferOffset->y_scalar, - BufferOffset->z_scalar}; - ur_rect_offset_t UrHostOffset{HostOffset->x_bytes, HostOffset->y_scalar, - HostOffset->z_scalar}; - ur_rect_region_t UrRegion{}; - UrRegion.depth = Region->depth_scalar; - UrRegion.height = Region->height_scalar; - UrRegion.width = Region->width_bytes; - - HANDLE_ERRORS(urCommandBufferAppendMembufferWriteRectExp( - UrCommandBuffer, UrBuffer, UrBufferOffset, UrHostOffset, UrRegion, - BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, - const_cast(Ptr), NumSyncPointsInWaitList, SyncPointWaitList, - SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextCommandBufferMemBufferWrite( - pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, - size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, - const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { - - PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); - - HANDLE_ERRORS(urCommandBufferAppendMembufferWriteExp( - UrCommandBuffer, UrBuffer, Offset, Size, const_cast(Ptr), - NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); - - return PI_SUCCESS; -} - -inline pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, - pi_queue Queue, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { - - ur_exp_command_buffer_handle_t UrCommandBuffer = - reinterpret_cast(CommandBuffer); - - ur_queue_handle_t UrQueue = reinterpret_cast(Queue); - const ur_event_handle_t *UrEventWaitList = - reinterpret_cast(EventWaitList); - ur_event_handle_t *UrEvent = reinterpret_cast(Event); - - HANDLE_ERRORS(urCommandBufferEnqueueExp( - UrCommandBuffer, UrQueue, NumEventsInWaitList, UrEventWaitList, UrEvent)); - - return PI_SUCCESS; -} - -// Command-buffer extension -/////////////////////////////////////////////////////////////////////////////// - -} // namespace pi2ur +//===---------------- pi2ur.hpp - PI API to UR API --------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// +#pragma once + +#include "ur_api.h" +#include +#include +#include +#include + +// Map of UR error codes to PI error codes +static pi_result ur2piResult(ur_result_t urResult) { + if (urResult == UR_RESULT_SUCCESS) + return PI_SUCCESS; + + switch (urResult) { + case UR_RESULT_ERROR_INVALID_OPERATION: + return PI_ERROR_INVALID_OPERATION; + case UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES: + return PI_ERROR_INVALID_QUEUE_PROPERTIES; + case UR_RESULT_ERROR_INVALID_QUEUE: + return PI_ERROR_INVALID_QUEUE; + case UR_RESULT_ERROR_INVALID_VALUE: + return PI_ERROR_INVALID_VALUE; + case UR_RESULT_ERROR_INVALID_CONTEXT: + return PI_ERROR_INVALID_CONTEXT; + case UR_RESULT_ERROR_INVALID_PLATFORM: + return PI_ERROR_INVALID_PLATFORM; + case UR_RESULT_ERROR_INVALID_BINARY: + return PI_ERROR_INVALID_BINARY; + case UR_RESULT_ERROR_INVALID_PROGRAM: + return PI_ERROR_INVALID_PROGRAM; + case UR_RESULT_ERROR_INVALID_SAMPLER: + return PI_ERROR_INVALID_SAMPLER; + case UR_RESULT_ERROR_INVALID_MEM_OBJECT: + return PI_ERROR_INVALID_MEM_OBJECT; + case UR_RESULT_ERROR_INVALID_EVENT: + return PI_ERROR_INVALID_EVENT; + case UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + return PI_ERROR_INVALID_EVENT_WAIT_LIST; + case UR_RESULT_ERROR_MISALIGNED_SUB_BUFFER_OFFSET: + return PI_ERROR_MISALIGNED_SUB_BUFFER_OFFSET; + case UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE: + return PI_ERROR_INVALID_WORK_GROUP_SIZE; + case UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE: + return PI_ERROR_COMPILER_NOT_AVAILABLE; + case UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE: + return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE; + case UR_RESULT_ERROR_DEVICE_NOT_FOUND: + return PI_ERROR_DEVICE_NOT_FOUND; + case UR_RESULT_ERROR_INVALID_DEVICE: + return PI_ERROR_INVALID_DEVICE; + case UR_RESULT_ERROR_DEVICE_REQUIRES_RESET: + case UR_RESULT_ERROR_DEVICE_LOST: + return PI_ERROR_DEVICE_NOT_AVAILABLE; + case UR_RESULT_ERROR_DEVICE_PARTITION_FAILED: + return PI_ERROR_DEVICE_PARTITION_FAILED; + case UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT: + return PI_ERROR_INVALID_DEVICE_PARTITION_COUNT; + case UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE: + return PI_ERROR_INVALID_WORK_ITEM_SIZE; + case UR_RESULT_ERROR_INVALID_WORK_DIMENSION: + return PI_ERROR_INVALID_WORK_DIMENSION; + case UR_RESULT_ERROR_INVALID_KERNEL_ARGS: + return PI_ERROR_INVALID_KERNEL_ARGS; + case UR_RESULT_ERROR_INVALID_KERNEL: + return PI_ERROR_INVALID_KERNEL; + case UR_RESULT_ERROR_INVALID_KERNEL_NAME: + return PI_ERROR_INVALID_KERNEL_NAME; + case UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX: + return PI_ERROR_INVALID_ARG_INDEX; + case UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE: + return PI_ERROR_INVALID_ARG_SIZE; + case UR_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE: + return PI_ERROR_INVALID_VALUE; + case UR_RESULT_ERROR_INVALID_IMAGE_SIZE: + return PI_ERROR_INVALID_IMAGE_SIZE; + case UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR: + return PI_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; + case UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED: + return PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED; + case UR_RESULT_ERROR_MEM_OBJECT_ALLOCATION_FAILURE: + return PI_ERROR_MEM_OBJECT_ALLOCATION_FAILURE; + case UR_RESULT_ERROR_INVALID_PROGRAM_EXECUTABLE: + return PI_ERROR_INVALID_PROGRAM_EXECUTABLE; + case UR_RESULT_ERROR_UNINITIALIZED: + return PI_ERROR_UNINITIALIZED; + case UR_RESULT_ERROR_OUT_OF_HOST_MEMORY: + return PI_ERROR_OUT_OF_HOST_MEMORY; + case UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY: + case UR_RESULT_ERROR_OUT_OF_RESOURCES: + return PI_ERROR_OUT_OF_RESOURCES; + case UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE: + return PI_ERROR_BUILD_PROGRAM_FAILURE; + case UR_RESULT_ERROR_PROGRAM_LINK_FAILURE: + return PI_ERROR_LINK_PROGRAM_FAILURE; + case UR_RESULT_ERROR_UNSUPPORTED_VERSION: + case UR_RESULT_ERROR_UNSUPPORTED_FEATURE: + case UR_RESULT_ERROR_INVALID_ARGUMENT: + case UR_RESULT_ERROR_INVALID_NULL_HANDLE: + case UR_RESULT_ERROR_HANDLE_OBJECT_IN_USE: + case UR_RESULT_ERROR_INVALID_NULL_POINTER: + return PI_ERROR_INVALID_VALUE; + case UR_RESULT_ERROR_INVALID_SIZE: + case UR_RESULT_ERROR_UNSUPPORTED_SIZE: + return PI_ERROR_INVALID_BUFFER_SIZE; + case UR_RESULT_ERROR_UNSUPPORTED_ALIGNMENT: + return PI_ERROR_INVALID_VALUE; + case UR_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT: + case UR_RESULT_ERROR_INVALID_ENUMERATION: + case UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION: + return PI_ERROR_INVALID_VALUE; + case UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT: + return PI_ERROR_IMAGE_FORMAT_NOT_SUPPORTED; + case UR_RESULT_ERROR_INVALID_NATIVE_BINARY: + return PI_ERROR_INVALID_BINARY; + case UR_RESULT_ERROR_INVALID_GLOBAL_NAME: + return PI_ERROR_INVALID_VALUE; + case UR_RESULT_ERROR_INVALID_FUNCTION_NAME: + return PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE; + case UR_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION: + return PI_ERROR_INVALID_WORK_DIMENSION; + case UR_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION: + return PI_ERROR_INVALID_VALUE; + + case UR_RESULT_ERROR_PROGRAM_UNLINKED: + return PI_ERROR_INVALID_PROGRAM_EXECUTABLE; + case UR_RESULT_ERROR_OVERLAPPING_REGIONS: + return PI_ERROR_MEM_COPY_OVERLAP; + case UR_RESULT_ERROR_INVALID_HOST_PTR: + return PI_ERROR_INVALID_HOST_PTR; + case UR_RESULT_ERROR_INVALID_USM_SIZE: + return PI_ERROR_INVALID_BUFFER_SIZE; + case UR_RESULT_ERROR_OBJECT_ALLOCATION_FAILURE: + return PI_ERROR_OUT_OF_RESOURCES; + case UR_RESULT_ERROR_ADAPTER_SPECIFIC: + return PI_ERROR_PLUGIN_SPECIFIC_ERROR; + case UR_RESULT_ERROR_UNKNOWN: + default: + return PI_ERROR_UNKNOWN; + }; +} + +// Helper for one-liner validation +#define PI_ASSERT(condition, error) \ + if (!(condition)) \ + return error; + +// Early exits on any error +#define HANDLE_ERRORS(urCall) \ + if (auto Result = urCall) \ + return ur2piResult(Result); + +// A version of return helper that returns pi_result and not ur_result_t +class ReturnHelper : public UrReturnHelper { +public: + using UrReturnHelper::UrReturnHelper; + + template pi_result operator()(const T &t) { + return ur2piResult(UrReturnHelper::operator()(t)); + } + // Array return value + template pi_result operator()(const T *t, size_t s) { + return ur2piResult(UrReturnHelper::operator()(t, s)); + } + // Array return value where element type is differrent from T + template pi_result operator()(const T *t, size_t s) { + return ur2piResult(UrReturnHelper::operator()(t, s)); + } +}; + +// A version of return helper that supports conversion through a map +class ConvertHelper : public ReturnHelper { + using ReturnHelper::ReturnHelper; + +public: + // Convert the value using a conversion map + template + pi_result convert(std::function Func) { + *param_value_size_ret = sizeof(TypePI); + + // There is no value to convert. + if (!param_value) + return PI_SUCCESS; + + auto pValueUR = static_cast(param_value); + auto pValuePI = static_cast(param_value); + + // Cannot convert to a smaller storage type + PI_ASSERT(sizeof(TypePI) >= sizeof(TypeUR), PI_ERROR_UNKNOWN); + + *pValuePI = Func(*pValueUR); + return PI_SUCCESS; + } + + // Convert the array (0-terminated) using a conversion map + template + pi_result convertArray(std::function Func) { + // Cannot convert to a smaller element storage type + PI_ASSERT(sizeof(TypePI) >= sizeof(TypeUR), PI_ERROR_UNKNOWN); + *param_value_size_ret *= sizeof(TypePI) / sizeof(TypeUR); + + // There is no value to convert. Adjust to a possibly bigger PI storage. + if (!param_value) + return PI_SUCCESS; + + PI_ASSERT(*param_value_size_ret % sizeof(TypePI) == 0, PI_ERROR_UNKNOWN); + + // Make a copy of the input UR array as we may possibly overwrite + // following elements while converting previous ones (if extending). + auto ValueUR = new char[*param_value_size_ret]; + auto pValueUR = reinterpret_cast(ValueUR); + auto pValuePI = static_cast(param_value); + memcpy(pValueUR, param_value, *param_value_size_ret); + + while (pValueUR) { + if (*pValueUR == 0) { + *pValuePI = 0; + break; + } + + *pValuePI = Func(*pValueUR); + ++pValuePI; + ++pValueUR; + } + + delete[] ValueUR; + return PI_SUCCESS; + } + + // Convert the bitset using a conversion map + template + pi_result convertBitSet(std::function Func) { + // There is no value to convert. + if (!param_value) + return PI_SUCCESS; + + auto pValuePI = static_cast(param_value); + auto pValueUR = static_cast(param_value); + + // Cannot handle biteset large than size_t + PI_ASSERT(sizeof(TypeUR) <= sizeof(size_t), PI_ERROR_UNKNOWN); + size_t In = *pValueUR; + TypePI Out = 0; + + size_t Val; + while ((Val = In & -In)) { // Val is the rightmost set bit in In + In &= In - 1; // Reset the rightmost set bit + + // Convert the Val alone and merge it into Out + *pValueUR = TypeUR(Val); + if (auto Res = convert(Func)) + return Res; + Out |= *pValuePI; + } + *pValuePI = TypePI(Out); + return PI_SUCCESS; + } +}; + +// Translate UR platform info values to PI info values +inline pi_result ur2piPlatformInfoValue(ur_platform_info_t ParamName, + size_t ParamValueSizePI, + size_t *ParamValueSizeUR, + void *ParamValue) { + + ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); + + switch (ParamName) { + case UR_PLATFORM_INFO_EXTENSIONS: + case UR_PLATFORM_INFO_NAME: + case UR_PLATFORM_INFO_PROFILE: + case UR_PLATFORM_INFO_VENDOR_NAME: + case UR_PLATFORM_INFO_VERSION: + // These ones do not need ur2pi translations + break; + case UR_PLATFORM_INFO_BACKEND: { + auto ConvertFunc = [](ur_platform_backend_t UrValue) { + switch (UrValue) { + case UR_PLATFORM_BACKEND_UNKNOWN: + return PI_EXT_PLATFORM_BACKEND_UNKNOWN; + case UR_PLATFORM_BACKEND_LEVEL_ZERO: + return PI_EXT_PLATFORM_BACKEND_LEVEL_ZERO; + case UR_PLATFORM_BACKEND_OPENCL: + return PI_EXT_PLATFORM_BACKEND_OPENCL; + case UR_PLATFORM_BACKEND_CUDA: + return PI_EXT_PLATFORM_BACKEND_CUDA; + case UR_PLATFORM_BACKEND_HIP: + return PI_EXT_PLATFORM_BACKEND_CUDA; + default: + die("UR_PLATFORM_INFO_BACKEND: unhandled value"); + } + }; + return Value.convert( + ConvertFunc); + } + default: + return PI_ERROR_UNKNOWN; + } + + if (ParamValueSizePI && ParamValueSizePI != *ParamValueSizeUR) { + fprintf(stderr, "UR PlatformInfoType=%d PI=%d but UR=%d\n", ParamName, + (int)ParamValueSizePI, (int)*ParamValueSizeUR); + die("ur2piPlatformInfoValue: size mismatch"); + } + return PI_SUCCESS; +} + +// Translate UR device info values to PI info values +inline pi_result ur2piDeviceInfoValue(ur_device_info_t ParamName, + size_t ParamValueSizePI, + size_t *ParamValueSizeUR, + void *ParamValue) { + + ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); + + if (ParamName == UR_DEVICE_INFO_TYPE) { + auto ConvertFunc = [](ur_device_type_t UrValue) { + switch (UrValue) { + case UR_DEVICE_TYPE_CPU: + return PI_DEVICE_TYPE_CPU; + case UR_DEVICE_TYPE_GPU: + return PI_DEVICE_TYPE_GPU; + case UR_DEVICE_TYPE_FPGA: + return PI_DEVICE_TYPE_ACC; + default: + die("UR_DEVICE_INFO_TYPE: unhandled value"); + } + }; + return Value.convert(ConvertFunc); + } else if (ParamName == UR_DEVICE_INFO_QUEUE_PROPERTIES) { + auto ConvertFunc = [](ur_queue_flag_t UrValue) { + switch (UrValue) { + case UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE: + return PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; + case UR_QUEUE_FLAG_PROFILING_ENABLE: + return PI_QUEUE_FLAG_PROFILING_ENABLE; + case UR_QUEUE_FLAG_ON_DEVICE: + return PI_QUEUE_FLAG_ON_DEVICE; + case UR_QUEUE_FLAG_ON_DEVICE_DEFAULT: + return PI_QUEUE_FLAG_ON_DEVICE_DEFAULT; + case UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM: + return static_cast(__SYCL_PI_CUDA_SYNC_WITH_DEFAULT); + case UR_QUEUE_FLAG_USE_DEFAULT_STREAM: + return static_cast(__SYCL_PI_CUDA_USE_DEFAULT_STREAM); + default: + die("UR_DEVICE_INFO_QUEUE_PROPERTIES: unhandled value"); + } + }; + return Value.convertBitSet( + ConvertFunc); + } else if (ParamName == UR_DEVICE_INFO_EXECUTION_CAPABILITIES) { + auto ConvertFunc = [](ur_device_exec_capability_flag_t UrValue) { + switch (UrValue) { + case UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL: + return PI_DEVICE_EXEC_CAPABILITIES_KERNEL; + case UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL: + return PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL; + default: + die("UR_DEVICE_INFO_EXECUTION_CAPABILITIES: unhandled value"); + } + }; + return Value + .convertBitSet( + ConvertFunc); + } else if (ParamName == UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { + auto ConvertFunc = [](ur_device_affinity_domain_flag_t UrValue) { + switch (UrValue) { + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA: + return PI_DEVICE_AFFINITY_DOMAIN_NUMA; + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE: + return PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; + default: + die("UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: unhandled value"); + } + }; + return Value.convertBitSet(ConvertFunc); + } else if (ParamName == UR_DEVICE_INFO_PARTITION_TYPE) { + auto ConvertFunc = [](ur_device_partition_t UrValue) { + if (UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN == UrValue) + return PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + else if (UR_DEVICE_PARTITION_BY_CSLICE == UrValue) + return PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE; + else if ((ur_device_partition_t) + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE == UrValue) + return (pi_device_partition_property) + PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; + die("UR_DEVICE_INFO_PARTITION_TYPE: unhandled value"); + }; + return Value + .convertArray( + ConvertFunc); + } else if (ParamName == UR_DEVICE_INFO_SUPPORTED_PARTITIONS) { + auto ConvertFunc = [](ur_device_partition_t UrValue) { + switch (UrValue) { + case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: + return PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + case UR_DEVICE_PARTITION_BY_CSLICE: + return PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE; + default: + die("UR_DEVICE_INFO_SUPPORTED_PARTITIONS: unhandled value"); + } + }; + return Value + .convertArray( + ConvertFunc); + } else if (ParamName == UR_DEVICE_INFO_LOCAL_MEM_TYPE) { + auto ConvertFunc = [](ur_device_local_mem_type_t UrValue) { + switch (UrValue) { + case UR_DEVICE_LOCAL_MEM_TYPE_LOCAL: + return PI_DEVICE_LOCAL_MEM_TYPE_LOCAL; + case UR_DEVICE_LOCAL_MEM_TYPE_GLOBAL: + return PI_DEVICE_LOCAL_MEM_TYPE_GLOBAL; + default: + die("UR_DEVICE_INFO_LOCAL_MEM_TYPE: unhandled value"); + } + }; + return Value.convert( + ConvertFunc); + } else if (ParamName == UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES || + ParamName == UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES) { + auto ConvertFunc = [](ur_memory_order_capability_flag_t UrValue) { + switch (UrValue) { + case UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED: + return PI_MEMORY_ORDER_RELAXED; + case UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE: + return PI_MEMORY_ORDER_ACQUIRE; + case UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE: + return PI_MEMORY_ORDER_RELEASE; + case UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL: + return PI_MEMORY_ORDER_ACQ_REL; + case UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST: + return PI_MEMORY_ORDER_SEQ_CST; + default: + die("UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: unhandled " + "value"); + } + }; + return Value.convertBitSet(ConvertFunc); + } else if (ParamName == UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES || + ParamName == UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES) { + auto ConvertFunc = [](ur_memory_scope_capability_flag_t UrValue) { + switch (UrValue) { + case UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM: + return PI_MEMORY_SCOPE_WORK_ITEM; + case UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP: + return PI_MEMORY_SCOPE_SUB_GROUP; + case UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP: + return PI_MEMORY_SCOPE_WORK_GROUP; + case UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE: + return PI_MEMORY_SCOPE_DEVICE; + case UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM: + return PI_MEMORY_SCOPE_SYSTEM; + default: + die("UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: unhandled " + "value"); + } + }; + return Value.convertBitSet(ConvertFunc); + } else { + // TODO: what else needs a UR-PI translation? + } + + if (ParamValueSizePI && ParamValueSizePI != *ParamValueSizeUR) { + fprintf(stderr, "UR DeviceInfoType=%d PI=%d but UR=%d\n", ParamName, + (int)ParamValueSizePI, (int)*ParamValueSizeUR); + die("ur2piDeviceInfoValue: size mismatch"); + } + return PI_SUCCESS; +} + +inline pi_result ur2piSamplerInfoValue(ur_sampler_info_t ParamName, + size_t ParamValueSizePI, + size_t *ParamValueSizeUR, + void *ParamValue) { + + ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); + switch (ParamName) { + case UR_SAMPLER_INFO_ADDRESSING_MODE: { + auto ConvertFunc = [](ur_sampler_addressing_mode_t UrValue) { + switch (UrValue) { + case UR_SAMPLER_ADDRESSING_MODE_CLAMP: + return PI_SAMPLER_ADDRESSING_MODE_CLAMP; + case UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: + return PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + case UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT: + return PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; + case UR_SAMPLER_ADDRESSING_MODE_NONE: + return PI_SAMPLER_ADDRESSING_MODE_NONE; + case UR_SAMPLER_ADDRESSING_MODE_REPEAT: + return PI_SAMPLER_ADDRESSING_MODE_REPEAT; + + default: + die("UR_SAMPLER_ADDRESSING_MODE_TYPE: unhandled value"); + } + }; + return Value + .convert( + ConvertFunc); + } + case UR_SAMPLER_INFO_FILTER_MODE: { + auto ConvertFunc = [](ur_sampler_filter_mode_t UrValue) { + switch (UrValue) { + case UR_SAMPLER_FILTER_MODE_LINEAR: + return PI_SAMPLER_FILTER_MODE_LINEAR; + case UR_SAMPLER_FILTER_MODE_NEAREST: + return PI_SAMPLER_FILTER_MODE_NEAREST; + default: + die("UR_SAMPLER_FILTER_MODE: unhandled value"); + } + }; + return Value.convert( + ConvertFunc); + } + default: + return PI_SUCCESS; + } +} + +// Translate UR device info values to PI info values +inline pi_result ur2piUSMAllocInfoValue(ur_usm_alloc_info_t ParamName, + size_t ParamValueSizePI, + size_t *ParamValueSizeUR, + void *ParamValue) { + ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); + + if (ParamName == UR_USM_ALLOC_INFO_TYPE) { + auto ConvertFunc = [](ur_usm_type_t UrValue) { + switch (UrValue) { + case UR_USM_TYPE_UNKNOWN: + return PI_MEM_TYPE_UNKNOWN; + case UR_USM_TYPE_HOST: + return PI_MEM_TYPE_HOST; + case UR_USM_TYPE_DEVICE: + return PI_MEM_TYPE_DEVICE; + case UR_USM_TYPE_SHARED: + return PI_MEM_TYPE_SHARED; + default: + die("UR_USM_ALLOC_INFO_TYPE: unhandled value"); + } + }; + return Value.convert(ConvertFunc); + } + + return PI_SUCCESS; +} + +// Handle mismatched PI and UR type return sizes for info queries +inline pi_result fixupInfoValueTypes(size_t ParamValueSizeRetUR, + size_t *ParamValueSizeRetPI, + size_t ParamValueSize, void *ParamValue) { + if (ParamValueSizeRetUR == 1 && ParamValueSize == 4) { + // extend bool to pi_bool (uint32_t) + if (ParamValue) { + auto *ValIn = static_cast(ParamValue); + auto *ValOut = static_cast(ParamValue); + *ValOut = static_cast(*ValIn); + } + if (ParamValueSizeRetPI) { + *ParamValueSizeRetPI = sizeof(pi_bool); + } + } + + return PI_SUCCESS; +} + +inline ur_result_t +mapPIMetadataToUR(const pi_device_binary_property *pi_metadata, + ur_program_metadata_t *ur_metadata) { + ur_metadata->pName = (*pi_metadata)->Name; + ur_metadata->size = (*pi_metadata)->ValSize; + switch ((*pi_metadata)->Type) { + case PI_PROPERTY_TYPE_UINT32: + ur_metadata->type = UR_PROGRAM_METADATA_TYPE_UINT32; + ur_metadata->value.data32 = (*pi_metadata)->ValSize; + return UR_RESULT_SUCCESS; + case PI_PROPERTY_TYPE_BYTE_ARRAY: + ur_metadata->type = UR_PROGRAM_METADATA_TYPE_BYTE_ARRAY; + ur_metadata->value.pData = (*pi_metadata)->ValAddr; + return UR_RESULT_SUCCESS; + case PI_PROPERTY_TYPE_STRING: + ur_metadata->type = UR_PROGRAM_METADATA_TYPE_STRING; + ur_metadata->value.pString = + reinterpret_cast((*pi_metadata)->ValAddr); + return UR_RESULT_SUCCESS; + default: + return UR_RESULT_ERROR_INVALID_VALUE; + } +} + +namespace pi2ur { + +inline pi_result piTearDown(void *PluginParameter) { + std::ignore = PluginParameter; + // TODO: Dont check for errors in urTearDown, since + // when using Level Zero plugin, the second urTearDown + // will fail as ur_loader.so has already been unloaded, + urTearDown(nullptr); + return PI_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +// Platform +inline pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, + pi_uint32 *NumPlatforms) { + + urInit(0); + auto phPlatforms = reinterpret_cast(Platforms); + HANDLE_ERRORS(urPlatformGet(NumEntries, phPlatforms, NumPlatforms)); + return PI_SUCCESS; +} + +inline pi_result piextPlatformGetNativeHandle(pi_platform Platform, + pi_native_handle *NativeHandle) { + + PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + + auto UrPlatform = reinterpret_cast(Platform); + + ur_native_handle_t UrNativeHandle{}; + HANDLE_ERRORS(urPlatformGetNativeHandle(UrPlatform, &UrNativeHandle)); + + *NativeHandle = reinterpret_cast(UrNativeHandle); + + return PI_SUCCESS; +} + +inline pi_result +piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_platform *Platform) { + + PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + + ur_platform_handle_t UrPlatform{}; + ur_native_handle_t UrNativeHandle = + reinterpret_cast(NativeHandle); + ur_platform_native_properties_t UrProperties{}; + urPlatformCreateWithNativeHandle(UrNativeHandle, &UrProperties, &UrPlatform); + + *Platform = reinterpret_cast(UrPlatform); + + return PI_SUCCESS; +} + +inline pi_result piPlatformGetInfo(pi_platform Platform, + pi_platform_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + + PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM); + + ur_platform_info_t UrParamName = {}; + switch (ParamName) { + case PI_PLATFORM_INFO_EXTENSIONS: { + UrParamName = UR_PLATFORM_INFO_EXTENSIONS; + break; + } + case PI_PLATFORM_INFO_NAME: { + UrParamName = UR_PLATFORM_INFO_NAME; + break; + } + case PI_PLATFORM_INFO_PROFILE: { + UrParamName = UR_PLATFORM_INFO_PROFILE; + break; + } + case PI_PLATFORM_INFO_VENDOR: { + UrParamName = UR_PLATFORM_INFO_VENDOR_NAME; + break; + } + case PI_PLATFORM_INFO_VERSION: { + UrParamName = UR_PLATFORM_INFO_VERSION; + break; + } + case PI_EXT_PLATFORM_INFO_BACKEND: { + UrParamName = UR_PLATFORM_INFO_BACKEND; + break; + } + default: + die("urGetContextInfo: unsuppported ParamName."); + } + + size_t UrParamValueSizeRet; + auto UrPlatform = reinterpret_cast(Platform); + HANDLE_ERRORS(urPlatformGetInfo(UrPlatform, UrParamName, ParamValueSize, + ParamValue, &UrParamValueSizeRet)); + + if (ParamValueSizeRet) { + *ParamValueSizeRet = UrParamValueSizeRet; + } + ur2piPlatformInfoValue(UrParamName, ParamValueSize, &ParamValueSize, + ParamValue); + fixupInfoValueTypes(UrParamValueSizeRet, ParamValueSizeRet, ParamValueSize, + ParamValue); + + return PI_SUCCESS; +} + +inline pi_result piextPluginGetOpaqueData(void *opaque_data_param, + void **opaque_data_return) { + (void)opaque_data_param; + (void)opaque_data_return; + return PI_ERROR_UNKNOWN; +} + +inline pi_result piPluginGetBackendOption(pi_platform Platform, + const char *FrontendOption, + const char **PlatformOption) { + + auto UrPlatform = reinterpret_cast(Platform); + HANDLE_ERRORS( + urPlatformGetBackendOption(UrPlatform, FrontendOption, PlatformOption)); + + return PI_SUCCESS; +} + +// Platform +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Device +inline pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, + pi_uint32 NumEntries, pi_device *Devices, + pi_uint32 *NumDevices) { + ur_device_type_t Type; + switch (DeviceType) { + case PI_DEVICE_TYPE_ALL: + Type = UR_DEVICE_TYPE_ALL; + break; + case PI_DEVICE_TYPE_GPU: + Type = UR_DEVICE_TYPE_GPU; + break; + case PI_DEVICE_TYPE_CPU: + Type = UR_DEVICE_TYPE_CPU; + break; + case PI_DEVICE_TYPE_ACC: + Type = UR_DEVICE_TYPE_FPGA; + break; + default: + return PI_ERROR_UNKNOWN; + } + + PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM); + + auto UrPlatform = reinterpret_cast(Platform); + auto UrDevices = reinterpret_cast(Devices); + HANDLE_ERRORS( + urDeviceGet(UrPlatform, Type, NumEntries, UrDevices, NumDevices)); + + return PI_SUCCESS; +} + +inline pi_result piDeviceRetain(pi_device Device) { + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrDevice = reinterpret_cast(Device); + HANDLE_ERRORS(urDeviceRetain(UrDevice)); + return PI_SUCCESS; +} + +inline pi_result piDeviceRelease(pi_device Device) { + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + auto UrDevice = reinterpret_cast(Device); + HANDLE_ERRORS(urDeviceRelease(UrDevice)); + return PI_SUCCESS; +} + +inline pi_result piPluginGetLastError(char **message) { + std::ignore = message; + return PI_SUCCESS; +} + +inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + + ur_device_info_t InfoType; + switch (ParamName) { + case PI_DEVICE_INFO_TYPE: + InfoType = UR_DEVICE_INFO_TYPE; + break; + case PI_DEVICE_INFO_PARENT_DEVICE: + InfoType = UR_DEVICE_INFO_PARENT_DEVICE; + break; + case PI_DEVICE_INFO_PLATFORM: + InfoType = UR_DEVICE_INFO_PLATFORM; + break; + case PI_DEVICE_INFO_VENDOR_ID: + InfoType = UR_DEVICE_INFO_VENDOR_ID; + break; + case PI_DEVICE_INFO_UUID: + InfoType = UR_DEVICE_INFO_UUID; + break; + case PI_DEVICE_INFO_ATOMIC_64: + InfoType = UR_DEVICE_INFO_ATOMIC_64; + break; + case PI_DEVICE_INFO_EXTENSIONS: + InfoType = UR_DEVICE_INFO_EXTENSIONS; + break; + case PI_DEVICE_INFO_NAME: + InfoType = UR_DEVICE_INFO_NAME; + break; + case PI_DEVICE_INFO_COMPILER_AVAILABLE: + InfoType = UR_DEVICE_INFO_COMPILER_AVAILABLE; + break; + case PI_DEVICE_INFO_LINKER_AVAILABLE: + InfoType = UR_DEVICE_INFO_LINKER_AVAILABLE; + break; + case PI_DEVICE_INFO_MAX_COMPUTE_UNITS: + InfoType = UR_DEVICE_INFO_MAX_COMPUTE_UNITS; + break; + case PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: + InfoType = UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS; + break; + case PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE: + InfoType = UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE; + break; + case PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES: + InfoType = UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES; + break; + case PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY: + InfoType = UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY; + break; + case PI_DEVICE_INFO_ADDRESS_BITS: + InfoType = UR_DEVICE_INFO_ADDRESS_BITS; + break; + case PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: + InfoType = UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE; + break; + case PI_DEVICE_INFO_GLOBAL_MEM_SIZE: + InfoType = UR_DEVICE_INFO_GLOBAL_MEM_SIZE; + break; + case PI_DEVICE_INFO_LOCAL_MEM_SIZE: + InfoType = UR_DEVICE_INFO_LOCAL_MEM_SIZE; + break; + case PI_DEVICE_INFO_IMAGE_SUPPORT: + InfoType = UR_DEVICE_INFO_IMAGE_SUPPORTED; + break; + case PI_DEVICE_INFO_HOST_UNIFIED_MEMORY: + InfoType = UR_DEVICE_INFO_HOST_UNIFIED_MEMORY; + break; + case PI_DEVICE_INFO_AVAILABLE: + InfoType = UR_DEVICE_INFO_AVAILABLE; + break; + case PI_DEVICE_INFO_VENDOR: + InfoType = UR_DEVICE_INFO_VENDOR; + break; + case PI_DEVICE_INFO_DRIVER_VERSION: + InfoType = UR_DEVICE_INFO_DRIVER_VERSION; + break; + case PI_DEVICE_INFO_VERSION: + InfoType = UR_DEVICE_INFO_VERSION; + break; + case PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: + InfoType = UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES; + break; + case PI_DEVICE_INFO_REFERENCE_COUNT: + InfoType = UR_DEVICE_INFO_REFERENCE_COUNT; + break; + case PI_DEVICE_INFO_PARTITION_PROPERTIES: + InfoType = UR_DEVICE_INFO_SUPPORTED_PARTITIONS; + break; + case PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: + InfoType = UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN; + break; + case PI_DEVICE_INFO_PARTITION_TYPE: + InfoType = UR_DEVICE_INFO_PARTITION_TYPE; + break; + case PI_DEVICE_INFO_OPENCL_C_VERSION: + InfoType = UR_EXT_DEVICE_INFO_OPENCL_C_VERSION; + break; + case PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: + InfoType = UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC; + break; + case PI_DEVICE_INFO_PRINTF_BUFFER_SIZE: + InfoType = UR_DEVICE_INFO_PRINTF_BUFFER_SIZE; + break; + case PI_DEVICE_INFO_PROFILE: + InfoType = UR_DEVICE_INFO_PROFILE; + break; + case PI_DEVICE_INFO_BUILT_IN_KERNELS: + InfoType = UR_DEVICE_INFO_BUILT_IN_KERNELS; + break; + case PI_DEVICE_INFO_QUEUE_PROPERTIES: + InfoType = UR_DEVICE_INFO_QUEUE_PROPERTIES; + break; + case PI_DEVICE_INFO_EXECUTION_CAPABILITIES: + InfoType = UR_DEVICE_INFO_EXECUTION_CAPABILITIES; + break; + case PI_DEVICE_INFO_ENDIAN_LITTLE: + InfoType = UR_DEVICE_INFO_ENDIAN_LITTLE; + break; + case PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: + InfoType = UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT; + break; + case PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: + InfoType = UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION; + break; + case PI_DEVICE_INFO_LOCAL_MEM_TYPE: + InfoType = UR_DEVICE_INFO_LOCAL_MEM_TYPE; + break; + case PI_DEVICE_INFO_MAX_CONSTANT_ARGS: + InfoType = UR_DEVICE_INFO_MAX_CONSTANT_ARGS; + break; + case PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: + InfoType = UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE; + break; + case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: + InfoType = UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE; + break; + case PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: + InfoType = UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE; + break; + case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: + InfoType = UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE; + break; + case PI_DEVICE_INFO_MAX_PARAMETER_SIZE: + InfoType = UR_DEVICE_INFO_MAX_PARAMETER_SIZE; + break; + case PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: + InfoType = UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN; + break; + case PI_DEVICE_INFO_MAX_SAMPLERS: + InfoType = UR_DEVICE_INFO_MAX_SAMPLERS; + break; + case PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS: + InfoType = UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS; + break; + case PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: + InfoType = UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS; + break; + case PI_DEVICE_INFO_SINGLE_FP_CONFIG: + InfoType = UR_DEVICE_INFO_SINGLE_FP_CONFIG; + break; + case PI_DEVICE_INFO_HALF_FP_CONFIG: + InfoType = UR_DEVICE_INFO_HALF_FP_CONFIG; + break; + case PI_DEVICE_INFO_DOUBLE_FP_CONFIG: + InfoType = UR_DEVICE_INFO_DOUBLE_FP_CONFIG; + break; + case PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH: + InfoType = UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH; + break; + case PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: + InfoType = UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT; + break; + case PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH: + InfoType = UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH; + break; + case PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: + InfoType = UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT; + break; + case PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH: + InfoType = UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH; + break; + case PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: + InfoType = UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE; + break; + case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: + InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR; + break; + case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: + InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR; + break; + case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: + InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT; + break; + case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: + InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT; + break; + case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: + InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT; + break; + case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: + InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT; + break; + case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: + InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG; + break; + case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: + InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG; + break; + case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: + InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT; + break; + case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: + InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT; + break; + case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: + InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE; + break; + case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: + InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE; + break; + case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: + InfoType = UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF; + break; + case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: + InfoType = UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF; + break; + case PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS: + InfoType = UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS; + break; + case PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: + InfoType = UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS; + break; + case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: + InfoType = UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL; + break; + case PI_DEVICE_INFO_IL_VERSION: + InfoType = UR_DEVICE_INFO_IL_VERSION; + break; + case PI_DEVICE_INFO_USM_HOST_SUPPORT: + InfoType = UR_DEVICE_INFO_USM_HOST_SUPPORT; + break; + case PI_DEVICE_INFO_USM_DEVICE_SUPPORT: + InfoType = UR_DEVICE_INFO_USM_DEVICE_SUPPORT; + break; + case PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: + InfoType = UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT; + break; + case PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: + InfoType = UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT; + break; + case PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: + InfoType = UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT; + break; + case PI_DEVICE_INFO_PCI_ADDRESS: + InfoType = UR_DEVICE_INFO_PCI_ADDRESS; + break; + case PI_DEVICE_INFO_GPU_EU_COUNT: + InfoType = UR_DEVICE_INFO_GPU_EU_COUNT; + break; + case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH: + InfoType = UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH; + break; + case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: + InfoType = UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE; + break; + case PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION: + InfoType = UR_DEVICE_INFO_IP_VERSION; + break; + case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE: + InfoType = UR_DEVICE_INFO_BUILD_ON_SUBDEVICE; + break; + case PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D: + InfoType = UR_DEVICE_INFO_MAX_WORK_GROUPS_3D; + break; + case PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: + InfoType = UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE; + break; + case PI_DEVICE_INFO_DEVICE_ID: + InfoType = UR_DEVICE_INFO_DEVICE_ID; + break; + case PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY: + InfoType = UR_DEVICE_INFO_GLOBAL_MEM_FREE; + break; + case PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE: + InfoType = UR_DEVICE_INFO_MEMORY_CLOCK_RATE; + break; + case PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH: + InfoType = UR_DEVICE_INFO_MEMORY_BUS_WIDTH; + break; + case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: + InfoType = UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES; + break; + case PI_DEVICE_INFO_GPU_SLICES: + InfoType = UR_DEVICE_INFO_GPU_EU_SLICES; + break; + case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: + InfoType = UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE; + break; + case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: + InfoType = UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU; + break; + case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: + InfoType = UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH; + break; + case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: + InfoType = UR_DEVICE_INFO_BFLOAT16; + break; + case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + InfoType = UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES; + break; + case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + InfoType = UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES; + break; + case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + InfoType = UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES; + break; + case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: + InfoType = UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES; + break; + case PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT: + InfoType = UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT; + break; + case PI_DEVICE_INFO_IMAGE_SRGB: + InfoType = UR_DEVICE_INFO_IMAGE_SRGB; + break; + case PI_DEVICE_INFO_BACKEND_VERSION: { + InfoType = UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION; + break; + } + case PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: { + InfoType = UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP; + break; + } + default: + return PI_ERROR_UNKNOWN; + }; + + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + size_t UrParamValueSizeRet; + auto UrDevice = reinterpret_cast(Device); + + HANDLE_ERRORS(urDeviceGetInfo(UrDevice, InfoType, ParamValueSize, ParamValue, + &UrParamValueSizeRet)); + + if (ParamValueSizeRet) { + *ParamValueSizeRet = UrParamValueSizeRet; + } + ur2piDeviceInfoValue(InfoType, ParamValueSize, &ParamValueSize, ParamValue); + fixupInfoValueTypes(UrParamValueSizeRet, ParamValueSizeRet, ParamValueSize, + ParamValue); + + return PI_SUCCESS; +} + +inline pi_result piextDeviceGetNativeHandle(pi_device Device, + pi_native_handle *NativeHandle) { + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + + auto UrDevice = reinterpret_cast(Device); + + ur_native_handle_t UrNativeHandle{}; + HANDLE_ERRORS(urDeviceGetNativeHandle(UrDevice, &UrNativeHandle)); + *NativeHandle = reinterpret_cast(UrNativeHandle); + return PI_SUCCESS; +} + +inline pi_result +piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_platform Platform, pi_device *Device) { + + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + + ur_native_handle_t UrNativeDevice = + reinterpret_cast(NativeHandle); + ur_platform_handle_t UrPlatform = + reinterpret_cast(Platform); + auto UrDevice = reinterpret_cast(Device); + ur_device_native_properties_t UrProperties{}; + HANDLE_ERRORS(urDeviceCreateWithNativeHandle(UrNativeDevice, UrPlatform, + &UrProperties, UrDevice)); + + return PI_SUCCESS; +} + +inline pi_result piDevicePartition( + pi_device Device, const pi_device_partition_property *Properties, + pi_uint32 NumEntries, pi_device *SubDevices, pi_uint32 *NumSubDevices) { + + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + if (!Properties || !Properties[0]) + return PI_ERROR_INVALID_VALUE; + + ur_device_partition_t Property; + switch (Properties[0]) { + case PI_DEVICE_PARTITION_EQUALLY: + Property = UR_DEVICE_PARTITION_EQUALLY; + break; + case PI_DEVICE_PARTITION_BY_COUNTS: + Property = UR_DEVICE_PARTITION_BY_COUNTS; + break; + case PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: + Property = UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + break; + case PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE: + Property = UR_DEVICE_PARTITION_BY_CSLICE; + break; + default: + return PI_ERROR_UNKNOWN; + } + + // Some partitioning types require a value + auto Value = uint32_t(Properties[1]); + if (Property == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + switch (Properties[1]) { + case PI_DEVICE_AFFINITY_DOMAIN_NUMA: + Value = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; + break; + case PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE: + Value = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE; + break; + default: + return PI_ERROR_UNKNOWN; + } + } + + // Translate partitioning properties from PI-way + // (array of uintptr_t values) to UR-way + // (array of {uint32_t, uint32_t} pairs) + // + // TODO: correctly terminate the UR properties, see: + // https://github.com/oneapi-src/unified-runtime/issues/183 + // + ur_device_partition_property_t UrProperty; + UrProperty.type = Property; + UrProperty.value.equally = Value; + + ur_device_partition_properties_t UrProperties{ + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, + nullptr, + &UrProperty, + 1, + }; + + auto UrDevice = reinterpret_cast(Device); + auto UrSubDevices = reinterpret_cast(SubDevices); + HANDLE_ERRORS(urDevicePartition(UrDevice, &UrProperties, NumEntries, + UrSubDevices, NumSubDevices)); + return PI_SUCCESS; +} + +inline pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { + auto UrDevice = reinterpret_cast(Device); + HANDLE_ERRORS(urDeviceGetGlobalTimestamps(UrDevice, DeviceTime, HostTime)); + return PI_SUCCESS; +} + +inline pi_result +piextDeviceSelectBinary(pi_device Device, // TODO: does this need to be context? + pi_device_binary *Binaries, pi_uint32 NumBinaries, + pi_uint32 *SelectedBinaryInd) { + + auto UrDevice = reinterpret_cast(Device); + std::vector UrBinaries(NumBinaries); + + for (uint32_t BinaryCount = 0; BinaryCount < NumBinaries; BinaryCount++) { + if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, + __SYCL_PI_DEVICE_BINARY_TARGET_UNKNOWN) == 0) + UrBinaries[BinaryCount].pDeviceTargetSpec = + UR_DEVICE_BINARY_TARGET_UNKNOWN; + else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV32) == 0) + UrBinaries[BinaryCount].pDeviceTargetSpec = + UR_DEVICE_BINARY_TARGET_SPIRV32; + else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64) == 0) + UrBinaries[BinaryCount].pDeviceTargetSpec = + UR_DEVICE_BINARY_TARGET_SPIRV64; + else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64) == 0) + UrBinaries[BinaryCount].pDeviceTargetSpec = + UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64; + else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN) == 0) + UrBinaries[BinaryCount].pDeviceTargetSpec = + UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; + else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0) + UrBinaries[BinaryCount].pDeviceTargetSpec = + UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA; + else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, + __SYCL_PI_DEVICE_BINARY_TARGET_NVPTX64) == 0) + UrBinaries[BinaryCount].pDeviceTargetSpec = + UR_DEVICE_BINARY_TARGET_NVPTX64; + else if (strcmp(Binaries[BinaryCount]->DeviceTargetSpec, + __SYCL_PI_DEVICE_BINARY_TARGET_AMDGCN) == 0) + UrBinaries[BinaryCount].pDeviceTargetSpec = + UR_DEVICE_BINARY_TARGET_AMDGCN; + else + UrBinaries[BinaryCount].pDeviceTargetSpec = + UR_DEVICE_BINARY_TARGET_UNKNOWN; + } + + HANDLE_ERRORS(urDeviceSelectBinary(UrDevice, UrBinaries.data(), NumBinaries, + SelectedBinaryInd)); + return PI_SUCCESS; +} + +// Device +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Context +inline pi_result piContextCreate(const pi_context_properties *Properties, + pi_uint32 NumDevices, const pi_device *Devices, + void (*PFnNotify)(const char *ErrInfo, + const void *PrivateInfo, + size_t CB, void *UserData), + void *UserData, pi_context *RetContext) { + std::ignore = Properties; + std::ignore = PFnNotify; + std::ignore = UserData; + auto UrDevices = reinterpret_cast(Devices); + + ur_context_handle_t *UrContext = + reinterpret_cast(RetContext); + // TODO: Parse PI Context Properties into UR + ur_context_properties_t UrProperties{}; + HANDLE_ERRORS( + urContextCreate(NumDevices, UrDevices, &UrProperties, UrContext)); + return PI_SUCCESS; +} + +inline pi_result piextContextSetExtendedDeleter( + pi_context Context, pi_context_extended_deleter Function, void *UserData) { + auto hContext = reinterpret_cast(Context); + + HANDLE_ERRORS(urContextSetExtendedDeleter(hContext, Function, UserData)); + + return PI_SUCCESS; +} + +inline pi_result piextContextGetNativeHandle(pi_context Context, + pi_native_handle *NativeHandle) { + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_native_handle_t UrNativeHandle{}; + HANDLE_ERRORS(urContextGetNativeHandle(UrContext, &UrNativeHandle)); + *NativeHandle = reinterpret_cast(UrNativeHandle); + return PI_SUCCESS; +} + +inline pi_result piextContextCreateWithNativeHandle( + pi_native_handle NativeHandle, pi_uint32 NumDevices, + const pi_device *Devices, bool OwnNativeHandle, pi_context *RetContext) { + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + PI_ASSERT(Devices, PI_ERROR_INVALID_DEVICE); + PI_ASSERT(RetContext, PI_ERROR_INVALID_VALUE); + PI_ASSERT(NumDevices, PI_ERROR_INVALID_VALUE); + + ur_native_handle_t NativeContext = + reinterpret_cast(NativeHandle); + const ur_device_handle_t *UrDevices = + reinterpret_cast(Devices); + ur_context_handle_t *UrContext = + reinterpret_cast(RetContext); + + ur_context_native_properties_t Properties{}; + Properties.isNativeHandleOwned = OwnNativeHandle; + HANDLE_ERRORS(urContextCreateWithNativeHandle( + NativeContext, NumDevices, UrDevices, &Properties, UrContext)); + + return PI_SUCCESS; +} + +inline pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + + ur_context_handle_t hContext = reinterpret_cast(Context); + ur_context_info_t ContextInfoType{}; + + switch (ParamName) { + case PI_CONTEXT_INFO_DEVICES: { + ContextInfoType = UR_CONTEXT_INFO_DEVICES; + break; + } + case PI_CONTEXT_INFO_NUM_DEVICES: { + ContextInfoType = UR_CONTEXT_INFO_NUM_DEVICES; + break; + } + case PI_CONTEXT_INFO_REFERENCE_COUNT: { + ContextInfoType = UR_CONTEXT_INFO_REFERENCE_COUNT; + break; + } + case PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT: + case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT: { + ContextInfoType = UR_CONTEXT_INFO_USM_FILL2D_SUPPORT; + break; + } + case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: { + ContextInfoType = UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT; + break; + } + case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + // These queries should be dealt with in context_impl.cpp by calling the + // queries of each device separately and building the intersection set. + die("These queries should have never come here"); + } + default: { + die("piContextGetInfo: unsuppported ParamName."); + } + } + + size_t UrParamValueSizeRet; + HANDLE_ERRORS(urContextGetInfo(hContext, ContextInfoType, ParamValueSize, + ParamValue, &UrParamValueSizeRet)); + if (ParamValueSizeRet) { + *ParamValueSizeRet = UrParamValueSizeRet; + } + fixupInfoValueTypes(UrParamValueSizeRet, ParamValueSizeRet, ParamValueSize, + ParamValue); + return PI_SUCCESS; +} + +inline pi_result piContextRetain(pi_context Context) { + ur_context_handle_t hContext = reinterpret_cast(Context); + + HANDLE_ERRORS(urContextRetain(hContext)); + + return PI_SUCCESS; +} + +inline pi_result piContextRelease(pi_context Context) { + ur_context_handle_t UrContext = + reinterpret_cast(Context); + HANDLE_ERRORS(urContextRelease(UrContext)); + return PI_SUCCESS; +} +// Context +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Queue +inline pi_result piextQueueCreate(pi_context Context, pi_device Device, + pi_queue_properties *Properties, + pi_queue *Queue) { + + PI_ASSERT(Properties, PI_ERROR_INVALID_VALUE); + // Expect flags mask to be passed first. + PI_ASSERT(Properties[0] == PI_QUEUE_FLAGS, PI_ERROR_INVALID_VALUE); + + PI_ASSERT(Properties[2] == 0 || + (Properties[2] == PI_QUEUE_COMPUTE_INDEX && Properties[4] == 0), + PI_ERROR_INVALID_VALUE); + + // Check that unexpected bits are not set. + PI_ASSERT(!(Properties[1] & + ~(PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | + PI_QUEUE_FLAG_PROFILING_ENABLE | PI_QUEUE_FLAG_ON_DEVICE | + PI_QUEUE_FLAG_ON_DEVICE_DEFAULT | + PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS | + PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW | + PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH)), + PI_ERROR_INVALID_VALUE); + + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + ur_queue_properties_t UrProperties{}; + UrProperties.stype = UR_STRUCTURE_TYPE_QUEUE_PROPERTIES; + if (Properties[1] & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) + UrProperties.flags |= UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; + if (Properties[1] & PI_QUEUE_FLAG_PROFILING_ENABLE) + UrProperties.flags |= UR_QUEUE_FLAG_PROFILING_ENABLE; + if (Properties[1] & PI_QUEUE_FLAG_ON_DEVICE) + UrProperties.flags |= UR_QUEUE_FLAG_ON_DEVICE; + if (Properties[1] & PI_QUEUE_FLAG_ON_DEVICE_DEFAULT) + UrProperties.flags |= UR_QUEUE_FLAG_ON_DEVICE_DEFAULT; + if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS) + UrProperties.flags |= UR_QUEUE_FLAG_DISCARD_EVENTS; + if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW) + UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_LOW; + if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH) + UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_HIGH; + if (Properties[1] & __SYCL_PI_CUDA_SYNC_WITH_DEFAULT) + UrProperties.flags |= UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM; + if (Properties[1] & __SYCL_PI_CUDA_USE_DEFAULT_STREAM) + UrProperties.flags |= UR_QUEUE_FLAG_USE_DEFAULT_STREAM; + + ur_queue_index_properties_t IndexProperties{}; + IndexProperties.stype = UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES; + if (Properties[2] != 0) { + IndexProperties.computeIndex = Properties[3]; + } + + UrProperties.pNext = &IndexProperties; + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + + ur_queue_handle_t *UrQueue = reinterpret_cast(Queue); + HANDLE_ERRORS(urQueueCreate(UrContext, UrDevice, &UrProperties, UrQueue)); + + return PI_SUCCESS; +} + +inline pi_result piQueueCreate(pi_context Context, pi_device Device, + pi_queue_properties Flags, pi_queue *Queue) { + pi_queue_properties Properties[] = {PI_QUEUE_FLAGS, Flags, 0}; + return pi2ur::piextQueueCreate(Context, Device, Properties, Queue); +} + +inline pi_result piextQueueCreateWithNativeHandle( + pi_native_handle NativeHandle, int32_t NativeHandleDesc, pi_context Context, + pi_device Device, bool OwnNativeHandle, pi_queue_properties *Properties, + pi_queue *Queue) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_device_handle_t UrDevice = reinterpret_cast(Device); + ur_native_handle_t UrNativeHandle = + reinterpret_cast(NativeHandle); + ur_queue_handle_t *UrQueue = reinterpret_cast(Queue); + ur_queue_native_properties_t UrNativeProperties{}; + UrNativeProperties.isNativeHandleOwned = OwnNativeHandle; + + ur_queue_properties_t UrProperties{}; + UrProperties.stype = UR_STRUCTURE_TYPE_QUEUE_PROPERTIES; + if (Properties[1] & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) + UrProperties.flags |= UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; + if (Properties[1] & PI_QUEUE_FLAG_PROFILING_ENABLE) + UrProperties.flags |= UR_QUEUE_FLAG_PROFILING_ENABLE; + if (Properties[1] & PI_QUEUE_FLAG_ON_DEVICE) + UrProperties.flags |= UR_QUEUE_FLAG_ON_DEVICE; + if (Properties[1] & PI_QUEUE_FLAG_ON_DEVICE_DEFAULT) + UrProperties.flags |= UR_QUEUE_FLAG_ON_DEVICE_DEFAULT; + if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS) + UrProperties.flags |= UR_QUEUE_FLAG_DISCARD_EVENTS; + if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW) + UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_LOW; + if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH) + UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_HIGH; + + ur_queue_native_desc_t UrNativeDesc{}; + UrNativeDesc.stype = UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC; + UrNativeDesc.pNativeData = &NativeHandleDesc; + + UrProperties.pNext = &UrNativeDesc; + UrNativeProperties.pNext = &UrProperties; + + HANDLE_ERRORS(urQueueCreateWithNativeHandle( + UrNativeHandle, UrContext, UrDevice, &UrNativeProperties, UrQueue)); + return PI_SUCCESS; +} + +inline pi_result piextQueueGetNativeHandle(pi_queue Queue, + pi_native_handle *NativeHandle, + int32_t *NativeHandleDesc) { + + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + + ur_queue_native_desc_t UrNativeDesc{}; + UrNativeDesc.pNativeData = NativeHandleDesc; + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + + ur_native_handle_t UrNativeQueue{}; + HANDLE_ERRORS(urQueueGetNativeHandle(UrQueue, &UrNativeDesc, &UrNativeQueue)); + + *NativeHandle = reinterpret_cast(UrNativeQueue); + + return PI_SUCCESS; +} + +inline pi_result piQueueRelease(pi_queue Queue) { + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + + HANDLE_ERRORS(urQueueRelease(UrQueue)); + + return PI_SUCCESS; +} + +inline pi_result piQueueFinish(pi_queue Queue) { + // Wait until command lists attached to the command queue are executed. + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + + HANDLE_ERRORS(urQueueFinish(UrQueue)); + + return PI_SUCCESS; +} + +inline pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + + ur_queue_info_t UrParamName{}; + + switch (ParamName) { + case PI_QUEUE_INFO_CONTEXT: { + UrParamName = UR_QUEUE_INFO_CONTEXT; + break; + } + case PI_QUEUE_INFO_DEVICE: { + UrParamName = UR_QUEUE_INFO_DEVICE; + break; + } + case PI_QUEUE_INFO_DEVICE_DEFAULT: { + UrParamName = UR_QUEUE_INFO_DEVICE_DEFAULT; + break; + } + case PI_QUEUE_INFO_PROPERTIES: { + UrParamName = UR_QUEUE_INFO_FLAGS; + break; + } + case PI_QUEUE_INFO_REFERENCE_COUNT: { + UrParamName = UR_QUEUE_INFO_REFERENCE_COUNT; + break; + } + case PI_QUEUE_INFO_SIZE: { + UrParamName = UR_QUEUE_INFO_SIZE; + break; + } + case PI_EXT_ONEAPI_QUEUE_INFO_EMPTY: { + UrParamName = UR_QUEUE_INFO_EMPTY; + break; + } + default: { + die("Unsupported ParamName in piQueueGetInfo"); + return PI_ERROR_INVALID_VALUE; + } + } + + HANDLE_ERRORS(urQueueGetInfo(UrQueue, UrParamName, ParamValueSize, ParamValue, + ParamValueSizeRet)); + + return PI_SUCCESS; +} + +inline pi_result piQueueRetain(pi_queue Queue) { + + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + + HANDLE_ERRORS(urQueueRetain(UrQueue)); + + return PI_SUCCESS; +} + +inline pi_result piQueueFlush(pi_queue Queue) { + + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + + HANDLE_ERRORS(urQueueFlush(UrQueue)); + + return PI_SUCCESS; +} + +// Queue +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Program + +inline pi_result piProgramCreate(pi_context Context, const void *ILBytes, + size_t Length, pi_program *Program) { + + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(ILBytes && Length, PI_ERROR_INVALID_VALUE); + PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_program_properties_t UrProperties{}; + ur_program_handle_t *UrProgram = + reinterpret_cast(Program); + HANDLE_ERRORS(urProgramCreateWithIL(UrContext, ILBytes, Length, &UrProperties, + UrProgram)); + + return PI_SUCCESS; +} + +inline pi_result piProgramCreateWithBinary( + pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, + const size_t *Lengths, const unsigned char **Binaries, + size_t NumMetadataEntries, const pi_device_binary_property *Metadata, + pi_int32 *BinaryStatus, pi_program *Program) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(DeviceList && NumDevices, PI_ERROR_INVALID_VALUE); + PI_ASSERT(Binaries && Lengths, PI_ERROR_INVALID_VALUE); + PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); + + // For now we support only one device. + if (NumDevices != 1) { + die("piProgramCreateWithBinary: level_zero supports only one device."); + return PI_ERROR_INVALID_VALUE; + } + if (!Binaries[0] || !Lengths[0]) { + if (BinaryStatus) + *BinaryStatus = PI_ERROR_INVALID_VALUE; + return PI_ERROR_INVALID_VALUE; + } + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(DeviceList[0]); + + ur_program_properties_t Properties = {}; + Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES; + Properties.pNext = nullptr; + Properties.count = NumMetadataEntries; + + std::unique_ptr pMetadatas; + if (NumMetadataEntries) { + pMetadatas.reset(new ur_program_metadata_t[NumMetadataEntries]); + for (unsigned i = 0; i < NumMetadataEntries; i++) { + HANDLE_ERRORS(mapPIMetadataToUR(&Metadata[i], &pMetadatas[i])); + } + + Properties.pMetadatas = pMetadatas.get(); + } + + ur_program_handle_t *UrProgram = + reinterpret_cast(Program); + HANDLE_ERRORS(urProgramCreateWithBinary(UrContext, UrDevice, Lengths[0], + Binaries[0], &Properties, UrProgram)); + + if (BinaryStatus) + *BinaryStatus = PI_SUCCESS; + + return PI_SUCCESS; +} + +inline pi_result piclProgramCreateWithSource(pi_context Context, + pi_uint32 Count, + const char **Strings, + const size_t *Lengths, + pi_program *RetProgram) { + std::ignore = Context; + std::ignore = Count; + std::ignore = Strings; + std::ignore = Lengths; + std::ignore = RetProgram; + die("piclProgramCreateWithSource: not supported in UR\n"); + return PI_ERROR_INVALID_OPERATION; +} + +inline pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + + PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); + + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + + ur_program_info_t PropName{}; + + switch (ParamName) { + case PI_PROGRAM_INFO_REFERENCE_COUNT: { + PropName = UR_PROGRAM_INFO_REFERENCE_COUNT; + break; + } + case PI_PROGRAM_INFO_CONTEXT: { + PropName = UR_PROGRAM_INFO_CONTEXT; + break; + } + case PI_PROGRAM_INFO_NUM_DEVICES: { + PropName = UR_PROGRAM_INFO_NUM_DEVICES; + break; + } + case PI_PROGRAM_INFO_DEVICES: { + PropName = UR_PROGRAM_INFO_DEVICES; + break; + } + case PI_PROGRAM_INFO_SOURCE: { + PropName = UR_PROGRAM_INFO_SOURCE; + break; + } + case PI_PROGRAM_INFO_BINARY_SIZES: { + PropName = UR_PROGRAM_INFO_BINARY_SIZES; + break; + } + case PI_PROGRAM_INFO_BINARIES: { + PropName = UR_PROGRAM_INFO_BINARIES; + break; + } + case PI_PROGRAM_INFO_NUM_KERNELS: { + PropName = UR_PROGRAM_INFO_NUM_KERNELS; + break; + } + case PI_PROGRAM_INFO_KERNEL_NAMES: { + PropName = UR_PROGRAM_INFO_KERNEL_NAMES; + break; + } + default: { + die("urProgramGetInfo: not implemented"); + } + } + + HANDLE_ERRORS(urProgramGetInfo(UrProgram, PropName, ParamValueSize, + ParamValue, ParamValueSizeRet)); + + return PI_SUCCESS; +} + +inline pi_result +piProgramLink(pi_context Context, pi_uint32 NumDevices, + const pi_device *DeviceList, const char *Options, + pi_uint32 NumInputPrograms, const pi_program *InputPrograms, + void (*PFnNotify)(pi_program Program, void *UserData), + void *UserData, pi_program *RetProgram) { + // We only support one device with Level Zero currently. + if (NumDevices != 1) { + die("piProgramLink: level_zero supports only one device."); + return PI_ERROR_INVALID_VALUE; + } + + // Validate input parameters. + PI_ASSERT(DeviceList, PI_ERROR_INVALID_DEVICE); + PI_ASSERT(!PFnNotify && !UserData, PI_ERROR_INVALID_VALUE); + if (NumInputPrograms == 0 || InputPrograms == nullptr) + return PI_ERROR_INVALID_VALUE; + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + const ur_program_handle_t *UrInputPrograms = + reinterpret_cast(InputPrograms); + ur_program_handle_t *UrProgram = + reinterpret_cast(RetProgram); + + HANDLE_ERRORS(urProgramLink(UrContext, NumInputPrograms, UrInputPrograms, + Options, UrProgram)); + + return PI_SUCCESS; +} + +inline pi_result piProgramCompile( + pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, + const char *Options, pi_uint32 NumInputHeaders, + const pi_program *InputHeaders, const char **HeaderIncludeNames, + void (*PFnNotify)(pi_program Program, void *UserData), void *UserData) { + + std::ignore = NumInputHeaders; + std::ignore = InputHeaders; + std::ignore = HeaderIncludeNames; + + PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); + + if ((NumDevices && !DeviceList) || (!NumDevices && DeviceList)) + return PI_ERROR_INVALID_VALUE; + + // These aren't supported. + PI_ASSERT(!PFnNotify && !UserData, PI_ERROR_INVALID_VALUE); + + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + + ur_program_info_t PropName = UR_PROGRAM_INFO_CONTEXT; + ur_context_handle_t UrContext{}; + HANDLE_ERRORS(urProgramGetInfo(UrProgram, PropName, sizeof(&UrContext), + &UrContext, nullptr)); + + HANDLE_ERRORS(urProgramCompile(UrContext, UrProgram, Options)); + + return PI_SUCCESS; +} + +inline pi_result +piProgramBuild(pi_program Program, pi_uint32 NumDevices, + const pi_device *DeviceList, const char *Options, + void (*PFnNotify)(pi_program Program, void *UserData), + void *UserData) { + PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); + if ((NumDevices && !DeviceList) || (!NumDevices && DeviceList)) { + return PI_ERROR_INVALID_VALUE; + } + + // We only support build to one device with Level Zero now. + // TODO: we should eventually build to the possibly multiple root + // devices in the context. + if (NumDevices != 1) { + die("piProgramBuild: level_zero supports only one device."); + return PI_ERROR_INVALID_VALUE; + } + + // These aren't supported. + PI_ASSERT(!PFnNotify && !UserData, PI_ERROR_INVALID_VALUE); + + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + ur_program_info_t PropName = UR_PROGRAM_INFO_CONTEXT; + ur_context_handle_t UrContext{}; + HANDLE_ERRORS(urProgramGetInfo(UrProgram, PropName, sizeof(&UrContext), + &UrContext, nullptr)); + + HANDLE_ERRORS(urProgramBuild(UrContext, UrProgram, Options)); + + return PI_SUCCESS; +} + +inline pi_result piextProgramSetSpecializationConstant(pi_program Program, + pi_uint32 SpecID, + size_t Size, + const void *SpecValue) { + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + uint32_t Count = 1; + ur_specialization_constant_info_t SpecConstant{}; + SpecConstant.id = SpecID; + SpecConstant.size = Size; + SpecConstant.pValue = SpecValue; + HANDLE_ERRORS( + urProgramSetSpecializationConstants(UrProgram, Count, &SpecConstant)); + + return PI_SUCCESS; +} + +inline pi_result piKernelCreate(pi_program Program, const char *KernelName, + pi_kernel *RetKernel) { + PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); + PI_ASSERT(RetKernel, PI_ERROR_INVALID_VALUE); + PI_ASSERT(KernelName, PI_ERROR_INVALID_VALUE); + + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + ur_kernel_handle_t *UrKernel = + reinterpret_cast(RetKernel); + + HANDLE_ERRORS(urKernelCreate(UrProgram, KernelName, UrKernel)); + + return PI_SUCCESS; +} + +inline pi_result +piEnqueueMemImageFill(pi_queue Queue, pi_mem Image, const void *FillColor, + const size_t *Origin, const size_t *Region, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *Event) { + + std::ignore = Image; + std::ignore = FillColor; + std::ignore = Origin; + std::ignore = Region; + std::ignore = NumEventsInWaitList; + std::ignore = EventsWaitList; + std::ignore = Event; + + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + die("piEnqueueMemImageFill: not implemented"); + return PI_SUCCESS; +} + +inline pi_result +piEnqueueNativeKernel(pi_queue Queue, void (*UserFunc)(void *), void *Args, + size_t CbArgs, pi_uint32 NumMemObjects, + const pi_mem *MemList, const void **ArgsMemLoc, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *Event) { + std::ignore = UserFunc; + std::ignore = Args; + std::ignore = CbArgs; + std::ignore = NumMemObjects; + std::ignore = MemList; + std::ignore = ArgsMemLoc; + std::ignore = NumEventsInWaitList; + std::ignore = EventsWaitList; + std::ignore = Event; + + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + die("piEnqueueNativeKernel: not implemented"); + return PI_SUCCESS; +} + +inline pi_result piextGetDeviceFunctionPointer(pi_device Device, + pi_program Program, + const char *FunctionName, + pi_uint64 *FunctionPointerRet) { + + PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); + + auto UrDevice = reinterpret_cast(Device); + + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + + void **FunctionPointer = reinterpret_cast(FunctionPointerRet); + + HANDLE_ERRORS(urProgramGetFunctionPointer(UrDevice, UrProgram, FunctionName, + FunctionPointer)); + return PI_SUCCESS; +} + +// Special version of piKernelSetArg to accept pi_mem. +inline pi_result +piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, + const pi_mem_obj_property *ArgProperties, + const pi_mem *ArgValue) { + + // TODO: the better way would probably be to add a new PI API for + // extracting native PI object from PI handle, and have SYCL + // RT pass that directly to the regular piKernelSetArg (and + // then remove this piextKernelSetArgMemObj). + + PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); + + ur_mem_handle_t UrMemory{}; + if (ArgValue) + UrMemory = reinterpret_cast(*ArgValue); + + // We don't yet know the device where this kernel will next be run on. + // Thus we can't know the actual memory allocation that needs to be used. + // Remember the memory object being used as an argument for this kernel + // to process it later when the device is known (at the kernel enqueue). + // + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + // the only applicable type, just ignore anything else + if (ArgProperties && ArgProperties->type == PI_KERNEL_ARG_MEM_OBJ_ACCESS) { + // following structure layout checks to be replaced with + // std::is_layout_compatible after move to C++20 + static_assert(sizeof(pi_mem_obj_property) == + sizeof(ur_kernel_arg_mem_obj_properties_t)); + static_assert(sizeof(pi_mem_obj_property::type) == + sizeof(ur_kernel_arg_mem_obj_properties_t::stype)); + static_assert(sizeof(pi_mem_obj_property::pNext) == + sizeof(ur_kernel_arg_mem_obj_properties_t::pNext)); + static_assert(sizeof(pi_mem_obj_property::mem_access) == + sizeof(ur_kernel_arg_mem_obj_properties_t::memoryAccess)); + + static_assert(uint32_t(PI_ACCESS_READ_WRITE) == + uint32_t(UR_MEM_FLAG_READ_WRITE)); + static_assert(uint32_t(PI_ACCESS_READ_ONLY) == + uint32_t(UR_MEM_FLAG_READ_ONLY)); + static_assert(uint32_t(PI_ACCESS_WRITE_ONLY) == + uint32_t(UR_MEM_FLAG_WRITE_ONLY)); + static_assert(uint32_t(PI_KERNEL_ARG_MEM_OBJ_ACCESS) == + uint32_t(UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES)); + + const ur_kernel_arg_mem_obj_properties_t *UrMemProperties = + reinterpret_cast( + ArgProperties); + HANDLE_ERRORS( + urKernelSetArgMemObj(UrKernel, ArgIndex, UrMemProperties, UrMemory)); + } else { + HANDLE_ERRORS(urKernelSetArgMemObj(UrKernel, ArgIndex, nullptr, UrMemory)); + } + + return PI_SUCCESS; +} + +inline pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, + size_t ArgSize, const void *ArgValue) { + + PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); + + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + + HANDLE_ERRORS( + urKernelSetArgValue(UrKernel, ArgIndex, ArgSize, nullptr, ArgValue)); + return PI_SUCCESS; +} + +inline pi_result piKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, + size_t ArgSize, const void *ArgValue) { + std::ignore = ArgSize; + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + HANDLE_ERRORS(urKernelSetArgPointer(UrKernel, ArgIndex, nullptr, ArgValue)); + + return PI_SUCCESS; +} + +inline pi_result +piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_context Context, pi_program Program, + bool OwnNativeHandle, pi_kernel *Kernel) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); + + ur_native_handle_t UrNativeKernel = + reinterpret_cast(NativeHandle); + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + ur_kernel_handle_t *UrKernel = reinterpret_cast(Kernel); + ur_kernel_native_properties_t Properties{}; + Properties.isNativeHandleOwned = OwnNativeHandle; + HANDLE_ERRORS(urKernelCreateWithNativeHandle( + UrNativeKernel, UrContext, UrProgram, &Properties, UrKernel)); + + return PI_SUCCESS; +} + +inline pi_result piProgramRetain(pi_program Program) { + PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); + + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + HANDLE_ERRORS( + urProgramRetain(reinterpret_cast(UrProgram))); + + return PI_SUCCESS; +} + +inline pi_result piKernelSetExecInfo(pi_kernel Kernel, + pi_kernel_exec_info ParamName, + size_t ParamValueSize, + const void *ParamValue) { + + PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); + PI_ASSERT(ParamValue, PI_ERROR_INVALID_VALUE); + + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + ur_kernel_exec_info_t PropName{}; + uint64_t PropValue{}; + switch (ParamName) { + case PI_USM_INDIRECT_ACCESS: { + PropName = UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS; + PropValue = *(static_cast(const_cast(ParamValue))); + break; + } + case PI_USM_PTRS: { + PropName = UR_KERNEL_EXEC_INFO_USM_PTRS; + break; + } + case PI_EXT_KERNEL_EXEC_INFO_CACHE_CONFIG: { + PropName = UR_KERNEL_EXEC_INFO_CACHE_CONFIG; + auto Param = (*(static_cast(ParamValue))); + if (Param == PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_SLM) { + PropValue = static_cast(UR_KERNEL_CACHE_CONFIG_LARGE_SLM); + } else if (Param == PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_DATA) { + PropValue = static_cast(UR_KERNEL_CACHE_CONFIG_LARGE_DATA); + break; + } else if (Param == PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT) { + PropValue = static_cast(UR_KERNEL_CACHE_CONFIG_DEFAULT); + } else { + die("piKernelSetExecInfo: unsupported ParamValue\n"); + } + break; + } + default: + die("piKernelSetExecInfo: unsupported ParamName\n"); + } + HANDLE_ERRORS(urKernelSetExecInfo(UrKernel, PropName, ParamValueSize, nullptr, + &PropValue)); + + return PI_SUCCESS; +} + +inline pi_result piextProgramGetNativeHandle(pi_program Program, + pi_native_handle *NativeHandle) { + PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + ur_native_handle_t NativeProgram{}; + HANDLE_ERRORS(urProgramGetNativeHandle(UrProgram, &NativeProgram)); + + *NativeHandle = reinterpret_cast(NativeProgram); + + return PI_SUCCESS; +} + +inline pi_result +piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_context Context, bool OwnNativeHandle, + pi_program *Program) { + PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + + ur_native_handle_t NativeProgram = + reinterpret_cast(NativeHandle); + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_program_handle_t *UrProgram = + reinterpret_cast(Program); + ur_program_native_properties_t UrProperties{}; + UrProperties.isNativeHandleOwned = OwnNativeHandle; + HANDLE_ERRORS(urProgramCreateWithNativeHandle(NativeProgram, UrContext, + &UrProperties, UrProgram)); + return PI_SUCCESS; +} + +inline pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); + + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + ur_kernel_info_t UrParamName{}; + switch (ParamName) { + case PI_KERNEL_INFO_FUNCTION_NAME: { + UrParamName = UR_KERNEL_INFO_FUNCTION_NAME; + break; + } + case PI_KERNEL_INFO_NUM_ARGS: { + UrParamName = UR_KERNEL_INFO_NUM_ARGS; + break; + } + case PI_KERNEL_INFO_REFERENCE_COUNT: { + UrParamName = UR_KERNEL_INFO_REFERENCE_COUNT; + break; + } + case PI_KERNEL_INFO_CONTEXT: { + UrParamName = UR_KERNEL_INFO_CONTEXT; + break; + } + case PI_KERNEL_INFO_PROGRAM: { + UrParamName = UR_KERNEL_INFO_PROGRAM; + break; + } + case PI_KERNEL_INFO_ATTRIBUTES: { + UrParamName = UR_KERNEL_INFO_ATTRIBUTES; + break; + } + default: + return PI_ERROR_INVALID_PROPERTY; + } + + HANDLE_ERRORS(urKernelGetInfo(UrKernel, UrParamName, ParamValueSize, + ParamValue, ParamValueSizeRet)); + + return PI_SUCCESS; +} + +inline pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, + pi_kernel_group_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + auto UrDevice = reinterpret_cast(Device); + + ur_kernel_group_info_t UrParamName{}; + switch (ParamName) { + case PI_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: { + UrParamName = UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE; + break; + } + case PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { + UrParamName = UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE; + break; + } + case PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: { + UrParamName = UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE; + break; + } + case PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: { + UrParamName = UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE; + break; + } + case PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { + UrParamName = UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE; + break; + } + case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: { + UrParamName = UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE; + break; + } + // The number of registers used by the compiled kernel (device specific) + case PI_KERNEL_GROUP_INFO_NUM_REGS: { + HANDLE_ERRORS(urKernelGetInfo(UrKernel, UR_KERNEL_INFO_NUM_REGS, + ParamValueSize, ParamValue, + ParamValueSizeRet)); + return PI_SUCCESS; + } + default: { + die("Unknown ParamName in piKernelGetGroupInfo"); + return PI_ERROR_INVALID_VALUE; + } + } + + HANDLE_ERRORS(urKernelGetGroupInfo(UrKernel, UrDevice, UrParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet)); + + return PI_SUCCESS; +} + +inline pi_result piKernelRetain(pi_kernel Kernel) { + + PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); + + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + + HANDLE_ERRORS(urKernelRetain(UrKernel)); + + return PI_SUCCESS; +} + +inline pi_result piKernelRelease(pi_kernel Kernel) { + + PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); + + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + + HANDLE_ERRORS(urKernelRelease(UrKernel)); + + return PI_SUCCESS; +} + +inline pi_result piProgramRelease(pi_program Program) { + + PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); + + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + + HANDLE_ERRORS(urProgramRelease(UrProgram)); + + return PI_SUCCESS; +} + +inline pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, + size_t ArgSize, + const void *ArgValue) { + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + + HANDLE_ERRORS( + urKernelSetArgValue(UrKernel, ArgIndex, ArgSize, nullptr, ArgValue)); + + return PI_SUCCESS; +} + +inline pi_result piKernelGetSubGroupInfo( + pi_kernel Kernel, pi_device Device, pi_kernel_sub_group_info ParamName, + size_t InputValueSize, const void *InputValue, size_t ParamValueSize, + void *ParamValue, size_t *ParamValueSizeRet) { + + std::ignore = InputValueSize; + std::ignore = InputValue; + + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + auto UrDevice = reinterpret_cast(Device); + + ur_kernel_sub_group_info_t PropName{}; + switch (ParamName) { + case PI_KERNEL_MAX_SUB_GROUP_SIZE: { + PropName = UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE; + break; + } + case PI_KERNEL_MAX_NUM_SUB_GROUPS: { + PropName = UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS; + break; + } + case PI_KERNEL_COMPILE_NUM_SUB_GROUPS: { + PropName = UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS; + break; + } + case PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: { + PropName = UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL; + break; + } + } + HANDLE_ERRORS(urKernelGetSubGroupInfo(UrKernel, UrDevice, PropName, + ParamValueSize, ParamValue, + ParamValueSizeRet)); + + return PI_SUCCESS; +} + +inline pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device, + pi_program_build_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + auto UrDevice = reinterpret_cast(Device); + + ur_program_build_info_t PropName{}; + switch (ParamName) { + case PI_PROGRAM_BUILD_INFO_STATUS: { + PropName = UR_PROGRAM_BUILD_INFO_STATUS; + break; + } + case PI_PROGRAM_BUILD_INFO_OPTIONS: { + PropName = UR_PROGRAM_BUILD_INFO_OPTIONS; + break; + } + case PI_PROGRAM_BUILD_INFO_LOG: { + PropName = UR_PROGRAM_BUILD_INFO_LOG; + break; + } + case PI_PROGRAM_BUILD_INFO_BINARY_TYPE: { + PropName = UR_PROGRAM_BUILD_INFO_BINARY_TYPE; + break; + } + default: { + die("piProgramGetBuildInfo: not implemented"); + } + } + HANDLE_ERRORS(urProgramGetBuildInfo(UrProgram, UrDevice, PropName, + ParamValueSize, ParamValue, + ParamValueSizeRet)); + + return PI_SUCCESS; +} + +inline pi_result piextKernelGetNativeHandle(pi_kernel Kernel, + pi_native_handle *NativeHandle) { + PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + ur_native_handle_t NativeKernel{}; + HANDLE_ERRORS(urKernelGetNativeHandle(UrKernel, &NativeKernel)); + + *NativeHandle = reinterpret_cast(NativeKernel); + + return PI_SUCCESS; +} + +/// API for writing data from host to a device global variable. +/// +/// \param Queue is the queue +/// \param Program is the program containing the device global variable +/// \param Name is the unique identifier for the device global variable +/// \param BlockingWrite is true if the write should block +/// \param Count is the number of bytes to copy +/// \param Offset is the byte offset into the device global variable to start +/// copying +/// \param Src is a pointer to where the data must be copied from +/// \param NumEventsInWaitList is a number of events in the wait list +/// \param EventWaitList is the wait list +/// \param Event is the resulting event +inline pi_result piextEnqueueDeviceGlobalVariableWrite( + pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingWrite, + size_t Count, size_t Offset, const void *Src, pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *OutEvent) { + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + HANDLE_ERRORS(urEnqueueDeviceGlobalVariableWrite( + UrQueue, UrProgram, Name, BlockingWrite, Count, Offset, Src, + NumEventsInWaitList, UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +/// API reading data from a device global variable to host. +/// +/// \param Queue is the queue +/// \param Program is the program containing the device global variable +/// \param Name is the unique identifier for the device global variable +/// \param BlockingRead is true if the read should block +/// \param Count is the number of bytes to copy +/// \param Offset is the byte offset into the device global variable to start +/// copying +/// \param Dst is a pointer to where the data must be copied to +/// \param NumEventsInWaitList is a number of events in the wait list +/// \param EventWaitList is the wait list +/// \param Event is the resulting event +inline pi_result piextEnqueueDeviceGlobalVariableRead( + pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingRead, + size_t Count, size_t Offset, void *Dst, pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *OutEvent) { + + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_program_handle_t UrProgram = + reinterpret_cast(Program); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueDeviceGlobalVariableRead( + UrQueue, UrProgram, Name, BlockingRead, Count, Offset, Dst, + NumEventsInWaitList, UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +// Program +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Memory +inline pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, + size_t Size, void *HostPtr, pi_mem *RetMem, + const pi_mem_properties *properties) { + + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(RetMem, PI_ERROR_INVALID_VALUE); + + if (properties != nullptr) { + die("piMemBufferCreate: no mem properties goes to Level-Zero RT yet"); + } + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_mem_flags_t UrBufferFlags{}; + if (Flags & PI_MEM_FLAGS_ACCESS_RW) { + UrBufferFlags |= UR_MEM_FLAG_READ_WRITE; + } + if (Flags & PI_MEM_ACCESS_READ_ONLY) { + UrBufferFlags |= UR_MEM_FLAG_READ_ONLY; + } + if (Flags & PI_MEM_FLAGS_HOST_PTR_USE) { + UrBufferFlags |= UR_MEM_FLAG_USE_HOST_POINTER; + } + if (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) { + UrBufferFlags |= UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; + } + if (Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) { + UrBufferFlags |= UR_MEM_FLAG_ALLOC_HOST_POINTER; + } + + ur_buffer_properties_t UrProps{}; + UrProps.stype = UR_STRUCTURE_TYPE_BUFFER_PROPERTIES; + UrProps.pHost = HostPtr; + ur_mem_handle_t *UrBuffer = reinterpret_cast(RetMem); + HANDLE_ERRORS( + urMemBufferCreate(UrContext, UrBufferFlags, Size, &UrProps, UrBuffer)); + + return PI_SUCCESS; +} + +inline pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, + pi_usm_mem_properties *Properties, + size_t Size, pi_uint32 Alignment) { + + std::ignore = Properties; + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_usm_desc_t USMDesc{}; + USMDesc.align = Alignment; + ur_usm_pool_handle_t Pool{}; + HANDLE_ERRORS(urUSMHostAlloc(UrContext, &USMDesc, Pool, Size, ResultPtr)); + return PI_SUCCESS; +} + +inline pi_result piMemGetInfo(pi_mem Mem, pi_mem_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + PI_ASSERT(Mem, PI_ERROR_INVALID_VALUE); + // piMemImageGetInfo must be used for images + + ur_mem_handle_t UrMemory = reinterpret_cast(Mem); + ur_mem_info_t MemInfoType{}; + switch (ParamName) { + case PI_MEM_CONTEXT: { + MemInfoType = UR_MEM_INFO_CONTEXT; + break; + } + case PI_MEM_SIZE: { + MemInfoType = UR_MEM_INFO_SIZE; + break; + } + default: { + die("piMemGetInfo: unsuppported ParamName."); + } + } + HANDLE_ERRORS(urMemGetInfo(UrMemory, MemInfoType, ParamValueSize, ParamValue, + ParamValueSizeRet)); + return PI_SUCCESS; +} + +static void pi2urImageDesc(const pi_image_format *ImageFormat, + const pi_image_desc *ImageDesc, + ur_image_format_t *UrFormat, + ur_image_desc_t *UrDesc) { + + switch (ImageFormat->image_channel_data_type) { + case PI_IMAGE_CHANNEL_TYPE_SNORM_INT8: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SNORM_INT8; + break; + } + case PI_IMAGE_CHANNEL_TYPE_SNORM_INT16: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SNORM_INT16; + break; + } + case PI_IMAGE_CHANNEL_TYPE_UNORM_INT8: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNORM_INT8; + break; + } + case PI_IMAGE_CHANNEL_TYPE_UNORM_INT16: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNORM_INT16; + break; + } + case PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565; + break; + } + case PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555; + break; + } + case PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_INT_101010; + break; + } + case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8; + break; + } + case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16; + break; + } + case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32; + break; + } + case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8; + break; + } + case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16; + break; + } + case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32; + break; + } + case PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT; + break; + } + case PI_IMAGE_CHANNEL_TYPE_FLOAT: { + UrFormat->channelType = UR_IMAGE_CHANNEL_TYPE_FLOAT; + break; + } + default: { + die("piMemImageCreate: unsuppported image_channel_data_type."); + } + } + switch (ImageFormat->image_channel_order) { + case PI_IMAGE_CHANNEL_ORDER_A: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_A; + break; + } + case PI_IMAGE_CHANNEL_ORDER_R: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_R; + break; + } + case PI_IMAGE_CHANNEL_ORDER_RG: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RG; + break; + } + case PI_IMAGE_CHANNEL_ORDER_RA: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RA; + break; + } + case PI_IMAGE_CHANNEL_ORDER_RGB: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RGB; + break; + } + case PI_IMAGE_CHANNEL_ORDER_RGBA: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RGBA; + break; + } + case PI_IMAGE_CHANNEL_ORDER_BGRA: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_BGRA; + break; + } + case PI_IMAGE_CHANNEL_ORDER_ARGB: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_ARGB; + break; + } + case PI_IMAGE_CHANNEL_ORDER_ABGR: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_ABGR; + break; + } + case PI_IMAGE_CHANNEL_ORDER_INTENSITY: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_INTENSITY; + break; + } + case PI_IMAGE_CHANNEL_ORDER_LUMINANCE: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_LUMINANCE; + break; + } + case PI_IMAGE_CHANNEL_ORDER_Rx: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RX; + break; + } + case PI_IMAGE_CHANNEL_ORDER_RGx: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RGX; + break; + } + case PI_IMAGE_CHANNEL_ORDER_RGBx: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_RGBX; + break; + } + case PI_IMAGE_CHANNEL_ORDER_sRGBA: { + UrFormat->channelOrder = UR_IMAGE_CHANNEL_ORDER_SRGBA; + break; + } + default: { + die("piMemImageCreate: unsuppported image_channel_data_type."); + } + } + + UrDesc->stype = UR_STRUCTURE_TYPE_IMAGE_DESC; + UrDesc->arraySize = ImageDesc->image_array_size; + UrDesc->depth = ImageDesc->image_depth; + UrDesc->height = ImageDesc->image_height; + UrDesc->numMipLevel = ImageDesc->num_mip_levels; + UrDesc->numSamples = ImageDesc->num_samples; + UrDesc->rowPitch = ImageDesc->image_row_pitch; + UrDesc->slicePitch = ImageDesc->image_slice_pitch; + switch (ImageDesc->image_type) { + case PI_MEM_TYPE_BUFFER: { + UrDesc->type = UR_MEM_TYPE_BUFFER; + break; + } + case PI_MEM_TYPE_IMAGE2D: { + UrDesc->type = UR_MEM_TYPE_IMAGE2D; + break; + } + case PI_MEM_TYPE_IMAGE3D: { + UrDesc->type = UR_MEM_TYPE_IMAGE3D; + break; + } + case PI_MEM_TYPE_IMAGE2D_ARRAY: { + UrDesc->type = UR_MEM_TYPE_IMAGE2D_ARRAY; + break; + } + case PI_MEM_TYPE_IMAGE1D: { + UrDesc->type = UR_MEM_TYPE_IMAGE1D; + break; + } + case PI_MEM_TYPE_IMAGE1D_ARRAY: { + UrDesc->type = UR_MEM_TYPE_IMAGE1D_ARRAY; + break; + } + case PI_MEM_TYPE_IMAGE1D_BUFFER: { + UrDesc->type = UR_MEM_TYPE_IMAGE1D_BUFFER; + break; + } + default: { + die("piMemImageCreate: unsuppported image_type."); + } + } + UrDesc->width = ImageDesc->image_width; + UrDesc->arraySize = ImageDesc->image_array_size; + UrDesc->arraySize = ImageDesc->image_array_size; +} + +inline pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, + const pi_image_format *ImageFormat, + const pi_image_desc *ImageDesc, void *HostPtr, + pi_mem *RetImage) { + + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(RetImage, PI_ERROR_INVALID_VALUE); + PI_ASSERT(ImageFormat, PI_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_mem_flags_t UrFlags{}; + if (Flags & PI_MEM_FLAGS_ACCESS_RW) { + UrFlags |= UR_MEM_FLAG_READ_WRITE; + } + if (Flags & PI_MEM_ACCESS_READ_ONLY) { + UrFlags |= UR_MEM_FLAG_READ_ONLY; + } + if (Flags & PI_MEM_FLAGS_HOST_PTR_USE) { + UrFlags |= UR_MEM_FLAG_USE_HOST_POINTER; + } + if (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) { + UrFlags |= UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; + } + if (Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) { + UrFlags |= UR_MEM_FLAG_ALLOC_HOST_POINTER; + } + + ur_image_format_t UrFormat{}; + ur_image_desc_t UrDesc{}; + pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); + + // TODO: UrDesc doesn't have something for ImageDesc->buffer + + ur_mem_handle_t *UrMem = reinterpret_cast(RetImage); + HANDLE_ERRORS( + urMemImageCreate(UrContext, UrFlags, &UrFormat, &UrDesc, HostPtr, UrMem)); + + return PI_SUCCESS; +} + +inline pi_result piextMemImageCreateWithNativeHandle( + pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, + const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, + pi_mem *RetImage) { + + PI_ASSERT(RetImage, PI_ERROR_INVALID_VALUE); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + + ur_native_handle_t UrNativeMem = + reinterpret_cast(NativeHandle); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_mem_handle_t *UrMem = reinterpret_cast(RetImage); + ur_mem_native_properties_t Properties{}; + Properties.isNativeHandleOwned = OwnNativeHandle; + + ur_image_format_t UrFormat{}; + ur_image_desc_t UrDesc{}; + pi2urImageDesc(ImageFormat, ImageDesc, &UrFormat, &UrDesc); + + HANDLE_ERRORS(urMemImageCreateWithNativeHandle( + UrNativeMem, UrContext, &UrFormat, &UrDesc, &Properties, UrMem)); + + return PI_SUCCESS; +} + +inline pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, + pi_buffer_create_type BufferCreateType, + void *BufferCreateInfo, pi_mem *RetMem) { + + PI_ASSERT(BufferCreateType == PI_BUFFER_CREATE_TYPE_REGION && + BufferCreateInfo && RetMem, + PI_ERROR_INVALID_VALUE); + + auto Region = (pi_buffer_region)BufferCreateInfo; + PI_ASSERT(Region->size != 0u, PI_ERROR_INVALID_BUFFER_SIZE); + PI_ASSERT(Region->origin <= (Region->origin + Region->size), + PI_ERROR_INVALID_VALUE); + + ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); + + ur_mem_flags_t UrFlags{}; + if (Flags & PI_MEM_FLAGS_ACCESS_RW) { + UrFlags |= UR_MEM_FLAG_READ_WRITE; + } + if (Flags & PI_MEM_ACCESS_READ_ONLY) { + UrFlags |= UR_MEM_FLAG_READ_ONLY; + } + if (Flags & PI_MEM_FLAGS_HOST_PTR_USE) { + UrFlags |= UR_MEM_FLAG_USE_HOST_POINTER; + } + if (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) { + UrFlags |= UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; + } + if (Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) { + UrFlags |= UR_MEM_FLAG_ALLOC_HOST_POINTER; + } + + ur_buffer_create_type_t UrBufferCreateType{}; + if (BufferCreateType == PI_BUFFER_CREATE_TYPE_REGION) { + UrBufferCreateType = UR_BUFFER_CREATE_TYPE_REGION; + } + + ur_buffer_region_t UrBufferCreateInfo{}; + UrBufferCreateInfo.origin = Region->origin; + UrBufferCreateInfo.size = Region->size; + ur_mem_handle_t *UrMem = reinterpret_cast(RetMem); + HANDLE_ERRORS(urMemBufferPartition(UrBuffer, UrFlags, UrBufferCreateType, + &UrBufferCreateInfo, UrMem)); + + return PI_SUCCESS; +} + +inline pi_result piextMemGetNativeHandle(pi_mem Mem, + pi_native_handle *NativeHandle) { + PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); + + ur_mem_handle_t UrMem = reinterpret_cast(Mem); + ur_native_handle_t NativeMem{}; + HANDLE_ERRORS(urMemGetNativeHandle(UrMem, &NativeMem)); + + *NativeHandle = reinterpret_cast(NativeMem); + + return PI_SUCCESS; +} + +inline pi_result +piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, + pi_image_offset SrcOrigin, pi_image_offset DstOrigin, + pi_image_region Region, pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *OutEvent) { + + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + + ur_mem_handle_t UrImageSrc = reinterpret_cast(SrcImage); + ur_mem_handle_t UrImageDst = reinterpret_cast(DstImage); + + ur_rect_offset_t UrSrcOrigin{SrcOrigin->x, SrcOrigin->y, SrcOrigin->z}; + ur_rect_offset_t UrDstOrigin{DstOrigin->x, DstOrigin->y, DstOrigin->z}; + ur_rect_region_t UrRegion{}; + UrRegion.depth = Region->depth; + UrRegion.height = Region->height; + UrRegion.width = Region->width; + + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueMemImageCopy( + UrQueue, UrImageSrc, UrImageDst, UrSrcOrigin, UrDstOrigin, UrRegion, + NumEventsInWaitList, UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_context Context, + bool OwnNativeHandle, + pi_mem *Mem) { + PI_ASSERT(Mem, PI_ERROR_INVALID_VALUE); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + + ur_native_handle_t UrNativeMem = + reinterpret_cast(NativeHandle); + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_mem_handle_t *UrMem = reinterpret_cast(Mem); + ur_mem_native_properties_t Properties{}; + Properties.isNativeHandleOwned = OwnNativeHandle; + HANDLE_ERRORS(urMemBufferCreateWithNativeHandle(UrNativeMem, UrContext, + &Properties, UrMem)); + + return PI_SUCCESS; +} + +inline pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context, + pi_device Device, + pi_usm_mem_properties *Properties, + size_t Size, pi_uint32 Alignment) { + + std::ignore = Properties; + ur_context_handle_t UrContext = + reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + + ur_usm_desc_t USMDesc{}; + USMDesc.align = Alignment; + ur_usm_pool_handle_t Pool{}; + HANDLE_ERRORS( + urUSMDeviceAlloc(UrContext, UrDevice, &USMDesc, Pool, Size, ResultPtr)); + + return PI_SUCCESS; +} + +inline pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, + pi_device Device, + pi_usm_mem_properties *Properties, + size_t Size, pi_uint32 Alignment) { + + std::ignore = Properties; + if (Properties && *Properties != 0) { + PI_ASSERT(*(Properties) == PI_MEM_ALLOC_FLAGS && *(Properties + 2) == 0, + PI_ERROR_INVALID_VALUE); + } + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + auto UrDevice = reinterpret_cast(Device); + + ur_usm_desc_t USMDesc{}; + ur_usm_device_desc_t UsmDeviceDesc{}; + UsmDeviceDesc.stype = UR_STRUCTURE_TYPE_USM_DEVICE_DESC; + ur_usm_host_desc_t UsmHostDesc{}; + UsmHostDesc.stype = UR_STRUCTURE_TYPE_USM_HOST_DESC; + if (Properties) { + if (Properties[0] == PI_MEM_ALLOC_FLAGS) { + if (Properties[1] == PI_MEM_ALLOC_WRTITE_COMBINED) { + UsmDeviceDesc.flags |= UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED; + } + if (Properties[1] == PI_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE) { + UsmDeviceDesc.flags |= UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT; + } + if (Properties[1] == PI_MEM_ALLOC_INITIAL_PLACEMENT_HOST) { + UsmHostDesc.flags |= UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT; + } + if (Properties[1] == PI_MEM_ALLOC_DEVICE_READ_ONLY) { + UsmDeviceDesc.flags |= UR_USM_DEVICE_MEM_FLAG_DEVICE_READ_ONLY; + } + } + } + UsmDeviceDesc.pNext = &UsmHostDesc; + USMDesc.pNext = &UsmDeviceDesc; + + USMDesc.align = Alignment; + + ur_usm_pool_handle_t Pool{}; + HANDLE_ERRORS( + urUSMSharedAlloc(UrContext, UrDevice, &USMDesc, Pool, Size, ResultPtr)); + + return PI_SUCCESS; +} + +inline pi_result piextUSMFree(pi_context Context, void *Ptr) { + ur_context_handle_t UrContext = + reinterpret_cast(Context); + HANDLE_ERRORS(urUSMFree(UrContext, Ptr)); + return PI_SUCCESS; +} + +inline pi_result piMemRetain(pi_mem Mem) { + PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); + + ur_mem_handle_t UrMem = reinterpret_cast(Mem); + + HANDLE_ERRORS(urMemRetain(UrMem)); + + return PI_SUCCESS; +} + +inline pi_result piMemRelease(pi_mem Mem) { + PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); + + ur_mem_handle_t UrMem = reinterpret_cast(Mem); + + HANDLE_ERRORS(urMemRelease(UrMem)); + + return PI_SUCCESS; +} + +/// Hint to migrate memory to the device +/// +/// @param Queue is the queue to submit to +/// @param Ptr points to the memory to migrate +/// @param Size is the number of bytes to migrate +/// @param Flags is a bitfield used to specify memory migration options +/// @param NumEventsInWaitList is the number of events to wait on +/// @param EventsWaitList is an array of events to wait on +/// @param Event is the event that represents this operation +inline pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, + size_t Size, + pi_usm_migration_flags Flags, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { + + // flags is currently unused so fail if set + PI_ASSERT(Flags == 0, PI_ERROR_INVALID_VALUE); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + // TODO: to map from pi_usm_migration_flags to + // ur_usm_migration_flags_t + // once we have those defined + ur_usm_migration_flags_t UrFlags{}; + HANDLE_ERRORS(urEnqueueUSMPrefetch(UrQueue, Ptr, Size, UrFlags, + NumEventsInWaitList, UrEventsWaitList, + UrEvent)); + + return PI_SUCCESS; +} + +/// USM memadvise API to govern behavior of automatic migration mechanisms +/// +/// @param Queue is the queue to submit to +/// @param Ptr is the data to be advised +/// @param Length is the size in bytes of the meory to advise +/// @param Advice is device specific advice +/// @param Event is the event that represents this operation +/// +inline pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, + size_t Length, pi_mem_advice Advice, + pi_event *OutEvent) { + + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + ur_usm_advice_flags_t UrAdvice{}; + if (Advice & PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY) { + UrAdvice |= UR_USM_ADVICE_FLAG_SET_READ_MOSTLY; + } + if (Advice & PI_MEM_ADVICE_CUDA_UNSET_READ_MOSTLY) { + UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY; + } + if (Advice & PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION) { + UrAdvice |= UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION; + } + if (Advice & PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION) { + UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION; + } + if (Advice & PI_MEM_ADVICE_RESET) { + UrAdvice |= UR_USM_ADVICE_FLAG_DEFAULT; + } + + HANDLE_ERRORS(urEnqueueUSMAdvise(UrQueue, Ptr, Length, UrAdvice, UrEvent)); + + return PI_SUCCESS; +} + +/// USM 2D Fill API +/// +/// \param queue is the queue to submit to +/// \param ptr is the ptr to fill +/// \param pitch is the total width of the destination memory including padding +/// \param pattern is a pointer with the bytes of the pattern to set +/// \param pattern_size is the size in bytes of the pattern +/// \param width is width in bytes of each row to fill +/// \param height is height the columns to fill +/// \param num_events_in_waitlist is the number of events to wait on +/// \param events_waitlist is an array of events to wait on +/// \param event is the event that represents this operation +inline pi_result piextUSMEnqueueFill2D(pi_queue Queue, void *Ptr, size_t Pitch, + size_t PatternSize, const void *Pattern, + size_t Width, size_t Height, + pi_uint32 NumEventsWaitList, + const pi_event *EventsWaitList, + pi_event *Event) { + + auto hQueue = reinterpret_cast(Queue); + auto phEventWaitList = + reinterpret_cast(EventsWaitList); + auto phEvent = reinterpret_cast(Event); + + HANDLE_ERRORS(urEnqueueUSMFill2D(hQueue, Ptr, Pitch, PatternSize, Pattern, + Width, Height, NumEventsWaitList, + phEventWaitList, phEvent)); + + return PI_SUCCESS; +} + +inline pi_result piextUSMEnqueueMemset2D(pi_queue Queue, void *Ptr, + size_t Pitch, int Value, size_t Width, + size_t Height, + pi_uint32 NumEventsWaitList, + const pi_event *EventsWaitList, + pi_event *Event) { + std::ignore = Queue; + std::ignore = Ptr; + std::ignore = Pitch; + std::ignore = Value; + std::ignore = Width; + std::ignore = Height; + std::ignore = NumEventsWaitList; + std::ignore = EventsWaitList; + std::ignore = Event; + die("piextUSMEnqueueMemset2D: not implemented"); + return PI_SUCCESS; +} + +inline pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, + pi_mem_alloc_info ParamName, + size_t ParamValueSize, + void *ParamValue, + size_t *ParamValueSizeRet) { + + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_usm_alloc_info_t UrParamName{}; + switch (ParamName) { + case PI_MEM_ALLOC_TYPE: { + UrParamName = UR_USM_ALLOC_INFO_TYPE; + break; + } + case PI_MEM_ALLOC_BASE_PTR: { + UrParamName = UR_USM_ALLOC_INFO_BASE_PTR; + break; + } + case PI_MEM_ALLOC_SIZE: { + UrParamName = UR_USM_ALLOC_INFO_SIZE; + break; + } + case PI_MEM_ALLOC_DEVICE: { + UrParamName = UR_USM_ALLOC_INFO_DEVICE; + break; + } + default: { + die("piextUSMGetMemAllocInfo: unsuppported ParamName."); + } + } + + size_t SizeInOut = ParamValueSize; + HANDLE_ERRORS(urUSMGetMemAllocInfo(UrContext, Ptr, UrParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet)) + ur2piUSMAllocInfoValue(UrParamName, ParamValueSize, &SizeInOut, ParamValue); + return PI_SUCCESS; +} + +inline pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + + auto hMem = reinterpret_cast(Image); + + ur_image_info_t UrParamName{}; + switch (ParamName) { + case PI_IMAGE_INFO_FORMAT: { + UrParamName = UR_IMAGE_INFO_FORMAT; + break; + } + case PI_IMAGE_INFO_ELEMENT_SIZE: { + UrParamName = UR_IMAGE_INFO_ELEMENT_SIZE; + break; + } + case PI_IMAGE_INFO_ROW_PITCH: { + UrParamName = UR_IMAGE_INFO_ROW_PITCH; + break; + } + case PI_IMAGE_INFO_SLICE_PITCH: { + UrParamName = UR_IMAGE_INFO_SLICE_PITCH; + break; + } + case PI_IMAGE_INFO_WIDTH: { + UrParamName = UR_IMAGE_INFO_WIDTH; + break; + } + case PI_IMAGE_INFO_HEIGHT: { + UrParamName = UR_IMAGE_INFO_HEIGHT; + break; + } + case PI_IMAGE_INFO_DEPTH: { + UrParamName = UR_IMAGE_INFO_DEPTH; + break; + } + default: + return PI_ERROR_UNKNOWN; + } + + HANDLE_ERRORS(urMemImageGetInfo(hMem, UrParamName, ParamValueSize, ParamValue, + ParamValueSizeRet)); + return PI_SUCCESS; +} + +/// USM 2D Memcpy API +/// +/// \param queue is the queue to submit to +/// \param blocking is whether this operation should block the host +/// \param dst_ptr is the location the data will be copied +/// \param dst_pitch is the total width of the destination memory including +/// padding +/// \param src_ptr is the data to be copied +/// \param dst_pitch is the total width of the source memory including padding +/// \param width is width in bytes of each row to be copied +/// \param height is height the columns to be copied +/// \param num_events_in_waitlist is the number of events to wait on +/// \param events_waitlist is an array of events to wait on +/// \param event is the event that represents this operation +inline pi_result piextUSMEnqueueMemcpy2D(pi_queue Queue, pi_bool Blocking, + void *DstPtr, size_t DstPitch, + const void *SrcPtr, size_t SrcPitch, + size_t Width, size_t Height, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { + + if (!DstPtr || !SrcPtr) + return PI_ERROR_INVALID_VALUE; + + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueUSMMemcpy2D( + UrQueue, Blocking, DstPtr, DstPitch, SrcPtr, SrcPitch, Width, Height, + NumEventsInWaitList, UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +// Memory +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Enqueue + +inline pi_result +piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, + const size_t *GlobalWorkOffset, + const size_t *GlobalWorkSize, const size_t *LocalWorkSize, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *OutEvent) { + + PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + PI_ASSERT((WorkDim > 0) && (WorkDim < 4), PI_ERROR_INVALID_WORK_DIMENSION); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueKernelLaunch( + UrQueue, UrKernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, + LocalWorkSize, NumEventsInWaitList, UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result +piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, pi_bool BlockingWrite, + pi_image_offset Origin, pi_image_region Region, + size_t InputRowPitch, size_t InputSlicePitch, + const void *Ptr, pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *OutEvent) { + + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_mem_handle_t UrImage = reinterpret_cast(Image); + ur_rect_offset_t UrOrigin{Origin->x, Origin->y, Origin->z}; + ur_rect_region_t UrRegion{}; + UrRegion.depth = Region->depth; + UrRegion.height = Region->height; + UrRegion.width = Region->width; + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueMemImageWrite( + UrQueue, UrImage, BlockingWrite, UrOrigin, UrRegion, InputRowPitch, + InputSlicePitch, const_cast(Ptr), NumEventsInWaitList, + UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result +piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, pi_bool BlockingRead, + pi_image_offset Origin, pi_image_region Region, + size_t RowPitch, size_t SlicePitch, void *Ptr, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *OutEvent) { + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_mem_handle_t UrImage = reinterpret_cast(Image); + ur_rect_offset_t UrOrigin{Origin->x, Origin->y, Origin->z}; + ur_rect_region_t UrRegion{}; + UrRegion.depth = Region->depth; + UrRegion.height = Region->height; + UrRegion.width = Region->width; + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueMemImageRead( + UrQueue, UrImage, BlockingRead, UrOrigin, UrRegion, RowPitch, SlicePitch, + Ptr, NumEventsInWaitList, UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEnqueueMemBufferMap( + pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, pi_map_flags MapFlags, + size_t Offset, size_t Size, pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *OutEvent, void **RetMap) { + // TODO: we don't implement read-only or write-only, always read-write. + // assert((map_flags & PI_MAP_READ) != 0); + // assert((map_flags & PI_MAP_WRITE) != 0); + PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_mem_handle_t UrMem = reinterpret_cast(Mem); + + ur_map_flags_t UrMapFlags{}; + if (MapFlags & PI_MAP_READ) + UrMapFlags |= UR_MAP_FLAG_READ; + if (MapFlags & PI_MAP_WRITE) + UrMapFlags |= UR_MAP_FLAG_WRITE; + if (MapFlags & PI_MAP_WRITE_INVALIDATE_REGION) + UrMapFlags |= UR_MAP_FLAG_WRITE_INVALIDATE_REGION; + + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueMemBufferMap(UrQueue, UrMem, BlockingMap, UrMapFlags, + Offset, Size, NumEventsInWaitList, + UrEventsWaitList, UrEvent, RetMap)); + + return PI_SUCCESS; +} + +inline pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem Mem, void *MappedPtr, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { + + PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_mem_handle_t UrMem = reinterpret_cast(Mem); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueMemUnmap(UrQueue, UrMem, MappedPtr, + NumEventsInWaitList, UrEventsWaitList, + UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, + const void *Pattern, size_t PatternSize, + size_t Offset, size_t Size, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { + PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueMemBufferFill(UrQueue, UrBuffer, Pattern, PatternSize, + Offset, Size, NumEventsInWaitList, + UrEventsWaitList, UrEvent)); + return PI_SUCCESS; +} + +inline pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, + pi_int32 Value, size_t Count, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + if (!Ptr) { + return PI_ERROR_INVALID_VALUE; + } + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + size_t PatternSize = 1; + HANDLE_ERRORS(urEnqueueUSMFill(UrQueue, Ptr, PatternSize, &Value, Count, + NumEventsInWaitList, UrEventsWaitList, + UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEnqueueMemBufferCopyRect( + pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, + pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, + size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, + size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *OutEvent) { + + PI_ASSERT(SrcMem && DstMem, PI_ERROR_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_mem_handle_t UrBufferSrc = reinterpret_cast(SrcMem); + ur_mem_handle_t UrBufferDst = reinterpret_cast(DstMem); + ur_rect_offset_t UrSrcOrigin{SrcOrigin->x_bytes, SrcOrigin->y_scalar, + SrcOrigin->z_scalar}; + ur_rect_offset_t UrDstOrigin{DstOrigin->x_bytes, DstOrigin->y_scalar, + DstOrigin->z_scalar}; + ur_rect_region_t UrRegion{}; + UrRegion.depth = Region->depth_scalar; + UrRegion.height = Region->height_scalar; + UrRegion.width = Region->width_bytes; + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueMemBufferCopyRect( + UrQueue, UrBufferSrc, UrBufferDst, UrSrcOrigin, UrDstOrigin, UrRegion, + SrcRowPitch, SrcSlicePitch, DstRowPitch, DstSlicePitch, + NumEventsInWaitList, UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcMem, + pi_mem DstMem, size_t SrcOffset, + size_t DstOffset, size_t Size, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { + + PI_ASSERT(SrcMem && DstMem, PI_ERROR_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_mem_handle_t UrBufferSrc = reinterpret_cast(SrcMem); + ur_mem_handle_t UrBufferDst = reinterpret_cast(DstMem); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueMemBufferCopy( + UrQueue, UrBufferSrc, UrBufferDst, SrcOffset, DstOffset, Size, + NumEventsInWaitList, UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, + void *DstPtr, const void *SrcPtr, + size_t Size, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueUSMMemcpy(UrQueue, Blocking, DstPtr, SrcPtr, Size, + NumEventsInWaitList, UrEventsWaitList, + UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEnqueueMemBufferWriteRect( + pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, + pi_uint32 NumEventsInWaitList, const pi_event *EventsWaitList, + pi_event *OutEvent) { + + PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); + ur_rect_offset_t UrBufferOffset{BufferOffset->x_bytes, BufferOffset->y_scalar, + BufferOffset->z_scalar}; + ur_rect_offset_t UrHostOffset{HostOffset->x_bytes, HostOffset->y_scalar, + HostOffset->z_scalar}; + ur_rect_region_t UrRegion{}; + UrRegion.depth = Region->depth_scalar; + UrRegion.height = Region->height_scalar; + UrRegion.width = Region->width_bytes; + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueMemBufferWriteRect( + UrQueue, UrBuffer, BlockingWrite, UrBufferOffset, UrHostOffset, UrRegion, + BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, + const_cast(Ptr), NumEventsInWaitList, UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, + pi_bool BlockingWrite, size_t Offset, + size_t Size, const void *Ptr, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { + + PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueMemBufferWrite( + UrQueue, UrBuffer, BlockingWrite, Offset, Size, const_cast(Ptr), + NumEventsInWaitList, UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEnqueueMemBufferReadRect( + pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, + pi_uint32 NumEventsInWaitList, const pi_event *EventsWaitList, + pi_event *OutEvent) { + + PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); + ur_rect_offset_t UrBufferOffset{BufferOffset->x_bytes, BufferOffset->y_scalar, + BufferOffset->z_scalar}; + ur_rect_offset_t UrHostOffset{HostOffset->x_bytes, HostOffset->y_scalar, + HostOffset->z_scalar}; + ur_rect_region_t UrRegion{}; + UrRegion.depth = Region->depth_scalar; + UrRegion.height = Region->height_scalar; + UrRegion.width = Region->width_bytes; + + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueMemBufferReadRect( + UrQueue, UrBuffer, BlockingRead, UrBufferOffset, UrHostOffset, UrRegion, + BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, + NumEventsInWaitList, UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, + pi_bool BlockingRead, size_t Offset, + size_t Size, void *Dst, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { + PI_ASSERT(Src, PI_ERROR_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + ur_mem_handle_t UrBuffer = reinterpret_cast(Src); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueMemBufferRead(UrQueue, UrBuffer, BlockingRead, Offset, + Size, Dst, NumEventsInWaitList, + UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { + + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueEventsWaitWithBarrier(UrQueue, NumEventsInWaitList, + UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEnqueueEventsWait(pi_queue Queue, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { + + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + if (EventsWaitList) { + PI_ASSERT(NumEventsInWaitList > 0, PI_ERROR_INVALID_VALUE); + } + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + ur_event_handle_t *UrEvent = reinterpret_cast(OutEvent); + + HANDLE_ERRORS(urEnqueueEventsWait(UrQueue, NumEventsInWaitList, + UrEventsWaitList, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result +piextEnqueueReadHostPipe(pi_queue queue, pi_program program, + const char *pipe_symbol, pi_bool blocking, void *ptr, + size_t size, pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, pi_event *event) { + auto hQueue = reinterpret_cast(queue); + auto hProgram = reinterpret_cast(program); + auto phEventWaitList = + reinterpret_cast(events_waitlist); + auto phEvent = reinterpret_cast(event); + + HANDLE_ERRORS(urEnqueueReadHostPipe(hQueue, hProgram, pipe_symbol, blocking, + ptr, size, num_events_in_waitlist, + phEventWaitList, phEvent)); + + return PI_SUCCESS; +} + +inline pi_result +piextEnqueueWriteHostPipe(pi_queue queue, pi_program program, + const char *pipe_symbol, pi_bool blocking, void *ptr, + size_t size, pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, pi_event *event) { + auto hQueue = reinterpret_cast(queue); + auto hProgram = reinterpret_cast(program); + auto phEventWaitList = + reinterpret_cast(events_waitlist); + auto phEvent = reinterpret_cast(event); + + HANDLE_ERRORS(urEnqueueWriteHostPipe(hQueue, hProgram, pipe_symbol, blocking, + ptr, size, num_events_in_waitlist, + phEventWaitList, phEvent)); + + return PI_SUCCESS; +} +// Enqueue +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Events +inline pi_result piEventsWait(pi_uint32 NumEvents, + const pi_event *EventsWaitList) { + if (NumEvents && !EventsWaitList) { + return PI_ERROR_INVALID_EVENT; + } + + const ur_event_handle_t *UrEventsWaitList = + reinterpret_cast(EventsWaitList); + + HANDLE_ERRORS(urEventWait(NumEvents, UrEventsWaitList)); + + return PI_SUCCESS; +} + +inline pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + + PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); + + ur_event_handle_t UrEvent = reinterpret_cast(Event); + + ur_event_info_t PropName{}; + if (ParamName == PI_EVENT_INFO_COMMAND_QUEUE) { + PropName = UR_EVENT_INFO_COMMAND_QUEUE; + } else if (ParamName == PI_EVENT_INFO_CONTEXT) { + PropName = UR_EVENT_INFO_CONTEXT; + } else if (ParamName == PI_EVENT_INFO_COMMAND_TYPE) { + PropName = UR_EVENT_INFO_COMMAND_TYPE; + } else if (ParamName == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + PropName = UR_EVENT_INFO_COMMAND_EXECUTION_STATUS; + } else if (ParamName == PI_EVENT_INFO_REFERENCE_COUNT) { + PropName = UR_EVENT_INFO_REFERENCE_COUNT; + } else { + return PI_ERROR_INVALID_VALUE; + } + + HANDLE_ERRORS(urEventGetInfo(UrEvent, PropName, ParamValueSize, ParamValue, + ParamValueSizeRet)); + + return PI_SUCCESS; +} + +inline pi_result piextEventGetNativeHandle(pi_event Event, + pi_native_handle *NativeHandle) { + + PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + + ur_event_handle_t UrEvent = reinterpret_cast(Event); + + ur_native_handle_t *UrNativeEvent = + reinterpret_cast(NativeHandle); + HANDLE_ERRORS(urEventGetNativeHandle(UrEvent, UrNativeEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEventGetProfilingInfo(pi_event Event, + pi_profiling_info ParamName, + size_t ParamValueSize, + void *ParamValue, + size_t *ParamValueSizeRet) { + + PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); + + ur_event_handle_t UrEvent = reinterpret_cast(Event); + + ur_profiling_info_t PropName{}; + switch (ParamName) { + case PI_PROFILING_INFO_COMMAND_QUEUED: { + PropName = UR_PROFILING_INFO_COMMAND_QUEUED; + break; + } + case PI_PROFILING_INFO_COMMAND_SUBMIT: { + PropName = UR_PROFILING_INFO_COMMAND_SUBMIT; + break; + } + case PI_PROFILING_INFO_COMMAND_START: { + PropName = UR_PROFILING_INFO_COMMAND_START; + break; + } + case PI_PROFILING_INFO_COMMAND_END: { + PropName = UR_PROFILING_INFO_COMMAND_END; + break; + } + default: + return PI_ERROR_INVALID_PROPERTY; + } + + HANDLE_ERRORS(urEventGetProfilingInfo(UrEvent, PropName, ParamValueSize, + ParamValue, ParamValueSizeRet)); + + return PI_SUCCESS; +} + +inline pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_event_handle_t *UrEvent = reinterpret_cast(RetEvent); + // pass null for the hNativeHandle to use urEventCreateWithNativeHandle + // as urEventCreate + ur_event_native_properties_t Properties{}; + HANDLE_ERRORS( + urEventCreateWithNativeHandle(nullptr, UrContext, &Properties, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_context Context, + bool OwnNativeHandle, + pi_event *Event) { + + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); + PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); + + ur_native_handle_t UrNativeKernel = + reinterpret_cast(NativeHandle); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_event_handle_t *UrEvent = reinterpret_cast(Event); + ur_event_native_properties_t Properties{}; + Properties.isNativeHandleOwned = OwnNativeHandle; + HANDLE_ERRORS(urEventCreateWithNativeHandle(UrNativeKernel, UrContext, + &Properties, UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEventSetCallback( + pi_event Event, pi_int32 CommandExecCallbackType, + void (*PFnNotify)(pi_event Event, pi_int32 EventCommandStatus, + void *UserData), + void *UserData) { + std::ignore = Event; + std::ignore = CommandExecCallbackType; + std::ignore = PFnNotify; + std::ignore = UserData; + die("piEventSetCallback: deprecated, to be removed"); + return PI_SUCCESS; +} + +inline pi_result piEventSetStatus(pi_event Event, pi_int32 ExecutionStatus) { + std::ignore = Event; + std::ignore = ExecutionStatus; + die("piEventSetStatus: deprecated, to be removed"); + return PI_SUCCESS; +} + +inline pi_result piEventRetain(pi_event Event) { + PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); + + ur_event_handle_t UrEvent = reinterpret_cast(Event); + HANDLE_ERRORS(urEventRetain(UrEvent)); + + return PI_SUCCESS; +} + +inline pi_result piEventRelease(pi_event Event) { + PI_ASSERT(Event, PI_ERROR_INVALID_EVENT); + + ur_event_handle_t UrEvent = reinterpret_cast(Event); + HANDLE_ERRORS(urEventRelease(UrEvent)); + + return PI_SUCCESS; +} + +// Events +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Sampler +inline pi_result piSamplerCreate(pi_context Context, + const pi_sampler_properties *SamplerProperties, + pi_sampler *RetSampler) { + + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(RetSampler, PI_ERROR_INVALID_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_sampler_desc_t UrProps{}; + UrProps.stype = UR_STRUCTURE_TYPE_SAMPLER_DESC; + const pi_sampler_properties *CurProperty = SamplerProperties; + while (*CurProperty != 0) { + switch (*CurProperty) { + case PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS: { + UrProps.normalizedCoords = ur_cast(*(++CurProperty)); + } break; + + case PI_SAMPLER_PROPERTIES_ADDRESSING_MODE: { + pi_sampler_addressing_mode CurValueAddressingMode = + ur_cast( + ur_cast(*(++CurProperty))); + + if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT) + UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; + else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_REPEAT) + UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_REPEAT; + else if (CurValueAddressingMode == + PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE) + UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_CLAMP) + UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_CLAMP; + else if (CurValueAddressingMode == PI_SAMPLER_ADDRESSING_MODE_NONE) + UrProps.addressingMode = UR_SAMPLER_ADDRESSING_MODE_NONE; + } break; + + case PI_SAMPLER_PROPERTIES_FILTER_MODE: { + pi_sampler_filter_mode CurValueFilterMode = + ur_cast(ur_cast(*(++CurProperty))); + + if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_NEAREST) + UrProps.filterMode = UR_SAMPLER_FILTER_MODE_NEAREST; + else if (CurValueFilterMode == PI_SAMPLER_FILTER_MODE_LINEAR) + UrProps.filterMode = UR_SAMPLER_FILTER_MODE_LINEAR; + } break; + + default: + break; + } + CurProperty++; + } + + ur_sampler_handle_t *UrSampler = + reinterpret_cast(RetSampler); + + HANDLE_ERRORS(urSamplerCreate(UrContext, &UrProps, UrSampler)); + + return PI_SUCCESS; +} + +inline pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + ur_sampler_info_t InfoType{}; + switch (ParamName) { + case PI_SAMPLER_INFO_REFERENCE_COUNT: + InfoType = UR_SAMPLER_INFO_REFERENCE_COUNT; + break; + case PI_SAMPLER_INFO_CONTEXT: + InfoType = UR_SAMPLER_INFO_CONTEXT; + break; + case PI_SAMPLER_INFO_NORMALIZED_COORDS: + InfoType = UR_SAMPLER_INFO_NORMALIZED_COORDS; + break; + case PI_SAMPLER_INFO_ADDRESSING_MODE: + InfoType = UR_SAMPLER_INFO_ADDRESSING_MODE; + break; + case PI_SAMPLER_INFO_FILTER_MODE: + InfoType = UR_SAMPLER_INFO_FILTER_MODE; + break; + default: + return PI_ERROR_UNKNOWN; + } + + size_t UrParamValueSizeRet; + auto hSampler = reinterpret_cast(Sampler); + HANDLE_ERRORS(urSamplerGetInfo(hSampler, InfoType, ParamValueSize, ParamValue, + &UrParamValueSizeRet)); + if (ParamValueSizeRet) { + *ParamValueSizeRet = UrParamValueSizeRet; + } + ur2piSamplerInfoValue(InfoType, ParamValueSize, &ParamValueSize, ParamValue); + fixupInfoValueTypes(UrParamValueSizeRet, ParamValueSizeRet, ParamValueSize, + ParamValue); + return PI_SUCCESS; +} + +// Special version of piKernelSetArg to accept pi_sampler. +inline pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex, + const pi_sampler *ArgValue) { + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + ur_sampler_handle_t UrSampler = + reinterpret_cast(*ArgValue); + + HANDLE_ERRORS(urKernelSetArgSampler(UrKernel, ArgIndex, nullptr, UrSampler)); + + return PI_SUCCESS; +} + +inline pi_result piSamplerRetain(pi_sampler Sampler) { + PI_ASSERT(Sampler, PI_ERROR_INVALID_SAMPLER); + + ur_sampler_handle_t UrSampler = + reinterpret_cast(Sampler); + + HANDLE_ERRORS(urSamplerRetain(UrSampler)); + + return PI_SUCCESS; +} + +inline pi_result piSamplerRelease(pi_sampler Sampler) { + PI_ASSERT(Sampler, PI_ERROR_INVALID_SAMPLER); + + ur_sampler_handle_t UrSampler = + reinterpret_cast(Sampler); + + HANDLE_ERRORS(urSamplerRelease(UrSampler)); + + return PI_SUCCESS; +} + +// Sampler +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Command-buffer extension + +inline pi_result +piextCommandBufferCreate(pi_context Context, pi_device Device, + const pi_ext_command_buffer_desc *Desc, + pi_ext_command_buffer *RetCommandBuffer) { + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_device_handle_t UrDevice = reinterpret_cast(Device); + const ur_exp_command_buffer_desc_t *UrDesc = + reinterpret_cast(Desc); + ur_exp_command_buffer_handle_t *UrCommandBuffer = + reinterpret_cast(RetCommandBuffer); + + HANDLE_ERRORS( + urCommandBufferCreateExp(UrContext, UrDevice, UrDesc, UrCommandBuffer)); + + return PI_SUCCESS; +} + +inline pi_result piextCommandBufferRetain(pi_ext_command_buffer CommandBuffer) { + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + + HANDLE_ERRORS(urCommandBufferRetainExp(UrCommandBuffer)); + + return PI_SUCCESS; +} + +inline pi_result +piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) { + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + + HANDLE_ERRORS(urCommandBufferReleaseExp(UrCommandBuffer)); + + return PI_SUCCESS; +} + +inline pi_result +piextCommandBufferFinalize(pi_ext_command_buffer CommandBuffer) { + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + + HANDLE_ERRORS(urCommandBufferFinalizeExp(UrCommandBuffer)); + + return PI_SUCCESS; +} + +inline pi_result piextCommandBufferNDRangeKernel( + pi_ext_command_buffer CommandBuffer, pi_kernel Kernel, pi_uint32 WorkDim, + const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, + const size_t *LocalWorkSize, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + + ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); + + HANDLE_ERRORS(urCommandBufferAppendKernelLaunchExp( + UrCommandBuffer, UrKernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, + LocalWorkSize, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); + + return PI_SUCCESS; +} + +inline pi_result piextCommandBufferMemcpyUSM( + pi_ext_command_buffer CommandBuffer, void *DstPtr, const void *SrcPtr, + size_t Size, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + + HANDLE_ERRORS(urCommandBufferAppendMemcpyUSMExp( + UrCommandBuffer, DstPtr, SrcPtr, Size, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint)); + + return PI_SUCCESS; +} + +inline pi_result piextCommandBufferMemBufferCopy( + pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, + size_t SrcOffset, size_t DstOffset, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + + ur_mem_handle_t UrSrcMem = reinterpret_cast(SrcMem); + ur_mem_handle_t UrDstMem = reinterpret_cast(DstMem); + + HANDLE_ERRORS(urCommandBufferAppendMembufferCopyExp( + UrCommandBuffer, UrSrcMem, UrDstMem, SrcOffset, DstOffset, Size, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); + + return PI_SUCCESS; +} + +inline pi_result piextCommandBufferMemBufferCopyRect( + pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, + pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, + pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, + size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + + ur_mem_handle_t UrSrcMem = reinterpret_cast(SrcMem); + ur_mem_handle_t UrDstMem = reinterpret_cast(DstMem); + + ur_rect_offset_t UrSrcOrigin{SrcOrigin->x_bytes, SrcOrigin->y_scalar, + SrcOrigin->z_scalar}; + ur_rect_offset_t UrDstOrigin{DstOrigin->x_bytes, DstOrigin->y_scalar, + DstOrigin->z_scalar}; + ur_rect_region_t UrRegion{}; + UrRegion.depth = Region->depth_scalar; + UrRegion.height = Region->height_scalar; + UrRegion.width = Region->width_bytes; + + HANDLE_ERRORS(urCommandBufferAppendMembufferCopyRectExp( + UrCommandBuffer, UrSrcMem, UrDstMem, UrSrcOrigin, UrDstOrigin, UrRegion, + SrcRowPitch, SrcSlicePitch, DstRowPitch, DstSlicePitch, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); + + return PI_SUCCESS; +} + +inline pi_result piextCommandBufferMemBufferReadRect( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + + PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); + + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); + ur_rect_offset_t UrBufferOffset{BufferOffset->x_bytes, BufferOffset->y_scalar, + BufferOffset->z_scalar}; + ur_rect_offset_t UrHostOffset{HostOffset->x_bytes, HostOffset->y_scalar, + HostOffset->z_scalar}; + ur_rect_region_t UrRegion{}; + UrRegion.depth = Region->depth_scalar; + UrRegion.height = Region->height_scalar; + UrRegion.width = Region->width_bytes; + + HANDLE_ERRORS(urCommandBufferAppendMembufferReadRectExp( + UrCommandBuffer, UrBuffer, UrBufferOffset, UrHostOffset, UrRegion, + BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); + + return PI_SUCCESS; +} + +inline pi_result piextCommandBufferMemBufferRead( + pi_ext_command_buffer CommandBuffer, pi_mem Src, size_t Offset, size_t Size, + void *Dst, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + PI_ASSERT(Src, PI_ERROR_INVALID_MEM_OBJECT); + + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + ur_mem_handle_t UrBuffer = reinterpret_cast(Src); + + HANDLE_ERRORS(urCommandBufferAppendMembufferReadExp( + UrCommandBuffer, UrBuffer, Offset, Size, Dst, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint)); + + return PI_SUCCESS; +} + +inline pi_result piextCommandBufferMemBufferWriteRect( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + + PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); + + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); + ur_rect_offset_t UrBufferOffset{BufferOffset->x_bytes, BufferOffset->y_scalar, + BufferOffset->z_scalar}; + ur_rect_offset_t UrHostOffset{HostOffset->x_bytes, HostOffset->y_scalar, + HostOffset->z_scalar}; + ur_rect_region_t UrRegion{}; + UrRegion.depth = Region->depth_scalar; + UrRegion.height = Region->height_scalar; + UrRegion.width = Region->width_bytes; + + HANDLE_ERRORS(urCommandBufferAppendMembufferWriteRectExp( + UrCommandBuffer, UrBuffer, UrBufferOffset, UrHostOffset, UrRegion, + BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, + const_cast(Ptr), NumSyncPointsInWaitList, SyncPointWaitList, + SyncPoint)); + + return PI_SUCCESS; +} + +inline pi_result piextCommandBufferMemBufferWrite( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, + size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + + PI_ASSERT(Buffer, PI_ERROR_INVALID_MEM_OBJECT); + + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + ur_mem_handle_t UrBuffer = reinterpret_cast(Buffer); + + HANDLE_ERRORS(urCommandBufferAppendMembufferWriteExp( + UrCommandBuffer, UrBuffer, Offset, Size, const_cast(Ptr), + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint)); + + return PI_SUCCESS; +} + +inline pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, + pi_queue Queue, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + + ur_exp_command_buffer_handle_t UrCommandBuffer = + reinterpret_cast(CommandBuffer); + + ur_queue_handle_t UrQueue = reinterpret_cast(Queue); + const ur_event_handle_t *UrEventWaitList = + reinterpret_cast(EventWaitList); + ur_event_handle_t *UrEvent = reinterpret_cast(Event); + + HANDLE_ERRORS(urCommandBufferEnqueueExp( + UrCommandBuffer, UrQueue, NumEventsInWaitList, UrEventWaitList, UrEvent)); + + return PI_SUCCESS; +} + +// Command-buffer extension +/////////////////////////////////////////////////////////////////////////////// + +} // namespace pi2ur From 6ed3213b4581b8e5cbc550f6f5d181306dc1d9c5 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 5 Jul 2023 05:22:59 -0700 Subject: [PATCH 64/75] Added back peer functions to pi2ur. Signed-off-by: JackAKirk --- sycl/plugins/unified_runtime/pi2ur.hpp | 55 ++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 79b6d44a4d0d8..d1f77bf029c69 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -4322,4 +4322,59 @@ inline pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, // Command-buffer extension /////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// usm-p2p + +pi_result piextEnablePeerAccess(pi_device command_device, + pi_device peer_device) { + auto commandDevice = reinterpret_cast(command_device); + auto peerDevice = reinterpret_cast(peer_device); + + HANDLE_ERRORS(urUsmP2PEnablePeerAccessExp(commandDevice, peerDevice)); + + return PI_SUCCESS; +} + +pi_result piextDisablePeerAccess(pi_device command_device, + pi_device peer_device) { + auto commandDevice = reinterpret_cast(command_device); + auto peerDevice = reinterpret_cast(peer_device); + + HANDLE_ERRORS(urUsmP2PDisablePeerAccessExp(commandDevice, peerDevice)); + + return PI_SUCCESS; +} + +pi_result piextPeerAccessGetInfo(pi_device command_device, + pi_device peer_device, pi_peer_attr attr, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + auto commandDevice = reinterpret_cast(command_device); + auto peerDevice = reinterpret_cast(peer_device); + + ur_exp_peer_info_t propName; + switch (attr) { + case PI_PEER_ACCESS_SUPPORTED: { + propName = UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED; + break; + } + case PI_PEER_ATOMICS_SUPPORTED: { + propName = UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED; + break; + } + default: { + return PI_ERROR_INVALID_VALUE; + } + } + + HANDLE_ERRORS(urUsmP2PPeerAccessGetInfoExp( + commandDevice, peerDevice, propName, param_value_size, param_value, + param_value_size_ret)); + + return PI_SUCCESS; +} + +// usm-p2p +/////////////////////////////////////////////////////////////////////////////// + } // namespace pi2ur From 512d2a8f0ae17bfef06a5470ae34e71751d0b670 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 5 Jul 2023 05:56:34 -0700 Subject: [PATCH 65/75] Removed unnecessary usm_p2p.hpp headers. Signed-off-by: JackAKirk --- sycl/plugins/cuda/CMakeLists.txt | 1 - sycl/plugins/level_zero/CMakeLists.txt | 1 - sycl/plugins/unified_runtime/CMakeLists.txt | 2 -- .../unified_runtime/ur/adapters/cuda/usm_p2p.cpp | 1 - .../unified_runtime/ur/adapters/cuda/usm_p2p.hpp | 10 ---------- .../ur/adapters/level_zero/usm_p2p.cpp | 1 - .../ur/adapters/level_zero/usm_p2p.hpp | 13 ------------- 7 files changed, 29 deletions(-) delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.hpp delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.hpp diff --git a/sycl/plugins/cuda/CMakeLists.txt b/sycl/plugins/cuda/CMakeLists.txt index 33e853fc178e6..99f6f601ee46a 100644 --- a/sycl/plugins/cuda/CMakeLists.txt +++ b/sycl/plugins/cuda/CMakeLists.txt @@ -81,7 +81,6 @@ add_sycl_plugin(cuda "../unified_runtime/ur/adapters/cuda/usm.cpp" "../unified_runtime/ur/adapters/cuda/command_buffer.hpp" "../unified_runtime/ur/adapters/cuda/command_buffer.cpp" - "../unified_runtime/ur/adapters/cuda/usm_p2p.hpp" "../unified_runtime/ur/adapters/cuda/usm_p2p.cpp" # --- "${sycl_inc_dir}/sycl/detail/pi.h" diff --git a/sycl/plugins/level_zero/CMakeLists.txt b/sycl/plugins/level_zero/CMakeLists.txt index 91ed549bead69..8c5a0d4f92c43 100755 --- a/sycl/plugins/level_zero/CMakeLists.txt +++ b/sycl/plugins/level_zero/CMakeLists.txt @@ -114,7 +114,6 @@ add_sycl_plugin(level_zero "../unified_runtime/ur/adapters/level_zero/queue.hpp" "../unified_runtime/ur/adapters/level_zero/sampler.hpp" "../unified_runtime/ur/adapters/level_zero/usm.hpp" - "../unified_runtime/ur/adapters/level_zero/usm_p2p.hpp" "../unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp" "../unified_runtime/ur/adapters/level_zero/command_buffer.cpp" "../unified_runtime/ur/adapters/level_zero/common.cpp" diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 1a85c21c57e5d..88c631c3b6795 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -98,7 +98,6 @@ add_sycl_library("ur_adapter_level_zero" SHARED "ur/adapters/level_zero/queue.hpp" "ur/adapters/level_zero/sampler.hpp" "ur/adapters/level_zero/usm.hpp" - "ur/adapters/level_zero/usm_p2p.hpp" "ur/adapters/level_zero/command_buffer.cpp" "ur/adapters/level_zero/common.cpp" "ur/adapters/level_zero/context.cpp" @@ -160,7 +159,6 @@ if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS) "ur/adapters/cuda/usm.cpp" "ur/adapters/cuda/command_buffer.hpp" "ur/adapters/cuda/command_buffer.cpp" - "ur/adapters/cuda/usm_p2p.hpp" "ur/adapters/cuda/usm_p2p.cpp" INCLUDE_DIRS ${sycl_inc_dir} diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp index eb98111b1429d..b1b0255a94d1e 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp @@ -6,7 +6,6 @@ // //===---------------------------------------------------------------===// -#include "usm_p2p.hpp" #include "common.hpp" #include "context.hpp" diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.hpp deleted file mode 100644 index f927860839860..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.hpp +++ /dev/null @@ -1,10 +0,0 @@ -//===--------- usm_p2p.hpp - CUDA Adapter---------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===---------------------------------------------------------------===// - -#pragma once -#include diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.cpp index eebcfb9fe3fd0..0b60554f028ec 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.cpp @@ -6,7 +6,6 @@ // //===-----------------------------------------------------------------===// -#include "usm_p2p.hpp" #include "ur_level_zero.hpp" UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.hpp deleted file mode 100644 index a2f8a7015f32f..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.hpp +++ /dev/null @@ -1,13 +0,0 @@ -//===--------- ur_level_zero_usm_p2p.hpp - L0 Adapter-----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===-----------------------------------------------------------------===// - -#pragma once - -#include - -#include "common.hpp" From 1e088a1f014501f9e61dea2ff8804753b1f5a295 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 5 Jul 2023 06:59:05 -0700 Subject: [PATCH 66/75] Removed level_zero pi::die impl. Signed-off-by: JackAKirk --- .../ur/adapters/level_zero/common.cpp | 4 ---- .../ur/adapters/level_zero/common.hpp | 17 ----------------- .../ur/adapters/level_zero/usm_p2p.cpp | 4 ---- 3 files changed, 25 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.cpp index 0a821b4b6b680..14b8942911ee4 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.cpp @@ -286,7 +286,3 @@ ur_result_t zerPluginGetLastError(char **message) { return ErrorMessageCode; } -void sycl::detail::ur::die(const char *Message) { - std::cerr << "ur_die: " << Message << std::endl; - std::terminate(); -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.hpp index 1040c45b7b6b5..3e8d4512adcc3 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.hpp @@ -21,7 +21,6 @@ #include #include -#include #include struct _ur_platform_handle_t; @@ -468,19 +467,3 @@ extern thread_local char ErrorMessage[MaxMessageSize]; [[maybe_unused]] void setErrorMessage(const char *message, ur_result_t error_code); -/// ------ Error handling, matching OpenCL plugin semantics. -namespace sycl { -__SYCL_INLINE_VER_NAMESPACE(_V1) { -namespace detail { -namespace ur { - -// Report error and no return (keeps compiler from printing warnings). -// TODO: Probably change that to throw a catchable exception, -// but for now it is useful to see every failure. -// -[[noreturn]] void die(const char *Message); - -} // namespace ur -} // namespace detail -} // __SYCL_INLINE_VER_NAMESPACE(_V1) -} // namespace sycl diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.cpp index 0b60554f028ec..a42d845f15c4f 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.cpp @@ -14,8 +14,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( std::ignore = commandDevice; std::ignore = peerDevice; - sycl::detail::ur::die("Experimental USM-P2P feature is not " - "implemented in the L0 adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -25,8 +23,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( std::ignore = commandDevice; std::ignore = peerDevice; - sycl::detail::ur::die("Experimental USM-P2P feature is not " - "implemented in the L0 adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } From 80dd26ad3298dee5334fefb4b2ba825eed1bb2ed Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 5 Jul 2023 07:25:08 -0700 Subject: [PATCH 67/75] Format. Signed-off-by: JackAKirk --- sycl/plugins/unified_runtime/ur/adapters/level_zero/common.cpp | 1 - sycl/plugins/unified_runtime/ur/adapters/level_zero/common.hpp | 1 - 2 files changed, 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.cpp index 14b8942911ee4..3d9da756faf2a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.cpp @@ -285,4 +285,3 @@ ur_result_t zerPluginGetLastError(char **message) { *message = &ErrorMessage[0]; return ErrorMessageCode; } - diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.hpp index 3e8d4512adcc3..c11de1ca1f335 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/common.hpp @@ -466,4 +466,3 @@ extern thread_local char ErrorMessage[MaxMessageSize]; // Utility function for setting a message and warning [[maybe_unused]] void setErrorMessage(const char *message, ur_result_t error_code); - From 9dc099c1dcd6789309dfd059f18939bce7d75268 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 6 Jul 2023 03:09:02 -0700 Subject: [PATCH 68/75] Added missing inline and urPrint. Signed-off-by: JackAKirk --- sycl/plugins/unified_runtime/pi2ur.hpp | 6 +++--- .../unified_runtime/ur/adapters/level_zero/usm_p2p.cpp | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index d1f77bf029c69..09bc41294534d 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -4325,7 +4325,7 @@ inline pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, /////////////////////////////////////////////////////////////////////////////// // usm-p2p -pi_result piextEnablePeerAccess(pi_device command_device, +inline pi_result piextEnablePeerAccess(pi_device command_device, pi_device peer_device) { auto commandDevice = reinterpret_cast(command_device); auto peerDevice = reinterpret_cast(peer_device); @@ -4335,7 +4335,7 @@ pi_result piextEnablePeerAccess(pi_device command_device, return PI_SUCCESS; } -pi_result piextDisablePeerAccess(pi_device command_device, +inline pi_result piextDisablePeerAccess(pi_device command_device, pi_device peer_device) { auto commandDevice = reinterpret_cast(command_device); auto peerDevice = reinterpret_cast(peer_device); @@ -4345,7 +4345,7 @@ pi_result piextDisablePeerAccess(pi_device command_device, return PI_SUCCESS; } -pi_result piextPeerAccessGetInfo(pi_device command_device, +inline pi_result piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, pi_peer_attr attr, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.cpp index a42d845f15c4f..9ec958e2d3c9c 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/usm_p2p.cpp @@ -14,6 +14,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( std::ignore = commandDevice; std::ignore = peerDevice; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -23,6 +24,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( std::ignore = commandDevice; std::ignore = peerDevice; + urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } From b191914e3bc882f129d0e20d0069ab68cad8023e Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 6 Jul 2023 03:18:19 -0700 Subject: [PATCH 69/75] Format. Signed-off-by: JackAKirk --- sycl/plugins/unified_runtime/pi2ur.hpp | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 09bc41294534d..be1cefdc4bf0c 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -4326,7 +4326,8 @@ inline pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, // usm-p2p inline pi_result piextEnablePeerAccess(pi_device command_device, - pi_device peer_device) { + pi_device peer_device) +{ auto commandDevice = reinterpret_cast(command_device); auto peerDevice = reinterpret_cast(peer_device); @@ -4336,7 +4337,8 @@ inline pi_result piextEnablePeerAccess(pi_device command_device, } inline pi_result piextDisablePeerAccess(pi_device command_device, - pi_device peer_device) { + pi_device peer_device) +{ auto commandDevice = reinterpret_cast(command_device); auto peerDevice = reinterpret_cast(peer_device); @@ -4346,23 +4348,28 @@ inline pi_result piextDisablePeerAccess(pi_device command_device, } inline pi_result piextPeerAccessGetInfo(pi_device command_device, - pi_device peer_device, pi_peer_attr attr, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { + pi_device peer_device, pi_peer_attr attr, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) +{ auto commandDevice = reinterpret_cast(command_device); auto peerDevice = reinterpret_cast(peer_device); ur_exp_peer_info_t propName; - switch (attr) { - case PI_PEER_ACCESS_SUPPORTED: { + switch (attr) + { + case PI_PEER_ACCESS_SUPPORTED: + { propName = UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED; break; } - case PI_PEER_ATOMICS_SUPPORTED: { + case PI_PEER_ATOMICS_SUPPORTED: + { propName = UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED; break; } - default: { + default: + { return PI_ERROR_INVALID_VALUE; } } From 5d83aeb2a6285ac541498b8425001ba31da3ff5f Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 6 Jul 2023 03:40:03 -0700 Subject: [PATCH 70/75] Format. Signed-off-by: JackAKirk --- sycl/plugins/unified_runtime/pi2ur.hpp | 27 ++++++++++---------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index be1cefdc4bf0c..1c86b1b075f5a 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -4326,8 +4326,7 @@ inline pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, // usm-p2p inline pi_result piextEnablePeerAccess(pi_device command_device, - pi_device peer_device) -{ + pi_device peer_device) { auto commandDevice = reinterpret_cast(command_device); auto peerDevice = reinterpret_cast(peer_device); @@ -4337,8 +4336,7 @@ inline pi_result piextEnablePeerAccess(pi_device command_device, } inline pi_result piextDisablePeerAccess(pi_device command_device, - pi_device peer_device) -{ + pi_device peer_device) { auto commandDevice = reinterpret_cast(command_device); auto peerDevice = reinterpret_cast(peer_device); @@ -4347,29 +4345,24 @@ inline pi_result piextDisablePeerAccess(pi_device command_device, return PI_SUCCESS; } -inline pi_result piextPeerAccessGetInfo(pi_device command_device, - pi_device peer_device, pi_peer_attr attr, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) -{ +inline pi_result +piextPeerAccessGetInfo(pi_device command_device, pi_device peer_device, + pi_peer_attr attr, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) { auto commandDevice = reinterpret_cast(command_device); auto peerDevice = reinterpret_cast(peer_device); ur_exp_peer_info_t propName; - switch (attr) - { - case PI_PEER_ACCESS_SUPPORTED: - { + switch (attr) { + case PI_PEER_ACCESS_SUPPORTED: { propName = UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED; break; } - case PI_PEER_ATOMICS_SUPPORTED: - { + case PI_PEER_ATOMICS_SUPPORTED: { propName = UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED; break; } - default: - { + default: { return PI_ERROR_INVALID_VALUE; } } From c389980ca53827144db7e8f3e6e3f7ac328a794a Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 7 Jul 2023 01:54:56 -0700 Subject: [PATCH 71/75] Moved p2p ext doc to supported. Signed-off-by: JackAKirk --- .../{proposed => supported}/sycl_ext_oneapi_peer_access.asciidoc | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename sycl/doc/extensions/{proposed => supported}/sycl_ext_oneapi_peer_access.asciidoc (100%) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_peer_access.asciidoc b/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc similarity index 100% rename from sycl/doc/extensions/proposed/sycl_ext_oneapi_peer_access.asciidoc rename to sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc From 8bd6b6022c6535de1b9f825c275037facf143283 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 7 Jul 2023 13:53:43 +0100 Subject: [PATCH 72/75] Added Backend support status, updated status. Signed-off-by: JackAKirk --- .../sycl_ext_oneapi_peer_access.asciidoc | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc b/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc index e31f974e38bb4..0929df20a389a 100644 --- a/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc +++ b/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc @@ -42,12 +42,17 @@ SYCL specification refer to that revision. == Status -This is a proposed extension specification, intended to gather community -feedback. Interfaces defined in this specification may not be implemented yet -or may be in a preliminary state. The specification itself may also change in -incompatible ways before it is finalized. *Shipping software products should -not rely on APIs defined in this specification.* +This extension is implemented and fully supported by DPC++. +== Backend support status + +This extension is currently implemented in {dpcpp} only for GPU devices. +This extension is only currently fully supported for the cuda backend. +Attempting to use this extension in kernels that run on other +backends may result in undefined behavior. Be aware that the compiler +may not able to issue a diagnostic to warn you if this happens. +When the extension is not supported for a particular gpu backend any queries +made using `ext_oneapi_can_access_peer` will return false. == Overview From 5e7d82137512334eeebb90597fb7dc31b4ab5998 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 7 Jul 2023 14:11:13 +0100 Subject: [PATCH 73/75] Updated sycl 2020 revision version. Signed-off-by: JackAKirk --- .../extensions/supported/sycl_ext_oneapi_peer_access.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc b/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc index 0929df20a389a..2a9ed85600035 100644 --- a/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc +++ b/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc @@ -36,7 +36,7 @@ https://github.com/intel/llvm/issues == Dependencies -This extension is written against the SYCL 2020 revision 6 specification. All +This extension is written against the SYCL 2020 revision 7 specification. All references below to the "core SYCL specification" or to section numbers in the SYCL specification refer to that revision. @@ -51,7 +51,7 @@ This extension is only currently fully supported for the cuda backend. Attempting to use this extension in kernels that run on other backends may result in undefined behavior. Be aware that the compiler may not able to issue a diagnostic to warn you if this happens. -When the extension is not supported for a particular gpu backend any queries +When the extension is not supported for a particular GPU backend any queries made using `ext_oneapi_can_access_peer` will return false. == Overview From ab3ac25ffd3961d304c7649122baf74c47d8584e Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 7 Jul 2023 14:14:15 +0100 Subject: [PATCH 74/75] Switch to Greg's suggested wording. Signed-off-by: JackAKirk --- .../supported/sycl_ext_oneapi_peer_access.asciidoc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc b/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc index 2a9ed85600035..fcc5fe0fb7fb0 100644 --- a/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc +++ b/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc @@ -46,13 +46,9 @@ This extension is implemented and fully supported by DPC++. == Backend support status -This extension is currently implemented in {dpcpp} only for GPU devices. -This extension is only currently fully supported for the cuda backend. -Attempting to use this extension in kernels that run on other -backends may result in undefined behavior. Be aware that the compiler -may not able to issue a diagnostic to warn you if this happens. -When the extension is not supported for a particular GPU backend any queries -made using `ext_oneapi_can_access_peer` will return false. +This extension is currently implemented in DPC++ for all GPU devices and +backends, however, only the CUDA backend allows peer to peer memory access. +Other backends report false from the ext_oneapi_can_access_peer query. == Overview From 47acd23715e0627af0c25b46da6bdf4922137e22 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 7 Jul 2023 15:26:23 +0100 Subject: [PATCH 75/75] Use code font for function name. Signed-off-by: JackAKirk --- .../extensions/supported/sycl_ext_oneapi_peer_access.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc b/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc index fcc5fe0fb7fb0..b089d9b853cbb 100644 --- a/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc +++ b/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc @@ -48,7 +48,7 @@ This extension is implemented and fully supported by DPC++. This extension is currently implemented in DPC++ for all GPU devices and backends, however, only the CUDA backend allows peer to peer memory access. -Other backends report false from the ext_oneapi_can_access_peer query. +Other backends report false from the `ext_oneapi_can_access_peer` query. == Overview