From 2b2be846d39a82e460333ce94ffe3f79901d180a Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 13 May 2025 13:58:32 +0100 Subject: [PATCH 1/3] Add opportunistic queue serialize prop Signed-off-by: JackAKirk --- unified-runtime/include/ur_api.h | 6 ++++++ unified-runtime/include/ur_print.hpp | 10 ++++++++++ unified-runtime/scripts/core/exp-launch-properties.yml | 6 ++++++ unified-runtime/source/adapters/cuda/enqueue.cpp | 7 +++++++ .../exp_launch_properties/launch_properties.cpp | 7 +++++++ 5 files changed, 36 insertions(+) diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index 0d0396cc1837..e85f0d4b2eee 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -12298,6 +12298,9 @@ typedef enum ur_exp_launch_property_id_t { UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION = 2, /// Implicit work group memory allocation UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY = 3, + /// Whether to opportunistically execute kernel launches serially on a + /// native queue + UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE = 4, /// @cond UR_EXP_LAUNCH_PROPERTY_ID_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -12320,6 +12323,9 @@ typedef union ur_exp_launch_property_value_t { /// [in] non-zero value indicates the amount of work group memory to /// allocate in bytes size_t workgroup_mem_size; + /// [in] non-zero value indicates a opportunistic native queue serialized + /// kernel + int opportunistic_queue_serialize; } ur_exp_launch_property_value_t; diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index f5e5b89dbb8a..46731903c2f2 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -12006,6 +12006,9 @@ inline std::ostream &operator<<(std::ostream &os, case UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY: os << "UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY"; break; + case UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE: + os << "UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE"; + break; default: os << "unknown enumerator"; break; @@ -12048,6 +12051,13 @@ inline ur_result_t printUnion(std::ostream &os, os << (params.workgroup_mem_size); + break; + case UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE: + + os << ".opportunistic_queue_serialize = "; + + os << (params.opportunistic_queue_serialize); + break; default: os << ""; diff --git a/unified-runtime/scripts/core/exp-launch-properties.yml b/unified-runtime/scripts/core/exp-launch-properties.yml index 558dd46cc8e6..9463629171da 100644 --- a/unified-runtime/scripts/core/exp-launch-properties.yml +++ b/unified-runtime/scripts/core/exp-launch-properties.yml @@ -36,6 +36,8 @@ etors: desc: "work-group cluster dimensions" - name: WORK_GROUP_MEMORY desc: "Implicit work group memory allocation" + - name: OPPORTUNISTIC_QUEUE_SERIALIZE + desc: "Whether to opportunistically execute kernel launches serially on a native queue" --- #-------------------------------------------------------------------------- type: union desc: "Specifies a launch property value" @@ -56,6 +58,10 @@ members: name: workgroup_mem_size desc: "[in] non-zero value indicates the amount of work group memory to allocate in bytes" tag: $X_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY + - type: int + name: opportunistic_queue_serialize + desc: "[in] non-zero value indicates a opportunistic native queue serialized kernel" + tag: $X_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE --- #-------------------------------------------------------------------------- type: struct desc: "Kernel launch property" diff --git a/unified-runtime/source/adapters/cuda/enqueue.cpp b/unified-runtime/source/adapters/cuda/enqueue.cpp index bc8d81ae4431..2e2c36bf6701 100644 --- a/unified-runtime/source/adapters/cuda/enqueue.cpp +++ b/unified-runtime/source/adapters/cuda/enqueue.cpp @@ -599,6 +599,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( attr.value.cooperative = launchPropList[i].value.cooperative; break; } + case UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE: { + auto &attr = launch_attribute.emplace_back(); + attr.id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION; + attr.value.programmaticStreamSerializationAllowed = + launchPropList[i].value.opportunistic_queue_serialize; + break; + } case UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY: { break; } diff --git a/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp b/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp index 43be352da7fc..ed726745b899 100644 --- a/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp +++ b/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp @@ -66,6 +66,13 @@ TEST_P(urEnqueueKernelLaunchCustomTest, Success) { props.push_back(coop_prop); } + if (compute_capability >= 9.0) { + ur_exp_launch_property_t opportunistic_queue_serialize_prop; + coop_prop.id = UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE; + coop_prop.value.opportunistic_queue_serialize = 1; + props.push_back(coop_prop); + } + ur_bool_t cluster_launch_supported = false; ASSERT_SUCCESS( urDeviceGetInfo(device, UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP, From aa45ff14fa78ea2f62eb80dabef30ba3ee8ffcbe Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 13 May 2025 14:25:21 +0100 Subject: [PATCH 2/3] Fix copy paste error Signed-off-by: JackAKirk --- .../conformance/exp_launch_properties/launch_properties.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp b/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp index ed726745b899..f03c7f3c57aa 100644 --- a/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp +++ b/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp @@ -68,9 +68,9 @@ TEST_P(urEnqueueKernelLaunchCustomTest, Success) { if (compute_capability >= 9.0) { ur_exp_launch_property_t opportunistic_queue_serialize_prop; - coop_prop.id = UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE; - coop_prop.value.opportunistic_queue_serialize = 1; - props.push_back(coop_prop); + opportunistic_queue_serialize_prop.id = UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE; + opportunistic_queue_serialize_prop.value.opportunistic_queue_serialize = 1; + props.push_back(opportunistic_queue_serialize_prop); } ur_bool_t cluster_launch_supported = false; From 116d0de47f7e65de0bf90dc41524b2bda7c31a69 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 13 May 2025 14:30:09 +0100 Subject: [PATCH 3/3] Fix format Signed-off-by: JackAKirk --- .../conformance/exp_launch_properties/launch_properties.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp b/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp index f03c7f3c57aa..042478a489b2 100644 --- a/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp +++ b/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp @@ -68,8 +68,10 @@ TEST_P(urEnqueueKernelLaunchCustomTest, Success) { if (compute_capability >= 9.0) { ur_exp_launch_property_t opportunistic_queue_serialize_prop; - opportunistic_queue_serialize_prop.id = UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE; - opportunistic_queue_serialize_prop.value.opportunistic_queue_serialize = 1; + opportunistic_queue_serialize_prop.id = + UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE; + opportunistic_queue_serialize_prop.value.opportunistic_queue_serialize = + 1; props.push_back(opportunistic_queue_serialize_prop); }