diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index 0d0396cc18374..e85f0d4b2eee3 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -12298,6 +12298,9 @@ typedef enum ur_exp_launch_property_id_t { UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION = 2, /// Implicit work group memory allocation UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY = 3, + /// Whether to opportunistically execute kernel launches serially on a + /// native queue + UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE = 4, /// @cond UR_EXP_LAUNCH_PROPERTY_ID_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -12320,6 +12323,9 @@ typedef union ur_exp_launch_property_value_t { /// [in] non-zero value indicates the amount of work group memory to /// allocate in bytes size_t workgroup_mem_size; + /// [in] non-zero value indicates a opportunistic native queue serialized + /// kernel + int opportunistic_queue_serialize; } ur_exp_launch_property_value_t; diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index f5e5b89dbb8ae..46731903c2f2e 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -12006,6 +12006,9 @@ inline std::ostream &operator<<(std::ostream &os, case UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY: os << "UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY"; break; + case UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE: + os << "UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE"; + break; default: os << "unknown enumerator"; break; @@ -12048,6 +12051,13 @@ inline ur_result_t printUnion(std::ostream &os, os << (params.workgroup_mem_size); + break; + case UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE: + + os << ".opportunistic_queue_serialize = "; + + os << (params.opportunistic_queue_serialize); + break; default: os << ""; diff --git a/unified-runtime/scripts/core/exp-launch-properties.yml b/unified-runtime/scripts/core/exp-launch-properties.yml index 558dd46cc8e6d..9463629171dab 100644 --- a/unified-runtime/scripts/core/exp-launch-properties.yml +++ b/unified-runtime/scripts/core/exp-launch-properties.yml @@ -36,6 +36,8 @@ etors: desc: "work-group cluster dimensions" - name: WORK_GROUP_MEMORY desc: "Implicit work group memory allocation" + - name: OPPORTUNISTIC_QUEUE_SERIALIZE + desc: "Whether to opportunistically execute kernel launches serially on a native queue" --- #-------------------------------------------------------------------------- type: union desc: "Specifies a launch property value" @@ -56,6 +58,10 @@ members: name: workgroup_mem_size desc: "[in] non-zero value indicates the amount of work group memory to allocate in bytes" tag: $X_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY + - type: int + name: opportunistic_queue_serialize + desc: "[in] non-zero value indicates a opportunistic native queue serialized kernel" + tag: $X_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE --- #-------------------------------------------------------------------------- type: struct desc: "Kernel launch property" diff --git a/unified-runtime/source/adapters/cuda/enqueue.cpp b/unified-runtime/source/adapters/cuda/enqueue.cpp index bc8d81ae44312..2e2c36bf67012 100644 --- a/unified-runtime/source/adapters/cuda/enqueue.cpp +++ b/unified-runtime/source/adapters/cuda/enqueue.cpp @@ -599,6 +599,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( attr.value.cooperative = launchPropList[i].value.cooperative; break; } + case UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE: { + auto &attr = launch_attribute.emplace_back(); + attr.id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION; + attr.value.programmaticStreamSerializationAllowed = + launchPropList[i].value.opportunistic_queue_serialize; + break; + } case UR_EXP_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY: { break; } diff --git a/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp b/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp index 43be352da7fcd..042478a489b20 100644 --- a/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp +++ b/unified-runtime/test/conformance/exp_launch_properties/launch_properties.cpp @@ -66,6 +66,15 @@ TEST_P(urEnqueueKernelLaunchCustomTest, Success) { props.push_back(coop_prop); } + if (compute_capability >= 9.0) { + ur_exp_launch_property_t opportunistic_queue_serialize_prop; + opportunistic_queue_serialize_prop.id = + UR_EXP_LAUNCH_PROPERTY_ID_OPPORTUNISTIC_QUEUE_SERIALIZE; + opportunistic_queue_serialize_prop.value.opportunistic_queue_serialize = + 1; + props.push_back(opportunistic_queue_serialize_prop); + } + ur_bool_t cluster_launch_supported = false; ASSERT_SUCCESS( urDeviceGetInfo(device, UR_DEVICE_INFO_CLUSTER_LAUNCH_SUPPORT_EXP,