diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index c71523cdd24c3..d058bdc4a3cc6 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -10756,8 +10756,8 @@ typedef struct ur_exp_command_buffer_desc_t { const void *pNext; /// [in] Commands in a finalized command-buffer can be updated. ur_bool_t isUpdatable; - /// [in] Commands in a command-buffer may be executed in-order without - /// explicit dependencies. + /// [in] Commands in a command-buffer will execute in-order, explicit + /// sync-point dependencies are ignored. ur_bool_t isInOrder; /// [in] Command-buffer profiling is enabled. ur_bool_t enableProfiling; @@ -11048,8 +11048,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t *phKernelAlternatives, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -11111,8 +11111,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -11177,8 +11177,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -11239,8 +11239,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -11300,8 +11300,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -11361,8 +11361,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -11431,8 +11431,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t dstSlicePitch, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -11505,8 +11505,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -11578,8 +11578,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -11643,8 +11643,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -11711,8 +11711,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -11779,8 +11779,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_usm_advice_flags_t advice, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -11840,8 +11840,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( ur_exp_command_buffer_handle_t hChildCommandBuffer, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [out][optional] Sync point associated with this command. ur_exp_command_buffer_sync_point_t *pSyncPoint); diff --git a/unified-runtime/scripts/core/EXP-COMMAND-BUFFER.rst b/unified-runtime/scripts/core/EXP-COMMAND-BUFFER.rst index 3ef48bbd7f0cf..136c7024632ec 100644 --- a/unified-runtime/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/unified-runtime/scripts/core/EXP-COMMAND-BUFFER.rst @@ -53,14 +53,14 @@ Command-Buffer Creation -------------------------------------------------------------------------------- Command-Buffers are tied to a specific ${x}_context_handle_t and -${x}_device_handle_t. ${x}CommandBufferCreateExp optionally takes a descriptor +${x}_device_handle_t. ${x}CommandBufferCreateExp takes a descriptor to provide additional properties for how the command-buffer should be constructed. The members defined in ${x}_exp_command_buffer_desc_t are: * ``isUpdatable``, which should be set to ``true`` to support :ref:`updating command-buffer commands`. -* ``isInOrder``, which should be set to ``true`` to enable commands enqueued to - a command-buffer to be executed in an in-order fashion where possible. +* ``isInOrder``, which should be set to ``true`` to enforce commands appended + to a command-buffer to be executed in an in-order fashion. * ``enableProfiling``, which should be set to ``true`` to enable profiling of the command-buffer. @@ -108,8 +108,9 @@ Sync-Points A sync-point is a value which represents a command inside of a command-buffer which is returned from command-buffer append function calls. These can be optionally passed to these functions to define execution dependencies on other -commands within the command-buffer. Sync-points passed to functions may be -ignored if the command-buffer was created in-order. +commands within the command-buffer. Both wait-list and return sync-point +parameters to append functions are ignored if the command-buffer was created +with the in-order property. Sync-points are unique and valid for use only within the command-buffer they were obtained from. @@ -550,6 +551,9 @@ Changelog +-----------+-------------------------------------------------------+ | 1.11 | Support native commands. | +-----------+-------------------------------------------------------+ +| 1.12 | Strengthen in-order property such that sync-points | +| | parameters to append APIs are ignored. | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/unified-runtime/scripts/core/exp-command-buffer.yml b/unified-runtime/scripts/core/exp-command-buffer.yml index 9e213fbc7bd2d..05e61985cd4b8 100644 --- a/unified-runtime/scripts/core/exp-command-buffer.yml +++ b/unified-runtime/scripts/core/exp-command-buffer.yml @@ -143,7 +143,7 @@ members: desc: "[in] Commands in a finalized command-buffer can be updated." - type: $x_bool_t name: isInOrder - desc: "[in] Commands in a command-buffer may be executed in-order without explicit dependencies." + desc: "[in] Commands in a command-buffer will execute in-order, explicit sync-point dependencies are ignored." - type: $x_bool_t name: enableProfiling desc: "[in] Command-buffer profiling is enabled." @@ -375,7 +375,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: uint32_t name: numEventsInWaitList desc: "[in] Size of the event wait list." @@ -440,7 +440,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: uint32_t name: numEventsInWaitList desc: "[in] Size of the event wait list." @@ -502,7 +502,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: uint32_t name: numEventsInWaitList desc: "[in] Size of the event wait list." @@ -569,7 +569,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: uint32_t name: numEventsInWaitList desc: "[in] Size of the event wait list." @@ -628,7 +628,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: uint32_t name: numEventsInWaitList desc: "[in] Size of the event wait list." @@ -687,7 +687,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: uint32_t name: numEventsInWaitList desc: "[in] Size of the event wait list." @@ -761,7 +761,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: uint32_t name: numEventsInWaitList desc: "[in] Size of the event wait list." @@ -835,7 +835,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: uint32_t name: numEventsInWaitList desc: "[in] Size of the event wait list." @@ -909,7 +909,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: uint32_t name: numEventsInWaitList desc: "[in] Size of the event wait list." @@ -971,7 +971,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: uint32_t name: numEventsInWaitList desc: "[in] Size of the event wait list." @@ -1032,7 +1032,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: uint32_t name: numEventsInWaitList desc: "[in] Size of the event wait list." @@ -1095,7 +1095,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: uint32_t name: numEventsInWaitList desc: "[in] Size of the event wait list." @@ -1166,7 +1166,7 @@ params: - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. - May be ignored if command-buffer is in-order." + Will be ignored if command-buffer is in-order." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] Sync point associated with this command." diff --git a/unified-runtime/source/adapters/cuda/command_buffer.cpp b/unified-runtime/source/adapters/cuda/command_buffer.cpp index a0b708e1d3e39..8ac039160fa34 100644 --- a/unified-runtime/source/adapters/cuda/command_buffer.cpp +++ b/unified-runtime/source/adapters/cuda/command_buffer.cpp @@ -56,10 +56,11 @@ ur_result_t commandHandleDestroy( } // end anonymous namespace ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( - ur_context_handle_t Context, ur_device_handle_t Device, bool IsUpdatable) + ur_context_handle_t Context, ur_device_handle_t Device, bool IsUpdatable, + bool IsInOrder) : handle_base(), Context(Context), Device(Device), IsUpdatable(IsUpdatable), - CudaGraph{nullptr}, CudaGraphExec{nullptr}, RefCount{1}, - NextSyncPoint{0} { + IsInOrder(IsInOrder), CudaGraph{nullptr}, CudaGraphExec{nullptr}, + RefCount{1}, NextSyncPoint{0} { urContextRetain(Context); } @@ -151,11 +152,24 @@ static ur_result_t getNodesFromSyncPoints( // the event associated with each sync-point auto SyncPoints = CommandBuffer->SyncPoints; + // If command-buffer is in-order use last node in ordered map, and return + // early as other user passed sync-points will be redundant for scheduling. + if (CommandBuffer->IsInOrder && !SyncPoints.empty()) { + auto LastNode = std::prev(SyncPoints.end()); + CuNodesList.push_back(LastNode->second); + return UR_RESULT_SUCCESS; + } + // For each sync-point add associated CUDA graph node to the return list. for (size_t i = 0; i < NumSyncPointsInWaitList; i++) { if (auto NodeHandle = SyncPoints.find(SyncPointWaitList[i]); NodeHandle != SyncPoints.end()) { - CuNodesList.push_back(NodeHandle->second); + auto DepNode = NodeHandle->second; + // Cuda driver API won't let you add duplicates to the dependency list + if (std::find(CuNodesList.begin(), CuNodesList.end(), DepNode) == + CuNodesList.end()) { + CuNodesList.push_back(DepNode); + } } else { return UR_RESULT_ERROR_INVALID_VALUE; } @@ -346,9 +360,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ur_exp_command_buffer_handle_t *phCommandBuffer) { const bool IsUpdatable = pCommandBufferDesc->isUpdatable; + const bool IsInOrder = pCommandBufferDesc->isInOrder; try { - *phCommandBuffer = - new ur_exp_command_buffer_handle_t_(hContext, hDevice, IsUpdatable); + *phCommandBuffer = new ur_exp_command_buffer_handle_t_( + hContext, hDevice, IsUpdatable, IsInOrder); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { diff --git a/unified-runtime/source/adapters/cuda/command_buffer.hpp b/unified-runtime/source/adapters/cuda/command_buffer.hpp index efc96d7df9bb2..e11b9ab74969a 100644 --- a/unified-runtime/source/adapters/cuda/command_buffer.hpp +++ b/unified-runtime/source/adapters/cuda/command_buffer.hpp @@ -131,7 +131,8 @@ struct ur_exp_command_buffer_command_handle_t_ : ur::cuda::handle_base { struct ur_exp_command_buffer_handle_t_ : ur::cuda::handle_base { ur_exp_command_buffer_handle_t_(ur_context_handle_t Context, - ur_device_handle_t Device, bool IsUpdatable); + ur_device_handle_t Device, bool IsUpdatable, + bool IsInOrder); ~ur_exp_command_buffer_handle_t_(); @@ -182,6 +183,8 @@ struct ur_exp_command_buffer_handle_t_ : ur::cuda::handle_base { ur_device_handle_t Device; // Whether commands in the command-buffer can be updated bool IsUpdatable; + // Whether commands in the command-buffer are in-order. + bool IsInOrder; // Cuda Graph handle CUgraph CudaGraph; // Cuda Graph Exec handle @@ -190,9 +193,9 @@ struct ur_exp_command_buffer_handle_t_ : ur::cuda::handle_base { // using std::atomic prevents data race when incrementing/decrementing. std::atomic_uint32_t RefCount; - // Map of sync_points to ur_events - std::unordered_map - SyncPoints; + // Ordered map of sync_points to ur_events, so that we can find the last + // node added to an in-order command-buffer. + std::map SyncPoints; // Next sync_point value (may need to consider ways to reuse values if 32-bits // is not enough) ur_exp_command_buffer_sync_point_t NextSyncPoint; diff --git a/unified-runtime/source/adapters/hip/command_buffer.cpp b/unified-runtime/source/adapters/hip/command_buffer.cpp index 2330e84ef8eed..788bc51b2e468 100644 --- a/unified-runtime/source/adapters/hip/command_buffer.cpp +++ b/unified-runtime/source/adapters/hip/command_buffer.cpp @@ -22,10 +22,11 @@ #include ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( - ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable) + ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable, + bool IsInOrder) : handle_base(), Context(hContext), Device(hDevice), - IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr}, - RefCount{1}, NextSyncPoint{0} { + IsUpdatable(IsUpdatable), IsInOrder(IsInOrder), HIPGraph{nullptr}, + HIPGraphExec{nullptr}, RefCount{1}, NextSyncPoint{0} { urContextRetain(hContext); } @@ -97,11 +98,24 @@ static ur_result_t getNodesFromSyncPoints( // the event associated with each sync-point auto SyncPoints = CommandBuffer->SyncPoints; + // If command-buffer is in-order use last node in ordered map, and return + // early as other user passed sync-points will be redundant for scheduling. + if (CommandBuffer->IsInOrder && !SyncPoints.empty()) { + auto LastNode = std::prev(SyncPoints.end()); + HIPNodesList.push_back(LastNode->second); + return UR_RESULT_SUCCESS; + } + // For each sync-point add associated HIP graph node to the return list. for (size_t i = 0; i < NumSyncPointsInWaitList; i++) { if (auto NodeHandle = SyncPoints.find(SyncPointWaitList[i]); NodeHandle != SyncPoints.end()) { - HIPNodesList.push_back(NodeHandle->second); + auto DepNode = NodeHandle->second; + // HIP driver API won't let you add duplicates to the dependency list + if (std::find(HIPNodesList.begin(), HIPNodesList.end(), DepNode) == + HIPNodesList.end()) { + HIPNodesList.push_back(DepNode); + } } else { return UR_RESULT_ERROR_INVALID_VALUE; } @@ -231,9 +245,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ur_exp_command_buffer_handle_t *phCommandBuffer) { const bool IsUpdatable = pCommandBufferDesc->isUpdatable; + const bool IsInOrder = pCommandBufferDesc->isInOrder; try { - *phCommandBuffer = - new ur_exp_command_buffer_handle_t_(hContext, hDevice, IsUpdatable); + *phCommandBuffer = new ur_exp_command_buffer_handle_t_( + hContext, hDevice, IsUpdatable, IsInOrder); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { diff --git a/unified-runtime/source/adapters/hip/command_buffer.hpp b/unified-runtime/source/adapters/hip/command_buffer.hpp index ba2c797111aee..3d0047adee013 100644 --- a/unified-runtime/source/adapters/hip/command_buffer.hpp +++ b/unified-runtime/source/adapters/hip/command_buffer.hpp @@ -86,7 +86,8 @@ struct ur_exp_command_buffer_command_handle_t_ : ur::hip::handle_base { struct ur_exp_command_buffer_handle_t_ : ur::hip::handle_base { ur_exp_command_buffer_handle_t_(ur_context_handle_t hContext, - ur_device_handle_t hDevice, bool IsUpdatable); + ur_device_handle_t hDevice, bool IsUpdatable, + bool IsInOrder); ~ur_exp_command_buffer_handle_t_(); @@ -118,6 +119,8 @@ struct ur_exp_command_buffer_handle_t_ : ur::hip::handle_base { ur_device_handle_t Device; // Whether commands in the command-buffer can be updated bool IsUpdatable; + // Whether commands in the command-buffer are in-order. + bool IsInOrder; // HIP Graph handle hipGraph_t HIPGraph; // HIP Graph Exec handle @@ -126,9 +129,8 @@ struct ur_exp_command_buffer_handle_t_ : ur::hip::handle_base { // using std::atomic prevents data race when incrementing/decrementing. std::atomic_uint32_t RefCount; - // Map of sync_points to ur_events - std::unordered_map - SyncPoints; + // Ordered map of sync_points to ur_events + std::map SyncPoints; // Next sync_point value (may need to consider ways to reuse values if 32-bits // is not enough) ur_exp_command_buffer_sync_point_t NextSyncPoint; diff --git a/unified-runtime/source/adapters/level_zero/command_buffer.cpp b/unified-runtime/source/adapters/level_zero/command_buffer.cpp index b4cd7592ddb78..5cb85d2059e65 100644 --- a/unified-runtime/source/adapters/level_zero/command_buffer.cpp +++ b/unified-runtime/source/adapters/level_zero/command_buffer.cpp @@ -318,8 +318,16 @@ ur_result_t createSyncPointAndGetZeEvents( return UR_RESULT_SUCCESS; } - UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, - SyncPointWaitList, ZeEventList)); + if (CommandBuffer->InOrderRequested && !CommandBuffer->ZeEventsList.empty()) { + // If a user requested an in-order UR command-buffer, but driver L0 + // command-lists couldn't be used, then we need to emulate the behavior by + // giving the command an event dependency on the last command. + ze_event_handle_t LastEvent = CommandBuffer->ZeEventsList.back(); + ZeEventList.push_back(LastEvent); + } else { + UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, ZeEventList)); + } ur_event_handle_t LaunchEvent; UR_CALL(EventCreate(CommandBuffer->Context, nullptr /*Queue*/, false /*IsMultiDevice*/, HostVisible, &LaunchEvent, @@ -477,9 +485,8 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( AllResetEvent(AllResetEvent), CopyFinishedEvent(CopyFinishedEvent), ComputeFinishedEvent(ComputeFinishedEvent), ZeFencesMap(), ZeActiveFence(nullptr), SyncPoints(), NextSyncPoint(0), - IsUpdatable(Desc ? Desc->isUpdatable : false), - IsProfilingEnabled(Desc ? Desc->enableProfiling : false), - IsInOrderCmdList(IsInOrderCmdList), + IsUpdatable(Desc->isUpdatable), IsProfilingEnabled(Desc->enableProfiling), + InOrderRequested(Desc->isInOrder), IsInOrderCmdList(IsInOrderCmdList), UseImmediateAppendPath(UseImmediateAppendPath) { ur::level_zero::urContextRetain(Context); ur::level_zero::urDeviceRetain(Device); diff --git a/unified-runtime/source/adapters/level_zero/command_buffer.hpp b/unified-runtime/source/adapters/level_zero/command_buffer.hpp index 0298c48c6f1e2..f7b62a9c8dd1e 100644 --- a/unified-runtime/source/adapters/level_zero/command_buffer.hpp +++ b/unified-runtime/source/adapters/level_zero/command_buffer.hpp @@ -136,7 +136,9 @@ struct ur_exp_command_buffer_handle_t_ : public ur_object { bool IsFinalized = false; // Command-buffer profiling is enabled. bool IsProfilingEnabled = false; - // Command-buffer can be submitted to an in-order command-list. + // User requested an in-order UR command-buffer + bool InOrderRequested = false; + // Command-buffer will be created from an in-order command-list. bool IsInOrderCmdList = false; // Whether this command-buffer should use the code path that uses // zeCommandListImmediateAppendCommandListsExp during enqueue. diff --git a/unified-runtime/source/adapters/mock/ur_mockddi.cpp b/unified-runtime/source/adapters/mock/ur_mockddi.cpp index a7977e775a38d..4340ca8a235a1 100644 --- a/unified-runtime/source/adapters/mock/ur_mockddi.cpp +++ b/unified-runtime/source/adapters/mock/ur_mockddi.cpp @@ -9572,8 +9572,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t *phKernelAlternatives, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9667,8 +9667,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9759,8 +9759,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9854,8 +9854,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9948,8 +9948,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -10041,8 +10041,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -10144,8 +10144,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t dstSlicePitch, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -10255,8 +10255,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -10365,8 +10365,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -10465,8 +10465,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -10557,8 +10557,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -10647,8 +10647,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_usm_advice_flags_t advice, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -10742,8 +10742,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( ur_exp_command_buffer_handle_t hChildCommandBuffer, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [out][optional] Sync point associated with this command. ur_exp_command_buffer_sync_point_t *pSyncPoint) try { diff --git a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp index c7f7f26f1ece2..ab264c7494661 100644 --- a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp +++ b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp @@ -8063,8 +8063,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t *phKernelAlternatives, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8145,8 +8145,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8223,8 +8223,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8304,8 +8304,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8385,8 +8385,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8465,8 +8465,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8555,8 +8555,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t dstSlicePitch, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8654,8 +8654,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8753,8 +8753,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8841,8 +8841,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8920,8 +8920,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8996,8 +8996,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_usm_advice_flags_t advice, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9077,8 +9077,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( ur_exp_command_buffer_handle_t hChildCommandBuffer, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [out][optional] Sync point associated with this command. ur_exp_command_buffer_sync_point_t *pSyncPoint) { diff --git a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp index d0c29b0987d77..fc0107413421d 100644 --- a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp +++ b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp @@ -8823,8 +8823,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t *phKernelAlternatives, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8915,8 +8915,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8996,8 +8996,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9085,8 +9085,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9173,8 +9173,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9256,8 +9256,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9349,8 +9349,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t dstSlicePitch, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9451,8 +9451,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9547,8 +9547,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9633,8 +9633,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9714,8 +9714,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9793,8 +9793,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_usm_advice_flags_t advice, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9877,8 +9877,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( ur_exp_command_buffer_handle_t hChildCommandBuffer, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [out][optional] Sync point associated with this command. ur_exp_command_buffer_sync_point_t *pSyncPoint) { diff --git a/unified-runtime/source/loader/ur_ldrddi.cpp b/unified-runtime/source/loader/ur_ldrddi.cpp index 3cf10f747db6b..08c388eabce4a 100644 --- a/unified-runtime/source/loader/ur_ldrddi.cpp +++ b/unified-runtime/source/loader/ur_ldrddi.cpp @@ -4583,8 +4583,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t *phKernelAlternatives, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -4630,8 +4630,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -4677,8 +4677,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -4725,8 +4725,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -4772,8 +4772,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -4819,8 +4819,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -4876,8 +4876,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t dstSlicePitch, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -4937,8 +4937,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -4997,8 +4997,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -5047,8 +5047,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -5092,8 +5092,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -5137,8 +5137,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_usm_advice_flags_t advice, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -5187,8 +5187,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( ur_exp_command_buffer_handle_t hChildCommandBuffer, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [out][optional] Sync point associated with this command. ur_exp_command_buffer_sync_point_t *pSyncPoint) { diff --git a/unified-runtime/source/loader/ur_libapi.cpp b/unified-runtime/source/loader/ur_libapi.cpp index 3fcd7462206bd..d904af3797127 100644 --- a/unified-runtime/source/loader/ur_libapi.cpp +++ b/unified-runtime/source/loader/ur_libapi.cpp @@ -8562,8 +8562,8 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t *phKernelAlternatives, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8639,8 +8639,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8717,8 +8717,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8791,8 +8791,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8865,8 +8865,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8939,8 +8939,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9022,8 +9022,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t dstSlicePitch, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9110,8 +9110,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9197,8 +9197,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9276,8 +9276,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9357,8 +9357,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9437,8 +9437,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_usm_advice_flags_t advice, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -9503,8 +9503,8 @@ ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( ur_exp_command_buffer_handle_t hChildCommandBuffer, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [out][optional] Sync point associated with this command. ur_exp_command_buffer_sync_point_t *pSyncPoint) try { diff --git a/unified-runtime/source/ur_api.cpp b/unified-runtime/source/ur_api.cpp index f11d9c4b6220c..4f2654b40726f 100644 --- a/unified-runtime/source/ur_api.cpp +++ b/unified-runtime/source/ur_api.cpp @@ -7459,8 +7459,8 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t *phKernelAlternatives, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -7525,8 +7525,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -7594,8 +7594,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -7659,8 +7659,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -7723,8 +7723,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -7787,8 +7787,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -7860,8 +7860,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t dstSlicePitch, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -7937,8 +7937,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( void *pSrc, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8013,8 +8013,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( void *pDst, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8081,8 +8081,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( size_t size, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8152,8 +8152,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8223,8 +8223,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_usm_advice_flags_t advice, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [in] Size of the event wait list. uint32_t numEventsInWaitList, @@ -8280,8 +8280,8 @@ ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( ur_exp_command_buffer_handle_t hChildCommandBuffer, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, - /// [in][optional] A list of sync points that this command depends on. May - /// be ignored if command-buffer is in-order. + /// [in][optional] A list of sync points that this command depends on. + /// Will be ignored if command-buffer is in-order. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [out][optional] Sync point associated with this command. ur_exp_command_buffer_sync_point_t *pSyncPoint) { diff --git a/unified-runtime/test/conformance/exp_command_buffer/CMakeLists.txt b/unified-runtime/test/conformance/exp_command_buffer/CMakeLists.txt index 1c79cb79857ae..2865d28c0a8b5 100644 --- a/unified-runtime/test/conformance/exp_command_buffer/CMakeLists.txt +++ b/unified-runtime/test/conformance/exp_command_buffer/CMakeLists.txt @@ -17,6 +17,7 @@ add_conformance_test_with_kernels_environment(exp_command_buffer rect_read.cpp rect_write.cpp enqueue.cpp + in-order.cpp update/buffer_fill_kernel_update.cpp update/invalid_update.cpp update/kernel_handle_update.cpp diff --git a/unified-runtime/test/conformance/exp_command_buffer/commands.cpp b/unified-runtime/test/conformance/exp_command_buffer/commands.cpp index 74db0f4fe42d2..22ac628c2726f 100644 --- a/unified-runtime/test/conformance/exp_command_buffer/commands.cpp +++ b/unified-runtime/test/conformance/exp_command_buffer/commands.cpp @@ -233,3 +233,29 @@ TEST_P(urCommandBufferAppendKernelLaunchExpTest, FinalizeTwice) { EXPECT_EQ_RESULT(urCommandBufferFinalizeExp(cmd_buf_handle), UR_RESULT_ERROR_INVALID_OPERATION); } + +TEST_P(urCommandBufferAppendKernelLaunchExpTest, DuplicateSyncPoint) { + ur_exp_command_buffer_sync_point_t sync_point; + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, + &local_size, 0, nullptr, 0, nullptr, 0, nullptr, &sync_point, nullptr, + nullptr)); + + // Test passing redundant sync-points + ur_exp_command_buffer_sync_point_t sync_points[2] = {sync_point, sync_point}; + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, + &local_size, 0, nullptr, 2, sync_points, 0, nullptr, nullptr, nullptr, + nullptr)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + + ASSERT_SUCCESS( + urEnqueueCommandBufferExp(queue, cmd_buf_handle, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + int32_t *ptrZ = static_cast(shared_ptrs[0]); + for (size_t i = 0; i < global_size; i++) { + uint32_t result = (A * i) + (i * 2); + ASSERT_EQ(result, ptrZ[i]); + } +} diff --git a/unified-runtime/test/conformance/exp_command_buffer/in-order.cpp b/unified-runtime/test/conformance/exp_command_buffer/in-order.cpp new file mode 100644 index 0000000000000..fd6335197cdf0 --- /dev/null +++ b/unified-runtime/test/conformance/exp_command_buffer/in-order.cpp @@ -0,0 +1,352 @@ +// Copyright (C) 2025 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" +#include + +// Virtual base class for tests verifying that if the `isInOrder` field is +// set on command-buffer creation, then the sync-point parameters to command +// append entry-points can be omitted. +struct urInOrderCommandBufferExpTest + : uur::command_buffer::urCommandBufferExpExecutionTest { + + virtual void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::SetUp()); + + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, // stype + nullptr, // pnext + false, // isUpdatable + true, // isInOrder + false, // enableProfiling + }; + ASSERT_SUCCESS( + urCommandBufferCreateExp(context, device, &desc, &in_order_cb)); + ASSERT_NE(in_order_cb, nullptr); + + // Each element of Y will be initialized to its index + std::iota(std::begin(y_data), std::end(y_data), 0); + } + + virtual void TearDown() override { + if (in_order_cb) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(in_order_cb)); + } + + UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::TearDown()); + } + ur_exp_command_buffer_handle_t in_order_cb = nullptr; + static constexpr size_t global_size = 64; + static constexpr size_t global_offset = 0; + static constexpr size_t allocation_size = sizeof(uint32_t) * global_size; + static constexpr size_t n_dimensions = 1; + static constexpr uint32_t A = 42; + static constexpr uint32_t x_pattern = 2; + static constexpr uint32_t zero_pattern = 0; + std::array y_data; + + void Verify(std::array &output) { + for (uint32_t i = 0; i < global_size; i++) { + const uint32_t ref = x_pattern * A + i; + ASSERT_EQ(ref, output[i]) << "Result mismatch at index: " << i; + } + } +}; + +struct urInOrderUSMCommandBufferExpTest : urInOrderCommandBufferExpTest { + virtual void SetUp() override { + program_name = "saxpy_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urInOrderCommandBufferExpTest::SetUp()); + + for (auto &device_ptr : device_ptrs) { + ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, + allocation_size, &device_ptr)); + ASSERT_NE(device_ptr, nullptr); + } + + // Index 0 is output + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, device_ptrs[0])); + // Index 1 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); + // Index 2 is X + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 2, nullptr, device_ptrs[1])); + // Index 3 is Y + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 3, nullptr, device_ptrs[2])); + } + + // Appends commands to in-order command-buffer without sync-points + // @param[in] hints Append USM advise/prefetch hints between functional + // commands. + // @param[out] output Host memory to copy result back to from device pointer. + void AppendCommands(bool hints, std::array &output) { + const uint32_t zero_pattern = 0; // Zero init the output + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + in_order_cb, device_ptrs[0], &zero_pattern, sizeof(uint32_t), + allocation_size, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); + + if (hints) { + ASSERT_SUCCESS(urCommandBufferAppendUSMAdviseExp( + in_order_cb, device_ptrs[1], allocation_size, + UR_USM_ADVICE_FLAG_DEFAULT, 0, nullptr, 0, nullptr, nullptr, nullptr, + nullptr)); + } + + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + in_order_cb, device_ptrs[1], &x_pattern, sizeof(uint32_t), + allocation_size, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); + + if (hints) { + ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( + in_order_cb, device_ptrs[0], allocation_size, + UR_USM_MIGRATION_FLAG_DEFAULT, 0, nullptr, 0, nullptr, nullptr, + nullptr, nullptr)); + } + + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + in_order_cb, device_ptrs[2], y_data.data(), allocation_size, 0, nullptr, + 0, nullptr, nullptr, nullptr, nullptr)); + + if (hints) { + ASSERT_SUCCESS(urCommandBufferAppendUSMAdviseExp( + in_order_cb, device_ptrs[0], allocation_size, + UR_USM_ADVICE_FLAG_DEFAULT, 0, nullptr, 0, nullptr, nullptr, nullptr, + nullptr)); + } + + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + in_order_cb, kernel, n_dimensions, &global_offset, &global_size, + nullptr, 0, nullptr, 0, nullptr, 0, nullptr, nullptr, nullptr, + nullptr)); + + if (hints) { + ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( + in_order_cb, device_ptrs[0], allocation_size, + UR_USM_MIGRATION_FLAG_DEFAULT, 0, nullptr, 0, nullptr, nullptr, + nullptr, nullptr)); + } + + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + in_order_cb, output.data(), device_ptrs[0], allocation_size, 0, nullptr, + 0, nullptr, nullptr, nullptr, nullptr)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(in_order_cb)); + } + + virtual void TearDown() override { + for (auto &device_ptr : device_ptrs) { + if (device_ptr) { + EXPECT_SUCCESS(urUSMFree(context, device_ptr)); + } + } + UUR_RETURN_ON_FATAL_FAILURE(urInOrderCommandBufferExpTest::TearDown()); + } + std::array device_ptrs = {nullptr, nullptr, nullptr}; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE(urInOrderUSMCommandBufferExpTest); + +// Tests USM Fill, Copy, and Kernel commands to a command-buffer +TEST_P(urInOrderUSMCommandBufferExpTest, WithoutHints) { + std::array output; + AppendCommands(false, output); + + ASSERT_SUCCESS( + urEnqueueCommandBufferExp(queue, in_order_cb, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + Verify(output); +} + +// Tests USM prefetch and advise, which are hints and can be implemented by +// adapters as empty nodes, by interleaving between fill, copy, and kernel +// commands from the above test +TEST_P(urInOrderUSMCommandBufferExpTest, WithHints) { + // No prefetch or advise in cl_khr_command_buffer + UUR_KNOWN_FAILURE_ON(uur::OpenCL{}); + + std::array output; + AppendCommands(true, output); + + ASSERT_SUCCESS( + urEnqueueCommandBufferExp(queue, in_order_cb, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + Verify(output); +} + +struct urInOrderBufferCommandBufferExpTest : urInOrderCommandBufferExpTest { + virtual void SetUp() override { + program_name = "saxpy"; + UUR_RETURN_ON_FATAL_FAILURE(urInOrderCommandBufferExpTest::SetUp()); + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + + for (auto &buffer : buffers) { + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + allocation_size, nullptr, &buffer)); + + ASSERT_NE(buffer, nullptr); + } + + // Variable that is incremented as arguments are added to the kernel + size_t current_arg_index = 0; + // Index 0 is output buffer for HIP/Non-HIP + ASSERT_SUCCESS( + urKernelSetArgMemObj(kernel, current_arg_index++, nullptr, buffers[0])); + + // Lambda to add accessor arguments depending on backend. + // HIP has 3 offset parameters and other backends only have 1. + auto addAccessorArgs = [&]() { + if (backend == UR_BACKEND_HIP) { + size_t val = 0; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index++, + sizeof(size_t), nullptr, &val)); + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index++, + sizeof(size_t), nullptr, &val)); + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index++, + sizeof(size_t), nullptr, &val)); + } else { + struct { + size_t offsets[1] = {0}; + } accessor; + ASSERT_SUCCESS(urKernelSetArgValue( + kernel, current_arg_index++, sizeof(accessor), nullptr, &accessor)); + } + }; + + // Index 3 on HIP and 1 on non-HIP are accessors + addAccessorArgs(); + + // Index 4 on HIP and 2 on non-HIP is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index++, sizeof(A), + nullptr, &A)); + + // Index 5 on HIP and 3 on non-HIP is X buffer + ASSERT_SUCCESS( + urKernelSetArgMemObj(kernel, current_arg_index++, nullptr, buffers[1])); + + // Index 8 on HIP and 4 on non-HIP is X buffer accessor + addAccessorArgs(); + + // Index 9 on HIP and 5 on non-HIP is Y buffer + ASSERT_SUCCESS( + urKernelSetArgMemObj(kernel, current_arg_index++, nullptr, buffers[2])); + + // Index 12 on HIP and 6 on non-HIP is Y buffer accessor + addAccessorArgs(); + } + + virtual void TearDown() override { + for (auto &buffer : buffers) { + if (buffer) { + EXPECT_SUCCESS(urMemRelease(buffer)); + } + } + + UUR_RETURN_ON_FATAL_FAILURE(urInOrderCommandBufferExpTest::TearDown()); + } + + ur_backend_t backend{}; + std::array buffers = {nullptr, nullptr, nullptr}; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE(urInOrderBufferCommandBufferExpTest); + +// Tests Buffer Fill, Write, Read, and Kernel commands to a command-buffer +TEST_P(urInOrderBufferCommandBufferExpTest, 1D) { + // No buffer read/write command in cl_khr_command_buffer + // See https://github.com/KhronosGroup/OpenCL-Docs/issues/1281 + UUR_KNOWN_FAILURE_ON(uur::OpenCL{}); + + // Zero init the output Z + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + in_order_cb, buffers[0], &zero_pattern, sizeof(zero_pattern), 0, + allocation_size, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); + + // Initialize the X input + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + in_order_cb, buffers[1], &x_pattern, sizeof(x_pattern), 0, + allocation_size, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); + + // Initialize the Y input + ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteExp( + in_order_cb, buffers[2], 0, allocation_size, y_data.data(), 0, nullptr, 0, + nullptr, nullptr, nullptr, nullptr)); + + // Run kernel + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + in_order_cb, kernel, n_dimensions, &global_offset, &global_size, nullptr, + 0, nullptr, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); + + // Copy Z -> X + ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyExp( + in_order_cb, buffers[0], buffers[1], 0, 0, allocation_size, 0, nullptr, 0, + nullptr, nullptr, nullptr, nullptr)); + + // Read X back to host + std::array host_data{}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadExp( + in_order_cb, buffers[1], 0, allocation_size, host_data.data(), 0, nullptr, + 0, nullptr, nullptr, nullptr, nullptr)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(in_order_cb)); + ASSERT_SUCCESS( + urEnqueueCommandBufferExp(queue, in_order_cb, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + Verify(host_data); +} + +TEST_P(urInOrderBufferCommandBufferExpTest, Rect) { + // No buffer read/write command in cl_khr_command_buffer + // See https://github.com/KhronosGroup/OpenCL-Docs/issues/1281 + UUR_KNOWN_FAILURE_ON(uur::OpenCL{}); + + // Zero init the output Z + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + in_order_cb, buffers[0], &zero_pattern, sizeof(zero_pattern), 0, + allocation_size, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); + + // Initialize the X input + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + in_order_cb, buffers[1], &x_pattern, sizeof(x_pattern), 0, + allocation_size, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); + + // Initialize the Y input + ur_rect_offset_t origin{0, 0, 0}; + ur_rect_region_t region{16, 16, 1}; + size_t row_pitch = 16; + size_t slice_pitch = allocation_size; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteRectExp( + in_order_cb, buffers[2], origin, origin, region, row_pitch, slice_pitch, + row_pitch, slice_pitch, y_data.data(), 0, nullptr, 0, nullptr, nullptr, + nullptr, nullptr)); + + // Run kernel + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + in_order_cb, kernel, n_dimensions, &global_offset, &global_size, nullptr, + 0, nullptr, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); + + // Copy Z -> X + ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyRectExp( + in_order_cb, buffers[0], buffers[1], origin, origin, region, row_pitch, + slice_pitch, row_pitch, slice_pitch, 0, nullptr, 0, nullptr, nullptr, + nullptr, nullptr)); + + // Read X back to host + std::array host_data{}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadRectExp( + in_order_cb, buffers[1], origin, origin, region, row_pitch, slice_pitch, + row_pitch, slice_pitch, host_data.data(), 0, nullptr, 0, nullptr, nullptr, + nullptr, nullptr)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(in_order_cb)); + ASSERT_SUCCESS( + urEnqueueCommandBufferExp(queue, in_order_cb, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + Verify(host_data); +} diff --git a/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_L0.cpp b/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_L0.cpp index 9805b7972a51d..a9430244fb4a7 100644 --- a/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_L0.cpp +++ b/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_L0.cpp @@ -111,3 +111,59 @@ TEST_P(urL0CommandBufferNativeAppendTest, Dependencies) { ASSERT_EQ(i, val); } } + +// Test using an in-order command-buffer +struct urL0InOrderCommandBufferNativeAppendTest + : urL0CommandBufferNativeAppendTest { + virtual void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urL0CommandBufferNativeAppendTest::SetUp()); + + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, // stype + nullptr, // pnext + false, // isUpdatable + true, // isInOrder + false, // enableProfiling + }; + ASSERT_SUCCESS( + urCommandBufferCreateExp(context, device, &desc, &in_order_cb)); + ASSERT_NE(in_order_cb, nullptr); + } + + virtual void TearDown() override { + if (in_order_cb) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(in_order_cb)); + } + + UUR_RETURN_ON_FATAL_FAILURE(urL0CommandBufferNativeAppendTest::TearDown()); + } + ur_exp_command_buffer_handle_t in_order_cb = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE(urL0InOrderCommandBufferNativeAppendTest); + +// Test command-buffer native command with other command-buffer commands as +// predecessors and successors, defined using in-order property rather than +// sync-points +TEST_P(urL0InOrderCommandBufferNativeAppendTest, Success) { + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + in_order_cb, src_device_ptr, &val, sizeof(val), allocation_size, 0, + nullptr, 0, nullptr, nullptr, nullptr, nullptr)); + + InteropData data{in_order_cb, src_device_ptr, dst_device_ptr}; + ASSERT_SUCCESS(urCommandBufferAppendNativeCommandExp( + in_order_cb, &interop_func, &data, nullptr, 0, nullptr, nullptr)); + + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + in_order_cb, host_vec.data(), dst_device_ptr, allocation_size, 0, nullptr, + 0, nullptr, nullptr, nullptr, nullptr)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(in_order_cb)); + ASSERT_SUCCESS( + urEnqueueCommandBufferExp(queue, in_order_cb, 0, nullptr, nullptr)); + + urQueueFinish(queue); + for (auto &i : host_vec) { + ASSERT_EQ(i, val); + } +} diff --git a/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_cuda.cpp b/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_cuda.cpp index 245a27acc1233..a1fce8c80ad6c 100644 --- a/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_cuda.cpp +++ b/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_cuda.cpp @@ -139,3 +139,61 @@ TEST_P(urCudaCommandBufferNativeAppendTest, Dependencies) { ASSERT_EQ(i, val); } } + +// Test using an in-order command-buffer +struct urCudaInOrderCommandBufferNativeAppendTest + : urCudaCommandBufferNativeAppendTest { + virtual void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urCudaCommandBufferNativeAppendTest::SetUp()); + + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, // stype + nullptr, // pnext + false, // isUpdatable + true, // isInOrder + false, // enableProfiling + }; + ASSERT_SUCCESS( + urCommandBufferCreateExp(context, device, &desc, &in_order_cb)); + ASSERT_NE(in_order_cb, nullptr); + } + + virtual void TearDown() override { + if (in_order_cb) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(in_order_cb)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urCudaCommandBufferNativeAppendTest::TearDown()); + } + ur_exp_command_buffer_handle_t in_order_cb = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE(urCudaInOrderCommandBufferNativeAppendTest); + +// Test command-buffer native command with other command-buffer commands as +// predecessors and successors, defined using in-order property rather than +// sync-points +TEST_P(urCudaInOrderCommandBufferNativeAppendTest, Success) { + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + in_order_cb, src_device_ptr, &val, sizeof(val), allocation_size, 0, + nullptr, 0, nullptr, nullptr, nullptr, nullptr)); + + InteropData data{child_cmd_buf, context, src_device_ptr, dst_device_ptr}; + ASSERT_SUCCESS(urCommandBufferAppendNativeCommandExp( + in_order_cb, &interop_func, &data, child_cmd_buf, 0, nullptr, nullptr)); + + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + in_order_cb, host_vec.data(), dst_device_ptr, allocation_size, 0, nullptr, + 0, nullptr, nullptr, nullptr, nullptr)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(in_order_cb)); + + ASSERT_SUCCESS( + urEnqueueCommandBufferExp(queue, in_order_cb, 0, nullptr, nullptr)); + + urQueueFinish(queue); + for (auto &i : host_vec) { + ASSERT_EQ(i, val); + } +} diff --git a/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_hip.cpp b/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_hip.cpp index 501265e8246b9..e41c0915a1f45 100644 --- a/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_hip.cpp +++ b/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_hip.cpp @@ -125,3 +125,60 @@ TEST_P(urHipCommandBufferNativeAppendTest, Dependencies) { ASSERT_EQ(i, val); } } + +// Test using an in-order command-buffer +struct urHipInOrderCommandBufferNativeAppendTest + : urHipCommandBufferNativeAppendTest { + virtual void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urHipCommandBufferNativeAppendTest::SetUp()); + + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, // stype + nullptr, // pnext + false, // isUpdatable + true, // isInOrder + false, // enableProfiling + }; + ASSERT_SUCCESS( + urCommandBufferCreateExp(context, device, &desc, &in_order_cb)); + ASSERT_NE(in_order_cb, nullptr); + } + + virtual void TearDown() override { + if (in_order_cb) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(in_order_cb)); + } + + UUR_RETURN_ON_FATAL_FAILURE(urHipCommandBufferNativeAppendTest::TearDown()); + } + ur_exp_command_buffer_handle_t in_order_cb = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE(urHipInOrderCommandBufferNativeAppendTest); + +// Test command-buffer native command with other command-buffer commands as +// predecessors and successors, defined using in-order property rather than +// sync-points +TEST_P(urHipInOrderCommandBufferNativeAppendTest, Success) { + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + in_order_cb, src_device_ptr, &val, sizeof(val), allocation_size, 0, + nullptr, 0, nullptr, nullptr, nullptr, nullptr)); + + InteropData data{child_cmd_buf, src_device_ptr, dst_device_ptr}; + ASSERT_SUCCESS(urCommandBufferAppendNativeCommandExp( + in_order_cb, &interop_func, &data, child_cmd_buf, 0, nullptr, nullptr)); + + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + in_order_cb, host_vec.data(), dst_device_ptr, allocation_size, 0, nullptr, + 0, nullptr, nullptr, nullptr, nullptr)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(in_order_cb)); + + ASSERT_SUCCESS( + urEnqueueCommandBufferExp(queue, in_order_cb, 0, nullptr, nullptr)); + + urQueueFinish(queue); + for (auto &i : host_vec) { + ASSERT_EQ(i, val); + } +} diff --git a/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_opencl.cpp b/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_opencl.cpp index 07e1ab0472cc4..2823199db838a 100644 --- a/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_opencl.cpp +++ b/unified-runtime/test/conformance/exp_command_buffer/native-command/append_native_opencl.cpp @@ -162,3 +162,66 @@ TEST_P(urOpenCLCommandBufferNativeAppendTest, Dependencies) { ASSERT_EQ(i, val); } } + +// Test using an in-order command-buffer +struct urOpenCLInOrderCommandBufferNativeAppendTest + : urOpenCLCommandBufferNativeAppendTest { + virtual void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urOpenCLCommandBufferNativeAppendTest::SetUp()); + + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, // stype + nullptr, // pnext + false, // isUpdatable + true, // isInOrder + false, // enableProfiling + }; + ASSERT_SUCCESS( + urCommandBufferCreateExp(context, device, &desc, &in_order_cb)); + ASSERT_NE(in_order_cb, nullptr); + } + + virtual void TearDown() override { + if (in_order_cb) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(in_order_cb)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urOpenCLCommandBufferNativeAppendTest::TearDown()); + } + ur_exp_command_buffer_handle_t in_order_cb = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE(urOpenCLInOrderCommandBufferNativeAppendTest); + +// Test command-buffer native command with other command-buffer commands as +// predecessors and successors, defined using in-order property rather than +// sync-points +TEST_P(urOpenCLInOrderCommandBufferNativeAppendTest, Success) { + auto &src_buffer = buffers[0]; + auto &dst_buffer = buffers[1]; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + in_order_cb, src_buffer, &val, sizeof(val), 0, allocation_size, 0, + nullptr, 0, nullptr, nullptr, nullptr, nullptr)); + + InteropData data{in_order_cb, device, src_buffer, dst_buffer, + clCommandCopyBufferKHR}; + ASSERT_SUCCESS(urCommandBufferAppendNativeCommandExp( + in_order_cb, &interop_func, &data, nullptr, 0, nullptr, nullptr)); + + auto ©_buffer = buffers[2]; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyExp( + in_order_cb, dst_buffer, copy_buffer, 0, 0, allocation_size, 0, nullptr, + 0, nullptr, nullptr, nullptr, nullptr)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(in_order_cb)); + ASSERT_SUCCESS( + urEnqueueCommandBufferExp(queue, in_order_cb, 0, nullptr, nullptr)); + + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, copy_buffer, true, 0, + allocation_size, host_vec.data(), 0, + nullptr, nullptr)); + for (auto &i : host_vec) { + ASSERT_EQ(i, val); + } +}