Skip to content

Commit eb590b2

Browse files
add tests for batched queue submissions
1 parent 65f35aa commit eb590b2

File tree

6 files changed

+578
-39
lines changed

6 files changed

+578
-39
lines changed

unified-runtime/source/adapters/level_zero/v2/queue_batched.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include "kernel.hpp"
1919
#include "lockable.hpp"
2020
#include "memory.hpp"
21-
#include "ur.hpp"
2221

2322
#include "../common/latency_tracker.hpp"
2423
#include "../helpers/kernel_helpers.hpp"
@@ -123,10 +122,10 @@ ur_result_t batch_manager::renewRegularUnlocked(
123122
ur_result_t
124123
ur_queue_batched_t::renewBatchUnlocked(locked<batch_manager> &batchLocked) {
125124
if (batchLocked->isLimitOfUsedCommandListsReached()) {
126-
UR_CALL(queueFinishUnlocked(batchLocked));
125+
return queueFinishUnlocked(batchLocked);
126+
} else {
127+
return batchLocked->renewRegularUnlocked(getNewRegularCmdList());
127128
}
128-
129-
return batchLocked->renewRegularUnlocked(getNewRegularCmdList());
130129
}
131130

132131
ur_result_t batch_manager::enqueueCurrentBatchUnlocked() {
@@ -214,20 +213,26 @@ ur_result_t batch_manager::batchFinish() {
214213

215214
UR_CALL(activeBatch.releaseSubmittedKernels());
216215

217-
{
216+
if (!isActiveBatchEmpty()) {
217+
// Should have been enqueued as part of queueFinishUnlocked
218218
TRACK_SCOPE_LATENCY("ur_queue_batched_t::resetRegCmdlist");
219219
ZE2UR_CALL(zeCommandListReset, (activeBatch.getZeCommandList()));
220+
221+
setBatchEmpty();
222+
regularGenerationNumber++;
220223
}
221224

222225
runBatches.clear();
223-
setBatchEmpty();
224226

225227
return UR_RESULT_SUCCESS;
226228
}
227229

228230
ur_result_t
229231
ur_queue_batched_t::queueFinishUnlocked(locked<batch_manager> &batchLocked) {
230-
UR_CALL(batchLocked->enqueueCurrentBatchUnlocked());
232+
if (!batchLocked->isActiveBatchEmpty()) {
233+
UR_CALL(batchLocked->enqueueCurrentBatchUnlocked());
234+
}
235+
231236
UR_CALL(batchLocked->hostSynchronize());
232237

233238
UR_CALL(queueFinishPoolsUnlocked());
@@ -1070,7 +1075,12 @@ ur_queue_batched_t::queueFlushUnlocked(locked<batch_manager> &batchLocked) {
10701075

10711076
ur_result_t ur_queue_batched_t::queueFlush() {
10721077
auto batchLocked = currentCmdLists.lock();
1073-
return queueFlushUnlocked(batchLocked);
1078+
1079+
if (batchLocked->isActiveBatchEmpty()) {
1080+
return UR_RESULT_SUCCESS;
1081+
} else {
1082+
return queueFlushUnlocked(batchLocked);
1083+
}
10741084
}
10751085

10761086
} // namespace v2

unified-runtime/source/adapters/level_zero/v2/queue_batched.hpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@
4747

4848
namespace v2 {
4949

50+
// The limit of regular command lists stored for execution; if exceeded, the
51+
// vector is cleared as part of queueFinish and slots are renewed.
52+
inline constexpr uint64_t initialSlotsForBatches = 10;
53+
54+
// For the explanation of the purpose of generation numbers, see the comment for
55+
// regularGenerationNumber below
56+
inline constexpr ur_event_generation_t initialGenerationNumber = 0;
57+
5058
struct batch_manager {
5159
private:
5260
// The currently active regular command list, which may be replaced in the
@@ -75,9 +83,6 @@ struct batch_manager {
7583
// associated with the event has already been submitted for execution and
7684
// additional submission of the current batch is not needed.
7785
ur_event_generation_t regularGenerationNumber;
78-
// The limit of regular command lists stored for execution; if exceeded, the
79-
// vector is cleared as part of queueFinish and slots are renewed.
80-
static constexpr uint64_t initialSlotsForBatches = 10;
8186
// Whether any operation has been enqueued on the current batch
8287
bool isEmpty = true;
8388

@@ -91,7 +96,7 @@ struct batch_manager {
9196
immediateList(context, device,
9297
std::forward<v2::raii::command_list_unique_handle>(
9398
commandListImmediate)),
94-
regularGenerationNumber(0) {
99+
regularGenerationNumber(initialGenerationNumber) {
95100
runBatches.reserve(initialSlotsForBatches);
96101
}
97102

unified-runtime/source/adapters/level_zero/v2/queue_create.cpp

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "queue_batched.hpp"
1616
#include "queue_handle.hpp"
1717
#include "queue_immediate_in_order.hpp"
18+
#include "ur_api.h"
1819

1920
static const bool ForceBatched = getenv_tobool("UR_L0_V2_FORCE_BATCHED");
2021

@@ -67,56 +68,55 @@ ur_result_t urQueueCreate(ur_context_handle_t hContext,
6768

6869
TRACK_SCOPE_LATENCY("queueCreate");
6970

71+
bool isImmediate = true;
72+
bool isOutOfOrder = false;
73+
bool isBatched = false;
74+
7075
ur_queue_flags_t flags = 0;
7176
if (pProperties) {
7277
flags = pProperties->flags;
78+
79+
isImmediate = flags & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE;
80+
isOutOfOrder = flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
81+
isBatched = flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED;
7382
}
7483

7584
if (ForceBatched) {
7685
flags |= UR_QUEUE_FLAG_SUBMISSION_BATCHED;
77-
}
78-
79-
auto zeIndex = v2::getZeIndex(pProperties);
80-
81-
bool immediate = true;
82-
bool outOfOrder = false;
86+
isBatched = true;
8387

84-
if (flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED) {
85-
immediate = false;
86-
}
87-
88-
if (flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
89-
outOfOrder = true;
90-
}
91-
92-
if (flags & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE) {
93-
if (!immediate) {
94-
UR_LOG(WARN,
95-
"urQueueCreate called with both UR_QUEUE_FLAG_SUBMISSION_BATCHED "
96-
"and UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE in ur_queue_flags_t. "
97-
"Defaulting to the immediate submission mode.");
88+
if (isImmediate) {
89+
flags &= ~UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE;
90+
isImmediate = false;
9891
}
92+
} else {
9993

100-
immediate = true;
94+
if (pProperties && isImmediate && isBatched) {
95+
return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES;
96+
}
10197
}
10298

103-
if (immediate) {
104-
if (outOfOrder) {
99+
auto zeIndex = v2::getZeIndex(pProperties);
100+
101+
if (isBatched) {
102+
// out of order not supported
103+
*phQueue = ur_queue_handle_t_::create<v2::ur_queue_batched_t>(
104+
hContext, hDevice, v2::getZeOrdinal(hDevice), v2::getZePriority(flags),
105+
zeIndex, v2::eventFlagsFromQueueFlags(flags), flags);
106+
} else {
107+
if (isOutOfOrder) {
105108
*phQueue =
106109
ur_queue_handle_t_::create<v2::ur_queue_immediate_out_of_order_t>(
107110
hContext, hDevice, v2::getZeOrdinal(hDevice),
108111
v2::getZePriority(flags), zeIndex,
109112
v2::eventFlagsFromQueueFlags(flags), flags);
110113
} else {
114+
// immediate
111115
*phQueue = ur_queue_handle_t_::create<v2::ur_queue_immediate_in_order_t>(
112116
hContext, hDevice, v2::getZeOrdinal(hDevice),
113117
v2::getZePriority(flags), zeIndex,
114118
v2::eventFlagsFromQueueFlags(flags), flags);
115119
}
116-
} else {
117-
*phQueue = ur_queue_handle_t_::create<v2::ur_queue_batched_t>(
118-
hContext, hDevice, v2::getZeOrdinal(hDevice), v2::getZePriority(flags),
119-
zeIndex, v2::eventFlagsFromQueueFlags(flags), flags);
120120
}
121121

122122
return UR_RESULT_SUCCESS;

unified-runtime/test/adapters/level_zero/enqueue_alloc.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <thread>
1111

1212
#include "ur_api.h"
13+
#include "uur/utils.h"
1314
#include <uur/fixtures.h>
1415

1516
struct EnqueueAllocTestParam {
@@ -81,6 +82,7 @@ struct urL0EnqueueAllocMultiQueueSameDeviceTest
8182
for (size_t i = 0; i < param.numQueues; i++) {
8283
ur_queue_handle_t queue = nullptr;
8384
ASSERT_SUCCESS(urQueueCreate(context, device, 0, &queue));
85+
SKIP_IF_BATCHED_QUEUE(queue);
8486
queues.push_back(queue);
8587
}
8688
}
@@ -343,6 +345,10 @@ TEST_P(urL0EnqueueAllocMultiQueueSameDeviceTest, SuccessMt) {
343345
const auto checkUSMSupportFunc =
344346
std::get<1>(this->GetParam()).funcParams.checkUSMSupportFunc;
345347

348+
if (numQueues > 0) {
349+
SKIP_IF_BATCHED_QUEUE(queues[0]);
350+
}
351+
346352
ur_device_usm_access_capability_flags_t USMSupport = 0;
347353
ASSERT_SUCCESS(checkUSMSupportFunc(device, USMSupport));
348354
if (!(USMSupport & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) {

unified-runtime/test/adapters/level_zero/v2/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,22 @@ add_l0_v2_devices_test(memory_residency
6767
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/ur_level_zero.cpp
6868
)
6969

70+
add_l0_v2_devices_test(batched_queue
71+
batched_queue_test.cpp
72+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/adapter.cpp
73+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/common.cpp
74+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/device.cpp
75+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/platform.cpp
76+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/ur_level_zero.cpp
77+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_pool_cache.cpp
78+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_pool.cpp
79+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_provider_counter.cpp
80+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_provider_normal.cpp
81+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event.cpp
82+
${PROJECT_SOURCE_DIR}/source/ur/ur.cpp
83+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/command_list_cache.cpp
84+
)
85+
7086
if(NOT UR_FOUND_DPCXX)
7187
# Tests that require kernels can't be used if we aren't generating
7288
# device binaries

0 commit comments

Comments
 (0)