Skip to content

Commit 8131c85

Browse files
MaciejPlewkaArturHarasimiuk
authored andcommitted
Change task count type to uint64_t
Signed-off-by: Maciej Plewka <[email protected]>
1 parent 07b037d commit 8131c85

File tree

89 files changed

+423
-400
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+423
-400
lines changed

runtime/command_queue/command_queue.cpp

+11-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2018-2019 Intel Corporation
2+
* Copyright (C) 2018-2023 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -117,22 +117,22 @@ CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver() const {
117117
return nullptr;
118118
}
119119

120-
uint32_t CommandQueue::getHwTag() const {
121-
uint32_t tag = *getHwTagAddress();
120+
TagAddressType CommandQueue::getHwTag() const {
121+
TagAddressType tag = *getHwTagAddress();
122122
return tag;
123123
}
124124

125-
volatile uint32_t *CommandQueue::getHwTagAddress() const {
125+
volatile TagAddressType *CommandQueue::getHwTagAddress() const {
126126
return getGpgpuCommandStreamReceiver().getTagAddress();
127127
}
128128

129-
bool CommandQueue::isCompleted(uint32_t taskCount) const {
130-
uint32_t tag = getHwTag();
129+
bool CommandQueue::isCompleted(TaskCountType taskCount) const {
130+
TagAddressType tag = getHwTag();
131131
DEBUG_BREAK_IF(tag == Event::eventNotReady);
132132
return tag >= taskCount;
133133
}
134134

135-
void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
135+
void CommandQueue::waitUntilComplete(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
136136
WAIT_ENTER()
137137

138138
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", taskCountToWait);
@@ -198,12 +198,12 @@ cl_int CommandQueue::getCommandQueueInfo(cl_command_queue_info paramName,
198198
return getQueueInfo<CommandQueue>(this, paramName, paramValueSize, paramValue, paramValueSizeRet);
199199
}
200200

201-
uint32_t CommandQueue::getTaskLevelFromWaitList(uint32_t taskLevel,
202-
cl_uint numEventsInWaitList,
203-
const cl_event *eventWaitList) {
201+
TaskCountType CommandQueue::getTaskLevelFromWaitList(TaskCountType taskLevel,
202+
cl_uint numEventsInWaitList,
203+
const cl_event *eventWaitList) {
204204
for (auto iEvent = 0u; iEvent < numEventsInWaitList; ++iEvent) {
205205
auto pEvent = (Event *)(eventWaitList[iEvent]);
206-
uint32_t eventTaskLevel = pEvent->taskLevel;
206+
TaskCountType eventTaskLevel = pEvent->taskLevel;
207207
taskLevel = std::max(taskLevel, eventTaskLevel);
208208
}
209209
return taskLevel;

runtime/command_queue/command_queue.h

+13-12
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
/*
2-
* Copyright (C) 2017-2019 Intel Corporation
2+
* Copyright (C) 2017-2023 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
66
*/
77

88
#pragma once
9+
#include "runtime/command_stream/task_count_helper.h"
910
#include "runtime/event/event.h"
1011
#include "runtime/helpers/base_object.h"
1112
#include "runtime/helpers/dispatch_info.h"
@@ -320,19 +321,19 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
320321
size_t paramValueSize, void *paramValue,
321322
size_t *paramValueSizeRet);
322323

323-
uint32_t getHwTag() const;
324+
TagAddressType getHwTag() const;
324325

325-
volatile uint32_t *getHwTagAddress() const;
326+
volatile TagAddressType *getHwTagAddress() const;
326327

327-
bool isCompleted(uint32_t taskCount) const;
328+
bool isCompleted(TaskCountType taskCount) const;
328329

329330
MOCKABLE_VIRTUAL bool isQueueBlocked();
330331

331-
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep);
332+
MOCKABLE_VIRTUAL void waitUntilComplete(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep);
332333

333-
static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel,
334-
cl_uint numEventsInWaitList,
335-
const cl_event *eventWaitList);
334+
static TaskCountType getTaskLevelFromWaitList(TaskCountType taskLevel,
335+
cl_uint numEventsInWaitList,
336+
const cl_event *eventWaitList);
336337

337338
CommandStreamReceiver &getGpgpuCommandStreamReceiver() const;
338339
CommandStreamReceiver *getBcsCommandStreamReceiver() const;
@@ -407,14 +408,14 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
407408
bool isMultiEngineQueue() const { return this->multiEngineQueue; }
408409

409410
// taskCount of last task
410-
uint32_t taskCount = 0;
411+
TaskCountType taskCount = 0;
411412

412413
// current taskLevel. Used for determining if a PIPE_CONTROL is needed.
413-
uint32_t taskLevel = 0;
414+
TaskCountType taskLevel = 0;
414415

415416
std::unique_ptr<FlushStampTracker> flushStamp;
416417

417-
std::atomic<uint32_t> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};
418+
std::atomic<TaskCountType> latestTaskCountWaited{std::numeric_limits<TaskCountType>::max()};
418419

419420
// virtual event that holds last Enqueue information
420421
Event *virtualEvent = nullptr;
@@ -428,7 +429,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
428429
void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
429430
cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest);
430431

431-
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){};
432+
virtual void obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){};
432433
bool isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue) const;
433434

434435
MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies);

runtime/command_queue/command_queue_hw.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2019 Intel Corporation
2+
* Copyright (C) 2017-2023 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -327,7 +327,7 @@ class CommandQueueHw : public CommandQueue {
327327
TimestampPacketContainer *previousTimestampPacketNodes,
328328
EventsRequest &eventsRequest,
329329
EventBuilder &eventBuilder,
330-
uint32_t taskLevel,
330+
TaskCountType taskLevel,
331331
PrintfHandler *printfHandler);
332332

333333
template <uint32_t commandType>
@@ -350,7 +350,7 @@ class CommandQueueHw : public CommandQueue {
350350
TimestampPacketContainer *previousTimestampPacketNodes,
351351
EventsRequest &eventsRequest,
352352
EventBuilder &eventBuilder,
353-
uint32_t taskLevel);
353+
TaskCountType taskLevel);
354354
void processDispatchForCacheFlush(Surface **surfaces,
355355
size_t numSurfaces,
356356
LinearStream *commandStream,
@@ -410,8 +410,8 @@ class CommandQueueHw : public CommandQueue {
410410
}
411411

412412
private:
413-
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
414-
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;
413+
bool isTaskLevelUpdateRequired(const TaskCountType &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
414+
void obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;
415415
void forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo);
416416
static void computeOffsetsValueForRectCommands(size_t *bufferOffset,
417417
size_t *hostOffset,

runtime/command_queue/cpu_data_transfer_handler.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2019 Intel Corporation
2+
* Copyright (C) 2017-2023 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -55,7 +55,7 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
5555
TakeOwnershipWrapper<CommandQueue> queueOwnership(*this);
5656

5757
auto blockQueue = false;
58-
auto taskLevel = 0u;
58+
TaskCountType taskLevel = 0u;
5959
obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, transferProperties.cmdType);
6060

6161
DBG_LOG(LogTaskCounts, __FUNCTION__, "taskLevel", taskLevel);

runtime/command_queue/enqueue_common.h

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2019 Intel Corporation
2+
* Copyright (C) 2017-2023 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -168,7 +168,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
168168
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
169169

170170
auto blockQueue = false;
171-
auto taskLevel = 0u;
171+
TaskCountType taskLevel = 0u;
172172
obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType);
173173
bool blitEnqueue = blitEnqueueAllowed(blockQueue, commandType);
174174

@@ -483,7 +483,7 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *d
483483
auto parentKernel = multiDispatchInfo.peekParentKernel();
484484
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
485485

486-
uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1;
486+
TaskCountType taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1;
487487
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
488488
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
489489
parentKernel,
@@ -523,7 +523,7 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *d
523523
}
524524

525525
template <typename GfxFamily>
526-
void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) {
526+
void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) {
527527
auto isQueueBlockedStatus = isQueueBlocked();
528528
taskLevel = getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList);
529529
blockQueueStatus = (taskLevel == Event::eventNotReady) || isQueueBlockedStatus;
@@ -536,7 +536,7 @@ void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &ta
536536
}
537537

538538
template <typename GfxFamily>
539-
bool CommandQueueHw<GfxFamily>::isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType) {
539+
bool CommandQueueHw<GfxFamily>::isTaskLevelUpdateRequired(const TaskCountType &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType) {
540540
bool updateTaskLevel = true;
541541
//if we are blocked by user event then no update
542542
if (taskLevel == Event::eventNotReady) {
@@ -577,7 +577,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
577577
TimestampPacketContainer *previousTimestampPacketNodes,
578578
EventsRequest &eventsRequest,
579579
EventBuilder &eventBuilder,
580-
uint32_t taskLevel,
580+
TaskCountType taskLevel,
581581
PrintfHandler *printfHandler) {
582582

583583
UNRECOVERABLE_IF(multiDispatchInfo.empty());
@@ -806,7 +806,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
806806
TimestampPacketContainer *previousTimestampPacketNodes,
807807
EventsRequest &eventsRequest,
808808
EventBuilder &eventBuilder,
809-
uint32_t taskLevel) {
809+
TaskCountType taskLevel) {
810810

811811
if (timestampPacketContainer) {
812812
timestampPacketContainer->makeResident(getGpgpuCommandStreamReceiver());

runtime/command_stream/CMakeLists.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (C) 2018-2019 Intel Corporation
2+
# Copyright (C) 2018-2023 Intel Corporation
33
#
44
# SPDX-License-Identifier: MIT
55
#
@@ -43,6 +43,7 @@ set(RUNTIME_SRCS_COMMAND_STREAM
4343
${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_base.h
4444
${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.cpp
4545
${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.h
46+
${CMAKE_CURRENT_SOURCE_DIR}/task_count_helper.h
4647
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.cpp
4748
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.h
4849
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.h
@@ -56,4 +57,4 @@ list(APPEND RUNTIME_SRCS_COMMAND_STREAM ${NEO_CORE_COMMAND_STREAM})
5657

5758
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMMAND_STREAM})
5859
set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMMAND_STREAM ${RUNTIME_SRCS_COMMAND_STREAM})
59-
add_subdirectories()
60+
add_subdirectories()

runtime/command_stream/aub_command_stream_receiver_hw.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2019 Intel Corporation
2+
* Copyright (C) 2017-2023 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -62,7 +62,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
6262
MOCKABLE_VIRTUAL void submitBatchBuffer(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits);
6363
void pollForCompletion() override;
6464
void pollForCompletionImpl() override;
65-
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
65+
void waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
6666

6767
uint32_t getDumpHandle();
6868
MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle);
@@ -109,7 +109,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
109109
bool dumpAubNonWritable = false;
110110
ExternalAllocationsContainer externalAllocations;
111111

112-
uint32_t pollForCompletionTaskCount = 0u;
112+
TaskCountType pollForCompletionTaskCount = 0;
113113
SpinLock pollForCompletionLock;
114114
};
115115
} // namespace NEO

runtime/command_stream/aub_command_stream_receiver_hw_base.inl

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2019 Intel Corporation
2+
* Copyright (C) 2019-2023 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -584,7 +584,7 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletionImpl() {
584584
}
585585

586586
template <typename GfxFamily>
587-
inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
587+
inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
588588
CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
589589
pollForCompletion();
590590
}

runtime/command_stream/command_stream_receiver.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2018-2019 Intel Corporation
2+
* Copyright (C) 2018-2023 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -13,6 +13,7 @@
1313
#include "runtime/command_stream/experimental_command_buffer.h"
1414
#include "runtime/command_stream/preemption.h"
1515
#include "runtime/command_stream/scratch_space_controller.h"
16+
#include "runtime/command_stream/task_count_helper.h"
1617
#include "runtime/context/context.h"
1718
#include "runtime/device/device.h"
1819
#include "runtime/event/event.h"
@@ -109,7 +110,7 @@ void CommandStreamReceiver::makeResidentHostPtrAllocation(GraphicsAllocation *gf
109110
}
110111
}
111112

112-
void CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) {
113+
void CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(TaskCountType requiredTaskCount, uint32_t allocationUsage) {
113114
auto address = getTagAddress();
114115
if (address) {
115116
while (*address < requiredTaskCount)
@@ -186,11 +187,11 @@ void CommandStreamReceiver::cleanupResources() {
186187
}
187188
}
188189

189-
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
190+
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, TaskCountType taskCountToWait) {
190191
std::chrono::high_resolution_clock::time_point time1, time2;
191192
int64_t timeDiff = 0;
192193

193-
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
194+
TaskCountType latestSentTaskCount = this->latestFlushedTaskCount;
194195
if (latestSentTaskCount < taskCountToWait) {
195196
this->flushBatchedSubmissions();
196197
}
@@ -217,7 +218,7 @@ bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int
217218
void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
218219
this->tagAllocation = allocation;
219220
UNRECOVERABLE_IF(allocation == nullptr);
220-
this->tagAddress = reinterpret_cast<uint32_t *>(allocation->getUnderlyingBuffer());
221+
this->tagAddress = reinterpret_cast<TagAddressType *>(allocation->getUnderlyingBuffer());
221222
}
222223

223224
FlushStamp CommandStreamReceiver::obtainCurrentFlushStamp() const {

0 commit comments

Comments
 (0)