Skip to content

Commit 29eee35

Browse files
pass event host scope flag to walker when single packet events are used
Related-To: NEO-7434 Signed-off-by: Zbigniew Zdanowicz <[email protected]>
1 parent 946dd27 commit 29eee35

File tree

4 files changed

+181
-8
lines changed

4 files changed

+181
-8
lines changed

level_zero/core/source/cmdlist/cmdlist.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ struct CmdListKernelLaunchParams {
3333
bool isKernelSplitOperation = false;
3434
bool isBuiltInKernel = false;
3535
bool isDestinationAllocationInSystemMemory = false;
36+
bool isHostSignalScopeEvent = false;
3637
};
3738

3839
struct CmdListReturnPoint {

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -303,13 +303,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
303303
return ret;
304304
}
305305

306+
CmdListKernelLaunchParams launchParams = {};
306307
Event *event = nullptr;
307308
if (hEvent) {
308309
event = Event::fromHandle(hEvent);
310+
launchParams.isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
309311
}
310312

311313
appendEventForProfiling(event, true, false);
312-
CmdListKernelLaunchParams launchParams = {};
313314
launchParams.isIndirect = true;
314315
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
315316
nullptr, launchParams);
@@ -332,9 +333,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
332333
return ret;
333334
}
334335

336+
CmdListKernelLaunchParams launchParams = {};
337+
launchParams.isIndirect = true;
338+
launchParams.isPredicate = true;
339+
335340
Event *event = nullptr;
336341
if (hEvent) {
337342
event = Event::fromHandle(hEvent);
343+
launchParams.isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
338344
}
339345

340346
appendEventForProfiling(event, true, false);
@@ -346,9 +352,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
346352
for (uint32_t i = 0; i < numKernels; i++) {
347353
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i);
348354

349-
CmdListKernelLaunchParams launchParams = {};
350-
launchParams.isIndirect = true;
351-
launchParams.isPredicate = true;
352355
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
353356
haveLaunchArguments ? &pLaunchArgumentsBuffer[i] : nullptr,
354357
nullptr, launchParams);
@@ -1183,17 +1186,18 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
11831186
return ret;
11841187
}
11851188

1189+
CmdListKernelLaunchParams launchParams = {};
1190+
11861191
Event *signalEvent = nullptr;
11871192
if (hSignalEvent) {
11881193
signalEvent = Event::fromHandle(hSignalEvent);
1194+
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
11891195
}
11901196

11911197
uint32_t kernelCounter = leftSize > 0 ? 1 : 0;
11921198
kernelCounter += middleSizeBytes > 0 ? 1 : 0;
11931199
kernelCounter += rightSize > 0 ? 1 : 0;
11941200

1195-
CmdListKernelLaunchParams launchParams = {};
1196-
11971201
launchParams.isKernelSplitOperation = kernelCounter > 1;
11981202
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
11991203

@@ -1544,9 +1548,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
15441548
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
15451549
}
15461550

1551+
CmdListKernelLaunchParams launchParams = {};
1552+
15471553
Event *signalEvent = nullptr;
15481554
if (hSignalEvent) {
15491555
signalEvent = Event::fromHandle(hSignalEvent);
1556+
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
15501557
}
15511558

15521559
if (isCopyOnly()) {
@@ -1596,7 +1603,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
15961603
}
15971604
}
15981605

1599-
CmdListKernelLaunchParams launchParams = {};
16001606
launchParams.isBuiltInKernel = true;
16011607
launchParams.isDestinationAllocationInSystemMemory = hostPointerNeedsFlush;
16021608

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
167167
uint64_t eventAddress = 0;
168168
bool isTimestampEvent = false;
169169
bool l3FlushEnable = false;
170-
bool isHostSignalScopeEvent = false;
170+
bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent;
171171
if (event) {
172172
eventAlloc = &event->getAllocation(this->device);
173173
commandContainer.addToResidencyContainer(eventAlloc);

level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,5 +1364,171 @@ HWTEST2_F(CreateCommandListXeHpcTest, whenFlagDisabledAndCreateImmediateCommandL
13641364
EXPECT_TRUE(static_cast<DeviceImp *>(device)->allocationsForReuse->peekIsEmpty());
13651365
}
13661366

1367+
struct AppendKernelXeHpcTestInput {
1368+
DriverHandle *driver = nullptr;
1369+
L0::Context *context = nullptr;
1370+
L0::Device *device = nullptr;
1371+
};
1372+
1373+
template <int32_t usePipeControlMultiPacketEventSync>
1374+
struct CommandListAppendLaunchMultiKernelEventFixture : public LocalMemoryModuleFixture {
1375+
void setUp() {
1376+
DebugManager.flags.UsePipeControlMultiKernelEventSync.set(usePipeControlMultiPacketEventSync);
1377+
LocalMemoryModuleFixture::setUp();
1378+
1379+
input.driver = driverHandle.get();
1380+
input.device = device;
1381+
input.context = context;
1382+
}
1383+
1384+
template <GFXCORE_FAMILY gfxCoreFamily>
1385+
void testHostSignalScopeDeviceMemoryAppendMultiKernelCopy(AppendKernelXeHpcTestInput &input) {
1386+
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
1387+
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
1388+
1389+
ze_result_t result = ZE_RESULT_SUCCESS;
1390+
1391+
auto &hwInfo = *input.device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
1392+
auto &hwConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
1393+
1394+
VariableBackup<unsigned short> hwRevId{&hwInfo.platform.usRevId};
1395+
hwRevId = hwConfig.getHwRevIdFromStepping(REVISION_B, hwInfo);
1396+
1397+
constexpr size_t size = 4096u;
1398+
constexpr size_t alignment = 4096u;
1399+
void *ptr = nullptr;
1400+
const void *srcPtr = reinterpret_cast<void *>(0x1234);
1401+
1402+
ze_device_mem_alloc_desc_t deviceDesc = {};
1403+
result = input.context->allocDeviceMem(input.device->toHandle(),
1404+
&deviceDesc,
1405+
size, alignment, &ptr);
1406+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
1407+
EXPECT_NE(nullptr, ptr);
1408+
1409+
ze_event_pool_desc_t eventPoolDesc = {};
1410+
eventPoolDesc.count = 1;
1411+
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(input.driver, input.context, 0, nullptr, &eventPoolDesc, result));
1412+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
1413+
ze_event_desc_t eventDesc = {};
1414+
eventDesc.index = 0;
1415+
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
1416+
eventDesc.wait = 0;
1417+
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, input.device));
1418+
1419+
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
1420+
result = commandList->initialize(input.device, NEO::EngineGroupType::Compute, 0u);
1421+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
1422+
1423+
constexpr size_t offset = 32;
1424+
void *copyPtr = reinterpret_cast<uint8_t *>(ptr) + offset;
1425+
result = commandList->appendMemoryCopy(copyPtr, srcPtr, size - offset, event.get(), 0, nullptr);
1426+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
1427+
1428+
GenCmdList commands;
1429+
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
1430+
commands,
1431+
commandList->commandContainer.getCommandStream()->getCpuBase(),
1432+
commandList->commandContainer.getCommandStream()->getUsed()));
1433+
1434+
auto itorWalkers = findAll<WALKER_TYPE *>(commands.begin(), commands.end());
1435+
EXPECT_NE(0u, itorWalkers.size());
1436+
for (const auto &it : itorWalkers) {
1437+
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*it);
1438+
auto &postSyncData = walkerCmd->getPostSync();
1439+
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
1440+
}
1441+
1442+
result = input.context->freeMem(ptr);
1443+
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
1444+
}
1445+
1446+
template <GFXCORE_FAMILY gfxCoreFamily>
1447+
void testHostSignalScopeHostMemoryAppendMultiKernelCopy(AppendKernelXeHpcTestInput &input) {
1448+
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
1449+
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
1450+
1451+
ze_result_t result = ZE_RESULT_SUCCESS;
1452+
1453+
auto &hwInfo = *input.device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
1454+
auto &hwConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
1455+
1456+
VariableBackup<unsigned short> hwRevId{&hwInfo.platform.usRevId};
1457+
hwRevId = hwConfig.getHwRevIdFromStepping(REVISION_B, hwInfo);
1458+
1459+
constexpr size_t size = 4096u;
1460+
constexpr size_t alignment = 4096u;
1461+
void *ptr = nullptr;
1462+
const void *srcPtr = reinterpret_cast<void *>(0x1234);
1463+
1464+
ze_host_mem_alloc_desc_t hostDesc = {};
1465+
result = input.context->allocHostMem(&hostDesc, size, alignment, &ptr);
1466+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
1467+
EXPECT_NE(nullptr, ptr);
1468+
1469+
ze_event_pool_desc_t eventPoolDesc = {};
1470+
eventPoolDesc.count = 1;
1471+
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(input.driver, input.context, 0, nullptr, &eventPoolDesc, result));
1472+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
1473+
ze_event_desc_t eventDesc = {};
1474+
eventDesc.index = 0;
1475+
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
1476+
eventDesc.wait = 0;
1477+
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, input.device));
1478+
1479+
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
1480+
result = commandList->initialize(input.device, NEO::EngineGroupType::Compute, 0u);
1481+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
1482+
1483+
constexpr size_t offset = 32;
1484+
void *copyPtr = reinterpret_cast<uint8_t *>(ptr) + offset;
1485+
result = commandList->appendMemoryCopy(copyPtr, srcPtr, size - offset, event.get(), 0, nullptr);
1486+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
1487+
1488+
GenCmdList commands;
1489+
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
1490+
commands,
1491+
commandList->commandContainer.getCommandStream()->getCpuBase(),
1492+
commandList->commandContainer.getCommandStream()->getUsed()));
1493+
1494+
auto itorWalkers = findAll<WALKER_TYPE *>(commands.begin(), commands.end());
1495+
EXPECT_NE(0u, itorWalkers.size());
1496+
for (const auto &it : itorWalkers) {
1497+
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*it);
1498+
auto &postSyncData = walkerCmd->getPostSync();
1499+
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
1500+
}
1501+
1502+
result = input.context->freeMem(ptr);
1503+
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
1504+
}
1505+
1506+
AppendKernelXeHpcTestInput input;
1507+
};
1508+
1509+
using CommandListAppendLaunchMultiKernelEventDisabledSinglePacketXeHpcCore = Test<CommandListAppendLaunchMultiKernelEventFixture<0>>;
1510+
1511+
HWTEST2_F(CommandListAppendLaunchMultiKernelEventDisabledSinglePacketXeHpcCore,
1512+
givenHwSupportsSystemFenceWhenKernelUsingDeviceMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceNotUsed, IsXeHpcCore) {
1513+
testHostSignalScopeDeviceMemoryAppendMultiKernelCopy<gfxCoreFamily>(input);
1514+
}
1515+
1516+
HWTEST2_F(CommandListAppendLaunchMultiKernelEventDisabledSinglePacketXeHpcCore,
1517+
givenHwSupportsSystemFenceWhenKernelUsingUsmHostMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) {
1518+
testHostSignalScopeHostMemoryAppendMultiKernelCopy<gfxCoreFamily>(input);
1519+
}
1520+
1521+
using CommandListAppendLaunchMultiKernelEventEnabledSinglePacketXeHpcCore = Test<CommandListAppendLaunchMultiKernelEventFixture<1>>;
1522+
1523+
HWTEST2_F(CommandListAppendLaunchMultiKernelEventEnabledSinglePacketXeHpcCore,
1524+
givenHwSupportsSystemFenceWhenKernelUsingDeviceMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceNotUsed, IsXeHpcCore) {
1525+
testHostSignalScopeDeviceMemoryAppendMultiKernelCopy<gfxCoreFamily>(input);
1526+
}
1527+
1528+
HWTEST2_F(CommandListAppendLaunchMultiKernelEventEnabledSinglePacketXeHpcCore,
1529+
givenHwSupportsSystemFenceWhenKernelUsingUsmHostMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) {
1530+
testHostSignalScopeHostMemoryAppendMultiKernelCopy<gfxCoreFamily>(input);
1531+
}
1532+
13671533
} // namespace ult
13681534
} // namespace L0

0 commit comments

Comments
 (0)