Skip to content

Commit 73731d3

Browse files
Revert "fix: correct suggested number of work groups for concurrent kernels o...
This reverts commit 6fc673b. Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
1 parent d5441cc commit 73731d3

File tree

8 files changed

+34
-31
lines changed

8 files changed

+34
-31
lines changed
 

‎level_zero/core/test/unit_tests/xe_hpc_core/pvc/test_kernel_pvc.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,19 @@ using KernelImpSuggestMaxCooperativeGroupCountTestsPvc = KernelImpSuggestMaxCoop
2222

2323
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
2424
auto workGroupSize = lws[0] * lws[1] * lws[2];
25-
auto expected = availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd);
25+
auto expected = (availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd)) / PVC::numberOfpartsInTileForConcurrentKernels;
2626
EXPECT_EQ(expected, getMaxWorkGroupCount());
2727
}
2828

2929
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
3030
usesBarriers = 1;
31-
auto expected = dssCount * (maxBarrierCount / usesBarriers);
31+
auto expected = (dssCount * (maxBarrierCount / usesBarriers)) / PVC::numberOfpartsInTileForConcurrentKernels;
3232
EXPECT_EQ(expected, getMaxWorkGroupCount());
3333
}
3434

3535
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
3636
usedSlm = 64 * KB;
37-
auto expected = availableSlm / usedSlm;
37+
auto expected = (availableSlm / usedSlm) / PVC::numberOfpartsInTileForConcurrentKernels;
3838
EXPECT_EQ(expected, getMaxWorkGroupCount());
3939
}
4040

‎shared/source/helpers/gfx_core_helper_pvc_and_later.inl

+3-3
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,10 @@ uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupC
6363
bool requiresLimitation = productHelper.isCooperativeEngineSupported(hwInfo) &&
6464
(engineGroupType != EngineGroupType::CooperativeCompute) &&
6565
(!isEngineInstanced);
66-
67-
auto ccsCount = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
68-
auto numberOfpartsInTileForConcurrentKernels = productHelper.getNumberOfPartsInTileForConcurrentKernel(ccsCount);
66+
auto numberOfpartsInTileForConcurrentKernels = productHelper.getNumberOfPartsInTileForConcurrentKernel();
6967
if (requiresLimitation) {
68+
69+
auto ccsCount = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
7070
UNRECOVERABLE_IF(ccsCount == 0);
7171
numberOfpartsInTileForConcurrentKernels = std::max(numberOfpartsInTileForConcurrentKernels, ccsCount);
7272
}

‎shared/source/os_interface/product_helper.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ class ProductHelper {
201201

202202
virtual bool isFusedEuDisabledForDpas(bool kernelHasDpasInstructions, const uint32_t *lws, const uint32_t *groupCount, const HardwareInfo &hwInfo) const = 0;
203203
virtual bool isCalculationForDisablingEuFusionWithDpasNeeded(const HardwareInfo &hwInfo) const = 0;
204-
virtual uint32_t getNumberOfPartsInTileForConcurrentKernel(uint32_t ccsCount) const = 0;
204+
virtual uint32_t getNumberOfPartsInTileForConcurrentKernel() const = 0;
205205
virtual bool is48bResourceNeededForRayTracing() const = 0;
206206
virtual bool disableL3CacheForDebug(const HardwareInfo &hwInfo) const = 0;
207207
virtual bool isSkippingStatefulInformationRequired(const KernelDescriptor &kernelDescriptor) const = 0;

‎shared/source/os_interface/product_helper.inl

+1-1
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,7 @@ bool ProductHelperHw<gfxProduct>::isStatefulAddressingModeSupported() const {
562562
return true;
563563
}
564564
template <PRODUCT_FAMILY gfxProduct>
565-
uint32_t ProductHelperHw<gfxProduct>::getNumberOfPartsInTileForConcurrentKernel(uint32_t ccsCount) const {
565+
uint32_t ProductHelperHw<gfxProduct>::getNumberOfPartsInTileForConcurrentKernel() const {
566566
return 1u;
567567
}
568568

‎shared/source/os_interface/product_helper_hw.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ class ProductHelperHw : public ProductHelper {
110110
void adjustNumberOfCcs(HardwareInfo &hwInfo) const override;
111111
bool isPrefetcherDisablingInDirectSubmissionRequired() const override;
112112
bool isStatefulAddressingModeSupported() const override;
113-
uint32_t getNumberOfPartsInTileForConcurrentKernel(uint32_t ccsCount) const override;
113+
uint32_t getNumberOfPartsInTileForConcurrentKernel() const override;
114114
bool isPlatformQuerySupported() const override;
115115
bool isNonBlockingGpuSubmissionSupported() const override;
116116
bool isResolveDependenciesByPipeControlsSupported(const HardwareInfo &hwInfo, bool isOOQ, TaskCountType queueTaskCount, const CommandStreamReceiver &queueCsr) const override;

‎shared/source/xe_hpc_core/hw_info_pvc.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,8 @@ void PvcHwConfig::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTable
200200
gtSysInfo->L3BankCount = 1;
201201

202202
gtSysInfo->CCSInfo.IsValid = true;
203-
gtSysInfo->CCSInfo.NumberOfCCSEnabled = 1;
204-
gtSysInfo->CCSInfo.Instances.CCSEnableMask = 0b1;
203+
gtSysInfo->CCSInfo.NumberOfCCSEnabled = 2;
204+
gtSysInfo->CCSInfo.Instances.CCSEnableMask = 0b11;
205205

206206
hwInfo->featureTable.ftrBcsInfo = 1;
207207
gtSysInfo->IsDynamicallyPopulated = true;

‎shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl

+1-6
Original file line numberDiff line numberDiff line change
@@ -209,12 +209,7 @@ std::optional<aub_stream::ProductFamily> ProductHelperHw<gfxProduct>::getAubStre
209209
};
210210

211211
template <>
212-
uint32_t ProductHelperHw<gfxProduct>::getNumberOfPartsInTileForConcurrentKernel(uint32_t ccsCount) const {
213-
if (ccsCount == 1) {
214-
return 1;
215-
} else if (ccsCount == 2) {
216-
return 4;
217-
}
212+
uint32_t ProductHelperHw<gfxProduct>::getNumberOfPartsInTileForConcurrentKernel() const {
218213
return PVC::numberOfpartsInTileForConcurrentKernels;
219214
}
220215

‎shared/test/unit_test/xe_hpc_core/pvc/gfx_core_helper_tests_pvc.cpp

+22-14
Original file line numberDiff line numberDiff line change
@@ -172,27 +172,35 @@ PVCTEST_F(GfxCoreHelperTestsPvc, GivenCooperativeEngineSupportedAndNotUsedWhenAd
172172
auto tilePartsForConcurrentKernels = PVC::numberOfpartsInTileForConcurrentKernels;
173173
auto passedMaxWorkGroupCount = 1024;
174174

175-
uint32_t revisions[] = {REVISION_A0, REVISION_B, REVISION_C};
175+
uint32_t revisions[] = {REVISION_A0, REVISION_B};
176176
for (auto &revision : revisions) {
177177
auto hwRevId = productHelper.getHwRevIdFromStepping(revision, hwInfo);
178+
if (hwRevId == CommonConstants::invalidStepping) {
179+
continue;
180+
}
178181
hwInfo.platform.usRevId = hwRevId;
179-
for (auto engineGroupType : {EngineGroupType::RenderCompute, EngineGroupType::Compute, EngineGroupType::CooperativeCompute}) {
182+
183+
for (auto isEngineInstanced : ::testing::Bool()) {
180184
for (auto isRcsEnabled : ::testing::Bool()) {
181185
hwInfo.featureTable.flags.ftrRcsNode = isRcsEnabled;
182-
bool disallowDispatch = (engineGroupType == EngineGroupType::RenderCompute ||
183-
(engineGroupType == EngineGroupType::Compute && isRcsEnabled)) &&
184-
productHelper.isCooperativeEngineSupported(hwInfo);
185-
for (auto isEngineInstanced : ::testing::Bool()) {
186-
if (disallowDispatch) {
187-
EXPECT_EQ(1u, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
188-
} else {
189-
for (uint32_t ccsCount : {1, 2, 4}) {
190-
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = ccsCount;
191-
tilePartsForConcurrentKernels = ccsCount == 1 ? 1
192-
: ccsCount == 2 ? 4
193-
: 8;
186+
for (auto engineGroupType : {EngineGroupType::RenderCompute, EngineGroupType::Compute, EngineGroupType::CooperativeCompute}) {
187+
if (productHelper.isCooperativeEngineSupported(hwInfo)) {
188+
bool disallowDispatch = (engineGroupType == EngineGroupType::RenderCompute) ||
189+
((engineGroupType == EngineGroupType::Compute) && isRcsEnabled);
190+
bool applyLimitation = !isEngineInstanced &&
191+
(engineGroupType != EngineGroupType::CooperativeCompute);
192+
if (disallowDispatch) {
193+
EXPECT_EQ(1u, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
194+
} else if (applyLimitation) {
195+
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4;
196+
EXPECT_EQ(passedMaxWorkGroupCount / tilePartsForConcurrentKernels, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
197+
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 16;
198+
EXPECT_EQ(passedMaxWorkGroupCount / hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
199+
} else {
194200
EXPECT_EQ(passedMaxWorkGroupCount / tilePartsForConcurrentKernels, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
195201
}
202+
} else {
203+
EXPECT_EQ(passedMaxWorkGroupCount / tilePartsForConcurrentKernels, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
196204
}
197205
}
198206
}

0 commit comments

Comments
 (0)
Please sign in to comment.