Skip to content

Commit 2d0a1de

Browse files
Review comment v1.0
1 parent 0e479c5 commit 2d0a1de

File tree

2 files changed

+26
-36
lines changed

2 files changed

+26
-36
lines changed

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp

Lines changed: 23 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,21 @@ FailureOr<ParameterSetting> ParameterSetting::create(
303303
TileSize maxL0Size = selectL2TileSizes(tileParams, m0Pack, n0Pack);
304304
M0 = maxL0Size.M;
305305
N0 = maxL0Size.N;
306+
307+
// TODO(avarma): This is currently a workaround for 1x1 AIE array to make
308+
// those 2D matmul shapes work for which all of the operands get pulled in
309+
// to L2 buffer. Once reprogramming of DMA ops is supported, we can get rid
310+
// of this workaround. We need to add this only for pack-peel-4-level-tiling
311+
// NOT pack-peel. The workaround just ensures that the tile size of first
312+
// level is NOT equal to M, N by halving the N0 tile.
313+
if (numRows == 1 && numCols == 1) {
314+
// Check if the tile size generated is exactly same as operand size. If
315+
// yes, halve N0 tile.
316+
if (M0 == M && N0 == N) {
317+
N0 /= 2;
318+
if (N0 < n0Pack) n0Pack /= 2;
319+
}
320+
}
306321
}
307322

308323
// Currently there is only one level of tiling for K dimension, and the packed
@@ -431,15 +446,6 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline(
431446
SmallVector<SmallVector<int64_t>> outerPerm;
432447
SmallVector<PackingConfigPackingLevelAttr> packingConfigLevelsVal;
433448

434-
int64_t m0Tile = packPeelTiling.M0;
435-
int64_t n0Tile = packPeelTiling.N0;
436-
// For 4D matmul-like ops, only tile the outer dims.
437-
// outer_tile_size = total_tile_size / inner_dim_size
438-
if (is4DMatmulLikeOp(linalgOp)) {
439-
m0Tile /= maybeInputDimsAndSizes.value().mSizes.back();
440-
n0Tile /= maybeInputDimsAndSizes.value().nSizes.back();
441-
}
442-
443449
// Pack level => 1.
444450
// For 2D matmul-like ops, the first level is to pack operands from 2D to 4D.
445451
// If the input is a 4D matmul-like op, this level of packing is not needed.
@@ -450,30 +456,6 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline(
450456
packedSizesL0[nDims.back()] = packPeelTiling.n0Pack;
451457
packedSizesL0[kDims.back()] = packPeelTiling.k0Pack;
452458

453-
// TODO(avarma): This is currently a workaround for 1x1 AIE array to make
454-
// those 2D matmul shapes work for which all of the operands get pulled in
455-
// to L2 buffer. Once reprogramming of DMA ops is supported, we can get rid
456-
// of this workaround. We need to add this only for pack-peel-4-level-tiling
457-
// NOT pack-peel. The workaround just ensures that the tile size of first
458-
// level is NOT equal to M,N by halving the n0Tile.
459-
if (numRows == 1 && numCols == 1) {
460-
auto getTotalSize = [](ArrayRef<int64_t> sizes) {
461-
return std::accumulate(sizes.begin(), sizes.end(), 1,
462-
std::multiplies<int64_t>());
463-
};
464-
465-
// Get the shape (M, N) of the full Matmul operation.
466-
auto maybeInputDimsAndSizes = getInputDimsAndSizes(linalgOp);
467-
int64_t M = getTotalSize(maybeInputDimsAndSizes.value().mSizes);
468-
int64_t N = getTotalSize(maybeInputDimsAndSizes.value().nSizes);
469-
// Check if the tile size generated is exactly same as operand size. If
470-
// yes, halve n0Tile.
471-
if (m0Tile == M && n0Tile == N) {
472-
n0Tile /= 2;
473-
if (n0Tile < packedSizesL0[nDims.back()])
474-
packedSizesL0[nDims.back()] /= 2;
475-
}
476-
}
477459
transposePackIndices = {0, 1, 2};
478460
// There is no corresponding unpack for the specified pack operation
479461
// 0 is used when unpack is empty
@@ -540,6 +522,14 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline(
540522
assert(!batchDims.empty() && "expected batch dims not empty");
541523
tileSizeLevel0[batchDims[0]] = 1;
542524
}
525+
int64_t m0Tile = packPeelTiling.M0;
526+
int64_t n0Tile = packPeelTiling.N0;
527+
// For 4D matmul-like ops, only tile the outer dims.
528+
// outer_tile_size = total_tile_size / inner_dim_size
529+
if (is4DMatmulLikeOp(linalgOp)) {
530+
m0Tile /= maybeInputDimsAndSizes.value().mSizes.back();
531+
n0Tile /= maybeInputDimsAndSizes.value().nSizes.back();
532+
}
543533
tileSizeLevel0[mDims[0]] = m0Tile;
544534
tileSizeLevel0[nDims[0]] = n0Tile;
545535

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_objectfifo_npu4.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ module {
4545
// -----
4646

4747
// Tests a matmul shape for pack-peel-4-level-tiling in which the tile size generated
48-
// should ideally be equal to the M,N,K dimensions of the matmul - but it won't work
48+
// should ideally be equal to the M, N, K dimension size of the matmul - but it won't work
4949
// until support for DMA ops' reconfiguration is added. The workaround therefore halves
5050
// the tile size for N as N/2.
5151

52-
// Pack-peel-4-level tiling on 4x8 cores : the tile size remains maximum in this case.
52+
// Pack-peel-4-level tiling on 4x4 cores : the tile size remains maximum in this case.
5353
// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config<tile_sizes = [[32, 512, 0], [4, 4, 0], [0, 0, 1], [1, 1, 0]]>
5454
// PACK-PEEL-4-LEVEL{LITERAL}: #packingConfig = #amdaie.packing_config<packing_config = [{packedSizes = [8, 32, 64], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[0, 1], [1, 0], [0, 1]], outerPerm = [[0, 1], [1, 0], [1, 0]]}, {packedSizes = [0, 0, 0, 4, 4, 8], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[0, 1], [1, 0], [0, 1]], outerPerm = [[0, 1, 3, 2], [0, 1, 3, 2], [0, 1, 3, 2]]}]>
5555

@@ -74,7 +74,7 @@ func.func @matmul_32x512x64_i32() {
7474
// -----
7575

7676
// Based on above workaround this test shows the packing size of N also being halved
77-
// in case the tile size for N dimension becomes lesser than the corresponding packing size.
77+
// in case the tile size for N dimension becomes less than the corresponding packing size.
7878

7979
// PACK-PEEL-4-LEVEL-1-CORE{LITERAL}: #config = #iree_codegen.lowering_config<tile_sizes = [[32, 16, 0], [1, 1, 0], [0, 0, 1], [1, 1, 0]]>
8080
// PACK-PEEL-4-LEVEL-1-CORE{LITERAL}: #packingConfig = #amdaie.packing_config<packing_config = [{packedSizes = [32, 16, 64], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[0, 1], [1, 0], [0, 1]], outerPerm = [[0, 1], [1, 0], [1, 0]]}, {packedSizes = [0, 0, 0, 4, 4, 8], transposePackIndices = [0, 1, 2], unpackEmpty = [false, false, true], innerPerm = [[0, 1], [1, 0], [0, 1]], outerPerm = [[0, 1, 3, 2], [0, 1, 3, 2], [0, 1, 3, 2]]}]>

0 commit comments

Comments
 (0)