@@ -303,6 +303,21 @@ FailureOr<ParameterSetting> ParameterSetting::create(
303
303
TileSize maxL0Size = selectL2TileSizes (tileParams, m0Pack, n0Pack);
304
304
M0 = maxL0Size.M ;
305
305
N0 = maxL0Size.N ;
306
+
307
+ // TODO(avarma): This is currently a workaround for 1x1 AIE array to make
308
+ // those 2D matmul shapes work for which all of the operands get pulled in
309
+ // to L2 buffer. Once reprogramming of DMA ops is supported, we can get rid
310
+ // of this workaround. We need to add this only for pack-peel-4-level-tiling
311
+ // NOT pack-peel. The workaround just ensures that the tile size of first
312
+ // level is NOT equal to M, N by halving the N0 tile.
313
+ if (numRows == 1 && numCols == 1 ) {
314
+ // Check if the tile size generated is exactly same as operand size. If
315
+ // yes, halve N0 tile.
316
+ if (M0 == M && N0 == N) {
317
+ N0 /= 2 ;
318
+ if (N0 < n0Pack) n0Pack /= 2 ;
319
+ }
320
+ }
306
321
}
307
322
308
323
// Currently there is only one level of tiling for K dimension, and the packed
@@ -431,15 +446,6 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline(
431
446
SmallVector<SmallVector<int64_t >> outerPerm;
432
447
SmallVector<PackingConfigPackingLevelAttr> packingConfigLevelsVal;
433
448
434
- int64_t m0Tile = packPeelTiling.M0 ;
435
- int64_t n0Tile = packPeelTiling.N0 ;
436
- // For 4D matmul-like ops, only tile the outer dims.
437
- // outer_tile_size = total_tile_size / inner_dim_size
438
- if (is4DMatmulLikeOp (linalgOp)) {
439
- m0Tile /= maybeInputDimsAndSizes.value ().mSizes .back ();
440
- n0Tile /= maybeInputDimsAndSizes.value ().nSizes .back ();
441
- }
442
-
443
449
// Pack level => 1.
444
450
// For 2D matmul-like ops, the first level is to pack operands from 2D to 4D.
445
451
// If the input is a 4D matmul-like op, this level of packing is not needed.
@@ -450,30 +456,6 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline(
450
456
packedSizesL0[nDims.back ()] = packPeelTiling.n0Pack ;
451
457
packedSizesL0[kDims .back ()] = packPeelTiling.k0Pack ;
452
458
453
- // TODO(avarma): This is currently a workaround for 1x1 AIE array to make
454
- // those 2D matmul shapes work for which all of the operands get pulled in
455
- // to L2 buffer. Once reprogramming of DMA ops is supported, we can get rid
456
- // of this workaround. We need to add this only for pack-peel-4-level-tiling
457
- // NOT pack-peel. The workaround just ensures that the tile size of first
458
- // level is NOT equal to M,N by halving the n0Tile.
459
- if (numRows == 1 && numCols == 1 ) {
460
- auto getTotalSize = [](ArrayRef<int64_t > sizes) {
461
- return std::accumulate (sizes.begin (), sizes.end (), 1 ,
462
- std::multiplies<int64_t >());
463
- };
464
-
465
- // Get the shape (M, N) of the full Matmul operation.
466
- auto maybeInputDimsAndSizes = getInputDimsAndSizes (linalgOp);
467
- int64_t M = getTotalSize (maybeInputDimsAndSizes.value ().mSizes );
468
- int64_t N = getTotalSize (maybeInputDimsAndSizes.value ().nSizes );
469
- // Check if the tile size generated is exactly same as operand size. If
470
- // yes, halve n0Tile.
471
- if (m0Tile == M && n0Tile == N) {
472
- n0Tile /= 2 ;
473
- if (n0Tile < packedSizesL0[nDims.back ()])
474
- packedSizesL0[nDims.back ()] /= 2 ;
475
- }
476
- }
477
459
transposePackIndices = {0 , 1 , 2 };
478
460
// There is no corresponding unpack for the specified pack operation
479
461
// 0 is used when unpack is empty
@@ -540,6 +522,14 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline(
540
522
assert (!batchDims.empty () && " expected batch dims not empty" );
541
523
tileSizeLevel0[batchDims[0 ]] = 1 ;
542
524
}
525
+ int64_t m0Tile = packPeelTiling.M0 ;
526
+ int64_t n0Tile = packPeelTiling.N0 ;
527
+ // For 4D matmul-like ops, only tile the outer dims.
528
+ // outer_tile_size = total_tile_size / inner_dim_size
529
+ if (is4DMatmulLikeOp (linalgOp)) {
530
+ m0Tile /= maybeInputDimsAndSizes.value ().mSizes .back ();
531
+ n0Tile /= maybeInputDimsAndSizes.value ().nSizes .back ();
532
+ }
543
533
tileSizeLevel0[mDims [0 ]] = m0Tile;
544
534
tileSizeLevel0[nDims[0 ]] = n0Tile;
545
535
0 commit comments