@@ -303,6 +303,21 @@ FailureOr<ParameterSetting> ParameterSetting::create(
303303 TileSize maxL0Size = selectL2TileSizes (tileParams, m0Pack, n0Pack);
304304 M0 = maxL0Size.M ;
305305 N0 = maxL0Size.N ;
306+
307+ // TODO(avarma): This is currently a workaround for 1x1 AIE array to make
308+ // those 2D matmul shapes work for which all of the operands get pulled in
309+ // to L2 buffer. Once reprogramming of DMA ops is supported, we can get rid
310+ // of this workaround. We need to add this only for pack-peel-4-level-tiling
311+ // NOT pack-peel. The workaround just ensures that the tile size of first
312+ // level is NOT equal to M, N by halving the N0 tile.
313+ if (numRows == 1 && numCols == 1 ) {
314+ // Check if the tile size generated is exactly same as operand size. If
315+ // yes, halve N0 tile.
316+ if (M0 == M && N0 == N) {
317+ N0 /= 2 ;
318+ if (N0 < n0Pack) n0Pack /= 2 ;
319+ }
320+ }
306321 }
307322
308323 // Currently there is only one level of tiling for K dimension, and the packed
@@ -431,15 +446,6 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline(
431446 SmallVector<SmallVector<int64_t >> outerPerm;
432447 SmallVector<PackingConfigPackingLevelAttr> packingConfigLevelsVal;
433448
434- int64_t m0Tile = packPeelTiling.M0 ;
435- int64_t n0Tile = packPeelTiling.N0 ;
436- // For 4D matmul-like ops, only tile the outer dims.
437- // outer_tile_size = total_tile_size / inner_dim_size
438- if (is4DMatmulLikeOp (linalgOp)) {
439- m0Tile /= maybeInputDimsAndSizes.value ().mSizes .back ();
440- n0Tile /= maybeInputDimsAndSizes.value ().nSizes .back ();
441- }
442-
443449 // Pack level => 1.
444450 // For 2D matmul-like ops, the first level is to pack operands from 2D to 4D.
445451 // If the input is a 4D matmul-like op, this level of packing is not needed.
@@ -450,30 +456,6 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline(
450456 packedSizesL0[nDims.back ()] = packPeelTiling.n0Pack ;
451457 packedSizesL0[kDims .back ()] = packPeelTiling.k0Pack ;
452458
453- // TODO(avarma): This is currently a workaround for 1x1 AIE array to make
454- // those 2D matmul shapes work for which all of the operands get pulled in
455- // to L2 buffer. Once reprogramming of DMA ops is supported, we can get rid
456- // of this workaround. We need to add this only for pack-peel-4-level-tiling
457- // NOT pack-peel. The workaround just ensures that the tile size of first
458- // level is NOT equal to M,N by halving the n0Tile.
459- if (numRows == 1 && numCols == 1 ) {
460- auto getTotalSize = [](ArrayRef<int64_t > sizes) {
461- return std::accumulate (sizes.begin (), sizes.end (), 1 ,
462- std::multiplies<int64_t >());
463- };
464-
465- // Get the shape (M, N) of the full Matmul operation.
466- auto maybeInputDimsAndSizes = getInputDimsAndSizes (linalgOp);
467- int64_t M = getTotalSize (maybeInputDimsAndSizes.value ().mSizes );
468- int64_t N = getTotalSize (maybeInputDimsAndSizes.value ().nSizes );
469- // Check if the tile size generated is exactly same as operand size. If
470- // yes, halve n0Tile.
471- if (m0Tile == M && n0Tile == N) {
472- n0Tile /= 2 ;
473- if (n0Tile < packedSizesL0[nDims.back ()])
474- packedSizesL0[nDims.back ()] /= 2 ;
475- }
476- }
477459 transposePackIndices = {0 , 1 , 2 };
478460 // There is no corresponding unpack for the specified pack operation
479461 // 0 is used when unpack is empty
@@ -540,6 +522,14 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline(
540522 assert (!batchDims.empty () && " expected batch dims not empty" );
541523 tileSizeLevel0[batchDims[0 ]] = 1 ;
542524 }
525+ int64_t m0Tile = packPeelTiling.M0 ;
526+ int64_t n0Tile = packPeelTiling.N0 ;
527+ // For 4D matmul-like ops, only tile the outer dims.
528+ // outer_tile_size = total_tile_size / inner_dim_size
529+ if (is4DMatmulLikeOp (linalgOp)) {
530+ m0Tile /= maybeInputDimsAndSizes.value ().mSizes .back ();
531+ n0Tile /= maybeInputDimsAndSizes.value ().nSizes .back ();
532+ }
543533 tileSizeLevel0[mDims [0 ]] = m0Tile;
544534 tileSizeLevel0[nDims[0 ]] = n0Tile;
545535
0 commit comments