Skip to content

Commit c041358

Browse files
[ObjectFifo][NFC] Refactor DmaUtils + SplitLogicalObjectFifoForReuse (#759)
-- This commit adds refactoring of few utilities in DmaUtils as well as a few involved in SplitLogicalObjectFifosForReuse pass. -- This is required for the follow-up PR that adds a new pass `--iree-amdaie-combine-logical-objectfifos-for-connection-reuse`. Signed-off-by: Abhishek Varma <abhvarma@amd.com>
1 parent 09576c8 commit c041358

File tree

6 files changed

+154
-121
lines changed

6 files changed

+154
-121
lines changed

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEDmaLoopSubsumption.cpp

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -52,60 +52,6 @@ int64_t calculateNbIterations(int64_t lowerBound, int64_t upperBound,
5252

5353
namespace {
5454

55-
/// Utility affine expression visitor to retrieve the scale and optional bias
56-
/// from the expression.
57-
struct RetrieveScaleAndBias
58-
: public AffineExprVisitor<RetrieveScaleAndBias, LogicalResult> {
59-
std::optional<int64_t> scale;
60-
std::optional<int64_t> bias;
61-
LogicalResult visitAffineBinaryOpExpr(AffineBinaryOpExpr /*expr*/) {
62-
return failure();
63-
}
64-
LogicalResult visitConstantExpr(AffineConstantExpr /*expr*/) {
65-
return failure();
66-
}
67-
LogicalResult visitDimExpr(AffineDimExpr /*expr*/) { return failure(); }
68-
LogicalResult visitSymbolExpr(AffineSymbolExpr /*expr*/) { return failure(); }
69-
LogicalResult visitMulExpr(AffineBinaryOpExpr expr) {
70-
if (auto rhsSize = dyn_cast<AffineConstantExpr>(expr.getRHS());
71-
isa<AffineDimExpr>(expr.getLHS())) {
72-
scale = rhsSize.getValue();
73-
} else if (auto lhsSize = dyn_cast<AffineConstantExpr>(expr.getLHS());
74-
isa<AffineDimExpr>(expr.getRHS())) {
75-
scale = lhsSize.getValue();
76-
}
77-
return success();
78-
}
79-
LogicalResult visitAddExpr(AffineBinaryOpExpr expr) {
80-
if (bias) return failure();
81-
if (auto rhsSize = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
82-
bias = rhsSize.getValue();
83-
if (bias.value() < 0) return failure();
84-
if (isa<AffineBinaryOpExpr>(expr.getLHS())) {
85-
return visit(expr.getLHS());
86-
} else if (isa<AffineDimExpr>(expr.getLHS())) {
87-
scale = 1;
88-
return success();
89-
} else {
90-
return failure();
91-
}
92-
} else if (auto lhsSize = dyn_cast<AffineConstantExpr>(expr.getLHS())) {
93-
bias = lhsSize.getValue();
94-
if (bias.value() < 0) return failure();
95-
if (isa<AffineBinaryOpExpr>(expr.getRHS())) {
96-
return visit(expr.getRHS());
97-
} else if (isa<AffineDimExpr>(expr.getRHS())) {
98-
scale = 1;
99-
return success();
100-
} else {
101-
return failure();
102-
}
103-
} else {
104-
return failure();
105-
}
106-
}
107-
};
108-
10955
struct SubsumeLoopIntoDMA
11056
: public OpInterfaceRewritePattern<AMDAIE::DoublyStridedOpInterface> {
11157
using OpInterfaceRewritePattern::OpInterfaceRewritePattern;

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEDmaUtils.h

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,71 @@
1212
#include "iree-amd-aie/IR/AMDAIEOps.h"
1313
#include "iree-amd-aie/aie_runtime/iree_aie_runtime.h"
1414
#include "llvm/ADT/SmallVector.h"
15+
#include "mlir/Dialect/Affine/IR/AffineOps.h"
16+
#include "mlir/IR/AffineExprVisitor.h"
1517
#include "mlir/IR/MLIRContext.h"
1618
#include "mlir/IR/OpDefinition.h"
1719
#include "mlir/IR/PatternMatch.h"
1820

1921
namespace mlir::iree_compiler::AMDAIE {
2022

23+
/// Utility to retrieve a constant index from an OpFoldResult.
24+
int64_t getConstantIndexOrAssert(OpFoldResult dim);
25+
26+
/// Utility affine expression visitor to retrieve the scale and optional bias
27+
/// from the expression.
28+
struct RetrieveScaleAndBias
29+
: public AffineExprVisitor<RetrieveScaleAndBias, LogicalResult> {
30+
std::optional<int64_t> scale;
31+
std::optional<int64_t> bias;
32+
LogicalResult visitAffineBinaryOpExpr(AffineBinaryOpExpr /*expr*/) {
33+
return failure();
34+
}
35+
LogicalResult visitConstantExpr(AffineConstantExpr /*expr*/) {
36+
return failure();
37+
}
38+
LogicalResult visitDimExpr(AffineDimExpr /*expr*/) { return failure(); }
39+
LogicalResult visitSymbolExpr(AffineSymbolExpr /*expr*/) { return failure(); }
40+
LogicalResult visitMulExpr(AffineBinaryOpExpr expr) {
41+
if (auto rhsSize = dyn_cast<AffineConstantExpr>(expr.getRHS());
42+
isa<AffineDimExpr>(expr.getLHS())) {
43+
scale = rhsSize.getValue();
44+
} else if (auto lhsSize = dyn_cast<AffineConstantExpr>(expr.getLHS());
45+
isa<AffineDimExpr>(expr.getRHS())) {
46+
scale = lhsSize.getValue();
47+
}
48+
return success();
49+
}
50+
LogicalResult visitAddExpr(AffineBinaryOpExpr expr) {
51+
if (bias) return failure();
52+
if (auto rhsSize = dyn_cast<AffineConstantExpr>(expr.getRHS())) {
53+
bias = rhsSize.getValue();
54+
if (bias.value() < 0) return failure();
55+
if (isa<AffineBinaryOpExpr>(expr.getLHS())) {
56+
return visit(expr.getLHS());
57+
} else if (isa<AffineDimExpr>(expr.getLHS())) {
58+
scale = 1;
59+
return success();
60+
} else {
61+
return failure();
62+
}
63+
} else if (auto lhsSize = dyn_cast<AffineConstantExpr>(expr.getLHS())) {
64+
bias = lhsSize.getValue();
65+
if (bias.value() < 0) return failure();
66+
if (isa<AffineBinaryOpExpr>(expr.getRHS())) {
67+
return visit(expr.getRHS());
68+
} else if (isa<AffineDimExpr>(expr.getRHS())) {
69+
scale = 1;
70+
return success();
71+
} else {
72+
return failure();
73+
}
74+
} else {
75+
return failure();
76+
}
77+
}
78+
};
79+
2180
// Constant specifying the number of inter-iteration dimension for DMA
2281
// operations.
2382
//

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELogicalObjFifoSplittingUtils.cpp

Lines changed: 88 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include <numeric>
1010

11+
#include "iree-amd-aie/Transforms/AMDAIEDmaUtils.h"
1112
#include "llvm/ADT/DenseMap.h"
1213
#include "llvm/Support/Debug.h"
1314
#include "mlir/Dialect/Affine/IR/AffineOps.h"
@@ -21,6 +22,56 @@
2122

2223
namespace mlir::iree_compiler::AMDAIE {
2324

25+
/// Utility to create a new logical objectfifo based on shape defined by
26+
/// `newSizesOpFoldResultArr`.
27+
static AMDAIE::LogicalObjectFifoFromMemrefOp createNewLogicalObjectFifo(
28+
IRRewriter &rewriter,
29+
AMDAIE::LogicalObjectFifoFromMemrefOp &oldLogicalObjectFifo,
30+
SmallVectorImpl<OpFoldResult> &newSizesOpFoldResultArr) {
31+
OpBuilder::InsertionGuard guard(rewriter);
32+
SmallVector<int64_t> newSizes = llvm::map_to_vector(
33+
newSizesOpFoldResultArr,
34+
[](OpFoldResult sizeVal) { return getConstantIndexOrAssert(sizeVal); });
35+
Value oldAllocOp = oldLogicalObjectFifo.getMemref();
36+
auto oldMemRefType = cast<MemRefType>(oldAllocOp.getType());
37+
MemRefType newAllocType = MemRefType::get(
38+
newSizes, oldMemRefType.getElementType(), MemRefLayoutAttrInterface{},
39+
oldMemRefType.getMemorySpace());
40+
assert(oldAllocOp.getDefiningOp() && "expected a defining op for the value");
41+
rewriter.setInsertionPoint(oldAllocOp.getDefiningOp());
42+
auto newAllocOp =
43+
rewriter.create<memref::AllocOp>(rewriter.getUnknownLoc(), newAllocType);
44+
auto newDeallocOp =
45+
rewriter.create<memref::DeallocOp>(rewriter.getUnknownLoc(), newAllocOp);
46+
newDeallocOp->moveBefore(&newAllocOp->getBlock()->back());
47+
auto type = cast<MemRefType>(newAllocOp.getType());
48+
// Create new logical objectfifo.
49+
rewriter.setInsertionPoint(oldLogicalObjectFifo);
50+
auto newLogicalObjectFifo =
51+
rewriter.create<AMDAIE::LogicalObjectFifoFromMemrefOp>(
52+
rewriter.getUnknownLoc(), LogicalObjectFifoType::get(type),
53+
newAllocOp.getResult(), oldLogicalObjectFifo.getTiles());
54+
return newLogicalObjectFifo;
55+
}
56+
57+
/// Utility to help fetch those input DmaCpyNd Ops which needs to be split.
58+
SmallVector<AMDAIE::DmaCpyNdOp> fetchDmaCpyNdOpsToSplitOrCombine(
59+
Operation *op) {
60+
SmallVector<AMDAIE::DmaCpyNdOp> l2ToL1DmaOps;
61+
// We are currently walking through CoreOps gathering 3rd Input DmaOp (if
62+
// applicable) from them.
63+
// TODO(avarma): We will generalize this later.
64+
op->walk([&](AMDAIE::CoreOp coreOp) {
65+
SmallVector<Value> inputDmas = coreOp.getInputDmas();
66+
if (inputDmas.size() != 3) return WalkResult::skip();
67+
auto dmaCpyNdOp = inputDmas[2].getDefiningOp<AMDAIE::DmaCpyNdOp>();
68+
assert(dmaCpyNdOp && "expected an amdaie.dma_cpy_nd op");
69+
l2ToL1DmaOps.push_back(dmaCpyNdOp);
70+
return WalkResult::advance();
71+
});
72+
return l2ToL1DmaOps;
73+
}
74+
2475
/// Utility to verify that the split dimensions for L2 are contiguous.
2576
static LogicalResult checkIsRangeFromZero(
2677
SmallVector<size_t> &splitDimsSetForL2) {
@@ -124,6 +175,33 @@ static FailureOr<OpFoldResult> updateL3SourceOffset(IRRewriter &rewriter,
124175
return newL3AsSourceOffset;
125176
}
126177

178+
/// Given a L2->L1 DmaCpyNd op, find the unique L3->L2 DmaCpyNd op.
179+
static FailureOr<AMDAIE::DmaCpyNdOp> fetchL3ToL2DmaCpyNdOp(
180+
AMDAIE::DmaCpyNdOp l2ToL1DmaOp) {
181+
LogicalObjectFifoFromMemrefOp sourceObjectFifo =
182+
l2ToL1DmaOp.getSourceObjectFifo();
183+
SmallVector<AMDAIE::DmaCpyNdOp> l3ToL2DmaOps;
184+
AMDAIE::DmaCpyNdOp l3ToL2DmaOp;
185+
for (Operation *objFifoUserOp : sourceObjectFifo->getUsers()) {
186+
if (auto dmaOp = dyn_cast<AMDAIE::DmaCpyNdOp>(objFifoUserOp);
187+
dmaOp.getTargetObjectFifo() == sourceObjectFifo) {
188+
l3ToL2DmaOps.push_back(dmaOp);
189+
}
190+
}
191+
if (l3ToL2DmaOps.size() == 0) {
192+
LLVM_DEBUG(llvm::dbgs() << "no corresponding L3->L2 dma op found for "
193+
<< sourceObjectFifo << "\n");
194+
return failure();
195+
}
196+
if (l3ToL2DmaOps.size() > 1) {
197+
LLVM_DEBUG(llvm::dbgs() << "found more than one L3->L2 dma ops for "
198+
<< sourceObjectFifo << "\n");
199+
return failure();
200+
}
201+
l3ToL2DmaOp = l3ToL2DmaOps[0];
202+
return l3ToL2DmaOp;
203+
}
204+
127205
/// A struct utility to encapsulate all the data required to perform splitting
128206
/// of logicalobjectfifos.
129207
struct SplittingLogicalObjectFifoData {
@@ -186,25 +264,10 @@ static LogicalResult checkWhetherSplitIsPossible(
186264
}
187265

188266
// Fetch the L3 -> L2 Dma Op corresponding to the L2 buffer as target.
189-
SmallVector<AMDAIE::DmaCpyNdOp> l3ToL2DmaOps;
190-
AMDAIE::DmaCpyNdOp l3ToL2DmaOp;
191-
for (Operation *objFifoUserOp : sourceObjectFifo->getUsers()) {
192-
if (auto dmaOp = dyn_cast<AMDAIE::DmaCpyNdOp>(objFifoUserOp);
193-
dmaOp.getTargetObjectFifo() == sourceObjectFifo) {
194-
l3ToL2DmaOps.push_back(dmaOp);
195-
}
196-
}
197-
if (l3ToL2DmaOps.size() == 0) {
198-
LLVM_DEBUG(llvm::dbgs() << "no corresponding L3->L2 dma op found for "
199-
<< sourceObjectFifo << "\n");
200-
return failure();
201-
}
202-
if (l3ToL2DmaOps.size() > 1) {
203-
LLVM_DEBUG(llvm::dbgs() << "found more than one L3->L2 dma ops for "
204-
<< sourceObjectFifo << "\n");
205-
return failure();
206-
}
207-
l3ToL2DmaOp = l3ToL2DmaOps[0];
267+
FailureOr<AMDAIE::DmaCpyNdOp> maybeL3ToL2DmaOp =
268+
fetchL3ToL2DmaCpyNdOp(l2ToL1DmaOps[0]);
269+
if (failed(maybeL3ToL2DmaOp)) return failure();
270+
AMDAIE::DmaCpyNdOp l3ToL2DmaOp = maybeL3ToL2DmaOp.value();
208271
if ((l3ToL2DmaOp.getTargetMixedOffsets().size() !=
209272
l3ToL2DmaOp.getSourceMixedOffsets().size()) ||
210273
(l3ToL2DmaOp.getTargetMixedSizes().size() !=
@@ -293,9 +356,6 @@ LogicalResult splitLogicalObjectFifos(
293356
l3ToL2DmaOp.getTargetMixedOffsets();
294357
SmallVector<OpFoldResult, 4> staticL2AsTargetSizes =
295358
l3ToL2DmaOp.getTargetMixedSizes();
296-
SmallVector<int64_t, 4> l2ShapeAsTarget = llvm::to_vector(
297-
cast<MemRefType>(l3ToL2DmaOp.getTargetObjectFifo().getMemref().getType())
298-
.getShape());
299359
SmallVector<OpFoldResult, 4> staticL3AsSourceOffsets =
300360
l3ToL2DmaOp.getSourceMixedOffsets();
301361
SmallVector<OpFoldResult, 4> staticL3AsSourceSizes =
@@ -310,7 +370,6 @@ LogicalResult splitLogicalObjectFifos(
310370
staticL2AsTargetSizes[dim] = oneVal;
311371
staticL3AsSourceOffsets[dim] = zeroVal;
312372
staticL3AsSourceSizes[dim] = oneVal;
313-
l2ShapeAsTarget[dim] = 1;
314373
}
315374

316375
// Traverse each L2->L1 DmaCpyNd op and split them.
@@ -321,34 +380,18 @@ LogicalResult splitLogicalObjectFifos(
321380
l2ToL1DmaOp.getSourceMixedSizes();
322381

323382
// Now we'll create a new L2 buffer based on the new shape inferred earlier
324-
// via `l2ShapeAsTarget`.
325-
rewriter.setInsertionPoint(sourceAllocOp);
326-
LogicalObjectFifoFromMemrefOp targetObjectFifo =
327-
l2ToL1DmaOp.getTargetObjectFifo();
328-
Value targetAllocOp = targetObjectFifo.getMemref();
329-
auto oldSourceMemRefType = cast<MemRefType>(sourceAllocOp.getType());
330-
auto targetMemRefType = cast<MemRefType>(targetAllocOp.getType());
331-
MemRefType newAllocType = MemRefType::get(
332-
l2ShapeAsTarget, targetMemRefType.getElementType(),
333-
MemRefLayoutAttrInterface{}, oldSourceMemRefType.getMemorySpace());
334-
auto newAllocOp = rewriter.create<memref::AllocOp>(rewriter.getUnknownLoc(),
335-
newAllocType);
336-
auto newDeallocOp = rewriter.create<memref::DeallocOp>(
337-
rewriter.getUnknownLoc(), newAllocOp);
338-
newDeallocOp->moveBefore(&newAllocOp->getBlock()->back());
339-
auto type = cast<MemRefType>(newAllocOp.getType());
340-
// Create new logicalobjectfifo.from_memref for the newly created L2 buffer.
341-
rewriter.setInsertionPoint(l2ToL1DmaOp.getSourceObjectFifo());
342-
auto source = rewriter.create<AMDAIE::LogicalObjectFifoFromMemrefOp>(
343-
rewriter.getUnknownLoc(), LogicalObjectFifoType::get(type),
344-
newAllocOp.getResult(), sourceObjectFifo.getTiles());
383+
// via `staticL2AsTargetSizes`.
384+
LogicalObjectFifoFromMemrefOp oldL2ObjectFifo =
385+
l2ToL1DmaOp.getSourceObjectFifo();
386+
AMDAIE::LogicalObjectFifoFromMemrefOp source = createNewLogicalObjectFifo(
387+
rewriter, oldL2ObjectFifo, staticL2AsTargetSizes);
345388

346389
// --------------------------------------------
347390
// ---------- L3 -> L2 splitting --------------
348391
// --------------------------------------------
349392
// Update L3 source offsets for non-split dimensions. Refer doc comment of
350393
// `updateL3SourceOffset` for the computation rationale involved.
351-
SmallVector<OpFoldResult, 4> staticL3AsSourceOffsets =
394+
SmallVector<OpFoldResult> staticL3AsSourceOffsets =
352395
l3ToL2DmaOp.getSourceMixedOffsets();
353396
for (auto &&[splitDim, nonSplitdim] :
354397
llvm::zip_equal(splitDimsForL2, nonSplitDimsForL2)) {

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELogicalObjFifoSplittingUtils.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111

1212
namespace mlir::iree_compiler::AMDAIE {
1313

14-
/// Utility to split logicalobjectfifos given a struct
15-
/// `SplittingLogicalObjectFifoData` which contains all the required data to
16-
/// perform the splitting.
14+
/// Utility to help fetch those input DmaCpyNd Ops which needs to be split.
15+
SmallVector<AMDAIE::DmaCpyNdOp> fetchDmaCpyNdOpsToSplitOrCombine(Operation *op);
16+
17+
/// Utility to split logicalobjectfifos given a vector of L2->L1 dma ops.
1718
LogicalResult splitLogicalObjectFifos(
1819
IRRewriter &rewriter, SmallVector<AMDAIE::DmaCpyNdOp> &l2ToL1DmaOps,
1920
MLIRContext *context);

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIESplitLogicalObjFifosForConnectionReuse.cpp

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,6 @@ namespace mlir::iree_compiler::AMDAIE {
1616

1717
namespace {
1818

19-
/// Utility to help fetch those input DmaCpyNd Ops which needs to be split.
20-
static SmallVector<AMDAIE::DmaCpyNdOp> fetchDmaCpyNdOpsToSplit(
21-
ModuleOp moduleOp) {
22-
SmallVector<AMDAIE::DmaCpyNdOp> l2ToL1DmaOps;
23-
// We are currently walking through CoreOps gathering 3rd Input DmaOp (if
24-
// applicable) from them.
25-
// TODO(avarma): We will generalize this later.
26-
moduleOp.walk([&](AMDAIE::CoreOp coreOp) {
27-
SmallVector<Value> inputDmas = coreOp.getInputDmas();
28-
if (inputDmas.size() != 3) return WalkResult::skip();
29-
auto dmaCpyNdOp = inputDmas[2].getDefiningOp<AMDAIE::DmaCpyNdOp>();
30-
assert(dmaCpyNdOp && "expected an amdaie.dma_cpy_nd op");
31-
l2ToL1DmaOps.push_back(dmaCpyNdOp);
32-
return WalkResult::advance();
33-
});
34-
return l2ToL1DmaOps;
35-
}
36-
3719
class AMDAIESplitLogicalObjFifosForConnectionReusePass
3820
: public impl::AMDAIESplitLogicalObjFifosForConnectionReuseBase<
3921
AMDAIESplitLogicalObjFifosForConnectionReusePass> {
@@ -53,7 +35,7 @@ void AMDAIESplitLogicalObjFifosForConnectionReusePass::runOnOperation() {
5335
IRRewriter rewriter(context);
5436

5537
SmallVector<AMDAIE::DmaCpyNdOp> l2ToL1DmaOps =
56-
fetchDmaCpyNdOpsToSplit(moduleOp);
38+
fetchDmaCpyNdOpsToSplitOrCombine(moduleOp);
5739

5840
if (failed(splitLogicalObjectFifos(rewriter, l2ToL1DmaOps, context))) {
5941
LLVM_DEBUG(llvm::dbgs()

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,8 @@ void addAMDAIEObjectFifoLoweringPasses(OpPassManager &passManager) {
595595
passManager.addPass(createCSEPass());
596596
passManager.addPass(createCanonicalizerPass());
597597
passManager.addPass(createAMDAIESplitLogicalObjFifosForConnectionReusePass());
598+
passManager.addPass(createCSEPass());
599+
passManager.addPass(createCanonicalizerPass());
598600

599601
passManager.addPass(createAMDAIEDmaToCircularDmaPass());
600602
passManager.addNestedPass<func::FuncOp>(createAMDAIECreateAIEWorkgroupPass());

0 commit comments

Comments
 (0)