-
Notifications
You must be signed in to change notification settings - Fork 37
[ObjectFifo][NFC] Refactor DmaUtils + SplitLogicalObjectFifoForReuse #759
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
|
||
#include <numeric> | ||
|
||
#include "iree-amd-aie/Transforms/AMDAIEDmaUtils.h" | ||
#include "llvm/ADT/DenseMap.h" | ||
#include "llvm/Support/Debug.h" | ||
#include "mlir/Dialect/Affine/IR/AffineOps.h" | ||
|
@@ -21,6 +22,57 @@ | |
|
||
namespace mlir::iree_compiler::AMDAIE { | ||
|
||
/// Utility to create a new logical objectfifo based on shape defined by | ||
/// `newSizesOpFoldResultArr`. | ||
static AMDAIE::LogicalObjectFifoFromMemrefOp createNewLogicalObjectFifo( | ||
IRRewriter &rewriter, | ||
AMDAIE::LogicalObjectFifoFromMemrefOp &oldLogicalObjectFifo, | ||
SmallVector<OpFoldResult> &newSizesOpFoldResultArr) { | ||
OpBuilder::InsertionGuard guard(rewriter); | ||
SmallVector<int64_t> newSizes; | ||
for (OpFoldResult sizeVal : newSizesOpFoldResultArr) { | ||
newSizes.push_back(getConstantIndexOrAssert(sizeVal)); | ||
} | ||
Value oldAllocOp = oldLogicalObjectFifo.getMemref(); | ||
auto oldMemRefType = cast<MemRefType>(oldAllocOp.getType()); | ||
MemRefType newAllocType = MemRefType::get( | ||
newSizes, oldMemRefType.getElementType(), MemRefLayoutAttrInterface{}, | ||
oldMemRefType.getMemorySpace()); | ||
assert(oldAllocOp.getDefiningOp() && "expected a defining op for the value"); | ||
rewriter.setInsertionPoint(oldAllocOp.getDefiningOp()); | ||
auto newAllocOp = | ||
rewriter.create<memref::AllocOp>(rewriter.getUnknownLoc(), newAllocType); | ||
auto newDeallocOp = | ||
rewriter.create<memref::DeallocOp>(rewriter.getUnknownLoc(), newAllocOp); | ||
newDeallocOp->moveBefore(&newAllocOp->getBlock()->back()); | ||
auto type = cast<MemRefType>(newAllocOp.getType()); | ||
// Create new logical objectfifo. | ||
rewriter.setInsertionPoint(oldLogicalObjectFifo); | ||
auto newLogicalObjectFifo = | ||
rewriter.create<AMDAIE::LogicalObjectFifoFromMemrefOp>( | ||
rewriter.getUnknownLoc(), LogicalObjectFifoType::get(type), | ||
newAllocOp.getResult(), oldLogicalObjectFifo.getTiles()); | ||
return newLogicalObjectFifo; | ||
} | ||
|
||
/// Utility to help fetch those input DmaCpyNd Ops which needs to be split. | ||
SmallVector<AMDAIE::DmaCpyNdOp> fetchDmaCpyNdOpsToSplitOrCombine( | ||
Abhishek-Varma marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ModuleOp moduleOp) { | ||
SmallVector<AMDAIE::DmaCpyNdOp> l2ToL1DmaOps; | ||
// We are currently walking through CoreOps gathering 3rd Input DmaOp (if | ||
// applicable) from them. | ||
// TODO(avarma): We will generalize this later. | ||
moduleOp.walk([&](AMDAIE::CoreOp coreOp) { | ||
SmallVector<Value> inputDmas = coreOp.getInputDmas(); | ||
if (inputDmas.size() != 3) return WalkResult::skip(); | ||
auto dmaCpyNdOp = inputDmas[2].getDefiningOp<AMDAIE::DmaCpyNdOp>(); | ||
assert(dmaCpyNdOp && "expected an amdaie.dma_cpy_nd op"); | ||
l2ToL1DmaOps.push_back(dmaCpyNdOp); | ||
return WalkResult::advance(); | ||
}); | ||
return l2ToL1DmaOps; | ||
} | ||
|
||
/// Utility to verify that the split dimensions for L2 are contiguous. | ||
static LogicalResult checkIsRangeFromZero( | ||
SmallVector<size_t> &splitDimsSetForL2) { | ||
|
@@ -124,6 +176,44 @@ static FailureOr<OpFoldResult> updateL3SourceOffset(IRRewriter &rewriter, | |
return newL3AsSourceOffset; | ||
} | ||
|
||
/// Given a L2->L1 DmaCpyNd op, find the unique L3->L2 DmaCpyNd op. | ||
static FailureOr<AMDAIE::DmaCpyNdOp> fetchL3ToL2DmaCpyNdOp( | ||
AMDAIE::DmaCpyNdOp l2ToL1DmaOp) { | ||
LogicalObjectFifoFromMemrefOp sourceObjectFifo = | ||
l2ToL1DmaOp.getSourceObjectFifo(); | ||
SmallVector<AMDAIE::DmaCpyNdOp> l3ToL2DmaOps; | ||
AMDAIE::DmaCpyNdOp l3ToL2DmaOp; | ||
for (Operation *objFifoUserOp : sourceObjectFifo->getUsers()) { | ||
if (auto dmaOp = dyn_cast<AMDAIE::DmaCpyNdOp>(objFifoUserOp); | ||
dmaOp.getTargetObjectFifo() == sourceObjectFifo) { | ||
l3ToL2DmaOps.push_back(dmaOp); | ||
} | ||
} | ||
if (l3ToL2DmaOps.size() == 0) { | ||
LLVM_DEBUG(llvm::dbgs() << "no corresponding L3->L2 dma op found for " | ||
<< sourceObjectFifo << "\n"); | ||
return failure(); | ||
} | ||
if (l3ToL2DmaOps.size() > 1) { | ||
LLVM_DEBUG(llvm::dbgs() << "found more than one L3->L2 dma ops for " | ||
<< sourceObjectFifo << "\n"); | ||
return failure(); | ||
} | ||
l3ToL2DmaOp = l3ToL2DmaOps[0]; | ||
if ((l3ToL2DmaOp.getTargetMixedOffsets().size() != | ||
l3ToL2DmaOp.getSourceMixedOffsets().size()) || | ||
(l3ToL2DmaOp.getTargetMixedSizes().size() != | ||
l3ToL2DmaOp.getSourceMixedSizes().size()) || | ||
(l3ToL2DmaOp.getTargetMixedStrides().size() != | ||
l3ToL2DmaOp.getSourceMixedStrides().size())) { | ||
LLVM_DEBUG(llvm::dbgs() << "dimensionality of source and target's " | ||
"offset/size/stride found different for " | ||
<< l3ToL2DmaOp << "\n"); | ||
return failure(); | ||
} | ||
Abhishek-Varma marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return l3ToL2DmaOp; | ||
} | ||
|
||
/// A struct utility to encapsulate all the data required to perform splitting | ||
/// of logicalobjectfifos. | ||
struct SplittingLogicalObjectFifoData { | ||
|
@@ -186,36 +276,10 @@ static LogicalResult checkWhetherSplitIsPossible( | |
} | ||
|
||
// Fetch the L3 -> L2 Dma Op corresponding to the L2 buffer as target. | ||
SmallVector<AMDAIE::DmaCpyNdOp> l3ToL2DmaOps; | ||
AMDAIE::DmaCpyNdOp l3ToL2DmaOp; | ||
for (Operation *objFifoUserOp : sourceObjectFifo->getUsers()) { | ||
if (auto dmaOp = dyn_cast<AMDAIE::DmaCpyNdOp>(objFifoUserOp); | ||
dmaOp.getTargetObjectFifo() == sourceObjectFifo) { | ||
l3ToL2DmaOps.push_back(dmaOp); | ||
} | ||
} | ||
if (l3ToL2DmaOps.size() == 0) { | ||
LLVM_DEBUG(llvm::dbgs() << "no corresponding L3->L2 dma op found for " | ||
<< sourceObjectFifo << "\n"); | ||
return failure(); | ||
} | ||
if (l3ToL2DmaOps.size() > 1) { | ||
LLVM_DEBUG(llvm::dbgs() << "found more than one L3->L2 dma ops for " | ||
<< sourceObjectFifo << "\n"); | ||
return failure(); | ||
} | ||
l3ToL2DmaOp = l3ToL2DmaOps[0]; | ||
if ((l3ToL2DmaOp.getTargetMixedOffsets().size() != | ||
l3ToL2DmaOp.getSourceMixedOffsets().size()) || | ||
(l3ToL2DmaOp.getTargetMixedSizes().size() != | ||
l3ToL2DmaOp.getSourceMixedSizes().size()) || | ||
(l3ToL2DmaOp.getTargetMixedStrides().size() != | ||
l3ToL2DmaOp.getSourceMixedStrides().size())) { | ||
LLVM_DEBUG(llvm::dbgs() << "dimensionality of source and target's " | ||
"offset/size/stride found different for " | ||
<< l3ToL2DmaOp << "\n"); | ||
return failure(); | ||
} | ||
FailureOr<AMDAIE::DmaCpyNdOp> maybeL3ToL2DmaOp = | ||
fetchL3ToL2DmaCpyNdOp(l2ToL1DmaOps[0]); | ||
if (failed(maybeL3ToL2DmaOp)) return failure(); | ||
AMDAIE::DmaCpyNdOp l3ToL2DmaOp = maybeL3ToL2DmaOp.value(); | ||
|
||
SmallVector<OpFoldResult, 4> staticL2AsTargetSizes = | ||
l3ToL2DmaOp.getTargetMixedSizes(); | ||
|
@@ -289,16 +353,13 @@ LogicalResult splitLogicalObjectFifos( | |
toBeErased.insert(sourceAllocOp); | ||
toBeErased.insert(sourceObjectFifo); | ||
|
||
SmallVector<OpFoldResult, 4> staticL2AsTargetOffsets = | ||
SmallVector<OpFoldResult> staticL2AsTargetOffsets = | ||
l3ToL2DmaOp.getTargetMixedOffsets(); | ||
SmallVector<OpFoldResult, 4> staticL2AsTargetSizes = | ||
SmallVector<OpFoldResult> staticL2AsTargetSizes = | ||
l3ToL2DmaOp.getTargetMixedSizes(); | ||
SmallVector<int64_t, 4> l2ShapeAsTarget = llvm::to_vector( | ||
cast<MemRefType>(l3ToL2DmaOp.getTargetObjectFifo().getMemref().getType()) | ||
.getShape()); | ||
SmallVector<OpFoldResult, 4> staticL3AsSourceOffsets = | ||
SmallVector<OpFoldResult> staticL3AsSourceOffsets = | ||
l3ToL2DmaOp.getSourceMixedOffsets(); | ||
SmallVector<OpFoldResult, 4> staticL3AsSourceSizes = | ||
SmallVector<OpFoldResult> staticL3AsSourceSizes = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Relaying here the review comment : #755 (comment) (@jtuyls ) Two reasons :-
Therefore the only way to circumvent point 2z, that I knew of, was to remove this which aligned well with point 1. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's still useful to specify the number of inlined elements in the vector as we know it will usually be less than 4 and not much more: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, I've addressed it in the latest push. Please take a look. |
||
l3ToL2DmaOp.getSourceMixedSizes(); | ||
OpFoldResult zeroVal = getAsIndexOpFoldResult(context, 0); | ||
OpFoldResult oneVal = getAsIndexOpFoldResult(context, 1); | ||
|
@@ -310,45 +371,28 @@ LogicalResult splitLogicalObjectFifos( | |
staticL2AsTargetSizes[dim] = oneVal; | ||
staticL3AsSourceOffsets[dim] = zeroVal; | ||
staticL3AsSourceSizes[dim] = oneVal; | ||
l2ShapeAsTarget[dim] = 1; | ||
} | ||
|
||
// Traverse each L2->L1 DmaCpyNd op and split them. | ||
for (AMDAIE::DmaCpyNdOp l2ToL1DmaOp : l2ToL1DmaOps) { | ||
SmallVector<OpFoldResult, 6> staticL2AsSourceOffsets = | ||
SmallVector<OpFoldResult> staticL2AsSourceOffsets = | ||
l2ToL1DmaOp.getSourceMixedOffsets(); | ||
SmallVector<OpFoldResult, 6> staticL2AsSourceSizes = | ||
SmallVector<OpFoldResult> staticL2AsSourceSizes = | ||
l2ToL1DmaOp.getSourceMixedSizes(); | ||
|
||
// Now we'll create a new L2 buffer based on the new shape inferred earlier | ||
// via `l2ShapeAsTarget`. | ||
rewriter.setInsertionPoint(sourceAllocOp); | ||
LogicalObjectFifoFromMemrefOp targetObjectFifo = | ||
l2ToL1DmaOp.getTargetObjectFifo(); | ||
Value targetAllocOp = targetObjectFifo.getMemref(); | ||
auto oldSourceMemRefType = cast<MemRefType>(sourceAllocOp.getType()); | ||
auto targetMemRefType = cast<MemRefType>(targetAllocOp.getType()); | ||
MemRefType newAllocType = MemRefType::get( | ||
l2ShapeAsTarget, targetMemRefType.getElementType(), | ||
MemRefLayoutAttrInterface{}, oldSourceMemRefType.getMemorySpace()); | ||
auto newAllocOp = rewriter.create<memref::AllocOp>(rewriter.getUnknownLoc(), | ||
newAllocType); | ||
auto newDeallocOp = rewriter.create<memref::DeallocOp>( | ||
rewriter.getUnknownLoc(), newAllocOp); | ||
newDeallocOp->moveBefore(&newAllocOp->getBlock()->back()); | ||
auto type = cast<MemRefType>(newAllocOp.getType()); | ||
// Create new logicalobjectfifo.from_memref for the newly created L2 buffer. | ||
rewriter.setInsertionPoint(l2ToL1DmaOp.getSourceObjectFifo()); | ||
auto source = rewriter.create<AMDAIE::LogicalObjectFifoFromMemrefOp>( | ||
rewriter.getUnknownLoc(), LogicalObjectFifoType::get(type), | ||
newAllocOp.getResult(), sourceObjectFifo.getTiles()); | ||
// via `staticL2AsTargetSizes`. | ||
LogicalObjectFifoFromMemrefOp oldL2ObjectFifo = | ||
l2ToL1DmaOp.getSourceObjectFifo(); | ||
AMDAIE::LogicalObjectFifoFromMemrefOp source = createNewLogicalObjectFifo( | ||
rewriter, oldL2ObjectFifo, staticL2AsTargetSizes); | ||
|
||
// -------------------------------------------- | ||
// ---------- L3 -> L2 splitting -------------- | ||
// -------------------------------------------- | ||
// Update L3 source offsets for non-split dimensions. Refer doc comment of | ||
// `updateL3SourceOffset` for the computation rationale involved. | ||
SmallVector<OpFoldResult, 4> staticL3AsSourceOffsets = | ||
SmallVector<OpFoldResult> staticL3AsSourceOffsets = | ||
l3ToL2DmaOp.getSourceMixedOffsets(); | ||
for (auto &&[splitDim, nonSplitdim] : | ||
llvm::zip_equal(splitDimsForL2, nonSplitDimsForL2)) { | ||
|
Uh oh!
There was an error while loading. Please reload this page.