From d4115b73232e9d635b6ca665585e166860380ece Mon Sep 17 00:00:00 2001 From: Andreu Carminati Date: Thu, 14 Nov 2024 11:10:37 +0000 Subject: [PATCH] [AIEX] Simplify AIEClusterBaseAddress pass * Including a more generic chaining algorithm. --- llvm/lib/Target/AIE/AIE2TargetMachine.cpp | 7 +- llvm/lib/Target/AIE/AIEClusterBaseAddress.cpp | 346 +++++++----------- .../GlobalISel/cluster-base-address-scl.mir | 63 ++-- .../GlobalISel/cluster-base-address-vec.mir | 63 ++-- .../GlobalISel/postinc-with-clustering.mir | 37 +- .../CodeGen/AIE/aie2/end-to-end/Memops.ll | 31 +- 6 files changed, 222 insertions(+), 325 deletions(-) diff --git a/llvm/lib/Target/AIE/AIE2TargetMachine.cpp b/llvm/lib/Target/AIE/AIE2TargetMachine.cpp index 11d458be65e1..e41641d5d267 100644 --- a/llvm/lib/Target/AIE/AIE2TargetMachine.cpp +++ b/llvm/lib/Target/AIE/AIE2TargetMachine.cpp @@ -71,6 +71,10 @@ static cl::opt StackAddrSpace( cl::desc("Specify the addrspace where the stack is allocated " "(5: Bank A, 6: Bank B, 7: Bank C, 8: Bank D)")); +static cl::opt EnableAddressChaining("aie-address-chaining", cl::Hidden, + cl::init(true), + cl::desc("Enable ptradd chaining.")); + extern bool AIEDumpArtifacts; void AIE2TargetMachine::anchor() {} @@ -138,7 +142,8 @@ void AIE2PassConfig::addPreLegalizeMachineIR() { void AIE2PassConfig::addPreRegBankSelect() { if (getOptLevel() != CodeGenOptLevel::None) { addPass(createAIE2PostLegalizerGenericCombiner()); - addPass(createAIEClusterBaseAddress()); + if (EnableAddressChaining) + addPass(createAIEClusterBaseAddress()); addPass(createAIE2PostLegalizerCustomCombiner()); } } diff --git a/llvm/lib/Target/AIE/AIEClusterBaseAddress.cpp b/llvm/lib/Target/AIE/AIEClusterBaseAddress.cpp index 94ca9b320299..4389f1e4a534 100644 --- a/llvm/lib/Target/AIE/AIEClusterBaseAddress.cpp +++ b/llvm/lib/Target/AIE/AIEClusterBaseAddress.cpp @@ -44,9 +44,7 @@ // are sure the clustering won't create any copies. //===----------------------------------------------------------------------===// -#include "AIEBaseSubtarget.h" -#include -#include +#include "AIE.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" @@ -57,6 +55,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/InitializePasses.h" +#include +#include #define DEBUG_TYPE "aie-cluster-base-address" @@ -65,10 +65,6 @@ using namespace llvm; static const char AIE_CLUSTER_BASE_ADDRESS[] = "AIE Base Address Clustering Optimization"; -static cl::opt EnableAddressChaining("aie-address-chaining", cl::Hidden, - cl::init(true), - cl::desc("Enable ptradd chaining.")); - static cl::opt EnableChainsForScalarLdSt( "aie-chain-addr-scl-ldst", cl::Hidden, cl::init(true), cl::desc("Enable ptradd chaining for scalar loads and stores.")); @@ -78,31 +74,13 @@ static cl::opt EnableChainsForVectorLdSt( cl::desc("Enable ptradd chaining for vector loads and stores.")); namespace { -/** - * @brief Struct PtrAdd - * - * @param PtrAddMI The next ptr add to a load/store MI that has the potential to - * be chained. - * @param BaseReg The base ptr of the load/store that is found by traversing the - * ptr adds backwards. This is also the new operand of the next ptr add. - * @param NewOffset This is the new offset for the next ptr add to be chained, - * calculated using the offset information of the previous ptr adds. - */ -struct PtrAdd { - MachineInstr *PtrAddMI; - Register BaseReg; - int64_t NewOffset; -}; - class AIEClusterBaseAddress : public MachineFunctionPass { public: static char ID; AIEClusterBaseAddress() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override { - if (!EnableAddressChaining) - return false; - MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI = &MF.getRegInfo(); TargetPassConfig &TPC = getAnalysis(); // Enable CSE. GISelCSEAnalysisWrapper &Wrapper = @@ -122,7 +100,7 @@ class AIEClusterBaseAddress : public MachineFunctionPass { bool Changed = false; for (MachineBasicBlock &MBB : MF) { - Changed |= processBasicBlock(MBB, MRI, MIB, Observer); + Changed |= processBasicBlock(MBB, MIB, Observer); } return Changed; } @@ -136,107 +114,141 @@ class AIEClusterBaseAddress : public MachineFunctionPass { StringRef getPassName() const override { return AIE_CLUSTER_BASE_ADDRESS; } + using RegUseMap = std::map>; + private: - bool processBasicBlock(MachineBasicBlock &MBB, MachineRegisterInfo &MRI, - MachineIRBuilder &MIB, + const MachineRegisterInfo *MRI = nullptr; + + bool processBasicBlock(MachineBasicBlock &MBB, MachineIRBuilder &MIB, GISelObserverWrapper &Observer) { - /* Pass 1 : - Traverse MBB to try and connect G_LOAD or G_STORE to following PTR_ADDs. - This also checks if any MI uses the base ptr and if it does, we simply - remove the entire entry from the ChainedPtrAdds map. We avoid combining if - the base register of the to-be-chained ptradd is used by another instr. - This would otherwise generate a COPY and increase reg pressure. - Use a map to store the Load/Store MI and the next ptr add with the base - ptr of the Ld/St MI and the new updated offset. - */ - std::map ChainedPtrAdds = - findChainablePtrAdds(MBB, MRI); - - // Return false if ChainedPtrAdds is empty since we have no ptradds to - // update. - if (ChainedPtrAdds.empty()) - return false; - - /* Pass 2 : - Simply update the chainable ptradds in the MBB using the information in - the ChainedPtrAdds map. - */ - updatePtrAddsInMBB(MBB, MRI, MIB, Observer, ChainedPtrAdds); - - return true; + + bool Changed = false; + + // Get all G_PTR_ADDs that use the same pointer. + RegUseMap RegAndUses = collectPtrUses(MBB); + + // Create chains, when profitable. + for (auto RegAndUse : RegAndUses) { + + SmallVector &Instrs = RegAndUse.second; + // Chaining acceptance criteria. + if (shouldSkipChaining(RegAndUse.first, Instrs, MBB)) + continue; + + // Build chain, breaking it (or restarting it) when necessary + buildChain(Instrs, MBB, MIB, Observer); + Changed = true; + } + return Changed; } - std::map - findChainablePtrAdds(MachineBasicBlock &MBB, MachineRegisterInfo &MRI) { - std::map ChainedPtrAdds; + // Get all candidates, i.e. groups of G_PTR_ADDs in the same + // basic block that shares the same input pointer. + RegUseMap collectPtrUses(MachineBasicBlock &MBB) { + RegUseMap RegAndUses; for (MachineInstr &MI : MBB) { - if (dyn_cast(&MI)) { - processLoadOrStore(MI, MRI, ChainedPtrAdds); + if (MI.getOpcode() == TargetOpcode::G_PTR_ADD) + RegAndUses[MI.getOperand(1).getReg()].push_back(&MI); + } + return RegAndUses; + } + + // Evaluate if we consider a group of G_PTR_ADDs as a candidate to + // create a chain. + bool shouldSkipChaining(Register PtrReg, + const SmallVector &Instrs, + MachineBasicBlock &MBB) { + + // No chain possibility at all. + if (Instrs.size() <= 1) + return true; + + // If the base reg is used in any of the successive MBBs, then we don't + // want to chain the corresponding ptr adds, since this would introduce a + // COPY and increase reg pressure. + return isRegUsedInSuccessiveMBBs(&MBB, PtrReg); + } + + // Build a chain (or set of chains) of G_PTR_ADDs. We consider as + // chain a linear sequence of linked G_PTR_ADDs, tied to output and + // input pointers. + void buildChain(SmallVector &Instrs, + MachineBasicBlock &MBB, MachineIRBuilder &MIB, + GISelObserverWrapper &Observer) { + int64_t AccumulatedOffset = 0; + for (unsigned I = 0; I < Instrs.size() - 1; I++) { + MachineInstr *MI = Instrs[I]; + MachineInstr *MINext = Instrs[I + 1]; + auto OffsetMI = + getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), *MRI); + auto OffsetMINext = getIConstantVRegValWithLookThrough( + MINext->getOperand(2).getReg(), *MRI); + + // Evaluate if we should restart the chain from the base + // pointer. This is necessary when we deal with unknown offsets + // (not constants) and desirable when we share pointers between + // loads and stores (avoiding dependencies). + if (shouldBreakChain(MI, MINext, OffsetMI, OffsetMINext)) { + AccumulatedOffset = 0; continue; } - // Check if any base pointer in `ChainedPtrAdds` is used by MI. In this - // case, we do not want to chain the addresses, because this would - // introduce a COPY that increases the pressure on PTR registers. + AccumulatedOffset += OffsetMI->Value.getSExtValue(); + const int64_t NewNextOffset = + OffsetMINext->Value.getSExtValue() - AccumulatedOffset; + MIB.setInsertPt(MBB, MINext->getIterator()); - // If MI is a ptradd already chained with a previous load or store, - // this is a safe use. - if (any_of(ChainedPtrAdds, - [&](std::pair &LdStAndPtrAdd) { - return LdStAndPtrAdd.second.PtrAddMI == &MI; - })) - continue; + Register NewOffsetReg = + MIB.buildConstant(LLT::scalar(20), NewNextOffset).getReg(0); - // Otherwise, remove all chained load/stores that use one of our operands - // as base pointer. - std::set ToBeErased; - for (const MachineOperand &Op : MI.operands()) { - if (Op.isReg() && Op.isUse()) - removeChainedInstrsWithBasePtr(ChainedPtrAdds, Op.getReg()); - } + Observer.changingInstr(*MINext); + MINext->getOperand(1).setReg(MI->getOperand(0).getReg()); + MINext->getOperand(2).setReg(NewOffsetReg); + Observer.changedInstr(*MINext); } - return ChainedPtrAdds; } - // Process all the encountered load and stores in the basic block. - // We find the base register of the Ld/St MI and using that base register, we - // find following ptr adds which have the same base register. If we find such - // a ptr add, we create an entry in the ChainedPtrAdds map. - void processLoadOrStore(MachineInstr &MI, MachineRegisterInfo &MRI, - std::map &ChainedPtrAdds) { - Register LdOrStPtrReg = MI.getOperand(1).getReg(); - // Find the base defining reg of the given MI and the ptr offset. - auto BaseRegAndOffset = findBaseReg(MRI, LdOrStPtrReg); - if (!BaseRegAndOffset.has_value()) - return; - if ((!EnableChainsForScalarLdSt && getLoadStoreSize(MI) <= 32) || - (!EnableChainsForVectorLdSt && getLoadStoreSize(MI) >= 256)) - return; - Register BaseReg = BaseRegAndOffset->first; - // If the base reg is used in any of the successive MBBs, then we don't want - // to chain the corresponding ptr adds. Since that would introduce a COPY - // and increase reg pressure. - if (isRegUsedInSuccessiveMBBs(MI.getParent(), BaseReg, MRI)) - return; - int64_t BasePtrOffset = BaseRegAndOffset->second; - // Find the next G_PTR_ADD MachineInstr that comes after the given - // MachineInstr and has the same base register. - MachineInstr *NextPtrAddMI = findNextPtrAddForReg(MI, MRI, BaseReg); - if (!NextPtrAddMI) - return; - auto [OffsetReg, ConstVal] = getPtrAddOffsetInfo(*NextPtrAddMI, MRI); - if (!ConstVal) - return; - int64_t NextPtrOffset = ConstVal->Value.getSExtValue(); - PtrAdd PA; - PA.PtrAddMI = NextPtrAddMI; - PA.BaseReg = BaseReg; - PA.NewOffset = NextPtrOffset - BasePtrOffset; - ChainedPtrAdds[&MI] = PA; + // Evaluate if we should break the chain construction. + // Criteria: + // * Unknown offsets. + // * Pointer shared between load(s) and store(s). + bool shouldBreakChain(MachineInstr *MIA, MachineInstr *MIB, + std::optional OffsetA, + std::optional OffsetB) { + + // If one of the offsets is not constant, it is better to break the chain. + if (!OffsetA || !OffsetB) + return true; + + return hasMixedLoadStoreUse({MIA, MIB}); + } + + // Return true if the instructions are used by both loads and stores. + bool hasMixedLoadStoreUse(SmallVector Instrs) { + unsigned LoadCount = 0; + unsigned StoreCount = 0; + for (MachineInstr *MI : Instrs) { + const Register PtrReg = MI->getOperand(0).getReg(); + for (const MachineInstr &UseMI : MRI->use_instructions(PtrReg)) { + if (!UseMI.mayLoadOrStore()) + continue; + if (UseMI.mayLoad()) + LoadCount++; + else + StoreCount++; + const LLT MemType = getLoadStoreType(UseMI); + // If desired, we also can break the chain between pairs of + // pointers that are used to load/store vectors and/or scalars. + if ((!EnableChainsForScalarLdSt && MemType.isScalar()) || + (!EnableChainsForVectorLdSt && MemType.isVector())) + return true; + } + } + return (LoadCount > 0 && StoreCount > 0); } - unsigned getLoadStoreSize(const MachineInstr &MI) { - return (*MI.memoperands_begin())->getSizeInBits().getValue(); + LLT getLoadStoreType(const MachineInstr &MI) { + return (*MI.memoperands_begin())->getMemoryType(); } // Get a set of all reachable MBBs from a given MBB. @@ -261,118 +273,14 @@ class AIEClusterBaseAddress : public MachineFunctionPass { } // Find if a register is used in reachable MBBs. - bool isRegUsedInSuccessiveMBBs(MachineBasicBlock *MBB, Register Reg, - MachineRegisterInfo &MRI) { + bool isRegUsedInSuccessiveMBBs(MachineBasicBlock *MBB, Register Reg) { std::set ReachableMBBs = findReachableMBBs(MBB); - for (MachineInstr &Use : MRI.use_nodbg_instructions(Reg)) { + for (MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) { if (ReachableMBBs.count(Use.getParent())) return true; } return false; } - - void removeChainedInstrsWithBasePtr( - std::map &ChainedPtrAdds, unsigned BaseReg) { - std::set ToBeErased; - for (auto &[LdStMI, ChainedPtrAdd] : ChainedPtrAdds) { - if (ChainedPtrAdd.BaseReg == BaseReg) - ToBeErased.insert(LdStMI); - } - for (auto &LdStMI : ToBeErased) { - ChainedPtrAdds.erase(LdStMI); - } - } - - void - updatePtrAddsInMBB(MachineBasicBlock &MBB, MachineRegisterInfo &MRI, - MachineIRBuilder &MIB, GISelObserverWrapper &Observer, - const std::map &ChainedPtrAdds) { - for (auto &MI : MBB) { - auto Entry = ChainedPtrAdds.find(&MI); - if (Entry == ChainedPtrAdds.end()) - continue; - MachineInstr *LdOrStMI = Entry->first; - Register LdOrStPtrReg = LdOrStMI->getOperand(1).getReg(); - const PtrAdd &ChainedPtrAdd = Entry->second; - MachineInstr *PtrAddMI = ChainedPtrAdd.PtrAddMI; - int64_t ChainedOffset = ChainedPtrAdd.NewOffset; - - MIB.setInsertPt(*PtrAddMI->getParent(), PtrAddMI->getIterator()); - // Change the ptr register operand of PtrAddMI to be the ptr reg operand - // of the load/store - Observer.changingInstr(*PtrAddMI); - PtrAddMI->getOperand(1).setReg(LdOrStPtrReg); - Observer.changedInstr(*PtrAddMI); - // Build a new G_CONSTANT MachineInstr with NewOffset as its value - // If there is a G_CONSTANT present which don't have any further uses - // other than a given ptr add, then it would just be eliminated as dead - // code. - Register NewOffsetReg = - MIB.buildConstant(LLT::scalar(20), ChainedOffset).getReg(0); - // Change the offset register operand of PtrAddMI to be NewOffsetReg - Observer.changingInstr(*PtrAddMI); - PtrAddMI->getOperand(2).setReg(NewOffsetReg); - Observer.changedInstr(*PtrAddMI); - } - } - - std::optional> - findBaseReg(MachineRegisterInfo &MRI, const Register Reg) { - Register BaseReg = Reg; - int64_t Offset = 0; - while (true) { - // Get the defining instruction for the register - MachineInstr *DefMI = MRI.getVRegDef(BaseReg); - if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_PTR_ADD) - break; - auto [OffsetReg, ConstVal] = getPtrAddOffsetInfo(*DefMI, MRI); - // TODO: Handle ptr adds with indirect constant offsets as needed. - if (!ConstVal) - break; - Offset += ConstVal->Value.getSExtValue(); - BaseReg = DefMI->getOperand(1).getReg(); - } - return std::make_optional(std::make_pair(BaseReg, Offset)); - } - - // Find next ptr add having the same base register. - MachineInstr *findNextPtrAddForReg(MachineInstr &Start, - MachineRegisterInfo &MRI, - const Register BaseReg) { - MachineBasicBlock *MBB = Start.getParent(); - MachineBasicBlock::iterator It = std::next(Start.getIterator()), - End = MBB->end(); - auto FoundIt = std::find_if(It, End, [&](MachineInstr &MI) { - if (MI.getOpcode() == TargetOpcode::G_PTR_ADD && - MI.getOperand(1).getReg() == BaseReg) - return true; - // We search for GLoadStore because we always want to stick to the - // immediately preceding GLoadStore to chain the ptr add. - if (dyn_cast(&MI)) { - Register LdOrStPtrReg = MI.getOperand(1).getReg(); - auto BaseRegAndOffset = findBaseReg(MRI, LdOrStPtrReg); - if (!BaseRegAndOffset.has_value()) - return true; - Register BasePtr = BaseRegAndOffset->first; - if (BasePtr == BaseReg) - return true; - } - return false; - }); - if (FoundIt != End && FoundIt->getOpcode() == TargetOpcode::G_PTR_ADD) - return &*FoundIt; - return nullptr; - } - - std::pair> - getPtrAddOffsetInfo(const MachineInstr &MI, MachineRegisterInfo &MRI) { - assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && - "Expected a ptr add MI"); - Register OffsetReg = MI.getOperand(2).getReg(); - std::optional ConstVal = - getIConstantVRegValWithLookThrough(OffsetReg, MRI); - return {OffsetReg, ConstVal}; - } }; } // namespace diff --git a/llvm/test/CodeGen/AIE/GlobalISel/cluster-base-address-scl.mir b/llvm/test/CodeGen/AIE/GlobalISel/cluster-base-address-scl.mir index 426fec8894fb..0a1f43b16291 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/cluster-base-address-scl.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/cluster-base-address-scl.mir @@ -7,6 +7,9 @@ # (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie2 -run-pass=aie-cluster-base-address --aie-chain-addr-scl-ldst --aie-chain-addr-vec-ldst=false %s -verify-machineinstrs -o - | FileCheck %s +# In this test, we chain the ptradds until we reach a store, then break the chain +# to not create data dependencies later on. + --- name: loads_and_stores_with_same_base_ptr_single_bb body: | @@ -24,13 +27,11 @@ body: | ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C2]](s20) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32)) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 -24 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 -56 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C4]](s20) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s20) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32)) ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p0) :: (store (s32)) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 152 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD2]], [[C6]](s20) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s20) %0:_(p0) = COPY $p0 %1:_(s20) = G_CONSTANT i20 4 %2:_(p0) = G_PTR_ADD %0:_, %1:_(s20) @@ -63,19 +64,18 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[COPY1]](p0) :: (store (s32)) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s20) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 -28 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 -28 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s20) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s20) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 -28 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 -32 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C3]](s20) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32)) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p0) :: (store (s32)) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 -36 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 -44 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s20) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s20) = G_CONSTANT i20 20 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C8]](s20) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 -36 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 -44 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C5]](s20) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s20) = G_CONSTANT i20 52 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD2]], [[C7]](s20) %0:_(p0) = COPY $p0 %1:_(p0) = COPY $p1 %2:_(s20) = G_CONSTANT i20 4 @@ -86,9 +86,7 @@ body: | %6:_(p0) = G_PTR_ADD %1:_, %5:_(s20) %7:_(s20) = G_CONSTANT i20 -28 %8:_(p0) = G_PTR_ADD %0:_, %7:_(s20) - ; G_LOAD has base ptr %0 %9:_(s32) = G_LOAD %3:_(p0) :: (load (s32), align 4) - ; G_STORE has base ptr %1 G_STORE %9:_(s32), %6:_(p0) :: (store (s32), align 4) %10:_(s20) = G_CONSTANT i20 -36 %11:_(p0) = G_PTR_ADD %1:_, %10:_(s20) @@ -125,10 +123,8 @@ body: | %2:_(p0) = G_PTR_ADD %0:_, %1:_(s20) %3:_(s32) = G_LOAD %2:_(p0) :: (load (s32), align 4) %4:_(s20) = G_CONSTANT i20 12 - ; This is the first use of the %4 %5:_(p0) = G_PTR_ADD %0:_, %4:_(s20) %6:_(s20) = G_CONSTANT i20 8 - ; This is the second use of the %4 %7:_(s20) = G_ADD %4:_(s20), %6:_(s20) %8:_(s32) = G_LOAD %5:_(p0) :: (load (s32), align 4) %9:_(s20) = G_CONSTANT i20 32 @@ -176,7 +172,6 @@ body: | %9:_(<32 x s16>) = G_LOAD %2:_(p0) :: (load (<32 x s16>)) %10:_(s32) = G_LOAD %4:_(p0) :: (load (s32)) %11:_(s20) = G_CONSTANT i20 232 - ; This ptradd will change according to %10 %12:_(p0) = G_PTR_ADD %0:_, %11:_(s20) %13:_(s32) = G_LOAD %12:_(p0) :: (load (s32)) %14:_(s20) = G_CONSTANT i20 116 @@ -185,8 +180,8 @@ body: | # Here we have multiple ptradds between 2 loads or stores. # In this case the immediately succeeding ptr add to the load -# or store will change, if there is no existing use of the base ptr after that. -# Rest will remain unaffected. +# or store will change. The remaining ptr adds will be also +# chained. --- name: multiple_ptradds_between_loads_or_stores_single_bb body: | @@ -204,17 +199,19 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s20) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 12 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 12 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s20) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C3]](s20) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32)) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p0) :: (store (s32)) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 16 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s20) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s20) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s20) = G_CONSTANT i20 28 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C7]](s20) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C5]](s20) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C7]](s20) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s20) = G_CONSTANT i20 20 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD2]], [[C9]](s20) %0:_(p0) = COPY $p0 %1:_(p0) = COPY $p1 %2:_(s20) = G_CONSTANT i20 4 @@ -228,11 +225,8 @@ body: | %9:_(s32) = G_LOAD %3:_(p0) :: (load (s32), align 4) G_STORE %9:_(s32), %6:_(p0) :: (store (s32), align 4) %10:_(s20) = G_CONSTANT i20 16 - ; This ptradd will not change. - ; There is a use of %1 later. %11:_(p0) = G_PTR_ADD %1:_, %10:_(s20) %12:_(s20) = G_CONSTANT i20 24 - ; This ptradd will not change. %13:_(p0) = G_PTR_ADD %1:_, %12:_(s20) %14:_(s20) = G_CONSTANT i20 32 %15:_(p0) = G_PTR_ADD %0:_, %14:_(s20) @@ -292,7 +286,6 @@ body: | %6:_(s32) = G_ADD %5:_, %1:_ G_STORE %6:_(s32), %4:_(p0) :: (store (s32)) %7:_(s20) = G_CONSTANT i20 4 - ; This ptr add will not change since %0 has use in bb.1 %8:_(p0) = G_PTR_ADD %0:_, %7:_(s20) %9:_(s32) = G_LOAD %8:_(p0) :: (dereferenceable load (s32)) %10:_(s32) = G_CONSTANT i32 255 diff --git a/llvm/test/CodeGen/AIE/GlobalISel/cluster-base-address-vec.mir b/llvm/test/CodeGen/AIE/GlobalISel/cluster-base-address-vec.mir index 1d08ff73c648..80f200d0173f 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/cluster-base-address-vec.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/cluster-base-address-vec.mir @@ -7,6 +7,9 @@ # (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie2 -run-pass=aie-cluster-base-address --aie-chain-addr-vec-ldst --aie-chain-addr-scl-ldst=false %s -verify-machineinstrs -o - | FileCheck %s +# In this test, we chain the ptradds until we reach a store, then break the chain +# to not create data dependencies later on. + --- name: vec_loads_and_stores_with_same_base_ptr_single_bb body: | @@ -24,13 +27,11 @@ body: | ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C2]](s20) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<16 x s32>), align 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 -24 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 -56 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C4]](s20) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s20) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[PTR_ADD2]](p0) :: (load (<8 x s64>), align 4) ; CHECK-NEXT: G_STORE [[LOAD2]](<8 x s64>), [[PTR_ADD2]](p0) :: (store (<8 x s64>), align 4) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 152 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD2]], [[C6]](s20) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s20) %0:_(p0) = COPY $p0 %1:_(s20) = G_CONSTANT i20 4 %2:_(p0) = G_PTR_ADD %0:_, %1:_(s20) @@ -62,19 +63,18 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (load (<16 x s32>), align 4) ; CHECK-NEXT: G_STORE [[LOAD]](<16 x s32>), [[COPY1]](p0) :: (store (<16 x s32>), align 4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s20) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 -28 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 -28 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s20) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s20) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 -28 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 -32 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C3]](s20) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<8 x s32>), align 4) ; CHECK-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[PTR_ADD1]](p0) :: (store (<8 x s32>), align 4) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 -36 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 -44 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s20) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s20) = G_CONSTANT i20 20 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C8]](s20) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 -36 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 -44 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C5]](s20) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s20) = G_CONSTANT i20 52 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD2]], [[C7]](s20) %0:_(p0) = COPY $p0 %1:_(p0) = COPY $p1 %2:_(s20) = G_CONSTANT i20 4 @@ -85,9 +85,7 @@ body: | %6:_(p0) = G_PTR_ADD %1:_, %5:_(s20) %7:_(s20) = G_CONSTANT i20 -28 %8:_(p0) = G_PTR_ADD %0:_, %7:_(s20) - ; G_LOAD has base ptr %0 %9:_(<8 x s32>) = G_LOAD %3:_(p0) :: (load (<8 x s32>), align 4) - ; G_STORE has base ptr %1 G_STORE %9:_(<8 x s32>), %6:_(p0) :: (store (<8 x s32>), align 4) %10:_(s20) = G_CONSTANT i20 -36 %11:_(p0) = G_PTR_ADD %1:_, %10:_(s20) @@ -124,10 +122,8 @@ body: | %2:_(p0) = G_PTR_ADD %0:_, %1:_(s20) %3:_(<16 x s32>) = G_LOAD %2:_(p0) :: (load (<16 x s32>), align 4) %4:_(s20) = G_CONSTANT i20 12 - ; This is the first use of the %4 %5:_(p0) = G_PTR_ADD %0:_, %4:_(s20) %6:_(s20) = G_CONSTANT i20 8 - ; This is the second use of the %4 %7:_(s20) = G_ADD %4:_(s20), %6:_(s20) %8:_(<16 x s32>) = G_LOAD %5:_(p0) :: (load (<16 x s32>), align 4) %9:_(s20) = G_CONSTANT i20 32 @@ -175,7 +171,6 @@ body: | %9:_(s32) = G_LOAD %2:_(p0) :: (load (s32)) %10:_(<16 x s32>) = G_LOAD %4:_(p0) :: (load (<16 x s32>)) %11:_(s20) = G_CONSTANT i20 232 - ; This ptradd will change according to %10 %12:_(p0) = G_PTR_ADD %0:_, %11:_(s20) %13:_(<16 x s32>) = G_LOAD %12:_(p0) :: (load (<16 x s32>)) %14:_(s20) = G_CONSTANT i20 116 @@ -184,8 +179,8 @@ body: | # Here we have multiple ptradds between 2 loads or stores. # In this case the immediately succeeding ptr add to the load -# or store will change, if there is no existing use of the base ptr after that. -# Rest will remain unaffected. +# or store will change. The remaining ptr adds will be also +# chained. --- name: multiple_ptradds_between_loads_or_stores_single_bb body: | @@ -203,17 +198,19 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s20) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 12 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 12 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s20) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C3]](s20) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<16 x s32>), align 4) ; CHECK-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[PTR_ADD1]](p0) :: (store (<16 x s32>), align 4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 16 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s20) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s20) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s20) = G_CONSTANT i20 28 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C7]](s20) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C5]](s20) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C7]](s20) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s20) = G_CONSTANT i20 20 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD2]], [[C9]](s20) %0:_(p0) = COPY $p0 %1:_(p0) = COPY $p1 %2:_(s20) = G_CONSTANT i20 4 @@ -227,11 +224,8 @@ body: | %9:_(<16 x s32>) = G_LOAD %3:_(p0) :: (load (<16 x s32>), align 4) G_STORE %9:_(<16 x s32>), %6:_(p0) :: (store (<16 x s32>), align 4) %10:_(s20) = G_CONSTANT i20 16 - ; This ptradd will not change. - ; There is a use of %1 later. %11:_(p0) = G_PTR_ADD %1:_, %10:_(s20) %12:_(s20) = G_CONSTANT i20 24 - ; This ptradd will not change. %13:_(p0) = G_PTR_ADD %1:_, %12:_(s20) %14:_(s20) = G_CONSTANT i20 32 %15:_(p0) = G_PTR_ADD %0:_, %14:_(s20) @@ -292,7 +286,6 @@ body: | %6:_(s32) = G_ADD %5:_, %1:_ G_STORE %6:_(s32), %4:_(p0) :: (store (s32)) %7:_(s20) = G_CONSTANT i20 4 - ; This ptr add will not change since %0 has use in bb.1 %8:_(p0) = G_PTR_ADD %0:_, %7:_(s20) %9:_(s32) = G_LOAD %8:_(p0) :: (load (s32)) %10:_(s32) = G_CONSTANT i32 255 diff --git a/llvm/test/CodeGen/AIE/GlobalISel/postinc-with-clustering.mir b/llvm/test/CodeGen/AIE/GlobalISel/postinc-with-clustering.mir index bed9ef5d97d5..067000971b95 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/postinc-with-clustering.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/postinc-with-clustering.mir @@ -18,14 +18,14 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 4 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 12 - ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C1]](s20) :: (load (s32)) + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (s32)) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD]](s32), [[COPY1]], [[C1]](s20) :: (store (s32)) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 - ; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD]](s32), [[COPY1]], [[C2]](s20) :: (store (s32)) - ; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (s32)) + ; CHECK-NEXT: [[AIE_POSTINC_LOAD2:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD3:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[AIE_POSTINC_LOAD1]], [[C2]](s20) :: (load (s32)) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 - ; CHECK-NEXT: [[AIE_POSTINC_STORE1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_OFFSET_LOAD]](s32), [[AIE_POSTINC_STORE]], [[C3]](s20) :: (store (s32)) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[AIE_POSTINC_LOAD1]](p0) :: (load (s32)) + ; CHECK-NEXT: [[AIE_POSTINC_STORE1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD2]](s32), [[AIE_POSTINC_STORE]], [[C3]](s20) :: (store (s32)) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[AIE_POSTINC_LOAD3]](p0) :: (load (s32)) ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[AIE_POSTINC_STORE1]](p0) :: (store (s32)) %0:_(p0) = COPY $p0 %1:_(p0) = COPY $p1 @@ -58,14 +58,14 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 4 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 12 - ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C1]](s20) :: (load (s32)) + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (s32)) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD]](s32), [[COPY1]], [[C1]](s20) :: (store (s32)) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 - ; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD]](s32), [[COPY1]], [[C2]](s20) :: (store (s32)) - ; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (s32)) + ; CHECK-NEXT: [[AIE_POSTINC_LOAD2:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD3:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[AIE_POSTINC_LOAD1]], [[C2]](s20) :: (load (s32)) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 - ; CHECK-NEXT: [[AIE_POSTINC_STORE1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_OFFSET_LOAD]](s32), [[AIE_POSTINC_STORE]], [[C3]](s20) :: (store (s32)) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[AIE_POSTINC_LOAD1]](p0) :: (load (s32)) + ; CHECK-NEXT: [[AIE_POSTINC_STORE1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD2]](s32), [[AIE_POSTINC_STORE]], [[C3]](s20) :: (store (s32)) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[AIE_POSTINC_LOAD3]](p0) :: (load (s32)) ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[AIE_POSTINC_STORE1]](p0) :: (store (s32)) %0:_(p0) = COPY $p0 %1:_(p0) = COPY $p1 @@ -82,7 +82,6 @@ body: | %10:_(s20) = G_CONSTANT i20 16 %11:_(p0) = G_PTR_ADD %1:_, %10:_(s20) %12:_(s20) = G_CONSTANT i20 24 - ; This ptradd will not change. %13:_(p0) = G_PTR_ADD %1:_, %12:_(s20) %14:_(s20) = G_CONSTANT i20 32 %15:_(p0) = G_PTR_ADD %0:_, %14:_(s20) @@ -119,14 +118,14 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 4 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 12 - ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C1]](s20) :: (load (s32)) + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (s32)) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 + ; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD]](s32), [[COPY1]], [[C1]](s20) :: (store (s32)) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 - ; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD]](s32), [[COPY1]], [[C2]](s20) :: (store (s32)) - ; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (s32)) + ; CHECK-NEXT: [[AIE_POSTINC_LOAD2:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD3:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[AIE_POSTINC_LOAD1]], [[C2]](s20) :: (load (s32)) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 8 - ; CHECK-NEXT: [[AIE_POSTINC_STORE1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_OFFSET_LOAD]](s32), [[AIE_POSTINC_STORE]], [[C3]](s20) :: (store (s32)) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[AIE_POSTINC_LOAD1]](p0) :: (load (s32)) + ; CHECK-NEXT: [[AIE_POSTINC_STORE1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD2]](s32), [[AIE_POSTINC_STORE]], [[C3]](s20) :: (store (s32)) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[AIE_POSTINC_LOAD3]](p0) :: (load (s32)) ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[AIE_POSTINC_STORE1]](p0) :: (store (s32)) %0:_(p0) = COPY $p0 %1:_(p0) = COPY $p1 diff --git a/llvm/test/CodeGen/AIE/aie2/end-to-end/Memops.ll b/llvm/test/CodeGen/AIE/aie2/end-to-end/Memops.ll index b0570790ddb4..e56e82b2b070 100644 --- a/llvm/test/CodeGen/AIE/aie2/end-to-end/Memops.ll +++ b/llvm/test/CodeGen/AIE/aie2/end-to-end/Memops.ll @@ -100,18 +100,9 @@ define dso_local void @lowerMemcpyUsingWordHalfByte() local_unnamed_addr #0 { ; CHECK-LABEL: lowerMemcpyUsingWordHalfByte: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; nopb ; movxm p1, #(buffer2+8) -; CHECK-NEXT: movxm p2, #(buffer1+8) -; CHECK-NEXT: lda.s16 r0, [p1], #2; mov p0, p1 -; CHECK-NEXT: st.s16 r0, [p2], #2; mov p3, p2 -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: lda.s8 r0, [p1, #0] -; CHECK-NEXT: st.s8 r0, [p2, #0] +; CHECK-NEXT: nopa ; nopb ; movxm p0, #(buffer2+8); nops +; CHECK-NEXT: lda.s16 r0, [p0, #0]; movxm p1, #(buffer1+8) +; CHECK-NEXT: st.s16 r0, [p1, #0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -119,14 +110,22 @@ define dso_local void @lowerMemcpyUsingWordHalfByte() local_unnamed_addr #0 { ; CHECK-NEXT: nop ; CHECK-NEXT: paddb [p0], #-8 ; CHECK-NEXT: lda r0, [p0], #4 -; CHECK-NEXT: lda r1, [p0, #0] +; CHECK-NEXT: mova m0, #6 +; CHECK-NEXT: lda r1, [p0], m0 +; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop +; CHECK-NEXT: paddb [p1], #-8 +; CHECK-NEXT: st r0, [p1], #4 +; CHECK-NEXT: lda.s8 r0, [p0, #0] +; CHECK-NEXT: st r1, [p1], m0 +; CHECK-NEXT: st.s8 r0, [p1, #0] +; CHECK-NEXT: nop ; CHECK-NEXT: ret lr ; CHECK-NEXT: nop // Delay Slot 5 -; CHECK-NEXT: paddb [p3], #-8 // Delay Slot 4 -; CHECK-NEXT: st r0, [p3], #4 // Delay Slot 3 -; CHECK-NEXT: st r1, [p3, #0] // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: nop // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: tail call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 4 dereferenceable(11) @buffer1, ptr noundef nonnull align 4 dereferenceable(11) @buffer2, i32 11, i1 false)