Skip to content

Commit 8d6b4f7

Browse files
committed
[AIE] Block widening register coalescing inside innerloop
1 parent c5b8fae commit 8d6b4f7

File tree

4 files changed

+48
-9
lines changed

4 files changed

+48
-9
lines changed

llvm/lib/Target/AIE/AIEBaseRegisterInfo.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,8 @@
1818
#include "Utils/AIELoopUtils.h"
1919
#include "llvm/CodeGen/LiveIntervals.h"
2020

21-
namespace llvm {
22-
static cl::opt<bool> EnableCoalescingForWideCopy(
23-
"aie-enable-widen-copy-coalescing",
24-
cl::desc("Enable register coalescing for widening Copy"), cl::init(false),
25-
cl::Hidden);
21+
using namespace llvm;
22+
extern cl::opt<bool> EnableCoalescingForWideCopy;
2623

2724
bool AIEBaseRegisterInfo::shouldCoalesce(
2825
MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
@@ -57,5 +54,3 @@ bool AIEBaseRegisterInfo::shouldCoalesce(
5754
return TargetRegisterInfo::shouldCoalesce(MI, SrcRC, SubReg, DstRC, DstSubReg,
5855
NewRC, LIS);
5956
}
60-
61-
} // namespace llvm

llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include "AIE2PRegisterBankInfo.h"
1818
#include "AIE2PSubtarget.h"
1919
#include "MCTargetDesc/aie2p/AIE2PMCTargetDesc.h"
20+
#include "Utils/AIELoopUtils.h"
21+
#include "llvm/CodeGen/LiveIntervals.h"
2022
#include "llvm/CodeGen/MachineFrameInfo.h"
2123
#include "llvm/CodeGen/MachineFunction.h"
2224
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -34,6 +36,11 @@ using namespace llvm;
3436

3537
extern cl::opt<bool> SimplifyCRSRRegs;
3638

39+
cl::opt<bool> EnableCoalescingForWideCopy(
40+
"aie-enable-widen-copy-coalescing",
41+
cl::desc("Enable register coalescing for widening Copy"), cl::init(false),
42+
cl::Hidden);
43+
3744
extern llvm::cl::opt<unsigned> ReservedGPRs;
3845

3946
AIE2PRegisterInfo::AIE2PRegisterInfo(unsigned HwMode)
@@ -603,3 +610,34 @@ bool AIE2PRegisterInfo::isFifoPhysReg(const Register Reg) const {
603610
return Reg.isPhysical() && (AIE2P::FIFO512RegClass.contains(Reg) ||
604611
AIE2P::FIFO1024RegClass.contains(Reg));
605612
}
613+
614+
bool AIE2PRegisterInfo::shouldCoalesce(
615+
MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
616+
const TargetRegisterClass *DstRC, unsigned DstSubReg,
617+
const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
618+
619+
const unsigned SrcSize = getRegSizeInBits(*SrcRC);
620+
const unsigned DstSize = getRegSizeInBits(*DstRC);
621+
MachineFunction *MF = MI->getMF();
622+
const AIEBaseInstrInfo *TII =
623+
static_cast<const AIEBaseInstrInfo *>(MF->getSubtarget().getInstrInfo());
624+
const unsigned BasicVectorSize = TII->getBasicVecRegSize();
625+
// Should not coalesce if copying from bigger source.
626+
if (!EnableCoalescingForWideCopy && SrcSize < DstSize &&
627+
(SrcSize >= BasicVectorSize || DstSize >= BasicVectorSize)) {
628+
MachineBasicBlock *MBB = MI->getParent();
629+
LiveInterval &LI = LIS.getInterval(MI->getOperand(1).getReg());
630+
const MachineInstr *FirstMI =
631+
LI.empty() ? nullptr : LIS.getInstructionFromIndex(LI.beginIndex());
632+
const MachineInstr *LastMI =
633+
LI.empty() ? nullptr : LIS.getInstructionFromIndex(LI.endIndex());
634+
// Coalescing inside the same basic block found beneficial. So, check that
635+
// the LiveInterval is not just local to MBB.
636+
if (!FirstMI || FirstMI->getParent() != MBB || !LastMI ||
637+
LastMI->getParent() != MBB)
638+
return false;
639+
}
640+
641+
return TargetRegisterInfo::shouldCoalesce(MI, SrcRC, SubReg, DstRC, DstSubReg,
642+
NewRC, LIS);
643+
}

llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ struct AIE2PRegisterInfo : public AIE2PGenRegisterInfo {
103103
bool isFifoPhysReg(const Register Reg) const override;
104104

105105
bool isSimplifiableReservedReg(MCRegister PhysReg) const override;
106+
107+
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
108+
unsigned SubReg, const TargetRegisterClass *DstRC,
109+
unsigned DstSubReg, const TargetRegisterClass *NewRC,
110+
LiveIntervals &LIS) const override;
106111
};
107112
} // namespace llvm
108113

llvm/test/CodeGen/AIE/aie2p/ra/coalesce-widen-copy.mir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,14 @@ body: |
8181
; CHECK-NEXT: {{ $}}
8282
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 7
8383
; CHECK-NEXT: [[VBCST_16_:%[0-9]+]]:vec512 = VBCST_16 [[MOV_RLC_imm11_pseudo]]
84-
; CHECK-NEXT: undef [[VCONV_fp32_bf16_mv_ups_xbf:%[0-9]+]].sub_1024_acc_lo:acc2048 = VCONV_fp32_bf16_mv_ups_xbf [[VBCST_16_]]
84+
; CHECK-NEXT: [[VCONV_fp32_bf16_mv_ups_xbf:%[0-9]+]]:ecml = VCONV_fp32_bf16_mv_ups_xbf [[VBCST_16_]]
8585
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0
8686
; CHECK-NEXT: {{ $}}
8787
; CHECK-NEXT: bb.1:
8888
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
8989
; CHECK-NEXT: {{ $}}
90-
; CHECK-NEXT: dead [[VADD_vmac_cm2_add_reg:%[0-9]+]]:acc2048 = VADD_vmac_cm2_add_reg [[VCONV_fp32_bf16_mv_ups_xbf]], [[VCONV_fp32_bf16_mv_ups_xbf]], [[MOV_RLC_imm11_pseudo1]]
90+
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_1024_acc_lo:acc2048 = COPY [[VCONV_fp32_bf16_mv_ups_xbf]]
91+
; CHECK-NEXT: dead [[VADD_vmac_cm2_add_reg:%[0-9]+]]:acc2048 = VADD_vmac_cm2_add_reg [[COPY]], [[COPY]], [[MOV_RLC_imm11_pseudo1]]
9192
; CHECK-NEXT: PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
9293
; CHECK-NEXT: PseudoJ_jump_imm %bb.2
9394
; CHECK-NEXT: {{ $}}

0 commit comments

Comments
 (0)