Skip to content

Commit 40ec791

Browse files
authored
[RegAllocFast] Refactor dominates algorithm for large basic block (llvm#72250)
The original brute force dominates algorithm is O(n) complexity so it is very slow for very large machine basic block which is very common with O0. This patch added InstrPosIndexes to assign index for each instruction and use it to determine dominance. The complexity is now O(1).
1 parent 5cb7534 commit 40ec791

File tree

1 file changed

+114
-14
lines changed

1 file changed

+114
-14
lines changed

llvm/lib/CodeGen/RegAllocFast.cpp

+114-14
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,107 @@ static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator",
6262

6363
namespace {
6464

65+
/// Assign ascending index for instructions in machine basic block. The index
66+
/// can be used to determine dominance between instructions in same MBB.
67+
class InstrPosIndexes {
68+
public:
69+
void init(const MachineBasicBlock &MBB) {
70+
CurMBB = &MBB;
71+
Instr2PosIndex.clear();
72+
uint64_t LastIndex = 0;
73+
for (const MachineInstr &MI : MBB) {
74+
LastIndex += InstrDist;
75+
Instr2PosIndex[&MI] = LastIndex;
76+
}
77+
}
78+
79+
/// Set \p Index to index of \p MI. If \p MI is new inserted, it try to assign
80+
/// index without affecting existing instruction's index. Return true if all
81+
/// instructions index has been reassigned.
82+
bool getIndex(const MachineInstr &MI, uint64_t &Index) {
83+
assert(MI.getParent() == CurMBB && "MI is not in CurMBB");
84+
if (Instr2PosIndex.count(&MI)) {
85+
Index = Instr2PosIndex[&MI];
86+
return false;
87+
}
88+
89+
// Distance is the number of consecutive unassigned instructions including
90+
// MI. Start is the first instruction of them. End is the next of last
91+
// instruction of them.
92+
// e.g.
93+
// |Instruction| A | B | C | MI | D | E |
94+
// | Index | 1024 | | | | | 2048 |
95+
//
96+
// In this case, B, C, MI, D are unassigned. Distance is 4, Start is B, End
97+
// is E.
98+
unsigned Distance = 1;
99+
MachineBasicBlock::const_iterator Start = MI.getIterator(),
100+
End = std::next(Start);
101+
while (Start != CurMBB->begin() &&
102+
!Instr2PosIndex.count(&*std::prev(Start))) {
103+
--Start;
104+
++Distance;
105+
}
106+
while (End != CurMBB->end() && !Instr2PosIndex.count(&*(End))) {
107+
++End;
108+
++Distance;
109+
}
110+
111+
// LastIndex is initialized to last used index prior to MI or zero.
112+
// In previous example, LastIndex is 1024, EndIndex is 2048;
113+
uint64_t LastIndex =
114+
Start == CurMBB->begin() ? 0 : Instr2PosIndex.at(&*std::prev(Start));
115+
uint64_t Step;
116+
if (End == CurMBB->end())
117+
Step = static_cast<uint64_t>(InstrDist);
118+
else {
119+
// No instruction uses index zero.
120+
uint64_t EndIndex = Instr2PosIndex.at(&*End);
121+
assert(EndIndex > LastIndex && "Index must be ascending order");
122+
unsigned NumAvailableIndexes = EndIndex - LastIndex - 1;
123+
// We want index gap between two adjacent MI is as same as possible. Given
124+
// total A available indexes, D is number of consecutive unassigned
125+
// instructions, S is the step.
126+
// |<- S-1 -> MI <- S-1 -> MI <- A-S*D ->|
127+
// There're S-1 available indexes between unassigned instruction and its
128+
// predecessor. There're A-S*D available indexes between the last
129+
// unassigned instruction and its successor.
130+
// Ideally, we want
131+
// S-1 = A-S*D
132+
// then
133+
// S = (A+1)/(D+1)
134+
// An valid S must be integer greater than zero, so
135+
// S <= (A+1)/(D+1)
136+
// =>
137+
// A-S*D >= 0
138+
// That means we can safely use (A+1)/(D+1) as step.
139+
// In previous example, Step is 204, Index of B, C, MI, D is 1228, 1432,
140+
// 1636, 1840.
141+
Step = (NumAvailableIndexes + 1) / (Distance + 1);
142+
}
143+
144+
// Reassign index for all instructions if number of new inserted
145+
// instructions exceed slot or all instructions are new.
146+
if (LLVM_UNLIKELY(!Step || (!LastIndex && Step == InstrDist))) {
147+
init(*CurMBB);
148+
Index = Instr2PosIndex.at(&MI);
149+
return true;
150+
}
151+
152+
for (auto I = Start; I != End; ++I) {
153+
LastIndex += Step;
154+
Instr2PosIndex[&*I] = LastIndex;
155+
}
156+
Index = Instr2PosIndex.at(&MI);
157+
return false;
158+
}
159+
160+
private:
161+
enum { InstrDist = 1024 };
162+
const MachineBasicBlock *CurMBB = nullptr;
163+
DenseMap<const MachineInstr *, uint64_t> Instr2PosIndex;
164+
};
165+
65166
class RegAllocFast : public MachineFunctionPass {
66167
public:
67168
static char ID;
@@ -153,6 +254,9 @@ class RegAllocFast : public MachineFunctionPass {
153254
// Register masks attached to the current instruction.
154255
SmallVector<const uint32_t *> RegMasks;
155256

257+
// Assign index for each instruction to quickly determine dominance.
258+
InstrPosIndexes PosIndexes;
259+
156260
void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
157261
bool isPhysRegFree(MCPhysReg PhysReg) const;
158262

@@ -339,18 +443,13 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) {
339443
return FrameIdx;
340444
}
341445

342-
static bool dominates(MachineBasicBlock &MBB,
343-
MachineBasicBlock::const_iterator A,
344-
MachineBasicBlock::const_iterator B) {
345-
auto MBBEnd = MBB.end();
346-
if (B == MBBEnd)
347-
return true;
348-
349-
MachineBasicBlock::const_iterator I = MBB.begin();
350-
for (; &*I != A && &*I != B; ++I)
351-
;
352-
353-
return &*I == A;
446+
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A,
447+
const MachineInstr &B) {
448+
uint64_t IndexA, IndexB;
449+
PosIndexes.getIndex(A, IndexA);
450+
if (LLVM_UNLIKELY(PosIndexes.getIndex(B, IndexB)))
451+
PosIndexes.getIndex(A, IndexA);
452+
return IndexA < IndexB;
354453
}
355454

356455
/// Returns false if \p VirtReg is known to not live out of the current block.
@@ -371,7 +470,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
371470
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
372471
return true;
373472
} else {
374-
if (!SelfLoopDef || dominates(*MBB, DefInst.getIterator(), SelfLoopDef))
473+
if (!SelfLoopDef || dominates(PosIndexes, DefInst, *SelfLoopDef))
375474
SelfLoopDef = &DefInst;
376475
}
377476
}
@@ -396,7 +495,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
396495
// Try to handle some simple cases to avoid spilling and reloading every
397496
// value inside a self looping block.
398497
if (SelfLoopDef == &UseInst ||
399-
!dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) {
498+
!dominates(PosIndexes, *SelfLoopDef, UseInst)) {
400499
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
401500
return true;
402501
}
@@ -1565,6 +1664,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
15651664
this->MBB = &MBB;
15661665
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
15671666

1667+
PosIndexes.init(MBB);
15681668
RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
15691669
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
15701670

0 commit comments

Comments
 (0)