Skip to content

Commit

Permalink
[AIE] more heurstics and an option to select a specific one
Browse files Browse the repository at this point in the history
rework interfaces to take SUnits as parameters; supply DAG and Info
in constructors
  • Loading branch information
Martien de Jong committed Nov 18, 2024
1 parent 8f03d3e commit 9f3e514
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 45 deletions.
120 changes: 93 additions & 27 deletions llvm/lib/Target/AIE/AIEPostPipeliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@

namespace llvm::AIE {

static cl::opt<int>
Heuristic("aie-postpipeliner-heuristic",
cl::desc("Select one specific post-pipeliner heuristic"),
cl::init(-1), cl::Hidden);

PipelineScheduleVisitor::~PipelineScheduleVisitor() {}

class PostPipelineDumper : public PipelineScheduleVisitor {
Expand Down Expand Up @@ -301,7 +306,7 @@ bool PostPipeliner::computeBackward() {
return Changed;
}

void PostPipeliner::computeLoopCarriedParameters() {
bool PostPipeliner::computeLoopCarriedParameters() {

// Forward properties like Earliest and Ancestors.
computeForward();
Expand Down Expand Up @@ -336,7 +341,7 @@ void PostPipeliner::computeLoopCarriedParameters() {
}

// Loop carried dependences will have pushed away Earliest of the second
// iteration, which should stay in lock step with the first
// iteration, which should stay in lock step with the first.
for (int K = 0; K < NInstr; K++) {
const int K2 = K + NInstr;
const int Earliest = Info[K2].Earliest - II;
Expand Down Expand Up @@ -368,6 +373,7 @@ void PostPipeliner::computeLoopCarriedParameters() {
N.StaticEarliest = N.Earliest;
N.StaticLatest = N.Latest;
}
return true;
}

void dumpGraph(int NInstr, const std::vector<NodeInfo> &Info,
Expand All @@ -392,11 +398,14 @@ void dumpGraph(int NInstr, const std::vector<NodeInfo> &Info,
if (S >= NInstr) {
dbgs() << "_" << S % NInstr;
}

dbgs() << " # L=" << Dep.getSignedLatency();
if (Dep.getKind() == SDep::Output) {
dbgs() << " WAW";
dbgs() << " [color=blue]";
} else if (Dep.getKind() == SDep::Anti) {
dbgs() << " [color=black]";
} else {
dbgs() << " [color=red]";
}
dbgs() << " # L=" << Dep.getSignedLatency();
dbgs() << "\n";
}
}
Expand Down Expand Up @@ -432,7 +441,7 @@ int PostPipeliner::mostUrgent(PostPipelinerStrategy &Strategy) {
continue;
}
LLVM_DEBUG(dbgs() << " SU" << K);
if (Best == -1 || Strategy.better(Info[K], Info[Best])) {
if (Best == -1 || Strategy.better(SU, DAG->SUnits[Best])) {
Best = K;
LLVM_DEBUG(dbgs() << "*");
}
Expand Down Expand Up @@ -468,8 +477,8 @@ bool PostPipeliner::scheduleFirstIteration(PostPipelinerStrategy &Strategy) {
LLVM_DEBUG(dbgs() << " Trying " << N << "\n");
SUnit &SU = DAG->SUnits[N];
MachineInstr *MI = SU.getInstr();
const int Earliest = Strategy.earliest(Info[N]);
const int Latest = Strategy.latest(Info[N]);
const int Earliest = Strategy.earliest(SU);
const int Latest = Strategy.latest(SU);
// Find the first cycle that fits. We try every position modulo II
const int Actual = Strategy.fromTop() ? fit(MI, Earliest, Latest, II)
: fit(MI, Latest, Earliest, II);
Expand All @@ -478,7 +487,7 @@ bool PostPipeliner::scheduleFirstIteration(PostPipelinerStrategy &Strategy) {
LLVM_DEBUG(dbgs() << "Out of resources\n");
return false;
}
Strategy.selected(Info[N]);
Strategy.selected(SU);
const int LocalCycle = Actual % II;
const MemoryBankBits MemoryBanks = HR.getMemoryBanks(MI);
int Cycle = -Depth + LocalCycle;
Expand Down Expand Up @@ -533,14 +542,17 @@ PostPipelinerStrategy::~PostPipelinerStrategy() {}

class ConfigStrategy : public PostPipelinerStrategy {
public:
enum class Modulation { OneWay, Alternate, RandomSwing };
enum class Modulation { OneWay, Alternate, Split, RandomSwing };

private:
Modulation Mode = Modulation::OneWay;
bool FromTop = true;

uint64_t Param = 0;
uint64_t RandomState;
int Count = 0;

std::set<int> SuccSiblingScheduled;

// Most trivial seeds lead to boring initial sequences,
// so spice them up a bit. The multiplication spreads the bits across the word
Expand All @@ -556,23 +568,51 @@ class ConfigStrategy : public PostPipelinerStrategy {
return RandomState & 1;
}

bool better(const NodeInfo &A, const NodeInfo &B) override {
bool better(const SUnit &A, const SUnit &B) override {
auto &IA = Info[A.NodeNum];
auto &IB = Info[B.NodeNum];
if (FromTop && SuccSiblingScheduled.count(A.NodeNum) >
SuccSiblingScheduled.count(B.NodeNum)) {
return true;
}
if (FromTop) {
return A.LCDLatest < B.LCDLatest;
return IA.LCDLatest < IB.LCDLatest;
}

return A.Earliest > B.Earliest;
return IA.Earliest > IB.Earliest;
}
bool fromTop() override { return FromTop; }
void selected(NodeInfo &N) override {
void selected(const SUnit &N) override {
switch (Mode) {
case Modulation::Alternate:
FromTop = !FromTop;
break;
case Modulation::Split:
if (Count) {
Count--;
if (!Count) {
FromTop = !FromTop;
}
}
break;
case Modulation::RandomSwing:
FromTop = randomBit();
break;
case Modulation::OneWay:
// Promote my siblings
if (FromTop) {
for (auto &SDep : N.Succs) {
if (SDep.getKind() != SDep::Data) {
continue;
}
for (auto &PDep : SDep.getSUnit()->Preds) {
if (PDep.getKind() != SDep::Data) {
continue;
}
SuccSiblingScheduled.insert(PDep.getSUnit()->NodeNum);
}
}
}
break;
}
}
Expand All @@ -583,38 +623,64 @@ class ConfigStrategy : public PostPipelinerStrategy {
std::to_string(int(Mode)) + "_" + std::to_string(FromTop) + "_" +
std::to_string(Param);
}
ConfigStrategy(int Length, enum Modulation Mode, bool FromTop, uint64_t Param)
: PostPipelinerStrategy(Length), Mode(Mode), FromTop(FromTop),
RandomState(spice(Param)) {}
ConfigStrategy(ScheduleDAGInstrs &DAG, std::vector<NodeInfo> &Info,
int Length, enum Modulation Mode, bool FromTop, uint64_t Param)
: PostPipelinerStrategy(DAG, Info, Length), Mode(Mode), FromTop(FromTop),
Param(Param), RandomState(spice(Param)) {
if (Mode == Modulation::Split) {
Count = Length * Param / 1000;
}
}
};

bool PostPipeliner::tryHeuristics() {
// The minimum length makes sure that every node has a range in which is
// can be scheduled
int MinLength = II;
for (auto &Node : Info) {
while (Node.Latest + MinLength < Node.Earliest) {
for (int K = 0; K < NInstr; K++) {
auto &Node = Info[K];
while (Node.Earliest > Node.Latest + MinLength) {
MinLength += II;
}
}
int64_t SearchVolume = 1;
const int64_t Limit = int64_t(1) << 50; // Quintillion
for (int K = 0; K < NInstr && SearchVolume < Limit; K++) {
auto &Node = Info[K];
SearchVolume *= Node.Latest + MinLength - Node.Earliest + 1;
}

LLVM_SUMMARY(dbgs() << "-- MinLength=" << MinLength
<< " SearchVolume=" << SearchVolume << "\n");

constexpr auto OneWay = ConfigStrategy::Modulation::OneWay;
constexpr auto Alternate = ConfigStrategy::Modulation::Alternate;
constexpr auto RandomSwing = ConfigStrategy::Modulation::RandomSwing;
static const std::tuple<bool, ConfigStrategy::Modulation, bool, uint64_t>
constexpr auto Split = ConfigStrategy::Modulation::Split;
static const std::tuple<int, ConfigStrategy::Modulation, bool, uint64_t>
Configs[] = {
{false, OneWay, true, 0},
{false, OneWay, false, 0},
{false, Alternate, false, 0},
{false, RandomSwing, false, 1},
{0, OneWay, true, 0}, {0, OneWay, false, 0},
{1, OneWay, true, 0}, {1, OneWay, false, 0},
{0, Split, true, 300}, {0, Split, true, 600},
{0, Split, false, 300}, {0, Split, false, 600},
{1, Alternate, false, 0}, {1, OneWay, true, 0},
{1, OneWay, false, 0}, {1, Alternate, false, 0},

};
for (auto &[UseMin, Mode, FromTop, Param] : Configs) {
ConfigStrategy S(UseMin ? MinLength : NCopies * II, Mode, FromTop, Param);
int H = 0;
for (auto &[Extra, Mode, FromTop, Param] : Configs) {
if (Heuristic >= 0 && Heuristic != H) {
H++;
continue;
}
ConfigStrategy S(*DAG, Info, MinLength + Extra * II, Mode, FromTop, Param);
resetSchedule();
LLVM_SUMMARY(dbgs() << "--- Strategy " << S.name());
if (scheduleFirstIteration(S) && scheduleOtherIterations()) {
LLVM_SUMMARY(dbgs() << " found II=" << II << "\n");
return true;
}
LLVM_SUMMARY(dbgs() << " failed\n");
H++;
}
LLVM_SUMMARY(dbgs() << "=== II=" << II << " Failed ===\n");
return false;
Expand Down
20 changes: 13 additions & 7 deletions llvm/lib/Target/AIE/AIEPostPipeliner.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,21 +104,27 @@ class NodeInfo {

class PostPipelinerStrategy {
protected:
ScheduleDAGInstrs &DAG;
std::vector<NodeInfo> &Info;
int LatestBias = 0;

public:
PostPipelinerStrategy(int LatestBias) : LatestBias(LatestBias){};
PostPipelinerStrategy(ScheduleDAGInstrs &DAG, std::vector<NodeInfo> &Info,
int LatestBias)
: DAG(DAG), Info(Info), LatestBias(LatestBias){};
virtual ~PostPipelinerStrategy();
virtual std::string name() { return "PostPipelinerStrategy"; }
// Choose among available alternatives
virtual bool better(const NodeInfo &A, const NodeInfo &B) { return false; }
virtual bool better(const SUnit &A, const SUnit &B) { return false; }
// Tweak the effective earliest
virtual int earliest(const NodeInfo &N) { return N.Earliest; }
virtual int earliest(const SUnit &N) { return Info[N.NodeNum].Earliest; }
// Select from top or from bottom.
virtual int latest(const NodeInfo &N) { return N.Latest + LatestBias; }
virtual int latest(const SUnit &N) {
return Info[N.NodeNum].Latest + LatestBias;
}
// Report a final selection. This marks the start of selecting a new node.
// fromTop() should be invariant between calls to selected()
virtual void selected(NodeInfo &N){};
virtual void selected(const SUnit &N){};
virtual bool fromTop() { return true; }
};

Expand Down Expand Up @@ -179,8 +185,8 @@ class PostPipeliner {
int fit(MachineInstr *MI, int Earliest, int NTries, int II);

/// Provide some look ahead by seeing the effect of the first iteration
/// on the second iteration.
void computeLoopCarriedParameters();
/// on the second iteration. May return false if the II isn't feasible.
bool computeLoopCarriedParameters();

/// Helpers of computeLoopCarriedParameters()
void computeForward();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@
; CHECK-NEXT: .LBB0_2: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldb wl8, [p0], #32; vlda wl11, [p1], m5; nops ; nopx ; vshuffle x9, x1, x3, r16; vmac.f bmh5, bmh5, x2, x5, r2
; CHECK-NEXT: vlda wh11, [p1], m6; vldb wh8, [p0], #32; nopx ; vshuffle x3, x11, x11, r6; vmac.f bmh6, bmh6, x10, x5, r2
; CHECK-NEXT: vlda wl5, [p1], m5; vldb wl1, [p0], #32; vshuffle x0, x0, x0, r6; vmac.f bmh7, bmh7, x9, x5, r2
; CHECK-NEXT: vlda wh5, [p1], m6; vldb wh1, [p0], #32; vshuffle x7, x7, x7, r6; vmac.f bmh0, bmh0, x6, x3, r2
; CHECK-NEXT: paddb [p0], m4; vmac.f bmh1, bmh1, x2, x3, r2
; CHECK-NEXT: vldb wl0, [p0], #32; vmac.f bmh2, bmh2, x10, x3, r2
; CHECK-NEXT: vldb wh0, [p0], #32; vmac.f bmh3, bmh3, x9, x3, r2
; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh8, bmh8, x6, x0, r2
; CHECK-NEXT: vlda wh11, [p1], m6; vldb wh8, [p0], #32; nopx ; vshuffle x0, x0, x0, r6; vmac.f bmh6, bmh6, x10, x5, r2
; CHECK-NEXT: vlda wl5, [p1], m5; vldb wl1, [p0], #32; vshuffle x3, x11, x11, r6; vmac.f bmh7, bmh7, x9, x5, r2
; CHECK-NEXT: vlda wh5, [p1], m6; vldb wh1, [p0], #32; vshuffle x7, x7, x7, r6; vmac.f bmh8, bmh8, x6, x0, r2
; CHECK-NEXT: paddb [p0], m4; vmac.f bmh0, bmh0, x6, x3, r2
; CHECK-NEXT: vldb wl0, [p0], #32; vmac.f bmh1, bmh1, x2, x3, r2
; CHECK-NEXT: vldb wh0, [p0], #32; vmac.f bmh2, bmh2, x10, x3, r2
; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh3, bmh3, x9, x3, r2
; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bml0, bml0, x2, x0, r2
; CHECK-NEXT: vldb wl0, [p1], m5; vmac.f bml1, bml1, x10, x0, r2
; CHECK-NEXT: vldb wh0, [p1], m6; vshuffle x5, x5, x5, r6; vmac.f bml2, bml2, x9, x0, r2
Expand All @@ -66,13 +66,13 @@
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshuffle x10, x1, x3, r3; vmac.f bmh4, bmh4, x6, x5, r2
; CHECK-NEXT: // %bb.3: // %for.cond.cleanup
; CHECK-NEXT: nopx ; vshuffle x9, x1, x3, r16; vmac.f bmh5, bmh5, x2, x5, r2
; CHECK-NEXT: vshuffle x3, x11, x11, r6; vmac.f bmh6, bmh6, x10, x5, r2
; CHECK-NEXT: vshuffle x0, x0, x0, r6; vmac.f bmh7, bmh7, x9, x5, r2
; CHECK-NEXT: vshuffle x7, x7, x7, r6; vmac.f bmh0, bmh0, x6, x3, r2
; CHECK-NEXT: vshuffle x0, x0, x0, r6; vmac.f bmh6, bmh6, x10, x5, r2
; CHECK-NEXT: vshuffle x3, x11, x11, r6; vmac.f bmh7, bmh7, x9, x5, r2
; CHECK-NEXT: vshuffle x7, x7, x7, r6; vmac.f bmh8, bmh8, x6, x0, r2
; CHECK-NEXT: vmac.f bmh0, bmh0, x6, x3, r2
; CHECK-NEXT: vmac.f bmh1, bmh1, x2, x3, r2
; CHECK-NEXT: vmac.f bmh2, bmh2, x10, x3, r2
; CHECK-NEXT: vmac.f bmh3, bmh3, x9, x3, r2
; CHECK-NEXT: vmac.f bmh8, bmh8, x6, x0, r2
; CHECK-NEXT: vmac.f bml0, bml0, x2, x0, r2
; CHECK-NEXT: vmac.f bml1, bml1, x10, x0, r2
; CHECK-NEXT: vmac.f bml2, bml2, x9, x0, r2
Expand Down

0 comments on commit 9f3e514

Please sign in to comment.