Skip to content

Commit

Permalink
[AIE] Implement bottom up scheduling with symmetric priority components
Browse files Browse the repository at this point in the history
  • Loading branch information
Martien de Jong committed Dec 9, 2024
1 parent e3ac9a9 commit 5024c32
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 36 deletions.
56 changes: 42 additions & 14 deletions llvm/lib/Target/AIE/AIEPostPipeliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,8 @@ class DefaultStrategy : public PostPipelinerStrategy {
};

class ConfigStrategy : public PostPipelinerStrategy {
bool TopDown = true;

public:
enum PriorityComponent {
NodeNum,
Expand Down Expand Up @@ -552,22 +554,27 @@ class ConfigStrategy : public PostPipelinerStrategy {
private:
std::string Name;
std::set<int> SuccSiblingScheduled;
std::set<int> PredSiblingScheduled;
std::function<bool(const SUnit &A, const SUnit &B)>
Discriminators[PriorityComponent::Size] = {
[&](const SUnit &A, const SUnit &B) { return A.NodeNum < B.NodeNum; },
[&](const SUnit &A, const SUnit &B) {
return TopDown ? A.NodeNum < B.NodeNum : A.NodeNum > B.NodeNum;
},
[&](const SUnit &A, const SUnit &B) {
auto &IA = Info[A.NodeNum];
auto &IB = Info[B.NodeNum];
return IA.Latest < IB.Latest;
return TopDown ? IA.Latest < IB.Latest : IA.Earliest > IB.Earliest;
},
[&](const SUnit &A, const SUnit &B) {
auto &IA = Info[A.NodeNum];
auto &IB = Info[B.NodeNum];
return IA.NumPushedEarliest > IB.NumPushedEarliest;
return TopDown ? IA.NumPushedEarliest > IB.NumPushedEarliest
: IA.NumPushedLatest > IB.NumPushedLatest;
},
[&](const SUnit &A, const SUnit &B) {
return SuccSiblingScheduled.count(A.NodeNum) >
SuccSiblingScheduled.count(B.NodeNum);
std::set<int> &Sibling =
TopDown ? SuccSiblingScheduled : PredSiblingScheduled;
return Sibling.count(A.NodeNum) > Sibling.count(B.NodeNum);
},
[&](const SUnit &A, const SUnit &B) {
auto &IA = Info[A.NodeNum];
Expand All @@ -577,6 +584,8 @@ class ConfigStrategy : public PostPipelinerStrategy {
};
std::vector<PriorityComponent> Priority;

bool fromTop() override { return TopDown; }

bool better(const SUnit &A, const SUnit &B) override {
for (auto P : Priority) {
if (Discriminators[P](A, B)) {
Expand Down Expand Up @@ -606,14 +615,26 @@ class ConfigStrategy : public PostPipelinerStrategy {
SuccSiblingScheduled.insert(PDep.getSUnit()->NodeNum);
}
}
for (auto &PDep : N.Preds) {
if (PDep.getKind() != SDep::Data) {
continue;
}
for (auto &SDep : PDep.getSUnit()->Succs) {
if (SDep.getKind() != SDep::Data) {
continue;
}
PredSiblingScheduled.insert(PDep.getSUnit()->NodeNum);
}
}
}

public:
std::string name() override { return Name; }
ConfigStrategy(ScheduleDAGInstrs &DAG, std::vector<NodeInfo> &Info,
int Length, ArrayRef<PriorityComponent> Components)
: PostPipelinerStrategy(DAG, Info, Length) {
Name = "Config_" + std::to_string(Length);
int Length, bool TopDown,
ArrayRef<PriorityComponent> Components)
: PostPipelinerStrategy(DAG, Info, Length), TopDown(TopDown) {
Name = "Config_" + std::to_string(Length) + std::to_string(TopDown);
for (auto Comp : Components) {
Name += "_" + getPriorityName(Comp);
Priority.emplace_back(Comp);
Expand All @@ -623,15 +644,21 @@ class ConfigStrategy : public PostPipelinerStrategy {

static const struct {
int ExtraStages;
bool TopDown;
bool Rerun;
ConfigStrategy::PriorityComponent Components[3];
} Strategies[] = {
// Loosely speaking, a lower value of the first parameter targets
// a lower stage count, which benefits code size.
{1, false, {ConfigStrategy::NodeNum}},
{1, false, {ConfigStrategy::Latest}},
{1, true, {ConfigStrategy::Critical}},
{1, true, {ConfigStrategy::Critical, ConfigStrategy::LCDLatest}},
// Rerurn is only useful for heuristics that use it, e.g. Critical
{1, true, false, {ConfigStrategy::NodeNum}},
{1, true, false, {ConfigStrategy::Latest}},
{1, true, true, {ConfigStrategy::Critical}},
{1, true, true, {ConfigStrategy::Critical, ConfigStrategy::LCDLatest}},
{0, false, true, {ConfigStrategy::Critical, ConfigStrategy::LCDLatest}},
{1, false, true, {ConfigStrategy::Critical, ConfigStrategy::LCDLatest}},
// This is pure bottom up
{1, false, false, {ConfigStrategy::NodeNum}},
};

bool PostPipeliner::tryHeuristics() {
Expand All @@ -640,11 +667,12 @@ bool PostPipeliner::tryHeuristics() {
DEBUG_SUMMARY(dbgs() << "-- MinLength=" << MinLength << "\n");

int HeuristicIndex = 0;
for (auto &[ExtraStages, Rerun, Components] : Strategies) {
for (auto &[ExtraStages, TopDown, Rerun, Components] : Strategies) {
if (Heuristic >= 0 && Heuristic != HeuristicIndex++) {
continue;
}
ConfigStrategy S(*DAG, Info, MinLength + ExtraStages * II, Components);
ConfigStrategy S(*DAG, Info, MinLength + ExtraStages * II, TopDown,
Components);
resetSchedule(/*FullReset=*/true);
DEBUG_SUMMARY(dbgs() << "--- Strategy " << S.name());
if (scheduleFirstIteration(S) && scheduleOtherIterations()) {
Expand Down
45 changes: 23 additions & 22 deletions llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/round.mir
Original file line number Diff line number Diff line change
Expand Up @@ -34,46 +34,47 @@
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
; CHECK-NEXT: // %bb.1: // %for.body.preheader
; CHECK-NEXT: nopb ; vlda.ups.s32.s8 cm0, s0, [p0], #32; nops ; nopxm ; nopv
; CHECK-NEXT: vlda.ups.s32.s8 cm1, s0, [p0], #32
; CHECK-NEXT: vlda.ups.s32.s8 cm0, s0, [p0], #32; nopxm
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: vlda.ups.s32.s8 cm1, s0, [p0], #32
; CHECK-NEXT: vlda.ups.s32.s8 cm0, s0, [p0], #32
; CHECK-NEXT: nop
; CHECK-NEXT: vlda.ups.s32.s8 cm0, s0, [p0], #32; add.nc lc, r0, #-3
; CHECK-NEXT: add.nc lc, r0, #-4
; CHECK-NEXT: vlda.ups.s32.s8 cm1, s0, [p0], #32; movxm ls, #.LBB0_2
; CHECK-NEXT: movxm le, #.L_LEnd0
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv
; CHECK-NEXT: vlda.ups.s32.s8 cm0, s0, [p0], #32; movxm le, #.L_LEnd0
; CHECK-NEXT: nopb ; nopa ; vsrs.s8.s32 wh0, cm0, s1; nopxm ; nopv
; CHECK-NEXT: nopb ; vlda.ups.s32.s8 cm0, s0, [p0], #32; vsrs.s8.s32 wh2, cm1, s1; nopxm ; nopv
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv
; CHECK-NEXT: nopb ; vlda.ups.s32.s8 cm1, s0, [p0], #32; nops ; nopxm ; nopv
; CHECK-NEXT: nopb ; vlda.ups.s32.s8 cm0, s0, [p0], #32; vsrs.s8.s32 wh2, cm1, s1; nopxm ; nopv
; CHECK-NEXT: nopb ; nopa ; vsrs.s8.s32 wh0, cm0, s1; nopxm ; nopv
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vups.s32.s8 cm2, wh0, s1; nopv
; CHECK-NEXT: nopb ; nopa ; vsrs.s8.s32 wh0, cm0, s1; nopx ; vups.s32.s8 cm3, wh2, s1; nopv
; CHECK-NEXT: nopb ; vlda.ups.s32.s8 cm1, s0, [p0], #32; nops ; nopx ; vups.s32.s8 cm2, wh0, s1; nopv
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: nopb ; vlda.ups.s32.s8 cm0, s0, [p0], #32; vsrs.s8.s32 wh2, cm1, s1; nopxm ; nopv
; CHECK-NEXT: vlda.ups.s32.s8 cm1, s0, [p0], #32; nopb ; vst.srs.s8.s32 cm2, s0, [p1], #32
; CHECK-NEXT: vst.srs.s8.s32 cm3, s0, [p1], #32
; CHECK-NEXT: vups.s32.s8 cm2, wh0, s1
; CHECK-NEXT: nopb ; vlda.ups.s32.s8 cm0, s0, [p0], #32; vsrs.s8.s32 wh2, cm1, s1; nopx ; vups.s32.s8 cm3, wh2, s1; nopv
; CHECK-NEXT: nopa ; nopb ; nopx ; vsrs.s8.s32 wh0, cm0, s1
; CHECK-NEXT: vst.srs.s8.s32 cm2, s0, [p1], #32
; CHECK-NEXT: .L_LEnd0:
; CHECK-NEXT: nopb ; nopa ; vsrs.s8.s32 wh0, cm0, s1; nopx ; vups.s32.s8 cm3, wh2, s1; nopv
; CHECK-NEXT: nopb ; vlda.ups.s32.s8 cm1, s0, [p0], #32; vst.srs.s8.s32 cm3, s0, [p1], #32; nopx ; vups.s32.s8 cm2, wh0, s1; nopv
; CHECK-NEXT: // %bb.3: // %for.cond.cleanup
; CHECK-NEXT: nopa ; vsrs.s8.s32 wh2, cm1, s1; nopx
; CHECK-NEXT: nopb ; nopa ; vsrs.s8.s32 wh2, cm1, s1; nopx ; vups.s32.s8 cm3, wh2, s1; nopv
; CHECK-NEXT: vsrs.s8.s32 wh0, cm0, s1; nopb ; nopx
; CHECK-NEXT: vst.srs.s8.s32 cm2, s0, [p1], #32
; CHECK-NEXT: vst.srs.s8.s32 cm3, s0, [p1], #32
; CHECK-NEXT: vups.s32.s8 cm2, wh0, s1
; CHECK-NEXT: vsrs.s8.s32 wh0, cm0, s1; vups.s32.s8 cm3, wh2, s1
; CHECK-NEXT: vsrs.s8.s32 wh2, cm1, s1
; CHECK-NEXT: vst.srs.s8.s32 cm3, s0, [p1], #32; vups.s32.s8 cm2, wh0, s1
; CHECK-NEXT: vsrs.s8.s32 wh2, cm1, s1; vups.s32.s8 cm3, wh2, s1
; CHECK-NEXT: vsrs.s8.s32 wh0, cm0, s1
; CHECK-NEXT: vst.srs.s8.s32 cm2, s0, [p1], #32
; CHECK-NEXT: vst.srs.s8.s32 cm3, s0, [p1], #32
; CHECK-NEXT: vups.s32.s8 cm2, wh0, s1
; CHECK-NEXT: vst.srs.s8.s32 cm3, s0, [p1], #32; vups.s32.s8 cm2, wh0, s1
; CHECK-NEXT: vsrs.s8.s32 wh2, cm1, s1; vups.s32.s8 cm3, wh2, s1
; CHECK-NEXT: nop
; CHECK-NEXT: vst.srs.s8.s32 cm2, s0, [p1], #32
; CHECK-NEXT: vst.srs.s8.s32 cm3, s0, [p1], #32; vups.s32.s8 cm2, wh0, s1
; CHECK-NEXT: vups.s32.s8 cm3, wh2, s1
; CHECK-NEXT: nop
; CHECK-NEXT: vst.srs.s8.s32 cm2, s0, [p1], #32
; CHECK-NEXT: vst.srs.s8.s32 cm3, s0, [p1], #32
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_4: // %for.cond.cleanup
; CHECK-NEXT: nopa ; ret lr
Expand Down

0 comments on commit 5024c32

Please sign in to comment.