Skip to content

Commit

Permalink
[AIE] Include 'Latest' in postpipeliner heuristic
Browse files Browse the repository at this point in the history
Compute Latest relative to last cycle of last Stage (using negative values)
  • Loading branch information
Martien de Jong committed Nov 18, 2024
1 parent 1a01a80 commit 6659147
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 118 deletions.
58 changes: 38 additions & 20 deletions llvm/lib/Target/AIE/AIEPostPipeliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,29 +190,35 @@ void PostPipeliner::computeLoopCarriedParameters() {
}

// Propagate Earliest upstream, initialize Latest
// Unrestricted: last cycle of last stage
const int Latest = NCopies * II - 1;
for (int K = 0; K < NInstr; K++) {
const int K2 = K + NInstr;
const int Earliest = Info[K2].Earliest - II;
Info[K].Earliest = std::max(Info[K].Earliest, Earliest);
// Unrestricted: Beyond the last stage.
Info[K].Latest = NCopies * II;
Info[K].Latest = Latest;
Info[K2].Latest = Latest;
}
// Propagate Latest upstream. Latest is the latest
// that is admissible for Earliest to be achievable within II
for (int K = 0; K < NInstr; K++) {
const int K2 = K + NInstr;
const int Earliest = Info[K2].Earliest;
const auto &SU = DAG->SUnits[K2];
for (auto &Dep : SU.Preds) {
const auto *Pred = Dep.getSUnit();
// Any predecessor in the first iteration
int K1 = Pred->NodeNum;
if (K1 < NInstr) {
const int Latest = Earliest - Dep.getSignedLatency();
Info[K1].Latest = std::min(Info[K1].Latest, Latest);

// Compute Latest. Use a fixpoint loop, because plain reversed
// order may not be topological for predecessors
bool Changed = true;
while (Changed) {
Changed = false;
for (int K = NInstr - 1; K >= 0; K--) {
SUnit &SU = DAG->SUnits[K];
const int Latest = Info[K].Latest;
for (auto &Dep : SU.Preds) {
int P = Dep.getSUnit()->NodeNum;
int NewLatest = Latest - Dep.getSignedLatency();
if (NewLatest < Info[P].Latest) {
Info[P].Latest = NewLatest;
Changed = true;
}
}
}
}

LLVM_DEBUG(for (int K = 0; K < NInstr; K++) {
dbgs() << "SU" << K << " : " << Info[K].Earliest << " - " << Info[K].Latest
<< "\n";
Expand All @@ -238,7 +244,12 @@ void dumpGraph(int NInstr, const std::vector<NodeInfo> &Info,
if (S >= NInstr) {
dbgs() << "_" << S % NInstr;
}
dbgs() << "# L=" << Dep.getSignedLatency() << "\n";

dbgs() << " # L=" << Dep.getSignedLatency();
if (Dep.getKind() == SDep::Output) {
dbgs() << " WAW";
}
dbgs() << "\n";
}
}
dbgs() << "}\n";
Expand All @@ -251,20 +262,27 @@ int PostPipeliner::mostUrgent() {
}
assert(FirstUnscheduled < NInstr);

// 'Latest' accounts for the critical path of the linear schedule
auto Better = [this](int N, int Ref) {
if (Info[N].Latest < Info[Ref].Latest) {
return true;
}

return false;
};

int Best = -1;
LLVM_DEBUG(dbgs() << "Available:");
for (int K = FirstUnscheduled; K < NInstr; K++) {
const auto &SU = DAG->SUnits[K];
// Check whether it is available
if (any_of(SU.Preds, [&](const auto &Dep) {
if (Info[K].Scheduled || any_of(SU.Preds, [&](const auto &Dep) {
return !Info[Dep.getSUnit()->NodeNum].Scheduled;
})) {
continue;
}
LLVM_DEBUG(dbgs() << " SU" << K);
// Yeah, I know. This is a difficult way to schedule in the original
// node order. Have patience, my friend.
if (Best == -1) {
if (Best == -1 || Better(K, Best)) {
Best = K;
LLVM_DEBUG(dbgs() << "*");
}
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d.mir
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,10 @@
; CHECK-NEXT: vldb wh6, [p0], m6
; CHECK-NEXT: vldb wl8, [p0], m6; add r0, r5, #33; vshift.align x2, x2, s1, x8, r0
; CHECK-NEXT: vldb.3d wh8, [p0], d0; vshuffle x9, x4, x2, r2
; CHECK-NEXT: vldb wl10, [p1], #32; vshuffle x1, x9, x0, r8
; CHECK-NEXT: vldb wh10, [p1], #32; vshuffle x3, x4, x2, r3; vmac cm1, cm1, x9, x10, r4
; CHECK-NEXT: vldb wl7, [p1], #32; vshuffle x5, x3, x0, r8; vmac cm2, cm2, x1, x10, r4
; CHECK-NEXT: vldb wh7, [p1], #32; mov r6, p0; vmac cm3, cm3, x3, x10, r4
; CHECK-NEXT: vldb wl10, [p1], #32; vshuffle x3, x4, x2, r3
; CHECK-NEXT: vldb wh10, [p1], #32; vshuffle x1, x9, x0, r8; vmac cm1, cm1, x9, x10, r4
; CHECK-NEXT: vldb wl7, [p1], #32; vshuffle x5, x3, x0, r8; vmac cm3, cm3, x3, x10, r4
; CHECK-NEXT: vldb wh7, [p1], #32; mov r6, p0; vmac cm2, cm2, x1, x10, r4
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2: // %inner.loop
; CHECK-NEXT: // Parent Loop BB0_1 Depth=1
Expand All @@ -115,21 +115,21 @@
; CHECK-NEXT: nopa ; vldb wh6, [p0], m6; nopx ; vmac cm5, cm5, x9, x7, r4
; CHECK-NEXT: vldb wl8, [p0], m6; add r0, r5, #33; vshift.align x2, x2, s1, x8, r0; vmac cm6, cm6, x1, x7, r4
; CHECK-NEXT: vldb.3d wh8, [p0], d0; vshuffle x9, x4, x2, r2; vmac cm7, cm7, x3, x7, r4
; CHECK-NEXT: vldb wl10, [p1], #32; vshuffle x1, x9, x0, r8; vmac cm0, cm0, x5, x7, r4
; CHECK-NEXT: vldb wh10, [p1], #32; vshuffle x3, x4, x2, r3; vmac cm1, cm1, x9, x10, r4
; CHECK-NEXT: vldb wl7, [p1], #32; vshuffle x5, x3, x0, r8; vmac cm2, cm2, x1, x10, r4
; CHECK-NEXT: vldb wl10, [p1], #32; vshuffle x3, x4, x2, r3; vmac cm0, cm0, x5, x7, r4
; CHECK-NEXT: vldb wh10, [p1], #32; vshuffle x1, x9, x0, r8; vmac cm1, cm1, x9, x10, r4
; CHECK-NEXT: vldb wl7, [p1], #32; vshuffle x5, x3, x0, r8; vmac cm3, cm3, x3, x10, r4
; CHECK-NEXT: .L_LEnd0:
; CHECK-NEXT: vldb wh7, [p1], #32; nopa ; nops ; nopx ; mov r6, p0; vmac cm3, cm3, x3, x10, r4
; CHECK-NEXT: vldb wh7, [p1], #32; nopa ; nops ; nopx ; mov r6, p0; vmac cm2, cm2, x1, x10, r4
; CHECK-NEXT: // %bb.3: // %outer.loop.latch
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: nopa ; and r5, r6, r9; vshift.align x4, x4, s1, x6, r0; vmac cm4, cm4, x5, x10, r4
; CHECK-NEXT: vmac cm5, cm5, x9, x7, r4
; CHECK-NEXT: add r0, r5, #33; vshift.align x2, x2, s1, x8, r0; vmac cm6, cm6, x1, x7, r4
; CHECK-NEXT: vshuffle x9, x4, x2, r2; vmac cm7, cm7, x3, x7, r4
; CHECK-NEXT: vshuffle x1, x9, x0, r8; vmac cm0, cm0, x5, x7, r4
; CHECK-NEXT: vshuffle x3, x4, x2, r3; vmac cm1, cm1, x9, x10, r4
; CHECK-NEXT: vshuffle x5, x3, x0, r8; vmac cm2, cm2, x1, x10, r4
; CHECK-NEXT: vmac cm3, cm3, x3, x10, r4
; CHECK-NEXT: vshuffle x3, x4, x2, r3; vmac cm0, cm0, x5, x7, r4
; CHECK-NEXT: vshuffle x1, x9, x0, r8; vmac cm1, cm1, x9, x10, r4
; CHECK-NEXT: vshuffle x5, x3, x0, r8; vmac cm3, cm3, x3, x10, r4
; CHECK-NEXT: vmac cm2, cm2, x1, x10, r4
; CHECK-NEXT: vmac cm4, cm4, x5, x10, r4
; CHECK-NEXT: vmac cm5, cm5, x9, x7, r4
; CHECK-NEXT: vmac cm6, cm6, x1, x7, r4
Expand Down
45 changes: 21 additions & 24 deletions llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16-1.mir
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,20 @@
; CHECK-NEXT: vldb wl1, [p0], m4; nopa ; nops ; nopxm ; nopv
; CHECK-NEXT: nopa ; vldb wh3, [p0, #32]; nopx
; CHECK-NEXT: vldb.3d wl3, [p0], d1
; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x0, x0, s0, x8, r3
; CHECK-NEXT: vlda wl7, [p4, #320]
; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x8, x0, x2, r9
; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vlda wl5, [p5], #256; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x0, x0, s0, x8, r3
; CHECK-NEXT: vlda wl9, [p4, #384]
; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vldb wh5, [p5, #32]; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vlda wl5, [p5], #256; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vlda wl7, [p4, #320]; vshuffle x8, x0, x2, r9
; CHECK-NEXT: vlda wh11, [p4, #480]; vshift.align x6, x6, s0, x3, r3
; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x3, x4, x6, r9
; CHECK-NEXT: vshuffle x10, x4, x6, r25
; CHECK-NEXT: vshuffle x1, x3, x5, r13
; CHECK-NEXT: vshuffle x3, x3, x5, r24
; CHECK-NEXT: mov r3, p0
; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh7, bmh7, x8, x5, r29
; CHECK-NEXT: add r3, r3, #34; vmac.f bmh5, bmh5, x1, x5, r29
; CHECK-NEXT: vmac.f bml2, bml2, x3, x5, r29
; CHECK-NEXT: vshuffle x10, x4, x6, r25
; CHECK-NEXT: mov r3, p0; vmac.f bmh7, bmh7, x8, x5, r29
; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh5, bmh5, x1, x5, r29
; CHECK-NEXT: add r3, r3, #34; vmac.f bml2, bml2, x3, x5, r29
; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29
; CHECK-NEXT: vmac.f bmh1, bmh1, x8, x9, r29
; CHECK-NEXT: vmac.f bmh0, bmh0, x1, x9, r29
Expand All @@ -66,28 +65,27 @@
; CHECK-NEXT: vldb wl1, [p0], m4; vmac.f bmh6, bmh6, x8, x11, r29
; CHECK-NEXT: vldb wh3, [p0, #32]; vmac.f bmh4, bmh4, x1, x11, r29
; CHECK-NEXT: vldb.3d wl3, [p0], d1; vmac.f bml1, bml1, x3, x11, r29
; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x0, x0, s0, x8, r3; vmac.f bmh8, bmh8, x10, x11, r29
; CHECK-NEXT: vlda wl7, [p4, #320]
; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x8, x0, x2, r9
; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vlda wl5, [p5], #256; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x0, x0, s0, x8, r3; vmac.f bmh8, bmh8, x10, x11, r29
; CHECK-NEXT: vlda wl9, [p4, #384]
; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vldb wh5, [p5, #32]; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vlda wl5, [p5], #256; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vlda wl7, [p4, #320]; vshuffle x8, x0, x2, r9
; CHECK-NEXT: vlda wh11, [p4, #480]; vshift.align x6, x6, s0, x3, r3
; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x3, x4, x6, r9
; CHECK-NEXT: vshuffle x10, x4, x6, r25
; CHECK-NEXT: vshuffle x1, x3, x5, r13
; CHECK-NEXT: vshuffle x3, x3, x5, r24
; CHECK-NEXT: mov r3, p0
; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh7, bmh7, x8, x5, r29
; CHECK-NEXT: add r3, r3, #34; vmac.f bmh5, bmh5, x1, x5, r29
; CHECK-NEXT: vmac.f bml2, bml2, x3, x5, r29
; CHECK-NEXT: vshuffle x10, x4, x6, r25
; CHECK-NEXT: mov r3, p0; vmac.f bmh7, bmh7, x8, x5, r29
; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh5, bmh5, x1, x5, r29
; CHECK-NEXT: add r3, r3, #34; vmac.f bml2, bml2, x3, x5, r29
; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29
; CHECK-NEXT: vmac.f bmh1, bmh1, x8, x9, r29
; CHECK-NEXT: vmac.f bmh0, bmh0, x1, x9, r29
; CHECK-NEXT: .L_LEnd0:
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bmh3, bmh3, x3, x9, r29
; CHECK-NEXT: // %bb.3: // %for.cond.cleanup
; CHECK-NEXT: nopa ; nopb ; nopx ; vmac.f bmh2, bmh2, x10, x9, r29
; CHECK-NEXT: nopa ; nopb ; nopxm ; vmac.f bmh2, bmh2, x10, x9, r29
; CHECK-NEXT: vmac.f bml4, bml4, x8, x7, r29
; CHECK-NEXT: vmac.f bml3, bml3, x1, x7, r29
; CHECK-NEXT: vmac.f bml6, bml6, x3, x7, r29
Expand All @@ -113,7 +111,6 @@
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_4: // %for.cond.cleanup
; CHECK-NEXT: nopa ; ret lr
Expand Down
37 changes: 17 additions & 20 deletions llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16.mir
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,15 @@
; CHECK-NEXT: vldb wh7, [p7, #32]; vshift.align x0, x0, s0, x8, r3
; CHECK-NEXT: vlda wl7, [p7], #256; paddb [p4], #320; mov r1, p0
; CHECK-NEXT: and r2, r1, r0; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vshuffle x8, x0, x2, r9
; CHECK-NEXT: vlda wh5, [p2, #352]; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vldb wl5, [p4], #64; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vldb wh9, [p4, #32]; add r3, r2, #34; vshift.align x6, x6, s0, x3, r3
; CHECK-NEXT: vldb wl9, [p4], #64; vshuffle x3, x4, x6, r9
; CHECK-NEXT: vldb wh11, [p4, #32]; vshuffle x10, x4, x6, r25; vmac.f bmh7, bmh7, x8, x7, r29
; CHECK-NEXT: vldb wl11, [p4, #0]; vshuffle x1, x3, x5, r13
; CHECK-NEXT: vldb wl5, [p4], #64; vshuffle x8, x0, x2, r9
; CHECK-NEXT: vldb wh9, [p4, #32]; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vldb wl9, [p4], #64; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vlda wh5, [p2, #352]; add r3, r2, #34; vshift.align x6, x6, s0, x3, r3
; CHECK-NEXT: vldb wh11, [p4, #32]; vshuffle x3, x4, x6, r9
; CHECK-NEXT: vldb wl11, [p4, #0]; vshuffle x1, x3, x5, r13; vmac.f bmh7, bmh7, x8, x7, r29
; CHECK-NEXT: vshuffle x3, x3, x5, r24
; CHECK-NEXT: mov p2, p5; vmac.f bmh5, bmh5, x1, x7, r29
; CHECK-NEXT: vmac.f bml2, bml2, x3, x7, r29
; CHECK-NEXT: vshuffle x10, x4, x6, r25; vmac.f bmh5, bmh5, x1, x7, r29
; CHECK-NEXT: mov p2, p5; vmac.f bml2, bml2, x3, x7, r29
; CHECK-NEXT: vmac.f bml0, bml0, x10, x7, r29
; CHECK-NEXT: vmac.f bmh1, bmh1, x8, x9, r29
; CHECK-NEXT: vmac.f bmh0, bmh0, x1, x9, r29
Expand All @@ -67,16 +66,15 @@
; CHECK-NEXT: vldb wh7, [p7, #32]; vshift.align x0, x0, s0, x8, r3; vmac.f bmh8, bmh8, x10, x11, r29
; CHECK-NEXT: vlda wl7, [p7], #256; paddb [p4], #320; mov r1, p0
; CHECK-NEXT: and r2, r1, r0; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vshuffle x8, x0, x2, r9
; CHECK-NEXT: vlda wh5, [p2, #352]; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vldb wl5, [p4], #64; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vldb wh9, [p4, #32]; add r3, r2, #34; vshift.align x6, x6, s0, x3, r3
; CHECK-NEXT: vldb wl9, [p4], #64; vshuffle x3, x4, x6, r9
; CHECK-NEXT: vldb wh11, [p4, #32]; vshuffle x10, x4, x6, r25; vmac.f bmh7, bmh7, x8, x7, r29
; CHECK-NEXT: vldb wl11, [p4, #0]; vshuffle x1, x3, x5, r13
; CHECK-NEXT: vldb wl5, [p4], #64; vshuffle x8, x0, x2, r9
; CHECK-NEXT: vldb wh9, [p4, #32]; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vldb wl9, [p4], #64; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vlda wh5, [p2, #352]; add r3, r2, #34; vshift.align x6, x6, s0, x3, r3
; CHECK-NEXT: vldb wh11, [p4, #32]; vshuffle x3, x4, x6, r9
; CHECK-NEXT: vldb wl11, [p4, #0]; vshuffle x1, x3, x5, r13; vmac.f bmh7, bmh7, x8, x7, r29
; CHECK-NEXT: vshuffle x3, x3, x5, r24
; CHECK-NEXT: mov p2, p5; vmac.f bmh5, bmh5, x1, x7, r29
; CHECK-NEXT: vmac.f bml2, bml2, x3, x7, r29
; CHECK-NEXT: vshuffle x10, x4, x6, r25; vmac.f bmh5, bmh5, x1, x7, r29
; CHECK-NEXT: mov p2, p5; vmac.f bml2, bml2, x3, x7, r29
; CHECK-NEXT: vmac.f bml0, bml0, x10, x7, r29
; CHECK-NEXT: vmac.f bmh1, bmh1, x8, x9, r29
; CHECK-NEXT: vmac.f bmh0, bmh0, x1, x9, r29
Expand All @@ -92,8 +90,7 @@
; CHECK-NEXT: vmac.f bmh4, bmh4, x1, x11, r29
; CHECK-NEXT: vmac.f bml1, bml1, x3, x11, r29
; CHECK-NEXT: vmac.f bmh8, bmh8, x10, x11, r29
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nopx
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
Expand Down
Loading

0 comments on commit 6659147

Please sign in to comment.