Skip to content

Commit

Permalink
[AIE] postprocess DAG and add sticky-waw eliminator
Browse files Browse the repository at this point in the history
  • Loading branch information
Martien de Jong committed Nov 18, 2024
1 parent 6659147 commit b4ddb6e
Show file tree
Hide file tree
Showing 10 changed files with 228 additions and 207 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/Target/AIE/AIEBaseSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,8 @@ AIEBaseSubtarget::getPostRAMutationsImpl(const Triple &TT) {
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
Mutations.emplace_back(std::make_unique<LockDelays>());
if (!TT.isAIE1()) {
if (EnableWAWStickyRegisters)
Mutations.emplace_back(std::make_unique<WAWStickyRegistersEdges>());
Mutations.emplace_back(std::make_unique<RegionEndEdges>());
Mutations.emplace_back(std::make_unique<MemoryEdges>());
Mutations.emplace_back(std::make_unique<MachineSchedWAWEdges>());
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AIE/AIEMachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1278,6 +1278,8 @@ void AIEScheduleDAGMI::schedule() {
// If it succeeds, we need to implement it, if we fail we fall back on the
// normal loop schedule
SchedImpl->buildGraph(*this, AA);
postProcessDAG();

auto &PostSWP = BS.getPostSWP();
if (PostSWP.schedule(*this, BS.FixPoint.II)) {
BS.setPipelined();
Expand Down
76 changes: 35 additions & 41 deletions llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16-1.mir
Original file line number Diff line number Diff line change
Expand Up @@ -36,67 +36,61 @@
; CHECK-NEXT: vldb wl1, [p0], m4; nopa ; nops ; nopxm ; nopv
; CHECK-NEXT: nopa ; vldb wh3, [p0, #32]; nopx
; CHECK-NEXT: vldb.3d wl3, [p0], d1
; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x0, x0, s0, x8, r3
; CHECK-NEXT: vlda wl9, [p4, #384]
; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vldb wh5, [p5, #32]; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vlda wl5, [p5], #256; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vlda wl7, [p4, #320]; vshuffle x8, x0, x2, r9
; CHECK-NEXT: vlda wh11, [p4, #480]; vshift.align x6, x6, s0, x3, r3
; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x3, x4, x6, r9
; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x0, x0, s0, x8, r3
; CHECK-NEXT: vlda wl7, [p4, #320]
; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vlda wh11, [p4, #480]; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x8, x0, x2, r9
; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x6, x6, s0, x3, r3
; CHECK-NEXT: vlda wl5, [p5], #256; vshuffle x3, x4, x6, r9
; CHECK-NEXT: vshuffle x10, x4, x6, r25; vmac.f bml4, bml4, x8, x7, r29
; CHECK-NEXT: vshuffle x1, x3, x5, r13
; CHECK-NEXT: vshuffle x3, x3, x5, r24
; CHECK-NEXT: vshuffle x10, x4, x6, r25
; CHECK-NEXT: mov r3, p0; vmac.f bmh7, bmh7, x8, x5, r29
; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh5, bmh5, x1, x5, r29
; CHECK-NEXT: add r3, r3, #34; vmac.f bml2, bml2, x3, x5, r29
; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29
; CHECK-NEXT: vmac.f bmh1, bmh1, x8, x9, r29
; CHECK-NEXT: vmac.f bmh0, bmh0, x1, x9, r29
; CHECK-NEXT: vmac.f bmh3, bmh3, x3, x9, r29
; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bmh1, bmh1, x8, x9, r29
; CHECK-NEXT: mov r3, p0; vmac.f bmh0, bmh0, x1, x9, r29
; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh3, bmh3, x3, x9, r29
; CHECK-NEXT: add r3, r3, #34; vmac.f bmh2, bmh2, x10, x9, r29
; CHECK-NEXT: vmac.f bmh7, bmh7, x8, x5, r29
; CHECK-NEXT: vmac.f bmh5, bmh5, x1, x5, r29
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p7, p5; vmac.f bmh2, bmh2, x10, x9, r29
; CHECK-NEXT: nopa ; vldb wl8, [p0], m4; nopx ; vmac.f bml4, bml4, x8, x7, r29
; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p7, p5; vmac.f bml2, bml2, x3, x5, r29
; CHECK-NEXT: nopa ; vldb wl8, [p0], m4; nopx ; vmac.f bml0, bml0, x10, x5, r29
; CHECK-NEXT: vldb wh10, [p0, #32]; vmac.f bml3, bml3, x1, x7, r29
; CHECK-NEXT: vldb wl10, [p0], m4; vmac.f bml6, bml6, x3, x7, r29
; CHECK-NEXT: vldb wh1, [p0, #32]; vmac.f bml5, bml5, x10, x7, r29
; CHECK-NEXT: vldb wl1, [p0], m4; vmac.f bmh6, bmh6, x8, x11, r29
; CHECK-NEXT: vldb wh3, [p0, #32]; vmac.f bmh4, bmh4, x1, x11, r29
; CHECK-NEXT: vldb.3d wl3, [p0], d1; vmac.f bml1, bml1, x3, x11, r29
; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x0, x0, s0, x8, r3; vmac.f bmh8, bmh8, x10, x11, r29
; CHECK-NEXT: vlda wl9, [p4, #384]
; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vldb wh5, [p5, #32]; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vlda wl5, [p5], #256; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vlda wl7, [p4, #320]; vshuffle x8, x0, x2, r9
; CHECK-NEXT: vlda wh11, [p4, #480]; vshift.align x6, x6, s0, x3, r3
; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x3, x4, x6, r9
; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x0, x0, s0, x8, r3; vmac.f bmh8, bmh8, x10, x11, r29
; CHECK-NEXT: vlda wl7, [p4, #320]
; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vlda wh11, [p4, #480]; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x8, x0, x2, r9
; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x6, x6, s0, x3, r3
; CHECK-NEXT: vlda wl5, [p5], #256; vshuffle x3, x4, x6, r9
; CHECK-NEXT: vshuffle x10, x4, x6, r25; vmac.f bml4, bml4, x8, x7, r29
; CHECK-NEXT: vshuffle x1, x3, x5, r13
; CHECK-NEXT: vshuffle x3, x3, x5, r24
; CHECK-NEXT: vshuffle x10, x4, x6, r25
; CHECK-NEXT: mov r3, p0; vmac.f bmh7, bmh7, x8, x5, r29
; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh5, bmh5, x1, x5, r29
; CHECK-NEXT: add r3, r3, #34; vmac.f bml2, bml2, x3, x5, r29
; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29
; CHECK-NEXT: vmac.f bmh1, bmh1, x8, x9, r29
; CHECK-NEXT: vmac.f bmh0, bmh0, x1, x9, r29
; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bmh1, bmh1, x8, x9, r29
; CHECK-NEXT: mov r3, p0; vmac.f bmh0, bmh0, x1, x9, r29
; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh3, bmh3, x3, x9, r29
; CHECK-NEXT: add r3, r3, #34; vmac.f bmh2, bmh2, x10, x9, r29
; CHECK-NEXT: vmac.f bmh7, bmh7, x8, x5, r29
; CHECK-NEXT: .L_LEnd0:
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bmh3, bmh3, x3, x9, r29
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bmh5, bmh5, x1, x5, r29
; CHECK-NEXT: // %bb.3: // %for.cond.cleanup
; CHECK-NEXT: nopa ; nopb ; nopxm ; vmac.f bmh2, bmh2, x10, x9, r29
; CHECK-NEXT: vmac.f bml4, bml4, x8, x7, r29
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bml2, bml2, x3, x5, r29
; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29
; CHECK-NEXT: vmac.f bml3, bml3, x1, x7, r29
; CHECK-NEXT: vmac.f bml6, bml6, x3, x7, r29
; CHECK-NEXT: vmac.f bml5, bml5, x10, x7, r29
; CHECK-NEXT: vmac.f bmh6, bmh6, x8, x11, r29
; CHECK-NEXT: vmac.f bmh4, bmh4, x1, x11, r29
; CHECK-NEXT: vmac.f bml1, bml1, x3, x11, r29
; CHECK-NEXT: vmac.f bmh8, bmh8, x10, x11, r29
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nopx
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
Expand Down
38 changes: 19 additions & 19 deletions llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16.mir
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,24 @@
; CHECK-NEXT: add.nc lc, r0, #-1
; CHECK-NEXT: movxm ls, #.LBB0_2
; CHECK-NEXT: movxm le, #.L_LEnd0
; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p5, p7; nopv
; CHECK-NEXT: vldb wl8, [p0], m4; nopa ; nops ; nopx ; mov p4, p2; nopv
; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p4, p2; nopv
; CHECK-NEXT: vldb wl8, [p0], m4; nopa ; nops ; nopx ; mov p5, p7; nopv
; CHECK-NEXT: vldb wh10, [p0, #32]; nopa ; nops ; nopxm ; nopv
; CHECK-NEXT: vldb wl10, [p0], m4; nopa ; nops ; nopxm ; nopv
; CHECK-NEXT: vldb wh1, [p0, #32]; nopa ; nops ; nopxm ; nopv
; CHECK-NEXT: vldb wl1, [p0], m4; nopa ; nops ; nopxm ; nopv
; CHECK-NEXT: vldb wh3, [p0, #32]
; CHECK-NEXT: vldb wh3, [p0, #32]; nopx
; CHECK-NEXT: vldb.3d wl3, [p0], d1
; CHECK-NEXT: vldb wh7, [p7, #32]; vshift.align x0, x0, s0, x8, r3
; CHECK-NEXT: vlda wl7, [p7], #256; paddb [p4], #320; mov r1, p0
; CHECK-NEXT: and r2, r1, r0; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vldb wl5, [p4], #64; vshuffle x8, x0, x2, r9
; CHECK-NEXT: paddb [p4], #320; vshift.align x0, x0, s0, x8, r3
; CHECK-NEXT: vldb wh7, [p7, #32]; mov r1, p0
; CHECK-NEXT: vlda wl7, [p7], #256; and r2, r1, r0; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vldb wl5, [p4], #64; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vldb wh9, [p4, #32]; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vldb wl9, [p4], #64; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vldb wl9, [p4], #64; vshuffle x8, x0, x2, r9
; CHECK-NEXT: vlda wh5, [p2, #352]; add r3, r2, #34; vshift.align x6, x6, s0, x3, r3
; CHECK-NEXT: vldb wh11, [p4, #32]; vshuffle x3, x4, x6, r9
; CHECK-NEXT: vldb wl11, [p4, #0]; vshuffle x1, x3, x5, r13; vmac.f bmh7, bmh7, x8, x7, r29
; CHECK-NEXT: vshuffle x3, x3, x5, r24
; CHECK-NEXT: vldb wl11, [p4, #0]; vshuffle x1, x3, x5, r13
; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bmh7, bmh7, x8, x7, r29
; CHECK-NEXT: vshuffle x10, x4, x6, r25; vmac.f bmh5, bmh5, x1, x7, r29
; CHECK-NEXT: mov p2, p5; vmac.f bml2, bml2, x3, x7, r29
; CHECK-NEXT: vmac.f bml0, bml0, x10, x7, r29
Expand All @@ -55,24 +55,24 @@
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: nopa ; vldb wh8, [p0, #32]; nopx ; mov p5, p7; vmac.f bmh2, bmh2, x10, x9, r29
; CHECK-NEXT: vldb wl8, [p0], m4; mov p4, p2; vmac.f bml4, bml4, x8, x5, r29
; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p4, p2; vmac.f bmh2, bmh2, x10, x9, r29
; CHECK-NEXT: vldb wl8, [p0], m4; mov p5, p7; vmac.f bml4, bml4, x8, x5, r29
; CHECK-NEXT: vldb wh10, [p0, #32]; vmac.f bml3, bml3, x1, x5, r29
; CHECK-NEXT: vldb wl10, [p0], m4; vmac.f bml6, bml6, x3, x5, r29
; CHECK-NEXT: vldb wh1, [p0, #32]; vmac.f bml5, bml5, x10, x5, r29
; CHECK-NEXT: vldb wl1, [p0], m4; vmac.f bmh6, bmh6, x8, x11, r29
; CHECK-NEXT: vldb wh3, [p0, #32]; vmac.f bmh4, bmh4, x1, x11, r29
; CHECK-NEXT: vldb.3d wl3, [p0], d1; vmac.f bml1, bml1, x3, x11, r29
; CHECK-NEXT: vldb wh7, [p7, #32]; vshift.align x0, x0, s0, x8, r3; vmac.f bmh8, bmh8, x10, x11, r29
; CHECK-NEXT: vlda wl7, [p7], #256; paddb [p4], #320; mov r1, p0
; CHECK-NEXT: and r2, r1, r0; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vldb wl5, [p4], #64; vshuffle x8, x0, x2, r9
; CHECK-NEXT: paddb [p4], #320; vshift.align x0, x0, s0, x8, r3; vmac.f bmh8, bmh8, x10, x11, r29
; CHECK-NEXT: vldb wh7, [p7, #32]; mov r1, p0
; CHECK-NEXT: vlda wl7, [p7], #256; and r2, r1, r0; vshift.align x2, x2, s0, x10, r3
; CHECK-NEXT: vldb wl5, [p4], #64; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vldb wh9, [p4, #32]; vshift.align x4, x4, s0, x1, r3
; CHECK-NEXT: vldb wl9, [p4], #64; vshuffle x5, x0, x2, r25
; CHECK-NEXT: vldb wl9, [p4], #64; vshuffle x8, x0, x2, r9
; CHECK-NEXT: vlda wh5, [p2, #352]; add r3, r2, #34; vshift.align x6, x6, s0, x3, r3
; CHECK-NEXT: vldb wh11, [p4, #32]; vshuffle x3, x4, x6, r9
; CHECK-NEXT: vldb wl11, [p4, #0]; vshuffle x1, x3, x5, r13; vmac.f bmh7, bmh7, x8, x7, r29
; CHECK-NEXT: vshuffle x3, x3, x5, r24
; CHECK-NEXT: vldb wl11, [p4, #0]; vshuffle x1, x3, x5, r13
; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bmh7, bmh7, x8, x7, r29
; CHECK-NEXT: vshuffle x10, x4, x6, r25; vmac.f bmh5, bmh5, x1, x7, r29
; CHECK-NEXT: mov p2, p5; vmac.f bml2, bml2, x3, x7, r29
; CHECK-NEXT: vmac.f bml0, bml0, x10, x7, r29
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,26 +35,26 @@
; CHECK-NEXT: vldb wl8, [p1], m5; nopa ; padds [p0], #128; nopxm ; nopv
; CHECK-NEXT: vldb wl2, [p0], #32; nopa ; nops ; nopxm ; nopv
; CHECK-NEXT: vldb wh2, [p0], #32; nopx
; CHECK-NEXT: vldb wl3, [p0], #32
; CHECK-NEXT: vldb wh8, [p1], m6
; CHECK-NEXT: vldb.3d wh3, [p0], d0
; CHECK-NEXT: vldb wl3, [p0], #32
; CHECK-NEXT: vldb wl9, [p1], m5
; CHECK-NEXT: vldb.3d wh3, [p0], d0
; CHECK-NEXT: vldb wh9, [p1], m6
; CHECK-NEXT: vldb wl10, [p1], m5
; CHECK-NEXT: vldb wh10, [p1], m6; vshuffle x4, x0, x2, r3
; CHECK-NEXT: vldb wl11, [p1], m5; vshuffle x5, x0, x2, r16
; CHECK-NEXT: vldb.3d wh11, [p1], d1; vshuffle x8, x8, x8, r6
; CHECK-NEXT: vldb wl0, [p0, #0]; vshuffle x6, x1, x3, r3
; CHECK-NEXT: vldb wh0, [p0, #32]; vshuffle x7, x1, x3, r16; vmac.f bmh0, bmh0, x4, x8, r2
; CHECK-NEXT: vldb wl1, [p0, #64]; vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2
; CHECK-NEXT: padds [p0], m4; vldb wh1, [p0, #96]; vmac.f bmh2, bmh2, x5, x8, r2
; CHECK-NEXT: vldb wl0, [p0, #0]
; CHECK-NEXT: vldb wh0, [p0, #32]; vshuffle x6, x1, x3, r3; vmac.f bmh0, bmh0, x4, x8, r2
; CHECK-NEXT: vldb wl1, [p0, #64]; vshuffle x7, x1, x3, r16; vmac.f bmh2, bmh2, x5, x8, r2
; CHECK-NEXT: padds [p0], m4; vldb wh1, [p0, #96]; vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2
; CHECK-NEXT: padds [p0], #128; vldb wl8, [p1], m5; vshuffle x10, x10, x10, r6; vmac.f bmh3, bmh3, x7, x8, r2
; CHECK-NEXT: vldb wl2, [p0], #32; vmac.f bmh4, bmh0, x4, x9, r2
; CHECK-NEXT: vldb wh2, [p0], #32; vshuffle x11, x11, x11, r6; vmac.f bmh5, bmh1, x6, x9, r2
; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh6, bmh2, x5, x9, r2
; CHECK-NEXT: vldb wh8, [p1], m6; vmac.f bmh7, bmh3, x7, x9, r2
; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bmh8, bmh0, x4, x10, r2
; CHECK-NEXT: vldb wl9, [p1], m5; vmac.f bml0, bmh1, x6, x10, r2
; CHECK-NEXT: vldb wh2, [p0], #32; vshuffle x11, x11, x11, r6; vmac.f bmh6, bmh2, x5, x9, r2
; CHECK-NEXT: vldb wh8, [p1], m6; vmac.f bmh5, bmh1, x6, x9, r2
; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh7, bmh3, x7, x9, r2
; CHECK-NEXT: vldb wl9, [p1], m5; vmac.f bmh8, bmh0, x4, x10, r2
; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bml0, bmh1, x6, x10, r2
; CHECK-NEXT: vldb wh9, [p1], m6; vmac.f bml1, bmh2, x5, x10, r2
; CHECK-NEXT: vldb wl10, [p1], m5; vmac.f bml2, bmh3, x7, x10, r2
; CHECK-NEXT: vldb wh10, [p1], m6; vshuffle x4, x0, x2, r3; vmac.f bml3, bmh0, x4, x11, r2
Expand All @@ -63,32 +63,32 @@
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldb wl0, [p0, #0]; nopa ; nops ; nopx ; vshuffle x6, x1, x3, r3; vmac.f bml6, bmh3, x7, x11, r2
; CHECK-NEXT: vldb wh0, [p0, #32]; vshuffle x7, x1, x3, r16; vmac.f bmh0, bmh0, x4, x8, r2
; CHECK-NEXT: vldb wl1, [p0, #64]; vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2
; CHECK-NEXT: padds [p0], m4; vldb wh1, [p0, #96]; vmac.f bmh2, bmh2, x5, x8, r2
; CHECK-NEXT: nopa ; vldb wl0, [p0, #0]; nopxm ; vmac.f bml6, bmh3, x7, x11, r2
; CHECK-NEXT: vldb wh0, [p0, #32]; vshuffle x6, x1, x3, r3; vmac.f bmh0, bmh0, x4, x8, r2
; CHECK-NEXT: vldb wl1, [p0, #64]; vshuffle x7, x1, x3, r16; vmac.f bmh2, bmh2, x5, x8, r2
; CHECK-NEXT: padds [p0], m4; vldb wh1, [p0, #96]; vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2
; CHECK-NEXT: padds [p0], #128; vldb wl8, [p1], m5; vshuffle x10, x10, x10, r6; vmac.f bmh3, bmh3, x7, x8, r2
; CHECK-NEXT: vldb wl2, [p0], #32; vmac.f bmh4, bmh0, x4, x9, r2
; CHECK-NEXT: vldb wh2, [p0], #32; vshuffle x11, x11, x11, r6; vmac.f bmh5, bmh1, x6, x9, r2
; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh6, bmh2, x5, x9, r2
; CHECK-NEXT: vldb wh8, [p1], m6; vmac.f bmh7, bmh3, x7, x9, r2
; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bmh8, bmh0, x4, x10, r2
; CHECK-NEXT: vldb wl9, [p1], m5; vmac.f bml0, bmh1, x6, x10, r2
; CHECK-NEXT: vldb wh2, [p0], #32; vshuffle x11, x11, x11, r6; vmac.f bmh6, bmh2, x5, x9, r2
; CHECK-NEXT: vldb wh8, [p1], m6; vmac.f bmh5, bmh1, x6, x9, r2
; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh7, bmh3, x7, x9, r2
; CHECK-NEXT: vldb wl9, [p1], m5; vmac.f bmh8, bmh0, x4, x10, r2
; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bml0, bmh1, x6, x10, r2
; CHECK-NEXT: vldb wh9, [p1], m6; vmac.f bml1, bmh2, x5, x10, r2
; CHECK-NEXT: vldb wl10, [p1], m5; vmac.f bml2, bmh3, x7, x10, r2
; CHECK-NEXT: vldb wh10, [p1], m6; vshuffle x4, x0, x2, r3; vmac.f bml3, bmh0, x4, x11, r2
; CHECK-NEXT: vldb wl11, [p1], m5; vshuffle x5, x0, x2, r16; vmac.f bml4, bmh1, x6, x11, r2
; CHECK-NEXT: .L_LEnd0:
; CHECK-NEXT: vldb.3d wh11, [p1], d1; nopa ; nops ; nopx ; vshuffle x8, x8, x8, r6; vmac.f bml5, bmh2, x5, x11, r2
; CHECK-NEXT: // %bb.3: // %for.cond.cleanup
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshuffle x6, x1, x3, r3; vmac.f bml6, bmh3, x7, x11, r2
; CHECK-NEXT: nopx ; vshuffle x7, x1, x3, r16; vmac.f bmh0, bmh0, x4, x8, r2
; CHECK-NEXT: nopa ; nopb ; nopxm ; vmac.f bml6, bmh3, x7, x11, r2
; CHECK-NEXT: vshuffle x6, x1, x3, r3; vmac.f bmh0, bmh0, x4, x8, r2
; CHECK-NEXT: vshuffle x7, x1, x3, r16; vmac.f bmh2, bmh2, x5, x8, r2
; CHECK-NEXT: vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2
; CHECK-NEXT: vmac.f bmh2, bmh2, x5, x8, r2
; CHECK-NEXT: vshuffle x10, x10, x10, r6; vmac.f bmh3, bmh3, x7, x8, r2
; CHECK-NEXT: vmac.f bmh4, bmh0, x4, x9, r2
; CHECK-NEXT: vshuffle x11, x11, x11, r6; vmac.f bmh5, bmh1, x6, x9, r2
; CHECK-NEXT: vmac.f bmh6, bmh2, x5, x9, r2
; CHECK-NEXT: vshuffle x11, x11, x11, r6; vmac.f bmh6, bmh2, x5, x9, r2
; CHECK-NEXT: vmac.f bmh5, bmh1, x6, x9, r2
; CHECK-NEXT: vmac.f bmh7, bmh3, x7, x9, r2
; CHECK-NEXT: vmac.f bmh8, bmh0, x4, x10, r2
; CHECK-NEXT: vmac.f bml0, bmh1, x6, x10, r2
Expand Down
Loading

0 comments on commit b4ddb6e

Please sign in to comment.