diff --git a/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp b/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp index a22d3de87ddc..84bf9ae44ebb 100644 --- a/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp +++ b/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp @@ -661,6 +661,8 @@ AIEBaseSubtarget::getPostRAMutationsImpl(const Triple &TT) { std::vector> Mutations; Mutations.emplace_back(std::make_unique()); if (!TT.isAIE1()) { + if (EnableWAWStickyRegisters) + Mutations.emplace_back(std::make_unique()); Mutations.emplace_back(std::make_unique()); Mutations.emplace_back(std::make_unique()); Mutations.emplace_back(std::make_unique()); diff --git a/llvm/lib/Target/AIE/AIEMachineScheduler.cpp b/llvm/lib/Target/AIE/AIEMachineScheduler.cpp index 113ebcb1531c..2d17070ed23b 100644 --- a/llvm/lib/Target/AIE/AIEMachineScheduler.cpp +++ b/llvm/lib/Target/AIE/AIEMachineScheduler.cpp @@ -1278,6 +1278,8 @@ void AIEScheduleDAGMI::schedule() { // If it succeeds, we need to implement it, if we fail we fall back on the // normal loop schedule SchedImpl->buildGraph(*this, AA); + postProcessDAG(); + auto &PostSWP = BS.getPostSWP(); if (PostSWP.schedule(*this, BS.FixPoint.II)) { BS.setPipelined(); diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16-1.mir b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16-1.mir index c1b8eeded62f..ec4ca1aafc7c 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16-1.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16-1.mir @@ -36,57 +36,53 @@ ; CHECK-NEXT: vldb wl1, [p0], m4; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: nopa ; vldb wh3, [p0, #32]; nopx ; CHECK-NEXT: vldb.3d wl3, [p0], d1 - ; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x0, x0, s0, x8, r3 - ; CHECK-NEXT: vlda wl9, [p4, #384] - ; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x2, x2, s0, x10, r3 - ; CHECK-NEXT: vldb wh5, [p5, #32]; vshuffle x5, x0, x2, r25 - ; CHECK-NEXT: vlda wl5, [p5], #256; vshift.align x4, x4, s0, x1, r3 - ; CHECK-NEXT: vlda wl7, [p4, #320]; vshuffle x8, x0, x2, r9 - ; CHECK-NEXT: vlda wh11, [p4, #480]; vshift.align x6, x6, s0, x3, r3 - ; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x3, x4, x6, r9 + ; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x0, x0, s0, x8, r3 + ; CHECK-NEXT: vlda wl7, [p4, #320] + ; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x2, x2, s0, x10, r3 + ; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x5, x0, x2, r25 + ; CHECK-NEXT: vlda wh11, [p4, #480]; vshift.align x4, x4, s0, x1, r3 + ; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x8, x0, x2, r9 + ; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x6, x6, s0, x3, r3 + ; CHECK-NEXT: vlda wl5, [p5], #256; vshuffle x3, x4, x6, r9 + ; CHECK-NEXT: vshuffle x10, x4, x6, r25; vmac.f bml4, bml4, x8, x7, r29 ; CHECK-NEXT: vshuffle x1, x3, x5, r13 - ; CHECK-NEXT: vshuffle x3, x3, x5, r24 - ; CHECK-NEXT: vshuffle x10, x4, x6, r25 - ; CHECK-NEXT: mov r3, p0; vmac.f bmh7, bmh7, x8, x5, r29 - ; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh5, bmh5, x1, x5, r29 - ; CHECK-NEXT: add r3, r3, #34; vmac.f bml2, bml2, x3, x5, r29 - ; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29 - ; CHECK-NEXT: vmac.f bmh1, bmh1, x8, x9, r29 - ; CHECK-NEXT: vmac.f bmh0, bmh0, x1, x9, r29 - ; CHECK-NEXT: vmac.f bmh3, bmh3, x3, x9, r29 + ; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bmh1, bmh1, x8, x9, r29 + ; CHECK-NEXT: mov r3, p0; vmac.f bmh0, bmh0, x1, x9, r29 + ; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh3, bmh3, x3, x9, r29 + ; CHECK-NEXT: add r3, r3, #34; vmac.f bmh2, bmh2, x10, x9, r29 + ; CHECK-NEXT: vmac.f bmh7, bmh7, x8, x5, r29 + ; CHECK-NEXT: vmac.f bmh5, bmh5, x1, x5, r29 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 - ; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p7, p5; vmac.f bmh2, bmh2, x10, x9, r29 - ; CHECK-NEXT: nopa ; vldb wl8, [p0], m4; nopx ; vmac.f bml4, bml4, x8, x7, r29 + ; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p7, p5; vmac.f bml2, bml2, x3, x5, r29 + ; CHECK-NEXT: nopa ; vldb wl8, [p0], m4; nopx ; vmac.f bml0, bml0, x10, x5, r29 ; CHECK-NEXT: vldb wh10, [p0, #32]; vmac.f bml3, bml3, x1, x7, r29 ; CHECK-NEXT: vldb wl10, [p0], m4; vmac.f bml6, bml6, x3, x7, r29 ; CHECK-NEXT: vldb wh1, [p0, #32]; vmac.f bml5, bml5, x10, x7, r29 ; CHECK-NEXT: vldb wl1, [p0], m4; vmac.f bmh6, bmh6, x8, x11, r29 ; CHECK-NEXT: vldb wh3, [p0, #32]; vmac.f bmh4, bmh4, x1, x11, r29 ; CHECK-NEXT: vldb.3d wl3, [p0], d1; vmac.f bml1, bml1, x3, x11, r29 - ; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x0, x0, s0, x8, r3; vmac.f bmh8, bmh8, x10, x11, r29 - ; CHECK-NEXT: vlda wl9, [p4, #384] - ; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x2, x2, s0, x10, r3 - ; CHECK-NEXT: vldb wh5, [p5, #32]; vshuffle x5, x0, x2, r25 - ; CHECK-NEXT: vlda wl5, [p5], #256; vshift.align x4, x4, s0, x1, r3 - ; CHECK-NEXT: vlda wl7, [p4, #320]; vshuffle x8, x0, x2, r9 - ; CHECK-NEXT: vlda wh11, [p4, #480]; vshift.align x6, x6, s0, x3, r3 - ; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x3, x4, x6, r9 + ; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x0, x0, s0, x8, r3; vmac.f bmh8, bmh8, x10, x11, r29 + ; CHECK-NEXT: vlda wl7, [p4, #320] + ; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x2, x2, s0, x10, r3 + ; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x5, x0, x2, r25 + ; CHECK-NEXT: vlda wh11, [p4, #480]; vshift.align x4, x4, s0, x1, r3 + ; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x8, x0, x2, r9 + ; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x6, x6, s0, x3, r3 + ; CHECK-NEXT: vlda wl5, [p5], #256; vshuffle x3, x4, x6, r9 + ; CHECK-NEXT: vshuffle x10, x4, x6, r25; vmac.f bml4, bml4, x8, x7, r29 ; CHECK-NEXT: vshuffle x1, x3, x5, r13 - ; CHECK-NEXT: vshuffle x3, x3, x5, r24 - ; CHECK-NEXT: vshuffle x10, x4, x6, r25 - ; CHECK-NEXT: mov r3, p0; vmac.f bmh7, bmh7, x8, x5, r29 - ; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh5, bmh5, x1, x5, r29 - ; CHECK-NEXT: add r3, r3, #34; vmac.f bml2, bml2, x3, x5, r29 - ; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29 - ; CHECK-NEXT: vmac.f bmh1, bmh1, x8, x9, r29 - ; CHECK-NEXT: vmac.f bmh0, bmh0, x1, x9, r29 + ; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bmh1, bmh1, x8, x9, r29 + ; CHECK-NEXT: mov r3, p0; vmac.f bmh0, bmh0, x1, x9, r29 + ; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh3, bmh3, x3, x9, r29 + ; CHECK-NEXT: add r3, r3, #34; vmac.f bmh2, bmh2, x10, x9, r29 + ; CHECK-NEXT: vmac.f bmh7, bmh7, x8, x5, r29 ; CHECK-NEXT: .L_LEnd0: - ; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bmh3, bmh3, x3, x9, r29 + ; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bmh5, bmh5, x1, x5, r29 ; CHECK-NEXT: // %bb.3: // %for.cond.cleanup - ; CHECK-NEXT: nopa ; nopb ; nopxm ; vmac.f bmh2, bmh2, x10, x9, r29 - ; CHECK-NEXT: vmac.f bml4, bml4, x8, x7, r29 + ; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bml2, bml2, x3, x5, r29 + ; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29 ; CHECK-NEXT: vmac.f bml3, bml3, x1, x7, r29 ; CHECK-NEXT: vmac.f bml6, bml6, x3, x7, r29 ; CHECK-NEXT: vmac.f bml5, bml5, x10, x7, r29 @@ -94,9 +90,7 @@ ; CHECK-NEXT: vmac.f bmh4, bmh4, x1, x11, r29 ; CHECK-NEXT: vmac.f bml1, bml1, x3, x11, r29 ; CHECK-NEXT: vmac.f bmh8, bmh8, x10, x11, r29 - ; CHECK-NEXT: nop - ; CHECK-NEXT: nop - ; CHECK-NEXT: nop + ; CHECK-NEXT: nopx ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16.mir b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16.mir index 25321ddb2188..d85295575eea 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16.mir @@ -28,24 +28,24 @@ ; CHECK-NEXT: add.nc lc, r0, #-1 ; CHECK-NEXT: movxm ls, #.LBB0_2 ; CHECK-NEXT: movxm le, #.L_LEnd0 - ; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p5, p7; nopv - ; CHECK-NEXT: vldb wl8, [p0], m4; nopa ; nops ; nopx ; mov p4, p2; nopv + ; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p4, p2; nopv + ; CHECK-NEXT: vldb wl8, [p0], m4; nopa ; nops ; nopx ; mov p5, p7; nopv ; CHECK-NEXT: vldb wh10, [p0, #32]; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: vldb wl10, [p0], m4; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: vldb wh1, [p0, #32]; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: vldb wl1, [p0], m4; nopa ; nops ; nopxm ; nopv - ; CHECK-NEXT: vldb wh3, [p0, #32] + ; CHECK-NEXT: vldb wh3, [p0, #32]; nopx ; CHECK-NEXT: vldb.3d wl3, [p0], d1 - ; CHECK-NEXT: vldb wh7, [p7, #32]; vshift.align x0, x0, s0, x8, r3 - ; CHECK-NEXT: vlda wl7, [p7], #256; paddb [p4], #320; mov r1, p0 - ; CHECK-NEXT: and r2, r1, r0; vshift.align x2, x2, s0, x10, r3 - ; CHECK-NEXT: vldb wl5, [p4], #64; vshuffle x8, x0, x2, r9 + ; CHECK-NEXT: paddb [p4], #320; vshift.align x0, x0, s0, x8, r3 + ; CHECK-NEXT: vldb wh7, [p7, #32]; mov r1, p0 + ; CHECK-NEXT: vlda wl7, [p7], #256; and r2, r1, r0; vshift.align x2, x2, s0, x10, r3 + ; CHECK-NEXT: vldb wl5, [p4], #64; vshuffle x5, x0, x2, r25 ; CHECK-NEXT: vldb wh9, [p4, #32]; vshift.align x4, x4, s0, x1, r3 - ; CHECK-NEXT: vldb wl9, [p4], #64; vshuffle x5, x0, x2, r25 + ; CHECK-NEXT: vldb wl9, [p4], #64; vshuffle x8, x0, x2, r9 ; CHECK-NEXT: vlda wh5, [p2, #352]; add r3, r2, #34; vshift.align x6, x6, s0, x3, r3 ; CHECK-NEXT: vldb wh11, [p4, #32]; vshuffle x3, x4, x6, r9 - ; CHECK-NEXT: vldb wl11, [p4, #0]; vshuffle x1, x3, x5, r13; vmac.f bmh7, bmh7, x8, x7, r29 - ; CHECK-NEXT: vshuffle x3, x3, x5, r24 + ; CHECK-NEXT: vldb wl11, [p4, #0]; vshuffle x1, x3, x5, r13 + ; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bmh7, bmh7, x8, x7, r29 ; CHECK-NEXT: vshuffle x10, x4, x6, r25; vmac.f bmh5, bmh5, x1, x7, r29 ; CHECK-NEXT: mov p2, p5; vmac.f bml2, bml2, x3, x7, r29 ; CHECK-NEXT: vmac.f bml0, bml0, x10, x7, r29 @@ -55,24 +55,24 @@ ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 - ; CHECK-NEXT: nopa ; vldb wh8, [p0, #32]; nopx ; mov p5, p7; vmac.f bmh2, bmh2, x10, x9, r29 - ; CHECK-NEXT: vldb wl8, [p0], m4; mov p4, p2; vmac.f bml4, bml4, x8, x5, r29 + ; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p4, p2; vmac.f bmh2, bmh2, x10, x9, r29 + ; CHECK-NEXT: vldb wl8, [p0], m4; mov p5, p7; vmac.f bml4, bml4, x8, x5, r29 ; CHECK-NEXT: vldb wh10, [p0, #32]; vmac.f bml3, bml3, x1, x5, r29 ; CHECK-NEXT: vldb wl10, [p0], m4; vmac.f bml6, bml6, x3, x5, r29 ; CHECK-NEXT: vldb wh1, [p0, #32]; vmac.f bml5, bml5, x10, x5, r29 ; CHECK-NEXT: vldb wl1, [p0], m4; vmac.f bmh6, bmh6, x8, x11, r29 ; CHECK-NEXT: vldb wh3, [p0, #32]; vmac.f bmh4, bmh4, x1, x11, r29 ; CHECK-NEXT: vldb.3d wl3, [p0], d1; vmac.f bml1, bml1, x3, x11, r29 - ; CHECK-NEXT: vldb wh7, [p7, #32]; vshift.align x0, x0, s0, x8, r3; vmac.f bmh8, bmh8, x10, x11, r29 - ; CHECK-NEXT: vlda wl7, [p7], #256; paddb [p4], #320; mov r1, p0 - ; CHECK-NEXT: and r2, r1, r0; vshift.align x2, x2, s0, x10, r3 - ; CHECK-NEXT: vldb wl5, [p4], #64; vshuffle x8, x0, x2, r9 + ; CHECK-NEXT: paddb [p4], #320; vshift.align x0, x0, s0, x8, r3; vmac.f bmh8, bmh8, x10, x11, r29 + ; CHECK-NEXT: vldb wh7, [p7, #32]; mov r1, p0 + ; CHECK-NEXT: vlda wl7, [p7], #256; and r2, r1, r0; vshift.align x2, x2, s0, x10, r3 + ; CHECK-NEXT: vldb wl5, [p4], #64; vshuffle x5, x0, x2, r25 ; CHECK-NEXT: vldb wh9, [p4, #32]; vshift.align x4, x4, s0, x1, r3 - ; CHECK-NEXT: vldb wl9, [p4], #64; vshuffle x5, x0, x2, r25 + ; CHECK-NEXT: vldb wl9, [p4], #64; vshuffle x8, x0, x2, r9 ; CHECK-NEXT: vlda wh5, [p2, #352]; add r3, r2, #34; vshift.align x6, x6, s0, x3, r3 ; CHECK-NEXT: vldb wh11, [p4, #32]; vshuffle x3, x4, x6, r9 - ; CHECK-NEXT: vldb wl11, [p4, #0]; vshuffle x1, x3, x5, r13; vmac.f bmh7, bmh7, x8, x7, r29 - ; CHECK-NEXT: vshuffle x3, x3, x5, r24 + ; CHECK-NEXT: vldb wl11, [p4, #0]; vshuffle x1, x3, x5, r13 + ; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bmh7, bmh7, x8, x7, r29 ; CHECK-NEXT: vshuffle x10, x4, x6, r25; vmac.f bmh5, bmh5, x1, x7, r29 ; CHECK-NEXT: mov p2, p5; vmac.f bml2, bml2, x3, x7, r29 ; CHECK-NEXT: vmac.f bml0, bml0, x10, x7, r29 diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm-feasibleRA-nopstinc.mir b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm-feasibleRA-nopstinc.mir index 6631eb1b3a5f..818a10b5bb4e 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm-feasibleRA-nopstinc.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm-feasibleRA-nopstinc.mir @@ -35,26 +35,26 @@ ; CHECK-NEXT: vldb wl8, [p1], m5; nopa ; padds [p0], #128; nopxm ; nopv ; CHECK-NEXT: vldb wl2, [p0], #32; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: vldb wh2, [p0], #32; nopx - ; CHECK-NEXT: vldb wl3, [p0], #32 ; CHECK-NEXT: vldb wh8, [p1], m6 - ; CHECK-NEXT: vldb.3d wh3, [p0], d0 + ; CHECK-NEXT: vldb wl3, [p0], #32 ; CHECK-NEXT: vldb wl9, [p1], m5 + ; CHECK-NEXT: vldb.3d wh3, [p0], d0 ; CHECK-NEXT: vldb wh9, [p1], m6 ; CHECK-NEXT: vldb wl10, [p1], m5 ; CHECK-NEXT: vldb wh10, [p1], m6; vshuffle x4, x0, x2, r3 ; CHECK-NEXT: vldb wl11, [p1], m5; vshuffle x5, x0, x2, r16 ; CHECK-NEXT: vldb.3d wh11, [p1], d1; vshuffle x8, x8, x8, r6 - ; CHECK-NEXT: vldb wl0, [p0, #0]; vshuffle x6, x1, x3, r3 - ; CHECK-NEXT: vldb wh0, [p0, #32]; vshuffle x7, x1, x3, r16; vmac.f bmh0, bmh0, x4, x8, r2 - ; CHECK-NEXT: vldb wl1, [p0, #64]; vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2 - ; CHECK-NEXT: padds [p0], m4; vldb wh1, [p0, #96]; vmac.f bmh2, bmh2, x5, x8, r2 + ; CHECK-NEXT: vldb wl0, [p0, #0] + ; CHECK-NEXT: vldb wh0, [p0, #32]; vshuffle x6, x1, x3, r3; vmac.f bmh0, bmh0, x4, x8, r2 + ; CHECK-NEXT: vldb wl1, [p0, #64]; vshuffle x7, x1, x3, r16; vmac.f bmh2, bmh2, x5, x8, r2 + ; CHECK-NEXT: padds [p0], m4; vldb wh1, [p0, #96]; vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2 ; CHECK-NEXT: padds [p0], #128; vldb wl8, [p1], m5; vshuffle x10, x10, x10, r6; vmac.f bmh3, bmh3, x7, x8, r2 ; CHECK-NEXT: vldb wl2, [p0], #32; vmac.f bmh4, bmh0, x4, x9, r2 - ; CHECK-NEXT: vldb wh2, [p0], #32; vshuffle x11, x11, x11, r6; vmac.f bmh5, bmh1, x6, x9, r2 - ; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh6, bmh2, x5, x9, r2 - ; CHECK-NEXT: vldb wh8, [p1], m6; vmac.f bmh7, bmh3, x7, x9, r2 - ; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bmh8, bmh0, x4, x10, r2 - ; CHECK-NEXT: vldb wl9, [p1], m5; vmac.f bml0, bmh1, x6, x10, r2 + ; CHECK-NEXT: vldb wh2, [p0], #32; vshuffle x11, x11, x11, r6; vmac.f bmh6, bmh2, x5, x9, r2 + ; CHECK-NEXT: vldb wh8, [p1], m6; vmac.f bmh5, bmh1, x6, x9, r2 + ; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh7, bmh3, x7, x9, r2 + ; CHECK-NEXT: vldb wl9, [p1], m5; vmac.f bmh8, bmh0, x4, x10, r2 + ; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bml0, bmh1, x6, x10, r2 ; CHECK-NEXT: vldb wh9, [p1], m6; vmac.f bml1, bmh2, x5, x10, r2 ; CHECK-NEXT: vldb wl10, [p1], m5; vmac.f bml2, bmh3, x7, x10, r2 ; CHECK-NEXT: vldb wh10, [p1], m6; vshuffle x4, x0, x2, r3; vmac.f bml3, bmh0, x4, x11, r2 @@ -63,17 +63,17 @@ ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 - ; CHECK-NEXT: vldb wl0, [p0, #0]; nopa ; nops ; nopx ; vshuffle x6, x1, x3, r3; vmac.f bml6, bmh3, x7, x11, r2 - ; CHECK-NEXT: vldb wh0, [p0, #32]; vshuffle x7, x1, x3, r16; vmac.f bmh0, bmh0, x4, x8, r2 - ; CHECK-NEXT: vldb wl1, [p0, #64]; vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2 - ; CHECK-NEXT: padds [p0], m4; vldb wh1, [p0, #96]; vmac.f bmh2, bmh2, x5, x8, r2 + ; CHECK-NEXT: nopa ; vldb wl0, [p0, #0]; nopxm ; vmac.f bml6, bmh3, x7, x11, r2 + ; CHECK-NEXT: vldb wh0, [p0, #32]; vshuffle x6, x1, x3, r3; vmac.f bmh0, bmh0, x4, x8, r2 + ; CHECK-NEXT: vldb wl1, [p0, #64]; vshuffle x7, x1, x3, r16; vmac.f bmh2, bmh2, x5, x8, r2 + ; CHECK-NEXT: padds [p0], m4; vldb wh1, [p0, #96]; vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2 ; CHECK-NEXT: padds [p0], #128; vldb wl8, [p1], m5; vshuffle x10, x10, x10, r6; vmac.f bmh3, bmh3, x7, x8, r2 ; CHECK-NEXT: vldb wl2, [p0], #32; vmac.f bmh4, bmh0, x4, x9, r2 - ; CHECK-NEXT: vldb wh2, [p0], #32; vshuffle x11, x11, x11, r6; vmac.f bmh5, bmh1, x6, x9, r2 - ; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh6, bmh2, x5, x9, r2 - ; CHECK-NEXT: vldb wh8, [p1], m6; vmac.f bmh7, bmh3, x7, x9, r2 - ; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bmh8, bmh0, x4, x10, r2 - ; CHECK-NEXT: vldb wl9, [p1], m5; vmac.f bml0, bmh1, x6, x10, r2 + ; CHECK-NEXT: vldb wh2, [p0], #32; vshuffle x11, x11, x11, r6; vmac.f bmh6, bmh2, x5, x9, r2 + ; CHECK-NEXT: vldb wh8, [p1], m6; vmac.f bmh5, bmh1, x6, x9, r2 + ; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh7, bmh3, x7, x9, r2 + ; CHECK-NEXT: vldb wl9, [p1], m5; vmac.f bmh8, bmh0, x4, x10, r2 + ; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bml0, bmh1, x6, x10, r2 ; CHECK-NEXT: vldb wh9, [p1], m6; vmac.f bml1, bmh2, x5, x10, r2 ; CHECK-NEXT: vldb wl10, [p1], m5; vmac.f bml2, bmh3, x7, x10, r2 ; CHECK-NEXT: vldb wh10, [p1], m6; vshuffle x4, x0, x2, r3; vmac.f bml3, bmh0, x4, x11, r2 @@ -81,14 +81,14 @@ ; CHECK-NEXT: .L_LEnd0: ; CHECK-NEXT: vldb.3d wh11, [p1], d1; nopa ; nops ; nopx ; vshuffle x8, x8, x8, r6; vmac.f bml5, bmh2, x5, x11, r2 ; CHECK-NEXT: // %bb.3: // %for.cond.cleanup - ; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshuffle x6, x1, x3, r3; vmac.f bml6, bmh3, x7, x11, r2 - ; CHECK-NEXT: nopx ; vshuffle x7, x1, x3, r16; vmac.f bmh0, bmh0, x4, x8, r2 + ; CHECK-NEXT: nopa ; nopb ; nopxm ; vmac.f bml6, bmh3, x7, x11, r2 + ; CHECK-NEXT: vshuffle x6, x1, x3, r3; vmac.f bmh0, bmh0, x4, x8, r2 + ; CHECK-NEXT: vshuffle x7, x1, x3, r16; vmac.f bmh2, bmh2, x5, x8, r2 ; CHECK-NEXT: vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2 - ; CHECK-NEXT: vmac.f bmh2, bmh2, x5, x8, r2 ; CHECK-NEXT: vshuffle x10, x10, x10, r6; vmac.f bmh3, bmh3, x7, x8, r2 ; CHECK-NEXT: vmac.f bmh4, bmh0, x4, x9, r2 - ; CHECK-NEXT: vshuffle x11, x11, x11, r6; vmac.f bmh5, bmh1, x6, x9, r2 - ; CHECK-NEXT: vmac.f bmh6, bmh2, x5, x9, r2 + ; CHECK-NEXT: vshuffle x11, x11, x11, r6; vmac.f bmh6, bmh2, x5, x9, r2 + ; CHECK-NEXT: vmac.f bmh5, bmh1, x6, x9, r2 ; CHECK-NEXT: vmac.f bmh7, bmh3, x7, x9, r2 ; CHECK-NEXT: vmac.f bmh8, bmh0, x4, x10, r2 ; CHECK-NEXT: vmac.f bml0, bmh1, x6, x10, r2 diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm-feasibleRA.mir b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm-feasibleRA.mir index 92ed051e861c..b9cb0bf36e9b 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm-feasibleRA.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm-feasibleRA.mir @@ -35,26 +35,26 @@ ; CHECK-NEXT: vldb wl8, [p1], m5; nopa ; padds [p0], m4; nopxm ; nopv ; CHECK-NEXT: vldb wl2, [p0], #32; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: nopa ; vldb wh2, [p0], #32; nopx - ; CHECK-NEXT: vldb wl3, [p0], #32 ; CHECK-NEXT: vldb wh8, [p1], m6 - ; CHECK-NEXT: vldb.3d wh3, [p0], d0 + ; CHECK-NEXT: vldb wl3, [p0], #32 ; CHECK-NEXT: vldb wl9, [p1], m5 + ; CHECK-NEXT: vldb.3d wh3, [p0], d0 ; CHECK-NEXT: vldb wh9, [p1], m6 ; CHECK-NEXT: vldb wl10, [p1], m5 ; CHECK-NEXT: vldb wh10, [p1], m6; vshuffle x4, x0, x2, r3 ; CHECK-NEXT: vldb wl11, [p1], m5; vshuffle x5, x0, x2, r16 ; CHECK-NEXT: vldb.3d wh11, [p1], d1; vshuffle x8, x8, x8, r6 - ; CHECK-NEXT: vldb wl0, [p0], #32; vshuffle x6, x1, x3, r3 - ; CHECK-NEXT: vldb wh0, [p0], #32; vshuffle x7, x1, x3, r16; vmac.f bmh0, bmh0, x4, x8, r2 - ; CHECK-NEXT: vldb wl1, [p0], #32; vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2 - ; CHECK-NEXT: vldb wh1, [p0], #32; vmac.f bmh2, bmh2, x5, x8, r2 + ; CHECK-NEXT: vldb wl0, [p0], #32 + ; CHECK-NEXT: vldb wh0, [p0], #32; vshuffle x6, x1, x3, r3; vmac.f bmh0, bmh0, x4, x8, r2 + ; CHECK-NEXT: vldb wl1, [p0], #32; vshuffle x7, x1, x3, r16; vmac.f bmh2, bmh2, x5, x8, r2 + ; CHECK-NEXT: vldb wh1, [p0], #32; vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2 ; CHECK-NEXT: padds [p0], m4; vldb wl8, [p1], m5; vshuffle x10, x10, x10, r6; vmac.f bmh3, bmh3, x7, x8, r2 ; CHECK-NEXT: vldb wl2, [p0], #32; vmac.f bmh4, bmh0, x4, x9, r2 - ; CHECK-NEXT: vldb wh2, [p0], #32; vshuffle x11, x11, x11, r6; vmac.f bmh5, bmh1, x6, x9, r2 - ; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh6, bmh2, x5, x9, r2 - ; CHECK-NEXT: vldb wh8, [p1], m6; vmac.f bmh7, bmh3, x7, x9, r2 - ; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bmh8, bmh0, x4, x10, r2 - ; CHECK-NEXT: vldb wl9, [p1], m5; vmac.f bml0, bmh1, x6, x10, r2 + ; CHECK-NEXT: vldb wh2, [p0], #32; vshuffle x11, x11, x11, r6; vmac.f bmh6, bmh2, x5, x9, r2 + ; CHECK-NEXT: vldb wh8, [p1], m6; vmac.f bmh5, bmh1, x6, x9, r2 + ; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh7, bmh3, x7, x9, r2 + ; CHECK-NEXT: vldb wl9, [p1], m5; vmac.f bmh8, bmh0, x4, x10, r2 + ; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bml0, bmh1, x6, x10, r2 ; CHECK-NEXT: vldb wh9, [p1], m6; vmac.f bml1, bmh2, x5, x10, r2 ; CHECK-NEXT: vldb wl10, [p1], m5; vmac.f bml2, bmh3, x7, x10, r2 ; CHECK-NEXT: vldb wh10, [p1], m6; vshuffle x4, x0, x2, r3; vmac.f bml3, bmh0, x4, x11, r2 @@ -63,17 +63,17 @@ ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 - ; CHECK-NEXT: vldb wl0, [p0], #32; nopa ; nops ; nopx ; vshuffle x6, x1, x3, r3; vmac.f bml6, bmh3, x7, x11, r2 - ; CHECK-NEXT: vldb wh0, [p0], #32; nopx ; vshuffle x7, x1, x3, r16; vmac.f bmh0, bmh0, x4, x8, r2 - ; CHECK-NEXT: vldb wl1, [p0], #32; vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2 - ; CHECK-NEXT: vldb wh1, [p0], #32; vmac.f bmh2, bmh2, x5, x8, r2 + ; CHECK-NEXT: vldb wl0, [p0], #32; nopa ; nops ; nopxm ; vmac.f bml6, bmh3, x7, x11, r2 + ; CHECK-NEXT: vldb wh0, [p0], #32; vshuffle x6, x1, x3, r3; vmac.f bmh0, bmh0, x4, x8, r2 + ; CHECK-NEXT: vldb wl1, [p0], #32; vshuffle x7, x1, x3, r16; vmac.f bmh2, bmh2, x5, x8, r2 + ; CHECK-NEXT: vldb wh1, [p0], #32; vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2 ; CHECK-NEXT: padds [p0], m4; vldb wl8, [p1], m5; vshuffle x10, x10, x10, r6; vmac.f bmh3, bmh3, x7, x8, r2 ; CHECK-NEXT: vldb wl2, [p0], #32; vmac.f bmh4, bmh0, x4, x9, r2 - ; CHECK-NEXT: vldb wh2, [p0], #32; vshuffle x11, x11, x11, r6; vmac.f bmh5, bmh1, x6, x9, r2 - ; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh6, bmh2, x5, x9, r2 - ; CHECK-NEXT: vldb wh8, [p1], m6; vmac.f bmh7, bmh3, x7, x9, r2 - ; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bmh8, bmh0, x4, x10, r2 - ; CHECK-NEXT: vldb wl9, [p1], m5; vmac.f bml0, bmh1, x6, x10, r2 + ; CHECK-NEXT: vldb wh2, [p0], #32; vshuffle x11, x11, x11, r6; vmac.f bmh6, bmh2, x5, x9, r2 + ; CHECK-NEXT: vldb wh8, [p1], m6; vmac.f bmh5, bmh1, x6, x9, r2 + ; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh7, bmh3, x7, x9, r2 + ; CHECK-NEXT: vldb wl9, [p1], m5; vmac.f bmh8, bmh0, x4, x10, r2 + ; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bml0, bmh1, x6, x10, r2 ; CHECK-NEXT: vldb wh9, [p1], m6; vmac.f bml1, bmh2, x5, x10, r2 ; CHECK-NEXT: vldb wl10, [p1], m5; vmac.f bml2, bmh3, x7, x10, r2 ; CHECK-NEXT: vldb wh10, [p1], m6; vshuffle x4, x0, x2, r3; vmac.f bml3, bmh0, x4, x11, r2 @@ -81,14 +81,14 @@ ; CHECK-NEXT: .L_LEnd0: ; CHECK-NEXT: vldb.3d wh11, [p1], d1; nopa ; nops ; nopx ; vshuffle x8, x8, x8, r6; vmac.f bml5, bmh2, x5, x11, r2 ; CHECK-NEXT: // %bb.3: // %for.cond.cleanup - ; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshuffle x6, x1, x3, r3; vmac.f bml6, bmh3, x7, x11, r2 - ; CHECK-NEXT: nopx ; vshuffle x7, x1, x3, r16; vmac.f bmh0, bmh0, x4, x8, r2 + ; CHECK-NEXT: nopa ; nopb ; nopxm ; vmac.f bml6, bmh3, x7, x11, r2 + ; CHECK-NEXT: vshuffle x6, x1, x3, r3; vmac.f bmh0, bmh0, x4, x8, r2 + ; CHECK-NEXT: vshuffle x7, x1, x3, r16; vmac.f bmh2, bmh2, x5, x8, r2 ; CHECK-NEXT: vshuffle x9, x9, x9, r6; vmac.f bmh1, bmh1, x6, x8, r2 - ; CHECK-NEXT: vmac.f bmh2, bmh2, x5, x8, r2 ; CHECK-NEXT: vshuffle x10, x10, x10, r6; vmac.f bmh3, bmh3, x7, x8, r2 ; CHECK-NEXT: vmac.f bmh4, bmh0, x4, x9, r2 - ; CHECK-NEXT: vshuffle x11, x11, x11, r6; vmac.f bmh5, bmh1, x6, x9, r2 - ; CHECK-NEXT: vmac.f bmh6, bmh2, x5, x9, r2 + ; CHECK-NEXT: vshuffle x11, x11, x11, r6; vmac.f bmh6, bmh2, x5, x9, r2 + ; CHECK-NEXT: vmac.f bmh5, bmh1, x6, x9, r2 ; CHECK-NEXT: vmac.f bmh7, bmh3, x7, x9, r2 ; CHECK-NEXT: vmac.f bmh8, bmh0, x4, x10, r2 ; CHECK-NEXT: vmac.f bml0, bmh1, x6, x10, r2 diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm-nopresched-nobanks.mir b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm-nopresched-nobanks.mir index ac136c57b989..07a45d2a0399 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm-nopresched-nobanks.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm-nopresched-nobanks.mir @@ -34,7 +34,7 @@ ; CHECK-NEXT: paddb [p0], m4; vlda wh1, [p0, #96]; nops ; nopxm ; nopv ; CHECK-NEXT: paddb [p0], #128; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: vldb wl10, [p0], #32; nopa ; nops ; nopxm ; nopv - ; CHECK-NEXT: nopa ; vldb wh10, [p0], #32; nopx + ; CHECK-NEXT: nopa ; vldb wh10, [p0], #32; nopxm ; nops ; CHECK-NEXT: vldb wl3, [p0], #32 ; CHECK-NEXT: vldb.3d wh3, [p0], d0 ; CHECK-NEXT: nop @@ -46,18 +46,16 @@ ; CHECK-NEXT: vldb wh0, [p1], m6; vshuffle x0, x1, x3, r3 ; CHECK-NEXT: vldb wl7, [p1], m5; vshuffle x9, x1, x3, r16 ; CHECK-NEXT: vldb.3d wh7, [p1], d1 - ; CHECK-NEXT: vshuffle x3, x3, x3, r6 - ; CHECK-NEXT: nop ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 - ; CHECK-NEXT: vldb wl8, [p0, #0]; nopa ; nops ; nopx ; vshuffle x5, x5, x5, r6; vmac.f bmh0, bmh0, x6, x3, r2 - ; CHECK-NEXT: vldb wh8, [p0, #32]; nopx ; vmac.f bmh1, bmh1, x11, x3, r2 - ; CHECK-NEXT: vldb wl1, [p0, #64]; vshuffle x0, x0, x0, r6 - ; CHECK-NEXT: vlda wh1, [p0, #96]; paddb [p0], m4 - ; CHECK-NEXT: paddb [p0], #128; vshuffle x7, x7, x7, r6; vmac.f bmh2, bmh2, x0, x3, r2 - ; CHECK-NEXT: vldb wl10, [p0], #32; vmac.f bmh3, bmh3, x9, x3, r2 - ; CHECK-NEXT: vldb wh10, [p0], #32; vmac.f bmh4, bmh4, x6, x5, r2 + ; CHECK-NEXT: vldb wl8, [p0, #0]; vshuffle x3, x3, x3, r6 + ; CHECK-NEXT: vldb wh8, [p0, #32]; nopx + ; CHECK-NEXT: vldb wl1, [p0, #64]; vshuffle x5, x5, x5, r6; vmac.f bmh0, bmh0, x6, x3, r2 + ; CHECK-NEXT: vlda wh1, [p0, #96]; paddb [p0], m4; vmac.f bmh1, bmh1, x11, x3, r2 + ; CHECK-NEXT: paddb [p0], #128; vshuffle x0, x0, x0, r6; vmac.f bmh3, bmh3, x9, x3, r2 + ; CHECK-NEXT: vldb wl10, [p0], #32; vmac.f bmh4, bmh4, x6, x5, r2 + ; CHECK-NEXT: vldb wh10, [p0], #32; vshuffle x7, x7, x7, r6; vmac.f bmh2, bmh2, x0, x3, r2 ; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bmh5, bmh5, x11, x5, r2 ; CHECK-NEXT: vldb.3d wh3, [p0], d0; vmac.f bmh6, bmh6, x0, x5, r2 ; CHECK-NEXT: vmac.f bmh7, bmh7, x9, x5, r2 @@ -68,18 +66,16 @@ ; CHECK-NEXT: vldb wl0, [p1], m5; vshuffle x11, x8, x10, r16; vmac.f bml3, bml3, x6, x7, r2 ; CHECK-NEXT: vldb wh0, [p1], m6; vshuffle x0, x1, x3, r3; vmac.f bml4, bml4, x11, x7, r2 ; CHECK-NEXT: vldb wl7, [p1], m5; vshuffle x9, x1, x3, r16; vmac.f bml6, bml6, x0, x7, r2 - ; CHECK-NEXT: vldb.3d wh7, [p1], d1; vmac.f bml5, bml5, x9, x7, r2 - ; CHECK-NEXT: vshuffle x3, x3, x3, r6 ; CHECK-NEXT: .L_LEnd0: - ; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv + ; CHECK-NEXT: vldb.3d wh7, [p1], d1; nopa ; nops ; nopxm ; vmac.f bml5, bml5, x9, x7, r2 ; CHECK-NEXT: // %bb.3: // %for.cond.cleanup - ; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshuffle x5, x5, x5, r6; vmac.f bmh0, bmh0, x6, x3, r2 - ; CHECK-NEXT: nopa ; nopx ; vmac.f bmh1, bmh1, x11, x3, r2 - ; CHECK-NEXT: vshuffle x0, x0, x0, r6 - ; CHECK-NEXT: nop - ; CHECK-NEXT: vshuffle x7, x7, x7, r6; vmac.f bmh2, bmh2, x0, x3, r2 - ; CHECK-NEXT: vmac.f bmh3, bmh3, x9, x3, r2 + ; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshuffle x3, x3, x3, r6; nopv + ; CHECK-NEXT: nopx + ; CHECK-NEXT: vshuffle x5, x5, x5, r6; vmac.f bmh0, bmh0, x6, x3, r2 + ; CHECK-NEXT: vmac.f bmh1, bmh1, x11, x3, r2 + ; CHECK-NEXT: vshuffle x0, x0, x0, r6; vmac.f bmh3, bmh3, x9, x3, r2 ; CHECK-NEXT: vmac.f bmh4, bmh4, x6, x5, r2 + ; CHECK-NEXT: vshuffle x7, x7, x7, r6; vmac.f bmh2, bmh2, x0, x3, r2 ; CHECK-NEXT: vmac.f bmh5, bmh5, x11, x5, r2 ; CHECK-NEXT: vmac.f bmh6, bmh6, x0, x5, r2 ; CHECK-NEXT: vmac.f bmh7, bmh7, x9, x5, r2 @@ -91,8 +87,6 @@ ; CHECK-NEXT: vmac.f bml4, bml4, x11, x7, r2 ; CHECK-NEXT: vmac.f bml6, bml6, x0, x7, r2 ; CHECK-NEXT: vmac.f bml5, bml5, x9, x7, r2 - ; CHECK-NEXT: nop - ; CHECK-NEXT: nop ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_4: // %for.cond.cleanup ; CHECK-NEXT: nopa ; ret lr diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm.mir b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm.mir index 84045383b00a..5aa2888b4501 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/gemm.mir @@ -32,53 +32,55 @@ ; CHECK-NEXT: vldb wl6, [p0, #64]; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: vldb wh6, [p0, #96]; nopa ; padds [p0], m4; nopxm ; nopv ; CHECK-NEXT: vldb wl9, [p1], m5; nopa ; padds [p0], #128; nopxm ; nopv - ; CHECK-NEXT: vldb wl10, [p0], #32; nopa ; nops ; nopxm ; nopv - ; CHECK-NEXT: vldb wh10, [p0], #32; nopx - ; CHECK-NEXT: vldb wh9, [p1], m6 - ; CHECK-NEXT: vldb wl3, [p0], #32 - ; CHECK-NEXT: vldb.3d wh3, [p0], d0 + ; CHECK-NEXT: vldb wh9, [p1], m6; nopa ; nops ; nopxm ; nopv + ; CHECK-NEXT: vldb wl10, [p0], #32 ; CHECK-NEXT: vldb wl5, [p1], m5 + ; CHECK-NEXT: vldb wh10, [p0], #32 ; CHECK-NEXT: vldb wh5, [p1], m6 + ; CHECK-NEXT: vldb wl3, [p0], #32 ; CHECK-NEXT: vldb wl7, [p1], m5 - ; CHECK-NEXT: vldb wh7, [p1], m6; vshuffle x1, x8, x10, r4 - ; CHECK-NEXT: vldb wl3, [p1], m5; vshuffle x11, x9, x9, r2 - ; CHECK-NEXT: vldb.3d wh3, [p1], d1; vshuffle x8, x8, x10, r16 - ; CHECK-NEXT: vshuffle x10, x6, x3, r4; vmac.f bmh0, bmh0, x1, x11, r3 - ; CHECK-NEXT: vshuffle x6, x6, x3, r16; vmac.f bmh1, bmh1, x8, x11, r3 - ; CHECK-NEXT: vshuffle x5, x5, x5, r2; vmac.f bmh2, bmh2, x10, x11, r3 - ; CHECK-NEXT: vmac.f bmh3, bmh3, x6, x11, r3 - ; CHECK-NEXT: vshuffle x7, x7, x7, r2; vmac.f bmh4, bmh4, x1, x5, r3 - ; CHECK-NEXT: vmac.f bmh5, bmh5, x8, x5, r3 - ; CHECK-NEXT: vshuffle x3, x3, x3, r2; vmac.f bmh6, bmh6, x10, x5, r3 + ; CHECK-NEXT: vldb.3d wh3, [p0], d0; vshuffle x11, x9, x9, r2 + ; CHECK-NEXT: vldb wh7, [p1], m6 + ; CHECK-NEXT: vldb wl3, [p1], m5 + ; CHECK-NEXT: vldb.3d wh3, [p1], d1; vshuffle x1, x8, x10, r4 + ; CHECK-NEXT: vshuffle x8, x8, x10, r16 + ; CHECK-NEXT: vshuffle x5, x5, x5, r2; vmac.f bmh0, bmh0, x1, x11, r3 + ; CHECK-NEXT: vmac.f bmh1, bmh1, x8, x11, r3 + ; CHECK-NEXT: vshuffle x10, x6, x3, r4; vmac.f bmh4, bmh4, x1, x5, r3 + ; CHECK-NEXT: vshuffle x6, x6, x3, r16; vmac.f bmh5, bmh5, x8, x5, r3 + ; CHECK-NEXT: vshuffle x7, x7, x7, r2; vmac.f bmh2, bmh2, x10, x11, r3 + ; CHECK-NEXT: vshuffle x3, x3, x3, r2; vmac.f bmh3, bmh3, x6, x11, r3 + ; CHECK-NEXT: vmac.f bmh6, bmh6, x10, x5, r3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 - ; CHECK-NEXT: vldb wl8, [p0, #0]; nopa ; nops ; nopxm ; vmac.f bmh7, bmh7, x6, x5, r3 - ; CHECK-NEXT: vldb wh8, [p0, #32]; nopx ; vmac.f bmh8, bmh8, x1, x7, r3 + ; CHECK-NEXT: nopa ; vldb wl8, [p0, #0]; nopx ; vmac.f bmh7, bmh7, x6, x5, r3 + ; CHECK-NEXT: vldb wh8, [p0, #32]; vmac.f bmh8, bmh8, x1, x7, r3 ; CHECK-NEXT: vldb wl6, [p0, #64]; vmac.f bml0, bml0, x8, x7, r3 ; CHECK-NEXT: padds [p0], m4; vldb wh6, [p0, #96]; vmac.f bml1, bml1, x10, x7, r3 ; CHECK-NEXT: padds [p0], #128; vldb wl9, [p1], m5; vmac.f bml2, bml2, x6, x7, r3 - ; CHECK-NEXT: vldb wl10, [p0], #32; vmac.f bml3, bml3, x1, x3, r3 - ; CHECK-NEXT: vldb wh10, [p0], #32; vmac.f bml5, bml5, x8, x3, r3 - ; CHECK-NEXT: vldb wh9, [p1], m6; vmac.f bml6, bml6, x10, x3, r3 - ; CHECK-NEXT: vldb wl3, [p0], #32; vmac.f bml4, bml4, x6, x3, r3 - ; CHECK-NEXT: vldb.3d wh3, [p0], d0 - ; CHECK-NEXT: vldb wl5, [p1], m5 + ; CHECK-NEXT: vldb wh9, [p1], m6; vmac.f bml3, bml3, x1, x3, r3 + ; CHECK-NEXT: vldb wl10, [p0], #32; vmac.f bml5, bml5, x8, x3, r3 + ; CHECK-NEXT: vldb wl5, [p1], m5; vmac.f bml6, bml6, x10, x3, r3 + ; CHECK-NEXT: vldb wh10, [p0], #32; vmac.f bml4, bml4, x6, x3, r3 ; CHECK-NEXT: vldb wh5, [p1], m6 + ; CHECK-NEXT: vldb wl3, [p0], #32 ; CHECK-NEXT: vldb wl7, [p1], m5 - ; CHECK-NEXT: vldb wh7, [p1], m6; vshuffle x1, x8, x10, r4 - ; CHECK-NEXT: vldb wl3, [p1], m5; vshuffle x11, x9, x9, r2 - ; CHECK-NEXT: vldb.3d wh3, [p1], d1; vshuffle x8, x8, x10, r16 - ; CHECK-NEXT: vshuffle x10, x6, x3, r4; vmac.f bmh0, bmh0, x1, x11, r3 - ; CHECK-NEXT: vshuffle x6, x6, x3, r16; vmac.f bmh1, bmh1, x8, x11, r3 - ; CHECK-NEXT: vshuffle x5, x5, x5, r2; vmac.f bmh2, bmh2, x10, x11, r3 - ; CHECK-NEXT: vmac.f bmh3, bmh3, x6, x11, r3 - ; CHECK-NEXT: vshuffle x7, x7, x7, r2; vmac.f bmh4, bmh4, x1, x5, r3 - ; CHECK-NEXT: vmac.f bmh5, bmh5, x8, x5, r3 + ; CHECK-NEXT: vldb.3d wh3, [p0], d0; vshuffle x11, x9, x9, r2 + ; CHECK-NEXT: vldb wh7, [p1], m6 + ; CHECK-NEXT: vldb wl3, [p1], m5 + ; CHECK-NEXT: vldb.3d wh3, [p1], d1; vshuffle x1, x8, x10, r4 + ; CHECK-NEXT: vshuffle x8, x8, x10, r16 + ; CHECK-NEXT: vshuffle x5, x5, x5, r2; vmac.f bmh0, bmh0, x1, x11, r3 + ; CHECK-NEXT: vmac.f bmh1, bmh1, x8, x11, r3 + ; CHECK-NEXT: vshuffle x10, x6, x3, r4; vmac.f bmh4, bmh4, x1, x5, r3 + ; CHECK-NEXT: vshuffle x6, x6, x3, r16; vmac.f bmh5, bmh5, x8, x5, r3 + ; CHECK-NEXT: vshuffle x7, x7, x7, r2; vmac.f bmh2, bmh2, x10, x11, r3 + ; CHECK-NEXT: vshuffle x3, x3, x3, r2; vmac.f bmh3, bmh3, x6, x11, r3 ; CHECK-NEXT: .L_LEnd0: - ; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshuffle x3, x3, x3, r2; vmac.f bmh6, bmh6, x10, x5, r3 + ; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bmh6, bmh6, x10, x5, r3 ; CHECK-NEXT: // %bb.3: // %for.cond.cleanup - ; CHECK-NEXT: vmac.f bmh7, bmh7, x6, x5, r3 + ; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bmh7, bmh7, x6, x5, r3 ; CHECK-NEXT: vmac.f bmh8, bmh8, x1, x7, r3 ; CHECK-NEXT: vmac.f bml0, bml0, x8, x7, r3 ; CHECK-NEXT: vmac.f bml1, bml1, x10, x7, r3 @@ -87,6 +89,7 @@ ; CHECK-NEXT: vmac.f bml5, bml5, x8, x3, r3 ; CHECK-NEXT: vmac.f bml6, bml6, x10, x3, r3 ; CHECK-NEXT: vmac.f bml4, bml4, x6, x3, r3 + ; CHECK-NEXT: nopx ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/round.mir b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/round.mir index ee49118a134a..acb4a7f09877 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/round.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/round.mir @@ -34,7 +34,7 @@ ; CHECK-NEXT: nop // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 ; CHECK-NEXT: // %bb.1: // %for.body.preheader - ; CHECK-NEXT: add.nc lc, r0, #-1 + ; CHECK-NEXT: add.nc lc, r0, #-3 ; CHECK-NEXT: movxm ls, #.LBB0_2 ; CHECK-NEXT: movxm le, #.L_LEnd0 ; CHECK-NEXT: nopb ; vlda.ups.s32.s8 cm0, s0, [p0], #32; nops ; nopxm ; nopv @@ -42,33 +42,41 @@ ; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv ; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv - ; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; nopv - ; CHECK-NEXT: nopa ; nopb ; nopxm + ; CHECK-NEXT: nopb ; vlda.ups.s32.s8 cm0, s0, [p0], #32; nops ; nopxm ; nopv + ; CHECK-NEXT: nopb ; vlda.ups.s32.s8 cm1, s0, [p0], #32; nops ; nopxm ; nopv ; CHECK-NEXT: nop ; CHECK-NEXT: nop + ; CHECK-NEXT: vsrs.s8.s32 wh0, cm0, s1 + ; CHECK-NEXT: vlda.ups.s32.s8 cm0, s0, [p0], #32; vsrs.s8.s32 wh2, cm1, s1 + ; CHECK-NEXT: vlda.ups.s32.s8 cm1, s0, [p0], #32 + ; CHECK-NEXT: nop + ; CHECK-NEXT: vups.s32.s8 cm2, wh0, s1 + ; CHECK-NEXT: vsrs.s8.s32 wh0, cm0, s1; vups.s32.s8 cm3, wh2, s1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 - ; CHECK-NEXT: vlda.ups.s32.s8 cm0, s0, [p0], #32; nopb ; vsrs.s8.s32 wh0, cm0, s1 - ; CHECK-NEXT: vlda.ups.s32.s8 cm1, s0, [p0], #32; vsrs.s8.s32 wh2, cm1, s1 - ; CHECK-NEXT: nop - ; CHECK-NEXT: nop + ; CHECK-NEXT: nopb ; vlda.ups.s32.s8 cm0, s0, [p0], #32; vsrs.s8.s32 wh2, cm1, s1; nopxm ; nopv + ; CHECK-NEXT: vlda.ups.s32.s8 cm1, s0, [p0], #32; nopb ; vst.srs.s8.s32 cm2, s0, [p1], #32 + ; CHECK-NEXT: vst.srs.s8.s32 cm3, s0, [p1], #32 ; CHECK-NEXT: vups.s32.s8 cm2, wh0, s1 - ; CHECK-NEXT: vups.s32.s8 cm3, wh2, s1 - ; CHECK-NEXT: nop - ; CHECK-NEXT: vst.srs.s8.s32 cm2, s0, [p1], #32 ; CHECK-NEXT: .L_LEnd0: - ; CHECK-NEXT: nopb ; nopa ; vst.srs.s8.s32 cm3, s0, [p1], #32; nopxm ; nopv + ; CHECK-NEXT: nopb ; nopa ; vsrs.s8.s32 wh0, cm0, s1; nopx ; vups.s32.s8 cm3, wh2, s1; nopv ; CHECK-NEXT: // %bb.3: // %for.cond.cleanup - ; CHECK-NEXT: vsrs.s8.s32 wh0, cm0, s1; nopx + ; CHECK-NEXT: nopa ; vsrs.s8.s32 wh2, cm1, s1; nopx + ; CHECK-NEXT: vst.srs.s8.s32 cm2, s0, [p1], #32 + ; CHECK-NEXT: vst.srs.s8.s32 cm3, s0, [p1], #32 + ; CHECK-NEXT: vups.s32.s8 cm2, wh0, s1 + ; CHECK-NEXT: vsrs.s8.s32 wh0, cm0, s1; vups.s32.s8 cm3, wh2, s1 ; CHECK-NEXT: vsrs.s8.s32 wh2, cm1, s1 - ; CHECK-NEXT: nop - ; CHECK-NEXT: nop + ; CHECK-NEXT: vst.srs.s8.s32 cm2, s0, [p1], #32 + ; CHECK-NEXT: vst.srs.s8.s32 cm3, s0, [p1], #32 ; CHECK-NEXT: vups.s32.s8 cm2, wh0, s1 ; CHECK-NEXT: vups.s32.s8 cm3, wh2, s1 ; CHECK-NEXT: nop ; CHECK-NEXT: vst.srs.s8.s32 cm2, s0, [p1], #32 ; CHECK-NEXT: vst.srs.s8.s32 cm3, s0, [p1], #32 + ; CHECK-NEXT: nop + ; CHECK-NEXT: nop ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_4: // %for.cond.cleanup ; CHECK-NEXT: nopa ; ret lr diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/status_regs/srWAW.mir b/llvm/test/CodeGen/AIE/aie2/schedule/status_regs/srWAW.mir index ff6d666fbd80..e649db0afd1e 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/status_regs/srWAW.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/status_regs/srWAW.mir @@ -1,14 +1,17 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 # This file is licensed under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates -# RUN: llc -mtriple=aie2 --run-pass=postmisched --issue-limit=1 -debug-only=machine-scheduler %s -o - 2>%t.log -# RUN: cat %t.log | FileCheck %s --check-prefix=CHECK-WAW # REQUIRES: asserts +# RUN: llc -mtriple=aie2 --run-pass=postmisched --issue-limit=1 \ +# RUN: -debug-only=machine-scheduler --aie-pipeliner-waw-sticky-registers=0 \ +# RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-WAW # This test checks the write-after-write(WAW) dependencies +# We have disabled the sticky version, since its dump confuses FileChecking the debug output --- # Here we have two WAW dependencies with srcarry W1->W3, W2->W3, where W3 is a live write @@ -18,12 +21,17 @@ tracksRegLiveness: true body: | bb.0.entry : liveins: $r1, $r2, $r3, $r4 - ; CHECK-WAW-LABEL: test_WAW WAW dependencies - ; CHECK-WAW: SU(0)->SU(2) Out Latency=1 $r0 - ; CHECK-WAW-NEXT: SU(0)->SU(2) Out Latency=1 $srcarry - ; CHECK-WAW-NEXT: SU(1)->SU(2) Out Latency=1 $srcarry - ; CHECK-WAW-NEXT: SU(2)->SU(3) Out Latency=1 $r0 + ; CHECK-WAW-LABEL: name: test_WAW + ; CHECK-WAW: liveins: $r1, $r2, $r3, $r4 + ; CHECK-WAW-NEXT: {{ $}} + ; CHECK-WAW-NEXT: RET implicit $lr + ; CHECK-WAW-NEXT: NOP + ; CHECK-WAW-NEXT: renamable $r0 = nsw ADD killed $r1, killed $r2, implicit-def $srcarry + ; CHECK-WAW-NEXT: renamable $r1 = nsw ADD killed $r3, $r4, implicit-def $srcarry + ; CHECK-WAW-NEXT: renamable $r0 = nsw ADD killed renamable $r0, killed $r4, implicit-def $srcarry + ; CHECK-WAW-NEXT: $r0 = nsw ADC killed renamable $r1, killed renamable $r0, implicit-def $srcarry, implicit $srcarry + ; CHECK-WAW-NEXT: DelayedSchedBarrier implicit killed $r0 renamable $r0 = nsw ADD $r1, $r2, implicit-def $srcarry renamable $r1 = nsw ADD $r3, $r4, implicit-def $srcarry renamable $r0 = nsw ADD killed renamable $r0, $r4, implicit-def $srcarry @@ -40,14 +48,17 @@ tracksRegLiveness: true body: | bb.0.entry: liveins: $r1, $r2, $r3, $r4 - ; CHECK-WAW-LABEL: test_WAW_2 WAW dependencies - ; CHECK-WAW: SU(0)->SU(2) Out Latency=1 $r0 - ; CHECK-WAW-NEXT: SU(0)->SU(3) Out Latency=1 $srcarry - ; CHECK-WAW-NEXT: SU(1)->SU(3) Out Latency=1 $r1 - ; CHECK-WAW-NEXT: SU(1)->SU(3) Out Latency=1 $srcarry - ; CHECK-WAW-NEXT: SU(2)->SU(4) Out Latency=1 $r0 - ; CHECK-WAW-NEXT: SU(2)->SU(3) Out Latency=1 $srcarry + ; CHECK-WAW-LABEL: name: test_WAW_2 + ; CHECK-WAW: liveins: $r1, $r2, $r3, $r4 + ; CHECK-WAW-NEXT: {{ $}} + ; CHECK-WAW-NEXT: RET implicit $lr + ; CHECK-WAW-NEXT: renamable $r0 = nsw ADD killed $r1, $r2, implicit-def $srcarry + ; CHECK-WAW-NEXT: renamable $r1 = nsw ADD killed $r3, $r4, implicit-def $srcarry + ; CHECK-WAW-NEXT: renamable $r0 = nsw ADD killed renamable $r0, killed renamable $r1, implicit-def $srcarry + ; CHECK-WAW-NEXT: renamable $r1 = nsw ADD killed $r2, killed $r4, implicit-def $srcarry + ; CHECK-WAW-NEXT: $r0 = nsw ADC killed renamable $r0, killed renamable $r1, implicit-def $srcarry, implicit $srcarry + ; CHECK-WAW-NEXT: DelayedSchedBarrier implicit killed $r0 renamable $r0 = nsw ADD $r1, $r2, implicit-def $srcarry renamable $r1 = nsw ADD $r3, $r4, implicit-def $srcarry renamable $r0 = nsw ADD killed renamable $r0, killed renamable $r1, implicit-def $srcarry @@ -65,13 +76,17 @@ tracksRegLiveness: true body: | bb.0.entry: liveins: $r1, $r2, $r3 - ; CHECK-WAW-LABEL: test_MOV_mv_scl WAW dependencies - ; CHECK-WAW: SU(0)->SU(2) Out Latency=1 $r0 - ; CHECK-WAW-NEXT: SU(0)->SU(2) Out Latency=1 $srcarry - ; CHECK-WAW-NEXT: SU(1)->SU(3) Out Latency=1 $r1 - ; CHECK-WAW-NEXT: SU(1)->SU(2) Out Latency=1 $srcarry - ; CHECK-WAW-NEXT: SU(2)->SU(4) Out Latency=1 $r0 + ; CHECK-WAW-LABEL: name: test_MOV_mv_scl + ; CHECK-WAW: liveins: $r1, $r2, $r3 + ; CHECK-WAW-NEXT: {{ $}} + ; CHECK-WAW-NEXT: RET implicit $lr + ; CHECK-WAW-NEXT: renamable $r0 = nsw ADD killed $r1, $r2, implicit-def $srcarry + ; CHECK-WAW-NEXT: renamable $r1 = nsw ADD killed $r2, killed $r3, implicit-def $srcarry + ; CHECK-WAW-NEXT: renamable $r0 = nsw ADD killed renamable $r0, killed renamable $r1, implicit-def $srcarry + ; CHECK-WAW-NEXT: $r1 = MOV_mv_scl $srcarry + ; CHECK-WAW-NEXT: $r0 = nsw ADD killed renamable $r0, killed renamable $r1, implicit-def $srcarry + ; CHECK-WAW-NEXT: DelayedSchedBarrier implicit killed $r0 renamable $r0 = nsw ADD $r1, $r2, implicit-def $srcarry renamable $r1 = nsw ADD $r2, $r3, implicit-def $srcarry renamable $r0 = nsw ADD killed renamable $r0, killed renamable $r1, implicit-def $srcarry @@ -89,16 +104,19 @@ tracksRegLiveness: true body: | bb.0.entry: liveins: $r1, $r2, $r3, $r4 - ; CHECK-WAW-LABEL: test_MOV_mv_scl_2 WAW dependencies - ; CHECK-WAW: SU(0)->SU(2) Out Latency=1 $r0 - ; CHECK-WAW-NEXT: SU(0)->SU(3) Out Latency=1 $srcarry - ; CHECK-WAW-NEXT: SU(1)->SU(4) Out Latency=1 $r1 - ; CHECK-WAW-NEXT: SU(1)->SU(3) Out Latency=1 $srcarry - ; CHECK-WAW-NEXT: SU(2)->SU(3) Out Latency=1 $r0 - ; CHECK-WAW-NEXT: SU(2)->SU(3) Out Latency=1 $srcarry - ; CHECK-WAW-NEXT: SU(3)->SU(5) Out Latency=1 $r0 + ; CHECK-WAW-LABEL: name: test_MOV_mv_scl_2 + ; CHECK-WAW: liveins: $r1, $r2, $r3, $r4 + ; CHECK-WAW-NEXT: {{ $}} + ; CHECK-WAW-NEXT: renamable $r0 = nsw ADD killed $r1, killed $r2, implicit-def $srcarry + ; CHECK-WAW-NEXT: RET implicit $lr + ; CHECK-WAW-NEXT: renamable $r1 = nsw ADD killed $r3, $r4, implicit-def $srcarry + ; CHECK-WAW-NEXT: renamable $r0 = nsw ADD killed renamable $r0, killed renamable $r1, implicit-def $srcarry + ; CHECK-WAW-NEXT: renamable $r0 = nsw ADD killed renamable $r0, killed $r4, implicit-def $srcarry + ; CHECK-WAW-NEXT: $r1 = MOV_mv_scl $srcarry + ; CHECK-WAW-NEXT: $r0 = nsw ADD killed renamable $r0, killed renamable $r1, implicit-def $srcarry + ; CHECK-WAW-NEXT: DelayedSchedBarrier implicit killed $r0 renamable $r0 = nsw ADD $r1, $r2, implicit-def $srcarry renamable $r1 = nsw ADD $r3, $r4, implicit-def $srcarry renamable $r0 = nsw ADD killed renamable $r0, killed renamable $r1, implicit-def $srcarry