Skip to content

Commit e3ac9a9

Browse files
author
Martien de Jong
committed
[AIE] Add 'original order' strategy
1 parent 36157f8 commit e3ac9a9

File tree

3 files changed

+35
-34
lines changed

3 files changed

+35
-34
lines changed

llvm/lib/Target/AIE/AIEPostPipeliner.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,7 @@ static const struct {
628628
} Strategies[] = {
629629
// Loosely speaking, a lower value of the first parameter targets
630630
// a lower stage count, which benefits code size.
631+
{1, false, {ConfigStrategy::NodeNum}},
631632
{1, false, {ConfigStrategy::Latest}},
632633
{1, true, {ConfigStrategy::Critical}},
633634
{1, true, {ConfigStrategy::Critical, ConfigStrategy::LCDLatest}},

llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d.mir

+12-12
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@
100100
; CHECK-NEXT: vldb wh6, [p0], m6; nopa ; nops ; nopxm ; nopv
101101
; CHECK-NEXT: vldb wl8, [p0], m6; nopa ; nops ; add r0, r5, #33; vshift.align x2, x2, s1, x8, r0; nopv
102102
; CHECK-NEXT: vldb.3d wh8, [p0], d0; nopa ; nops ; nopx ; vshuffle x9, x4, x2, r2; nopv
103-
; CHECK-NEXT: vldb wl10, [p1], #32; nopa ; nops ; nopx ; vshuffle x3, x4, x2, r3; nopv
104-
; CHECK-NEXT: vldb wh10, [p1], #32; nopx ; vshuffle x1, x9, x0, r8; vmac cm1, cm1, x9, x10, r4
105-
; CHECK-NEXT: vldb wl7, [p1], #32; vshuffle x5, x3, x0, r8; vmac cm3, cm3, x3, x10, r4
106-
; CHECK-NEXT: vldb wh7, [p1], #32; mov r6, p0; vmac cm2, cm2, x1, x10, r4
103+
; CHECK-NEXT: vldb wl10, [p1], #32; nopa ; nops ; nopx ; vshuffle x1, x9, x0, r8; nopv
104+
; CHECK-NEXT: vldb wh10, [p1], #32; nopx ; vshuffle x3, x4, x2, r3; vmac cm1, cm1, x9, x10, r4
105+
; CHECK-NEXT: vldb wl7, [p1], #32; vshuffle x5, x3, x0, r8; vmac cm2, cm2, x1, x10, r4
106+
; CHECK-NEXT: vldb wh7, [p1], #32; mov r6, p0; vmac cm3, cm3, x3, x10, r4
107107
; CHECK-NEXT: .p2align 4
108108
; CHECK-NEXT: .LBB0_2: // %inner.loop
109109
; CHECK-NEXT: // Parent Loop BB0_1 Depth=1
@@ -112,21 +112,21 @@
112112
; CHECK-NEXT: nopa ; vldb wh6, [p0], m6; nopx ; vmac cm5, cm5, x9, x7, r4
113113
; CHECK-NEXT: vldb wl8, [p0], m6; add r0, r5, #33; vshift.align x2, x2, s1, x8, r0; vmac cm6, cm6, x1, x7, r4
114114
; CHECK-NEXT: vldb.3d wh8, [p0], d0; vshuffle x9, x4, x2, r2; vmac cm7, cm7, x3, x7, r4
115-
; CHECK-NEXT: vldb wl10, [p1], #32; vshuffle x3, x4, x2, r3; vmac cm0, cm0, x5, x7, r4
116-
; CHECK-NEXT: vldb wh10, [p1], #32; vshuffle x1, x9, x0, r8; vmac cm1, cm1, x9, x10, r4
117-
; CHECK-NEXT: vldb wl7, [p1], #32; vshuffle x5, x3, x0, r8; vmac cm3, cm3, x3, x10, r4
115+
; CHECK-NEXT: vldb wl10, [p1], #32; vshuffle x1, x9, x0, r8; vmac cm0, cm0, x5, x7, r4
116+
; CHECK-NEXT: vldb wh10, [p1], #32; vshuffle x3, x4, x2, r3; vmac cm1, cm1, x9, x10, r4
117+
; CHECK-NEXT: vldb wl7, [p1], #32; vshuffle x5, x3, x0, r8; vmac cm2, cm2, x1, x10, r4
118118
; CHECK-NEXT: .L_LEnd0:
119-
; CHECK-NEXT: vldb wh7, [p1], #32; nopa ; nops ; nopx ; mov r6, p0; vmac cm2, cm2, x1, x10, r4
119+
; CHECK-NEXT: vldb wh7, [p1], #32; nopa ; nops ; nopx ; mov r6, p0; vmac cm3, cm3, x3, x10, r4
120120
; CHECK-NEXT: // %bb.3: // %outer.loop.latch
121121
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
122122
; CHECK-NEXT: nopa ; and r5, r6, r9; vshift.align x4, x4, s1, x6, r0; vmac cm4, cm4, x5, x10, r4
123123
; CHECK-NEXT: vmac cm5, cm5, x9, x7, r4
124124
; CHECK-NEXT: add r0, r5, #33; vshift.align x2, x2, s1, x8, r0; vmac cm6, cm6, x1, x7, r4
125125
; CHECK-NEXT: vshuffle x9, x4, x2, r2; vmac cm7, cm7, x3, x7, r4
126-
; CHECK-NEXT: vshuffle x3, x4, x2, r3; vmac cm0, cm0, x5, x7, r4
127-
; CHECK-NEXT: vshuffle x1, x9, x0, r8; vmac cm1, cm1, x9, x10, r4
128-
; CHECK-NEXT: vshuffle x5, x3, x0, r8; vmac cm3, cm3, x3, x10, r4
129-
; CHECK-NEXT: vmac cm2, cm2, x1, x10, r4
126+
; CHECK-NEXT: vshuffle x1, x9, x0, r8; vmac cm0, cm0, x5, x7, r4
127+
; CHECK-NEXT: vshuffle x3, x4, x2, r3; vmac cm1, cm1, x9, x10, r4
128+
; CHECK-NEXT: vshuffle x5, x3, x0, r8; vmac cm2, cm2, x1, x10, r4
129+
; CHECK-NEXT: vmac cm3, cm3, x3, x10, r4
130130
; CHECK-NEXT: vmac cm4, cm4, x5, x10, r4
131131
; CHECK-NEXT: vmac cm5, cm5, x9, x7, r4
132132
; CHECK-NEXT: vmac cm6, cm6, x1, x7, r4

llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16-1.mir

+22-22
Original file line numberDiff line numberDiff line change
@@ -36,24 +36,24 @@
3636
; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x0, x0, s0, x8, r3
3737
; CHECK-NEXT: vlda wl7, [p4, #320]; movxm le, #.L_LEnd0
3838
; CHECK-NEXT: nopb ; vlda wh9, [p4, #416]; nops ; nopx ; vshift.align x2, x2, s0, x10, r3; nopv
39-
; CHECK-NEXT: nopb ; vlda wl9, [p4, #384]; nops ; nopx ; vshuffle x5, x0, x2, r25; nopv
40-
; CHECK-NEXT: nopb ; vlda wh11, [p4, #480]; nops ; nopx ; vshift.align x4, x4, s0, x1, r3; nopv
41-
; CHECK-NEXT: nopb ; vlda wl11, [p4, #448]; nops ; nopx ; vshuffle x8, x0, x2, r9; nopv
42-
; CHECK-NEXT: vldb wh5, [p5, #32]; nopa ; nops ; nopx ; vshift.align x6, x6, s0, x3, r3; nopv
43-
; CHECK-NEXT: nopb ; vlda wl5, [p5], #256; nops ; nopx ; vshuffle x3, x4, x6, r9; nopv
39+
; CHECK-NEXT: nopb ; vlda wl9, [p4, #384]; nops ; nopx ; vshuffle x8, x0, x2, r9; nopv
40+
; CHECK-NEXT: vldb wh5, [p5, #32]; nopa ; nops ; nopx ; vshift.align x4, x4, s0, x1, r3; nopv
41+
; CHECK-NEXT: nopb ; vlda wl5, [p5], #256; nops ; nopx ; vshuffle x5, x0, x2, r25; nopv
42+
; CHECK-NEXT: nopb ; vlda wh11, [p4, #480]; nops ; nopx ; vshift.align x6, x6, s0, x3, r3; nopv
43+
; CHECK-NEXT: nopb ; vlda wl11, [p4, #448]; nops ; nopx ; vshuffle x3, x4, x6, r9; nopv
4444
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshuffle x10, x4, x6, r25; vmac.f bml4, bml4, x8, x7, r29
4545
; CHECK-NEXT: nopa ; vshuffle x1, x3, x5, r13
4646
; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bmh1, bmh1, x8, x9, r29
4747
; CHECK-NEXT: mov r3, p0; vmac.f bmh0, bmh0, x1, x9, r29
48-
; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh3, bmh3, x3, x9, r29
49-
; CHECK-NEXT: add r3, r3, #34; vmac.f bmh2, bmh2, x10, x9, r29
50-
; CHECK-NEXT: vmac.f bmh7, bmh7, x8, x5, r29
51-
; CHECK-NEXT: vmac.f bmh5, bmh5, x1, x5, r29
48+
; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh7, bmh7, x8, x5, r29
49+
; CHECK-NEXT: add r3, r3, #34; vmac.f bmh5, bmh5, x1, x5, r29
50+
; CHECK-NEXT: vmac.f bml2, bml2, x3, x5, r29
51+
; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29
5252
; CHECK-NEXT: .p2align 4
5353
; CHECK-NEXT: .LBB0_2: // %for.body
5454
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
55-
; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p7, p5; vmac.f bml2, bml2, x3, x5, r29
56-
; CHECK-NEXT: nopa ; vldb wl8, [p0], m4; nopx ; vmac.f bml0, bml0, x10, x5, r29
55+
; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p7, p5; vmac.f bmh3, bmh3, x3, x9, r29
56+
; CHECK-NEXT: nopa ; vldb wl8, [p0], m4; nopx ; vmac.f bmh2, bmh2, x10, x9, r29
5757
; CHECK-NEXT: vldb wh10, [p0, #32]; vmac.f bml3, bml3, x1, x7, r29
5858
; CHECK-NEXT: vldb wl10, [p0], m4; vmac.f bml6, bml6, x3, x7, r29
5959
; CHECK-NEXT: vldb wh1, [p0, #32]; vmac.f bml5, bml5, x10, x7, r29
@@ -63,23 +63,23 @@
6363
; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x0, x0, s0, x8, r3; vmac.f bmh8, bmh8, x10, x11, r29
6464
; CHECK-NEXT: vlda wl7, [p4, #320]
6565
; CHECK-NEXT: vlda wh9, [p4, #416]; vshift.align x2, x2, s0, x10, r3
66-
; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x5, x0, x2, r25
67-
; CHECK-NEXT: vlda wh11, [p4, #480]; vshift.align x4, x4, s0, x1, r3
68-
; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x8, x0, x2, r9
69-
; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x6, x6, s0, x3, r3
70-
; CHECK-NEXT: vlda wl5, [p5], #256; vshuffle x3, x4, x6, r9
66+
; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x8, x0, x2, r9
67+
; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x4, x4, s0, x1, r3
68+
; CHECK-NEXT: vlda wl5, [p5], #256; vshuffle x5, x0, x2, r25
69+
; CHECK-NEXT: vlda wh11, [p4, #480]; vshift.align x6, x6, s0, x3, r3
70+
; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x3, x4, x6, r9
7171
; CHECK-NEXT: vshuffle x10, x4, x6, r25; vmac.f bml4, bml4, x8, x7, r29
7272
; CHECK-NEXT: vshuffle x1, x3, x5, r13
7373
; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bmh1, bmh1, x8, x9, r29
7474
; CHECK-NEXT: mov r3, p0; vmac.f bmh0, bmh0, x1, x9, r29
75-
; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh3, bmh3, x3, x9, r29
76-
; CHECK-NEXT: add r3, r3, #34; vmac.f bmh2, bmh2, x10, x9, r29
77-
; CHECK-NEXT: vmac.f bmh7, bmh7, x8, x5, r29
75+
; CHECK-NEXT: and r3, r3, r0; mov p4, p7; vmac.f bmh7, bmh7, x8, x5, r29
76+
; CHECK-NEXT: add r3, r3, #34; vmac.f bmh5, bmh5, x1, x5, r29
77+
; CHECK-NEXT: vmac.f bml2, bml2, x3, x5, r29
7878
; CHECK-NEXT: .L_LEnd0:
79-
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bmh5, bmh5, x1, x5, r29
79+
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bml0, bml0, x10, x5, r29
8080
; CHECK-NEXT: // %bb.3: // %for.cond.cleanup
81-
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bml2, bml2, x3, x5, r29
82-
; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29
81+
; CHECK-NEXT: nopb ; nopa ; nops ; nopxm ; vmac.f bmh3, bmh3, x3, x9, r29
82+
; CHECK-NEXT: vmac.f bmh2, bmh2, x10, x9, r29
8383
; CHECK-NEXT: vmac.f bml3, bml3, x1, x7, r29
8484
; CHECK-NEXT: vmac.f bml6, bml6, x3, x7, r29
8585
; CHECK-NEXT: vmac.f bml5, bml5, x10, x7, r29

0 commit comments

Comments
 (0)