@@ -16,14 +16,18 @@ module {
1616 // Specialize two inner-most for loops into the wrap-and-stride list, and leave one outer-most for loop unchanged.
1717
1818 // CHECK-LABEL: func0
19- // CHECK: scf.for %[[EVENT0:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%[[EVENT1:.*]] = %{{.*}}) -> (!air.async.token) {
20- // CHECK-NEXT: %[[EVENT2:.*]] = air.channel.put async{{.*}}@channel_0[%c0{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c0{{.*}}, %[[EVENT0]], %c0{{.*}}] [%c2{{.*}}, %c8{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c0{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId22} : (memref<512x512xbf16>)
21- // CHECK-NEXT: %[[EVENT3:.*]] = air.channel.put async{{.*}}@channel_0[%c1{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c0{{.*}}, %[[EVENT0]], %c32768{{.*}}] [%c2{{.*}}, %c8{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c0{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId23} : (memref<512x512xbf16>)
22- // CHECK-NEXT: %[[EVENT4:.*]] = air.channel.put async{{.*}}@channel_0[%c2{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c0{{.*}}, %[[EVENT0]], %c65536{{.*}}] [%c2{{.*}}, %c8{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c0{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId24} : (memref<512x512xbf16>)
23- // CHECK-NEXT: %[[EVENT5:.*]] = air.channel.put async{{.*}}@channel_0[%c3{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c0{{.*}}, %[[EVENT0]], %c98304{{.*}}] [%c2{{.*}}, %c8{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c0{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId25} : (memref<512x512xbf16>)
24- // CHECK-NEXT: %[[EVENT6:.*]] = air.wait_all async [%[[EVENT2]], %[[EVENT3]], %[[EVENT4]], %[[EVENT5]]]
25- // CHECK-NEXT: scf.yield %[[EVENT6]] : !air.async.token
26- // CHECK-NEXT: }
19+ // CHECK: scf.for %[[FOR0IV:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%[[FOR0IA:.*]] = %{{.*}})
20+ // CHECK: %[[PUT0:.*]] = air.channel.put async{{.*}}@channel_0[%c0{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c0{{.*}}, %[[FOR0IV]], %c0{{.*}}] [%c2{{.*}}, %c8{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c0{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId22} : (memref<512x512xbf16>)
21+ // CHECK: scf.yield %[[PUT0]] : !air.async.token
22+ // CHECK: scf.for %[[FOR1IV:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%[[FOR1IA:.*]] = %{{.*}})
23+ // CHECK: %[[PUT1:.*]] = air.channel.put async{{.*}}@channel_0[%c1{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c0{{.*}}, %[[FOR1IV]], %c32768{{.*}}] [%c2{{.*}}, %c8{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c0{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId23} : (memref<512x512xbf16>)
24+ // CHECK: scf.yield %[[PUT1]] : !air.async.token
25+ // CHECK: scf.for %[[FOR2IV:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%[[FOR2IA:.*]] = %{{.*}})
26+ // CHECK: %[[PUT2:.*]] = air.channel.put async{{.*}}@channel_0[%c2{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c0{{.*}}, %[[FOR2IV]], %c65536{{.*}}] [%c2{{.*}}, %c8{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c0{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId24} : (memref<512x512xbf16>)
27+ // CHECK: scf.yield %[[PUT2]] : !air.async.token
28+ // CHECK: scf.for %[[FOR3IV:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%[[FOR3IA:.*]] = %{{.*}})
29+ // CHECK: %[[PUT3:.*]] = air.channel.put async{{.*}}@channel_0[%c3{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c0{{.*}}, %[[FOR3IV]], %c98304{{.*}}] [%c2{{.*}}, %c8{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c0{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId25} : (memref<512x512xbf16>)
30+ // CHECK: scf.yield %[[PUT3]] : !air.async.token
2731
2832 // AIE1-LABEL: func0
2933 // AIE1-COUNT-3: scf.for
@@ -68,16 +72,21 @@ module {
6872 // The second to fourth air.channel.puts can only fold one inner-most for loop into wrap-and-stride list due to having non-zero offsets at 3rd dimension.
6973
7074 // CHECK: air.channel.put async{{.*}}@channel_0[%c0{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c0{{.*}}, %c0{{.*}}, %c0{{.*}}] [%c2{{.*}}, %c2{{.*}}, %c512{{.*}}, %c64{{.*}}] [%c0{{.*}}, %c256{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId26} : (memref<512x512xbf16>)
71- // CHECK: scf.for %[[EVENT0:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%{{.*}} = %{{.*}})
72- // CHECK-NEXT: %[[EVENT2:.*]] = scf.for %[[EVENT1:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%{{.*}} = %{{.*}})
73- // CHECK: %[[EVENT3:.*]] = air.channel.put async{{.*}}@channel_0[%c1{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c1{{.*}}, %c0{{.*}}, %[[EVENT1]]] [%c8{{.*}}, %c1{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c32768{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId27} : (memref<512x512xbf16>)
74- // CHECK: %[[EVENT4:.*]] = air.channel.put async{{.*}}@channel_0[%c2{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c2{{.*}}, %c0{{.*}}, %[[EVENT1]]] [%c8{{.*}}, %c1{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c32768{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId28} : (memref<512x512xbf16>)
75- // CHECK: %[[EVENT5:.*]] = air.channel.put async{{.*}}@channel_0[%c3{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c3{{.*}}, %c0{{.*}}, %[[EVENT1]]] [%c8{{.*}}, %c1{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c32768{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId29} : (memref<512x512xbf16>)
76- // CHECK: %[[EVENT6:.*]] = air.wait_all async [%[[EVENT3]], %[[EVENT4]], %[[EVENT5]]]
77- // CHECK: scf.yield %[[EVENT6]] : !air.async.token
78- // CHECK: }
79- // CHECK: scf.yield %[[EVENT2]] : !air.async.token
80- // CHECK: }
75+ // CHECK: scf.for %[[FOR0IV:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%{{.*}} = %{{.*}})
76+ // CHECK-NEXT: %[[FOR3IV:.*]] = scf.for %[[FOR0IA:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%{{.*}} = %{{.*}})
77+ // CHECK: %[[PUT0:.*]] = air.channel.put async{{.*}}@channel_0[%c1{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c1{{.*}}, %c0{{.*}}, %[[FOR0IA]]] [%c8{{.*}}, %c1{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c32768{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId27} : (memref<512x512xbf16>)
78+ // CHECK: scf.yield %[[PUT0]] : !air.async.token
79+ // CHECK: scf.yield %[[FOR3IV]] : !air.async.token
80+ // CHECK: scf.for %[[FOR1IV:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%{{.*}} = %{{.*}})
81+ // CHECK-NEXT: %[[FOR4IV:.*]] = scf.for %[[FOR1IA:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%{{.*}} = %{{.*}})
82+ // CHECK: %[[PUT1:.*]] = air.channel.put async{{.*}}@channel_0[%c2{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c2{{.*}}, %c0{{.*}}, %[[FOR1IA]]] [%c8{{.*}}, %c1{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c32768{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId28} : (memref<512x512xbf16>)
83+ // CHECK: scf.yield %[[PUT1]] : !air.async.token
84+ // CHECK: scf.yield %[[FOR4IV]] : !air.async.token
85+ // CHECK: scf.for %[[FOR2IV:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%{{.*}} = %{{.*}})
86+ // CHECK-NEXT: %[[FOR5IV:.*]] = scf.for %[[FOR2IA:.*]] = %c0{{.*}} to %c512{{.*}} step %c256{{.*}} iter_args(%{{.*}} = %{{.*}})
87+ // CHECK: %[[PUT2:.*]] = air.channel.put async{{.*}}@channel_0[%c3{{.*}}, %c0{{.*}}] (%{{.*}}[%c0{{.*}}, %c3{{.*}}, %c0{{.*}}, %[[FOR2IA]]] [%c8{{.*}}, %c1{{.*}}, %c64{{.*}}, %c64{{.*}}] [%c32768{{.*}}, %c64{{.*}}, %c512{{.*}}, %c1{{.*}}]) {metadata = @airMemcpyId29} : (memref<512x512xbf16>)
88+ // CHECK: scf.yield %[[PUT2]] : !air.async.token
89+ // CHECK: scf.yield %[[FOR5IV]] : !air.async.token
8190
8291 // AIE1-LABEL: func1
8392 // AIE1-COUNT-3: scf.for
0 commit comments