@@ -30,7 +30,7 @@ builtin.module {
30
30
}
31
31
}
32
32
33
- // CHECK-NEXT: builtin.module {
33
+ // CHECK: builtin.module {
34
34
// CHECK-NEXT: func.func @gauss_seidel_func(%a : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>, %b : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>) {
35
35
// CHECK-NEXT: %0 = stencil.load %a : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>> -> !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>
36
36
// CHECK-NEXT: %pref = "csl_stencil.prefetch"(%0) <{"topo" = #dmp.topo<1022x510>, "swaps" = [#csl_stencil.exchange<to [1, 0]>, #csl_stencil.exchange<to [-1, 0]>, #csl_stencil.exchange<to [0, 1]>, #csl_stencil.exchange<to [0, -1]>]}> : (!stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>) -> memref<4xtensor<510xf32>>
@@ -59,8 +59,7 @@ builtin.module {
59
59
// CHECK-NEXT: }
60
60
// CHECK-NEXT: }
61
61
62
-
63
- // CHECK-GENERIC-NEXT: "builtin.module"() ({
62
+ // CHECK-GENERIC: "builtin.module"() ({
64
63
// CHECK-GENERIC-NEXT: "func.func"() <{"sym_name" = "gauss_seidel_func", "function_type" = (!stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>, !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>) -> ()}> ({
65
64
// CHECK-GENERIC-NEXT: ^0(%a : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>, %b : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>):
66
65
// CHECK-GENERIC-NEXT: %0 = "stencil.load"(%a) : (!stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>) -> !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>
@@ -90,3 +89,117 @@ builtin.module {
90
89
// CHECK-GENERIC-NEXT: "func.return"() : () -> ()
91
90
// CHECK-GENERIC-NEXT: }) : () -> ()
92
91
// CHECK-GENERIC-NEXT: }) : () -> ()
92
+
93
+ // -----
94
+
95
+ builtin.module {
96
+ func.func @gauss_seidel (%a : !stencil.field <[-1 ,1023 ]x [-1 ,511 ]xtensor <512 xf32 >>, %b : !stencil.field <[-1 ,1023 ]x [-1 ,511 ]xtensor <512 xf32 >>) {
97
+ %0 = stencil.load %a : !stencil.field <[-1 ,1023 ]x [-1 ,511 ]xtensor <512 xf32 >> -> !stencil.temp <[-1 ,2 ]x [-1 ,2 ]xtensor <512 xf32 >>
98
+
99
+ %1 = tensor.empty () : tensor <510 xf32 >
100
+ %2 = csl_stencil.apply (%0 : !stencil.temp <[-1 ,2 ]x [-1 ,2 ]xtensor <512 xf32 >>, %1 : tensor <510 xf32 >) <{" num_chunks" = 2 , " topo" = #dmp.topo <1022 x510 >, " swaps" = [#csl_stencil.exchange <to [1 , 0 ]>, #csl_stencil.exchange <to [-1 , 0 ]>, #csl_stencil.exchange <to [0 , 1 ]>, #csl_stencil.exchange <to [0 , -1 ]>]}> -> (!stencil.temp <[0 ,1 ]x [0 ,1 ]xtensor <510 xf32 >>) ({
101
+ ^0 (%recv : memref <4 xtensor <255 xf32 >>, %offset : index , %iter_arg : tensor <510 xf32 >):
102
+ // reduces chunks from neighbours into one chunk (clear_recv_buf_cb)
103
+ %4 = csl_stencil.access %recv [1 , 0 ] : memref <4 xtensor <255 xf32 >>
104
+ %5 = csl_stencil.access %recv [-1 , 0 ] : memref <4 xtensor <255 xf32 >>
105
+ %6 = csl_stencil.access %recv [0 , 1 ] : memref <4 xtensor <255 xf32 >>
106
+ %7 = csl_stencil.access %recv [0 , -1 ] : memref <4 xtensor <255 xf32 >>
107
+
108
+ %8 = arith.addf %4 , %5 : tensor <255 xf32 >
109
+ %9 = arith.addf %8 , %6 : tensor <255 xf32 >
110
+ %10 = arith.addf %9 , %7 : tensor <255 xf32 >
111
+
112
+ %11 = " tensor.insert_slice" (%10 , %iter_arg , %offset ) <{" static_offsets" = array<i64 : 0 >, " static_sizes" = array<i64 : 255 >, " static_strides" = array<i64 : 1 >, " operandSegmentSizes" = array<i32 : 1 , 1 , 1 , 0 , 0 >}> : (tensor <255 xf32 >, tensor <510 xf32 >, index ) -> tensor <510 xf32 >
113
+ csl_stencil.yield %11 : tensor <510 xf32 >
114
+ }, {
115
+ ^0 (%3 : !stencil.temp <[-1 ,2 ]x [-1 ,2 ]xtensor <512 xf32 >>, %rcv : tensor <510 xf32 >):
116
+ // takes combined chunks and applies further compute (communicate_cb)
117
+ %12 = csl_stencil.access %3 [0 , 0 ] : !stencil.temp <[-1 ,2 ]x [-1 ,2 ]xtensor <512 xf32 >>
118
+ %13 = csl_stencil.access %3 [0 , 0 ] : !stencil.temp <[-1 ,2 ]x [-1 ,2 ]xtensor <512 xf32 >>
119
+ %14 = " tensor.extract_slice" (%12 ) <{" static_offsets" = array<i64 : 1 >, " static_sizes" = array<i64 : 510 >, " static_strides" = array<i64 : 1 >, " operandSegmentSizes" = array<i32 : 1 , 0 , 0 , 0 >}> : (tensor <512 xf32 >) -> tensor <510 xf32 >
120
+ %15 = " tensor.extract_slice" (%13 ) <{" static_offsets" = array<i64 : -1 >, " static_sizes" = array<i64 : 510 >, " static_strides" = array<i64 : 1 >, " operandSegmentSizes" = array<i32 : 1 , 0 , 0 , 0 >}> : (tensor <512 xf32 >) -> tensor <510 xf32 >
121
+
122
+ %16 = arith.addf %rcv , %14 : tensor <510 xf32 >
123
+ %17 = arith.addf %16 , %15 : tensor <510 xf32 >
124
+
125
+ %18 = arith.constant 1.666600e-01 : f32
126
+ %19 = tensor.empty () : tensor <510 xf32 >
127
+ %20 = linalg.fill ins (%18 : f32 ) outs (%19 : tensor <510 xf32 >) -> tensor <510 xf32 >
128
+ %21 = arith.mulf %17 , %20 : tensor <510 xf32 >
129
+
130
+ csl_stencil.yield %21 : tensor <510 xf32 >
131
+ })
132
+
133
+ stencil.store %2 to %b ([0 , 0 ] : [1 , 1 ]) : !stencil.temp <[0 ,1 ]x [0 ,1 ]xtensor <510 xf32 >> to !stencil.field <[-1 ,1023 ]x [-1 ,511 ]xtensor <512 xf32 >>
134
+ func.return
135
+ }
136
+ }
137
+
138
+ // CHECK: builtin.module {
139
+ // CHECK-NEXT: func.func @gauss_seidel(%a : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>, %b : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>) {
140
+ // CHECK-NEXT: %0 = stencil.load %a : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>> -> !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>
141
+ // CHECK-NEXT: %1 = tensor.empty() : tensor<510xf32>
142
+ // CHECK-NEXT: %2 = csl_stencil.apply(%0 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %1 : tensor<510xf32>) -> (!stencil.temp<[0,1]x[0,1]xtensor<510xf32>>) ({
143
+ // CHECK-NEXT: ^0(%recv : memref<4xtensor<255xf32>>, %offset : index, %iter_arg : tensor<510xf32>):
144
+ // CHECK-NEXT: %3 = csl_stencil.access %recv[1, 0] : memref<4xtensor<255xf32>>
145
+ // CHECK-NEXT: %4 = csl_stencil.access %recv[-1, 0] : memref<4xtensor<255xf32>>
146
+ // CHECK-NEXT: %5 = csl_stencil.access %recv[0, 1] : memref<4xtensor<255xf32>>
147
+ // CHECK-NEXT: %6 = csl_stencil.access %recv[0, -1] : memref<4xtensor<255xf32>>
148
+ // CHECK-NEXT: %7 = arith.addf %3, %4 : tensor<255xf32>
149
+ // CHECK-NEXT: %8 = arith.addf %7, %5 : tensor<255xf32>
150
+ // CHECK-NEXT: %9 = arith.addf %8, %6 : tensor<255xf32>
151
+ // CHECK-NEXT: %10 = "tensor.insert_slice"(%9, %iter_arg, %offset) <{"static_offsets" = array<i64: 0>, "static_sizes" = array<i64: 255>, "static_strides" = array<i64: 1>, "operandSegmentSizes" = array<i32: 1, 1, 1, 0, 0>}> : (tensor<255xf32>, tensor<510xf32>, index) -> tensor<510xf32>
152
+ // CHECK-NEXT: csl_stencil.yield %10 : tensor<510xf32>
153
+ // CHECK-NEXT: }, {
154
+ // CHECK-NEXT: ^1(%11 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %rcv : tensor<510xf32>):
155
+ // CHECK-NEXT: %12 = csl_stencil.access %11[0, 0] : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>
156
+ // CHECK-NEXT: %13 = csl_stencil.access %11[0, 0] : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>
157
+ // CHECK-NEXT: %14 = "tensor.extract_slice"(%12) <{"static_offsets" = array<i64: 1>, "static_sizes" = array<i64: 510>, "static_strides" = array<i64: 1>, "operandSegmentSizes" = array<i32: 1, 0, 0, 0>}> : (tensor<512xf32>) -> tensor<510xf32>
158
+ // CHECK-NEXT: %15 = "tensor.extract_slice"(%13) <{"static_offsets" = array<i64: -1>, "static_sizes" = array<i64: 510>, "static_strides" = array<i64: 1>, "operandSegmentSizes" = array<i32: 1, 0, 0, 0>}> : (tensor<512xf32>) -> tensor<510xf32>
159
+ // CHECK-NEXT: %16 = arith.addf %rcv, %14 : tensor<510xf32>
160
+ // CHECK-NEXT: %17 = arith.addf %16, %15 : tensor<510xf32>
161
+ // CHECK-NEXT: %18 = arith.constant 1.666600e-01 : f32
162
+ // CHECK-NEXT: %19 = tensor.empty() : tensor<510xf32>
163
+ // CHECK-NEXT: %20 = linalg.fill ins(%18 : f32) outs(%19 : tensor<510xf32>) -> tensor<510xf32>
164
+ // CHECK-NEXT: %21 = arith.mulf %17, %20 : tensor<510xf32>
165
+ // CHECK-NEXT: csl_stencil.yield %21 : tensor<510xf32>
166
+ // CHECK-NEXT: })
167
+ // CHECK-NEXT: stencil.store %2 to %b ([0, 0] : [1, 1]) : !stencil.temp<[0,1]x[0,1]xtensor<510xf32>> to !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>
168
+ // CHECK-NEXT: func.return
169
+ // CHECK-NEXT: }
170
+ // CHECK-NEXT: }
171
+
172
+ // CHECK-GENERIC: "builtin.module"() ({
173
+ // CHECK-GENERIC-NEXT: "func.func"() <{"sym_name" = "gauss_seidel", "function_type" = (!stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>, !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>) -> ()}> ({
174
+ // CHECK-GENERIC-NEXT: ^0(%a : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>, %b : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>):
175
+ // CHECK-GENERIC-NEXT: %0 = "stencil.load"(%a) : (!stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>) -> !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>
176
+ // CHECK-GENERIC-NEXT: %1 = "tensor.empty"() : () -> tensor<510xf32>
177
+ // CHECK-GENERIC-NEXT: %2 = "csl_stencil.apply"(%0, %1) <{"num_chunks" = 2 : i64, "topo" = #dmp.topo<1022x510>, "swaps" = [#csl_stencil.exchange<to [1, 0]>, #csl_stencil.exchange<to [-1, 0]>, #csl_stencil.exchange<to [0, 1]>, #csl_stencil.exchange<to [0, -1]>]}> ({
178
+ // CHECK-GENERIC-NEXT: ^1(%recv : memref<4xtensor<255xf32>>, %offset : index, %iter_arg : tensor<510xf32>):
179
+ // CHECK-GENERIC-NEXT: %3 = "csl_stencil.access"(%recv) <{"offset" = #stencil.index[1, 0], "offset_mapping" = #stencil.index[0, 1]}> : (memref<4xtensor<255xf32>>) -> tensor<255xf32>
180
+ // CHECK-GENERIC-NEXT: %4 = "csl_stencil.access"(%recv) <{"offset" = #stencil.index[-1, 0], "offset_mapping" = #stencil.index[0, 1]}> : (memref<4xtensor<255xf32>>) -> tensor<255xf32>
181
+ // CHECK-GENERIC-NEXT: %5 = "csl_stencil.access"(%recv) <{"offset" = #stencil.index[0, 1], "offset_mapping" = #stencil.index[0, 1]}> : (memref<4xtensor<255xf32>>) -> tensor<255xf32>
182
+ // CHECK-GENERIC-NEXT: %6 = "csl_stencil.access"(%recv) <{"offset" = #stencil.index[0, -1], "offset_mapping" = #stencil.index[0, 1]}> : (memref<4xtensor<255xf32>>) -> tensor<255xf32>
183
+ // CHECK-GENERIC-NEXT: %7 = "arith.addf"(%3, %4) <{"fastmath" = #arith.fastmath<none>}> : (tensor<255xf32>, tensor<255xf32>) -> tensor<255xf32>
184
+ // CHECK-GENERIC-NEXT: %8 = "arith.addf"(%7, %5) <{"fastmath" = #arith.fastmath<none>}> : (tensor<255xf32>, tensor<255xf32>) -> tensor<255xf32>
185
+ // CHECK-GENERIC-NEXT: %9 = "arith.addf"(%8, %6) <{"fastmath" = #arith.fastmath<none>}> : (tensor<255xf32>, tensor<255xf32>) -> tensor<255xf32>
186
+ // CHECK-GENERIC-NEXT: %10 = "tensor.insert_slice"(%9, %iter_arg, %offset) <{"static_offsets" = array<i64: 0>, "static_sizes" = array<i64: 255>, "static_strides" = array<i64: 1>, "operandSegmentSizes" = array<i32: 1, 1, 1, 0, 0>}> : (tensor<255xf32>, tensor<510xf32>, index) -> tensor<510xf32>
187
+ // CHECK-GENERIC-NEXT: "csl_stencil.yield"(%10) : (tensor<510xf32>) -> ()
188
+ // CHECK-GENERIC-NEXT: }, {
189
+ // CHECK-GENERIC-NEXT: ^2(%11 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %rcv : tensor<510xf32>):
190
+ // CHECK-GENERIC-NEXT: %12 = "csl_stencil.access"(%11) <{"offset" = #stencil.index[0, 0], "offset_mapping" = #stencil.index[0, 1]}> : (!stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>) -> tensor<512xf32>
191
+ // CHECK-GENERIC-NEXT: %13 = "csl_stencil.access"(%11) <{"offset" = #stencil.index[0, 0], "offset_mapping" = #stencil.index[0, 1]}> : (!stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>) -> tensor<512xf32>
192
+ // CHECK-GENERIC-NEXT: %14 = "tensor.extract_slice"(%12) <{"static_offsets" = array<i64: 1>, "static_sizes" = array<i64: 510>, "static_strides" = array<i64: 1>, "operandSegmentSizes" = array<i32: 1, 0, 0, 0>}> : (tensor<512xf32>) -> tensor<510xf32>
193
+ // CHECK-GENERIC-NEXT: %15 = "tensor.extract_slice"(%13) <{"static_offsets" = array<i64: -1>, "static_sizes" = array<i64: 510>, "static_strides" = array<i64: 1>, "operandSegmentSizes" = array<i32: 1, 0, 0, 0>}> : (tensor<512xf32>) -> tensor<510xf32>
194
+ // CHECK-GENERIC-NEXT: %16 = "arith.addf"(%rcv, %14) <{"fastmath" = #arith.fastmath<none>}> : (tensor<510xf32>, tensor<510xf32>) -> tensor<510xf32>
195
+ // CHECK-GENERIC-NEXT: %17 = "arith.addf"(%16, %15) <{"fastmath" = #arith.fastmath<none>}> : (tensor<510xf32>, tensor<510xf32>) -> tensor<510xf32>
196
+ // CHECK-GENERIC-NEXT: %18 = "arith.constant"() <{"value" = 1.666600e-01 : f32}> : () -> f32
197
+ // CHECK-GENERIC-NEXT: %19 = "tensor.empty"() : () -> tensor<510xf32>
198
+ // CHECK-GENERIC-NEXT: %20 = "linalg.fill"(%18, %19) <{"operandSegmentSizes" = array<i32: 1, 1>}> : (f32, tensor<510xf32>) -> tensor<510xf32>
199
+ // CHECK-GENERIC-NEXT: %21 = "arith.mulf"(%17, %20) <{"fastmath" = #arith.fastmath<none>}> : (tensor<510xf32>, tensor<510xf32>) -> tensor<510xf32>
200
+ // CHECK-GENERIC-NEXT: "csl_stencil.yield"(%21) : (tensor<510xf32>) -> ()
201
+ // CHECK-GENERIC-NEXT: }) : (!stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, tensor<510xf32>) -> !stencil.temp<[0,1]x[0,1]xtensor<510xf32>>
202
+ // CHECK-GENERIC-NEXT: "stencil.store"(%2, %b) {"bounds" = #stencil.bounds[0, 0] : [1, 1]} : (!stencil.temp<[0,1]x[0,1]xtensor<510xf32>>, !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>) -> ()
203
+ // CHECK-GENERIC-NEXT: "func.return"() : () -> ()
204
+ // CHECK-GENERIC-NEXT: }) : () -> ()
205
+ // CHECK-GENERIC-NEXT: }) : () -> ()
0 commit comments