|
| 1 | +// RUN: xdsl-opt %s -p canonicalize --split-input-file | filecheck %s |
| 2 | + |
| 3 | + |
| 4 | +builtin.module { |
| 5 | + func.func @gauss_seidel(%a : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>, %b : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>) { |
| 6 | + %0 = stencil.load %a : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>> -> !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>> |
| 7 | + |
| 8 | + %1 = tensor.empty() : tensor<510xf32> |
| 9 | + %2 = csl_stencil.apply(%0 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %1 : tensor<510xf32>) <{"num_chunks" = 2, "topo" = #dmp.topo<1022x510>, "swaps" = [#csl_stencil.exchange<to [1, 0]>, #csl_stencil.exchange<to [-1, 0]>, #csl_stencil.exchange<to [0, 1]>, #csl_stencil.exchange<to [0, -1]>]}> -> (!stencil.temp<[0,1]x[0,1]xtensor<510xf32>>) ({ |
| 10 | + ^0(%3 : memref<4xtensor<255xf32>>, %4 : index, %5 : tensor<510xf32>): |
| 11 | + %6 = csl_stencil.access %3[1, 0] : memref<4xtensor<255xf32>> |
| 12 | + %7 = "tensor.insert_slice"(%6, %5, %4) <{"static_offsets" = array<i64: 0>, "static_sizes" = array<i64: 255>, "static_strides" = array<i64: 1>, "operandSegmentSizes" = array<i32: 1, 1, 1, 0, 0>}> : (tensor<255xf32>, tensor<510xf32>, index) -> tensor<510xf32> |
| 13 | + csl_stencil.yield %7 : tensor<510xf32> |
| 14 | + }, { |
| 15 | + ^0(%8 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %9 : tensor<510xf32>): |
| 16 | + csl_stencil.yield %9 : tensor<510xf32> |
| 17 | + }) |
| 18 | + stencil.store %2 to %b ([0, 0] : [1, 1]) : !stencil.temp<[0,1]x[0,1]xtensor<510xf32>> to !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>> |
| 19 | + |
| 20 | + %10 = tensor.empty() : tensor<510xf32> |
| 21 | + %11 = csl_stencil.apply(%0 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %10 : tensor<510xf32>) <{"num_chunks" = 2, "topo" = #dmp.topo<1022x510>, "swaps" = [#csl_stencil.exchange<to [1, 0]>, #csl_stencil.exchange<to [-1, 0]>, #csl_stencil.exchange<to [0, 1]>, #csl_stencil.exchange<to [0, -1]>]}> -> (!stencil.temp<[0,1]x[0,1]xtensor<510xf32>>) ({ |
| 22 | + ^0(%12 : memref<4xtensor<255xf32>>, %13 : index, %14 : tensor<510xf32>): |
| 23 | + %15 = csl_stencil.access %12[1, 0] : memref<4xtensor<255xf32>> |
| 24 | + %16 = "tensor.insert_slice"(%15, %14, %13) <{"static_offsets" = array<i64: 0>, "static_sizes" = array<i64: 255>, "static_strides" = array<i64: 1>, "operandSegmentSizes" = array<i32: 1, 1, 1, 0, 0>}> : (tensor<255xf32>, tensor<510xf32>, index) -> tensor<510xf32> |
| 25 | + csl_stencil.yield %16 : tensor<510xf32> |
| 26 | + }, { |
| 27 | + ^0(%17 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %18 : tensor<510xf32>): |
| 28 | + csl_stencil.yield %18 : tensor<510xf32> |
| 29 | + }) |
| 30 | + stencil.store %11 to %b ([0, 0] : [1, 1]) : !stencil.temp<[0,1]x[0,1]xtensor<510xf32>> to !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>> |
| 31 | + |
| 32 | + %19 = tensor.empty() : tensor<510xf32> |
| 33 | + %20 = csl_stencil.apply(%0 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %19 : tensor<510xf32>) <{"num_chunks" = 2, "topo" = #dmp.topo<1022x510>, "swaps" = [#csl_stencil.exchange<to [1, 0]>, #csl_stencil.exchange<to [-1, 0]>, #csl_stencil.exchange<to [0, 1]>, #csl_stencil.exchange<to [0, -1]>]}> -> (!stencil.temp<[0,1]x[0,1]xtensor<510xf32>>) ({ |
| 34 | + ^0(%21 : memref<4xtensor<255xf32>>, %22 : index, %23 : tensor<510xf32>): |
| 35 | + %24 = csl_stencil.access %21[1, 0] : memref<4xtensor<255xf32>> |
| 36 | + %25 = "tensor.insert_slice"(%24, %23, %22) <{"static_offsets" = array<i64: 0>, "static_sizes" = array<i64: 255>, "static_strides" = array<i64: 1>, "operandSegmentSizes" = array<i32: 1, 1, 1, 0, 0>}> : (tensor<255xf32>, tensor<510xf32>, index) -> tensor<510xf32> |
| 37 | + csl_stencil.yield %25 : tensor<510xf32> |
| 38 | + }, { |
| 39 | + ^0(%26 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %27 : tensor<510xf32>): |
| 40 | + csl_stencil.yield %27 : tensor<510xf32> |
| 41 | + }) |
| 42 | + stencil.store %20 to %b ([0, 0] : [1, 1]) : !stencil.temp<[0,1]x[0,1]xtensor<510xf32>> to !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>> |
| 43 | + func.return |
| 44 | + } |
| 45 | +} |
| 46 | + |
| 47 | + |
| 48 | +// CHECK-NEXT: builtin.module { |
| 49 | +// CHECK-NEXT: func.func @gauss_seidel(%a : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>, %b : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>>) { |
| 50 | +// CHECK-NEXT: %0 = stencil.load %a : !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>> -> !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>> |
| 51 | +// CHECK-NEXT: %1 = tensor.empty() : tensor<510xf32> |
| 52 | +// CHECK-NEXT: %2 = csl_stencil.apply(%0 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %1 : tensor<510xf32>) -> (!stencil.temp<[0,1]x[0,1]xtensor<510xf32>>) ({ |
| 53 | +// CHECK-NEXT: ^0(%3 : memref<4xtensor<255xf32>>, %4 : index, %5 : tensor<510xf32>): |
| 54 | +// CHECK-NEXT: %6 = csl_stencil.access %3[1, 0] : memref<4xtensor<255xf32>> |
| 55 | +// CHECK-NEXT: %7 = "tensor.insert_slice"(%6, %5, %4) <{"static_offsets" = array<i64: 0>, "static_sizes" = array<i64: 255>, "static_strides" = array<i64: 1>, "operandSegmentSizes" = array<i32: 1, 1, 1, 0, 0>}> : (tensor<255xf32>, tensor<510xf32>, index) -> tensor<510xf32> |
| 56 | +// CHECK-NEXT: csl_stencil.yield %7 : tensor<510xf32> |
| 57 | +// CHECK-NEXT: }, { |
| 58 | +// CHECK-NEXT: ^1(%8 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %9 : tensor<510xf32>): |
| 59 | +// CHECK-NEXT: csl_stencil.yield %9 : tensor<510xf32> |
| 60 | +// CHECK-NEXT: }) |
| 61 | +// CHECK-NEXT: stencil.store %2 to %b ([0, 0] : [1, 1]) : !stencil.temp<[0,1]x[0,1]xtensor<510xf32>> to !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>> |
| 62 | +// CHECK-NEXT: %3 = csl_stencil.apply(%0 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %1 : tensor<510xf32>) -> (!stencil.temp<[0,1]x[0,1]xtensor<510xf32>>) ({ |
| 63 | +// CHECK-NEXT: ^0(%4 : memref<4xtensor<255xf32>>, %5 : index, %6 : tensor<510xf32>): |
| 64 | +// CHECK-NEXT: %7 = csl_stencil.access %4[1, 0] : memref<4xtensor<255xf32>> |
| 65 | +// CHECK-NEXT: %8 = "tensor.insert_slice"(%7, %6, %5) <{"static_offsets" = array<i64: 0>, "static_sizes" = array<i64: 255>, "static_strides" = array<i64: 1>, "operandSegmentSizes" = array<i32: 1, 1, 1, 0, 0>}> : (tensor<255xf32>, tensor<510xf32>, index) -> tensor<510xf32> |
| 66 | +// CHECK-NEXT: csl_stencil.yield %8 : tensor<510xf32> |
| 67 | +// CHECK-NEXT: }, { |
| 68 | +// CHECK-NEXT: ^1(%9 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %10 : tensor<510xf32>): |
| 69 | +// CHECK-NEXT: csl_stencil.yield %10 : tensor<510xf32> |
| 70 | +// CHECK-NEXT: }) |
| 71 | +// CHECK-NEXT: stencil.store %3 to %b ([0, 0] : [1, 1]) : !stencil.temp<[0,1]x[0,1]xtensor<510xf32>> to !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>> |
| 72 | +// CHECK-NEXT: %4 = csl_stencil.apply(%0 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %1 : tensor<510xf32>) -> (!stencil.temp<[0,1]x[0,1]xtensor<510xf32>>) ({ |
| 73 | +// CHECK-NEXT: ^0(%5 : memref<4xtensor<255xf32>>, %6 : index, %7 : tensor<510xf32>): |
| 74 | +// CHECK-NEXT: %8 = csl_stencil.access %5[1, 0] : memref<4xtensor<255xf32>> |
| 75 | +// CHECK-NEXT: %9 = "tensor.insert_slice"(%8, %7, %6) <{"static_offsets" = array<i64: 0>, "static_sizes" = array<i64: 255>, "static_strides" = array<i64: 1>, "operandSegmentSizes" = array<i32: 1, 1, 1, 0, 0>}> : (tensor<255xf32>, tensor<510xf32>, index) -> tensor<510xf32> |
| 76 | +// CHECK-NEXT: csl_stencil.yield %9 : tensor<510xf32> |
| 77 | +// CHECK-NEXT: }, { |
| 78 | +// CHECK-NEXT: ^1(%10 : !stencil.temp<[-1,2]x[-1,2]xtensor<512xf32>>, %11 : tensor<510xf32>): |
| 79 | +// CHECK-NEXT: csl_stencil.yield %11 : tensor<510xf32> |
| 80 | +// CHECK-NEXT: }) |
| 81 | +// CHECK-NEXT: stencil.store %4 to %b ([0, 0] : [1, 1]) : !stencil.temp<[0,1]x[0,1]xtensor<510xf32>> to !stencil.field<[-1,1023]x[-1,511]xtensor<512xf32>> |
| 82 | +// CHECK-NEXT: func.return |
| 83 | +// CHECK-NEXT: } |
| 84 | +// CHECK-NEXT: } |
0 commit comments