From f268f0e493f9bf0bb1932cce7f03982c250b2bf6 Mon Sep 17 00:00:00 2001 From: abisca Date: Fri, 3 Mar 2023 06:18:10 -0700 Subject: [PATCH] Added channel unit-tests from jgmelber's mlir-air fork. --- .../00_worker2worker/aie.mlir | 18 +++++ .../00_worker2worker/air.mlir | 26 ++++++ test/50_air_channel_move/01_one_tile/aie.mlir | 20 +++++ test/50_air_channel_move/01_one_tile/air.mlir | 22 +++++ .../01_one_tile/channels.mlir | 28 +++++++ .../02_whole_vector/aie.mlir | 22 +++++ .../02_whole_vector/air.mlir | 23 ++++++ .../02_whole_vector/channels.mlir | 34 ++++++++ test/50_air_channel_move/03_reduce/aie.mlir | 24 ++++++ test/50_air_channel_move/03_reduce/air.mlir | 27 +++++++ .../03_reduce/channels.mlir | 35 ++++++++ .../04_two_inputs/aie.mlir | 28 +++++++ .../04_two_inputs/air.mlir | 29 +++++++ .../04_two_inputs/channels.mlir | 43 ++++++++++ .../05_mini_transformer_pipe/aie.mlir | 57 +++++++++++++ .../05_mini_transformer_pipe/channels.mlir | 80 +++++++++++++++++++ .../06_multiple_worker_to_worker/aie.mlir | 32 ++++++++ .../channels.mlir | 22 +++++ test/50_air_channel_move/config.make | 40 ++++++++++ 19 files changed, 610 insertions(+) create mode 100755 test/50_air_channel_move/00_worker2worker/aie.mlir create mode 100755 test/50_air_channel_move/00_worker2worker/air.mlir create mode 100755 test/50_air_channel_move/01_one_tile/aie.mlir create mode 100755 test/50_air_channel_move/01_one_tile/air.mlir create mode 100755 test/50_air_channel_move/01_one_tile/channels.mlir create mode 100755 test/50_air_channel_move/02_whole_vector/aie.mlir create mode 100755 test/50_air_channel_move/02_whole_vector/air.mlir create mode 100755 test/50_air_channel_move/02_whole_vector/channels.mlir create mode 100755 test/50_air_channel_move/03_reduce/aie.mlir create mode 100755 test/50_air_channel_move/03_reduce/air.mlir create mode 100755 test/50_air_channel_move/03_reduce/channels.mlir create mode 100755 test/50_air_channel_move/04_two_inputs/aie.mlir create mode 100755 test/50_air_channel_move/04_two_inputs/air.mlir create mode 100755 test/50_air_channel_move/04_two_inputs/channels.mlir create mode 100755 test/50_air_channel_move/05_mini_transformer_pipe/aie.mlir create mode 100755 test/50_air_channel_move/05_mini_transformer_pipe/channels.mlir create mode 100755 test/50_air_channel_move/06_multiple_worker_to_worker/aie.mlir create mode 100755 test/50_air_channel_move/06_multiple_worker_to_worker/channels.mlir create mode 100755 test/50_air_channel_move/config.make diff --git a/test/50_air_channel_move/00_worker2worker/aie.mlir b/test/50_air_channel_move/00_worker2worker/aie.mlir new file mode 100755 index 000000000..72404d846 --- /dev/null +++ b/test/50_air_channel_move/00_worker2worker/aie.mlir @@ -0,0 +1,18 @@ +module @aie.partition_0 { + %0 = AIE.tile(1, 1) + %1 = AIE.tile(1, 2) + %2 = AIE.objectFifo.createObjectFifo(%0, {%1}, 2) : !AIE.objectFifo> + %3 = AIE.core(%1) { + %5 = AIE.objectFifo.acquire (%2 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %6 = AIE.objectFifo.subview.access %5[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + AIE.objectFifo.release (%2 : !AIE.objectFifo>, 1) + AIE.end + } {elf_file = "partition_0_core_1_2.elf"} + %4 = AIE.core(%0) { + %5 = AIE.objectFifo.acquire (%2 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %6 = AIE.objectFifo.subview.access %5[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + AIE.objectFifo.release (%2 : !AIE.objectFifo>, 1) + AIE.end + } {elf_file = "partition_0_core_1_1.elf"} +} + diff --git a/test/50_air_channel_move/00_worker2worker/air.mlir b/test/50_air_channel_move/00_worker2worker/air.mlir new file mode 100755 index 000000000..ce192d703 --- /dev/null +++ b/test/50_air_channel_move/00_worker2worker/air.mlir @@ -0,0 +1,26 @@ +#set = affine_set<()[s0, s1] : (s0 >= 0, s1 == 0)> +#set1 = affine_set<()[s0, s1] : (s0 >= 0, s1 - 1 == 0)> +module { + air.channel @channel_0 [1, 1] + func.func @graph() { + %c2 = arith.constant 2 : index + %c1 = arith.constant 1 : index + air.herd @herd_0 tile (%arg0, %arg1) in (%arg2=%c1, %arg3=%c2) { + %c0 = arith.constant 0 : index + %c32 = arith.constant 32 : index + affine.if #set()[%arg0, %arg1] { + %alloc = memref.alloc() {sym_name = "scratch"} : memref<32xi32, 2> + air.channel.put @channel_0[] (%alloc[%c0] [%c32] [%c0]) : (memref<32xi32, 2>) + memref.dealloc %alloc : memref<32xi32, 2> + } + affine.if #set1()[%arg0, %arg1] { + %alloc = memref.alloc() {sym_name = "scratch_copy"} : memref<32xi32, 2> + air.channel.get @channel_0[] (%alloc[%c0] [%c32] [%c0]) : (memref<32xi32, 2>) + memref.dealloc %alloc : memref<32xi32, 2> + } + air.herd_terminator + } + return + } +} + diff --git a/test/50_air_channel_move/01_one_tile/aie.mlir b/test/50_air_channel_move/01_one_tile/aie.mlir new file mode 100755 index 000000000..b372a24d8 --- /dev/null +++ b/test/50_air_channel_move/01_one_tile/aie.mlir @@ -0,0 +1,20 @@ +module @aie.partition_0 { + %0 = AIE.tile(1, 1) + %1 = AIE.tile(2, 0) + %2 = AIE.objectFifo.createObjectFifo(%1, {%0}, 1) : !AIE.objectFifo> + %3 = AIE.objectFifo.createObjectFifo(%0, {%1}, 1) : !AIE.objectFifo> + %4 = AIE.core(%0) { + %5 = AIE.objectFifo.acquire (%2 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %6 = AIE.objectFifo.subview.access %5[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + %7 = AIE.objectFifo.acquire (%3 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %8 = AIE.objectFifo.subview.access %7[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + affine.for %arg0 = 0 to 32 { + %9 = affine.load %6[%arg0] : memref<32xi32, 2> + affine.store %9, %8[%arg0] : memref<32xi32, 2> + } + AIE.objectFifo.release (%2 : !AIE.objectFifo>, 1) + AIE.objectFifo.release (%3 : !AIE.objectFifo>, 1) + AIE.end + } {elf_file = "partition_0_core_1_1.elf"} +} + diff --git a/test/50_air_channel_move/01_one_tile/air.mlir b/test/50_air_channel_move/01_one_tile/air.mlir new file mode 100755 index 000000000..c99cbf1de --- /dev/null +++ b/test/50_air_channel_move/01_one_tile/air.mlir @@ -0,0 +1,22 @@ +module { + func.func @graph(%arg0: memref<4096xi32>, %arg1: memref<4096xi32>) { + %c1 = arith.constant 1 : index + air.herd @herd_0 tile (%arg2, %arg3) in (%arg4=%c1, %arg5=%c1) args(%arg6=%arg0, %arg7=%arg1) : memref<4096xi32>, memref<4096xi32> { + %c0 = arith.constant 0 : index + %c32 = arith.constant 32 : index + %alloc = memref.alloc() {sym_name = "scratch"} : memref<32xi32, 2> + %alloc_0 = memref.alloc() {sym_name = "scratch_copy"} : memref<32xi32, 2> + air.dma_memcpy_nd (%alloc[%c0] [%c32] [%c0], %arg6[%c0] [%c32] [%c0]) {id = 1 : i32} : (memref<32xi32, 2>, memref<4096xi32>) + affine.for %arg8 = 0 to 32 { + %0 = affine.load %alloc[%arg8] : memref<32xi32, 2> + affine.store %0, %alloc_0[%arg8] : memref<32xi32, 2> + } + air.dma_memcpy_nd (%arg7[%c0] [%c32] [%c0], %alloc_0[%c0] [%c32] [%c0]) {id = 2 : i32} : (memref<4096xi32>, memref<32xi32, 2>) + memref.dealloc %alloc_0 : memref<32xi32, 2> + memref.dealloc %alloc : memref<32xi32, 2> + air.herd_terminator + } + return + } +} + diff --git a/test/50_air_channel_move/01_one_tile/channels.mlir b/test/50_air_channel_move/01_one_tile/channels.mlir new file mode 100755 index 000000000..888c9c91d --- /dev/null +++ b/test/50_air_channel_move/01_one_tile/channels.mlir @@ -0,0 +1,28 @@ +module { + air.channel @channel_1 [1, 1] + air.channel @channel_0 [1, 1] + func.func @graph(%arg0: memref<4096xi32>, %arg1: memref<4096xi32>) { + %c32 = arith.constant 32 : index + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + air.channel.put @channel_0[] (%arg0[%c0] [%c32] [%c0]) : (memref<4096xi32>) + air.channel.get @channel_1[] (%arg1[%c0] [%c32] [%c0]) : (memref<4096xi32>) + air.herd @herd_0 tile (%arg2, %arg3) in (%arg4=%c1, %arg5=%c1) { + %c0_0 = arith.constant 0 : index + %c32_1 = arith.constant 32 : index + %alloc = memref.alloc() {sym_name = "scratch"} : memref<32xi32, 2> + %alloc_2 = memref.alloc() {sym_name = "scratch_copy"} : memref<32xi32, 2> + air.channel.get @channel_0[] (%alloc[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + affine.for %arg6 = 0 to 32 { + %0 = affine.load %alloc[%arg6] : memref<32xi32, 2> + affine.store %0, %alloc_2[%arg6] : memref<32xi32, 2> + } + air.channel.put @channel_1[] (%alloc_2[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + memref.dealloc %alloc_2 : memref<32xi32, 2> + memref.dealloc %alloc : memref<32xi32, 2> + air.herd_terminator + } + return + } +} + diff --git a/test/50_air_channel_move/02_whole_vector/aie.mlir b/test/50_air_channel_move/02_whole_vector/aie.mlir new file mode 100755 index 000000000..b59b8064e --- /dev/null +++ b/test/50_air_channel_move/02_whole_vector/aie.mlir @@ -0,0 +1,22 @@ +module @aie.partition_0 { + %0 = AIE.tile(1, 1) + %1 = AIE.tile(2, 0) + %2 = AIE.objectFifo.createObjectFifo(%1, {%0}, 1) : !AIE.objectFifo> + %3 = AIE.objectFifo.createObjectFifo(%0, {%1}, 1) : !AIE.objectFifo> + %4 = AIE.core(%0) { + affine.for %arg0 = 0 to 4096 step 32 { + %5 = AIE.objectFifo.acquire (%2 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %6 = AIE.objectFifo.subview.access %5[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + %7 = AIE.objectFifo.acquire (%3 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %8 = AIE.objectFifo.subview.access %7[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + affine.for %arg1 = 0 to 32 { + %9 = affine.load %6[%arg1] : memref<32xi32, 2> + affine.store %9, %8[%arg1] : memref<32xi32, 2> + } + AIE.objectFifo.release (%2 : !AIE.objectFifo>, 1) + AIE.objectFifo.release (%3 : !AIE.objectFifo>, 1) + } + AIE.end + } {elf_file = "partition_0_core_1_1.elf"} +} + diff --git a/test/50_air_channel_move/02_whole_vector/air.mlir b/test/50_air_channel_move/02_whole_vector/air.mlir new file mode 100755 index 000000000..c42ea7060 --- /dev/null +++ b/test/50_air_channel_move/02_whole_vector/air.mlir @@ -0,0 +1,23 @@ +module { + func.func @graph(%arg0: memref<4096xi32>, %arg1: memref<4096xi32>) { + %c1 = arith.constant 1 : index + air.herd @herd_0 tile (%arg2, %arg3) in (%arg4=%c1, %arg5=%c1) args(%arg6=%arg0, %arg7=%arg1) : memref<4096xi32>, memref<4096xi32> { + %c0 = arith.constant 0 : index + %c32 = arith.constant 32 : index + %alloc = memref.alloc() {sym_name = "scratch"} : memref<32xi32, 2> + %alloc_0 = memref.alloc() {sym_name = "scratch_copy"} : memref<32xi32, 2> + affine.for %arg8 = 0 to 4096 step 32 { + air.dma_memcpy_nd (%alloc[%c0] [%c32] [%c0], %arg6[%arg8] [%c32] [%c0]) {id = 1 : i32} : (memref<32xi32, 2>, memref<4096xi32>) + affine.for %arg9 = 0 to 32 { + %0 = affine.load %alloc[%arg9] : memref<32xi32, 2> + affine.store %0, %alloc_0[%arg9] : memref<32xi32, 2> + } + air.dma_memcpy_nd (%arg7[%arg8] [%c32] [%c0], %alloc_0[%c0] [%c32] [%c0]) {id = 2 : i32} : (memref<4096xi32>, memref<32xi32, 2>) + } + memref.dealloc %alloc_0 : memref<32xi32, 2> + memref.dealloc %alloc : memref<32xi32, 2> + air.herd_terminator + } + return + } +} diff --git a/test/50_air_channel_move/02_whole_vector/channels.mlir b/test/50_air_channel_move/02_whole_vector/channels.mlir new file mode 100755 index 000000000..56e95e91f --- /dev/null +++ b/test/50_air_channel_move/02_whole_vector/channels.mlir @@ -0,0 +1,34 @@ +module { + air.channel @channel_1 [1, 1] + air.channel @channel_0 [1, 1] + func.func @graph(%arg0: memref<4096xi32>, %arg1: memref<4096xi32>) { + %c32 = arith.constant 32 : index + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + affine.for %arg2 = 0 to 4096 step 32 { + air.channel.put @channel_0[] (%arg0[%arg2] [%c32] [%c0]) : (memref<4096xi32>) + } + affine.for %arg2 = 0 to 4096 step 32 { + air.channel.get @channel_1[] (%arg1[%arg2] [%c32] [%c0]) : (memref<4096xi32>) + } + air.herd @herd_0 tile (%arg2, %arg3) in (%arg4=%c1, %arg5=%c1) { + %c0_0 = arith.constant 0 : index + %c32_1 = arith.constant 32 : index + %alloc = memref.alloc() {sym_name = "scratch"} : memref<32xi32, 2> + %alloc_2 = memref.alloc() {sym_name = "scratch_copy"} : memref<32xi32, 2> + affine.for %arg6 = 0 to 4096 step 32 { + air.channel.get @channel_0[] (%alloc[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + affine.for %arg7 = 0 to 32 { + %0 = affine.load %alloc[%arg7] : memref<32xi32, 2> + affine.store %0, %alloc_2[%arg7] : memref<32xi32, 2> + } + air.channel.put @channel_1[] (%alloc_2[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + } + memref.dealloc %alloc_2 : memref<32xi32, 2> + memref.dealloc %alloc : memref<32xi32, 2> + air.herd_terminator + } + return + } +} + diff --git a/test/50_air_channel_move/03_reduce/aie.mlir b/test/50_air_channel_move/03_reduce/aie.mlir new file mode 100755 index 000000000..a31f62bb2 --- /dev/null +++ b/test/50_air_channel_move/03_reduce/aie.mlir @@ -0,0 +1,24 @@ +module @aie.partition_0 { + %0 = AIE.tile(1, 1) + %1 = AIE.tile(2, 0) + %2 = AIE.objectFifo.createObjectFifo(%1, {%0}, 1) : !AIE.objectFifo> + %3 = AIE.objectFifo.createObjectFifo(%0, {%1}, 1) : !AIE.objectFifo> + %4 = AIE.core(%0) { + %5 = AIE.objectFifo.acquire (%3 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %6 = AIE.objectFifo.subview.access %5[0] : !AIE.objectFifoSubview> -> memref<1xi32, 2> + affine.for %arg0 = 0 to 4096 step 32 { + %7 = AIE.objectFifo.acquire (%2 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %8 = AIE.objectFifo.subview.access %7[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + affine.for %arg1 = 0 to 32 { + %9 = affine.load %8[%arg1] : memref<32xi32, 2> + %10 = affine.load %6[0] : memref<1xi32, 2> + %11 = arith.addi %9, %10 : i32 + affine.store %11, %6[0] : memref<1xi32, 2> + } + AIE.objectFifo.release (%2 : !AIE.objectFifo>, 1) + } + AIE.objectFifo.release (%3 : !AIE.objectFifo>, 1) + AIE.end + } {elf_file = "partition_0_core_1_1.elf"} +} + diff --git a/test/50_air_channel_move/03_reduce/air.mlir b/test/50_air_channel_move/03_reduce/air.mlir new file mode 100755 index 000000000..b76f9e1ba --- /dev/null +++ b/test/50_air_channel_move/03_reduce/air.mlir @@ -0,0 +1,27 @@ +module { + func.func @graph(%arg0: memref<4096xi32>, %arg1: memref<1xi32>) { + %c1 = arith.constant 1 : index + air.herd @herd_0 tile (%arg2, %arg3) in (%arg4=%c1, %arg5=%c1) args(%arg6=%arg0, %arg7=%arg1) : memref<4096xi32>, memref<1xi32> { + %c0 = arith.constant 0 : index + %c1_0 = arith.constant 1 : index + %c32 = arith.constant 32 : index + %alloc = memref.alloc() {sym_name = "scratch"} : memref<32xi32, 2> + %alloc_1 = memref.alloc() {sym_name = "scratch_copy"} : memref<1xi32, 2> + affine.for %arg8 = 0 to 4096 step 32 { + air.dma_memcpy_nd (%alloc[%c0] [%c32] [%c0], %arg6[%arg8] [%c32] [%c0]) {id = 1 : i32} : (memref<32xi32, 2>, memref<4096xi32>) + affine.for %arg9 = 0 to 32 { + %0 = affine.load %alloc[%arg9] : memref<32xi32, 2> + %1 = affine.load %alloc_1[0] : memref<1xi32, 2> + %2 = arith.addi %0, %1 : i32 + affine.store %2, %alloc_1[0] : memref<1xi32, 2> + } + } + air.dma_memcpy_nd (%arg7[%c0] [%c1_0] [%c0], %alloc_1[%c0] [%c1_0] [%c0]) {id = 2 : i32} : (memref<1xi32>, memref<1xi32, 2>) + memref.dealloc %alloc_1 : memref<1xi32, 2> + memref.dealloc %alloc : memref<32xi32, 2> + air.herd_terminator + } + return + } +} + diff --git a/test/50_air_channel_move/03_reduce/channels.mlir b/test/50_air_channel_move/03_reduce/channels.mlir new file mode 100755 index 000000000..ea837d820 --- /dev/null +++ b/test/50_air_channel_move/03_reduce/channels.mlir @@ -0,0 +1,35 @@ +module { + air.channel @channel_1 [1, 1] + air.channel @channel_0 [1, 1] + func.func @graph(%arg0: memref<4096xi32>, %arg1: memref<1xi32>) { + %c32 = arith.constant 32 : index + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + affine.for %arg2 = 0 to 4096 step 32 { + air.channel.put @channel_0[] (%arg0[%arg2] [%c32] [%c0]) : (memref<4096xi32>) + } + air.channel.get @channel_1[] (%arg1[%c0] [%c1] [%c0]) : (memref<1xi32>) + air.herd @herd_0 tile (%arg2, %arg3) in (%arg4=%c1, %arg5=%c1) { + %c0_0 = arith.constant 0 : index + %c1_1 = arith.constant 1 : index + %c32_2 = arith.constant 32 : index + %alloc = memref.alloc() {sym_name = "scratch"} : memref<32xi32, 2> + %alloc_3 = memref.alloc() {sym_name = "scratch_copy"} : memref<1xi32, 2> + affine.for %arg6 = 0 to 4096 step 32 { + air.channel.get @channel_0[] (%alloc[%c0_0] [%c32_2] [%c0_0]) : (memref<32xi32, 2>) + affine.for %arg7 = 0 to 32 { + %0 = affine.load %alloc[%arg7] : memref<32xi32, 2> + %1 = affine.load %alloc_3[0] : memref<1xi32, 2> + %2 = arith.addi %0, %1 : i32 + affine.store %2, %alloc_3[0] : memref<1xi32, 2> + } + } + air.channel.put @channel_1[] (%alloc_3[%c0_0] [%c1_1] [%c0_0]) : (memref<1xi32, 2>) + memref.dealloc %alloc_3 : memref<1xi32, 2> + memref.dealloc %alloc : memref<32xi32, 2> + air.herd_terminator + } + return + } +} + diff --git a/test/50_air_channel_move/04_two_inputs/aie.mlir b/test/50_air_channel_move/04_two_inputs/aie.mlir new file mode 100755 index 000000000..9b6dcdd24 --- /dev/null +++ b/test/50_air_channel_move/04_two_inputs/aie.mlir @@ -0,0 +1,28 @@ +module @aie.partition_0 { + %0 = AIE.tile(1, 1) + %1 = AIE.tile(2, 0) + %2 = AIE.objectFifo.createObjectFifo(%1, {%0}, 2) {sym_name = "inA_0_0"} : !AIE.objectFifo> + %3 = AIE.objectFifo.createObjectFifo(%1, {%0}, 2) {sym_name = "inB_0_0"} : !AIE.objectFifo> + %4 = AIE.objectFifo.createObjectFifo(%0, {%1}, 2) {sym_name = "outC_0_0"} : !AIE.objectFifo> + %5 = AIE.core(%0) { + affine.for %arg0 = 0 to 4096 step 32 { + %6 = AIE.objectFifo.acquire (%2 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %7 = AIE.objectFifo.subview.access %6[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + %8 = AIE.objectFifo.acquire (%3 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %9 = AIE.objectFifo.subview.access %8[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + %10 = AIE.objectFifo.acquire (%4 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %11 = AIE.objectFifo.subview.access %10[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + affine.for %arg1 = 0 to 32 { + %12 = affine.load %7[%arg1] : memref<32xi32, 2> + %13 = affine.load %9[%arg1] : memref<32xi32, 2> + %14 = arith.addi %13, %12 : i32 + affine.store %14, %11[%arg1] : memref<32xi32, 2> + } + AIE.objectFifo.release (%2 : !AIE.objectFifo>, 1) + AIE.objectFifo.release (%3 : !AIE.objectFifo>, 1) + AIE.objectFifo.release (%4 : !AIE.objectFifo>, 1) + } + AIE.end + } {elf_file = "partition_0_core_1_1.elf"} +} + diff --git a/test/50_air_channel_move/04_two_inputs/air.mlir b/test/50_air_channel_move/04_two_inputs/air.mlir new file mode 100755 index 000000000..0592debc8 --- /dev/null +++ b/test/50_air_channel_move/04_two_inputs/air.mlir @@ -0,0 +1,29 @@ +module { + func.func @graph(%arg0: memref<4096xi32>, %arg1: memref<4096xi32>, %arg2: memref<4096xi32>) { + %c1 = arith.constant 1 : index + air.herd @herd_0 tile (%arg3, %arg4) in (%arg5=%c1, %arg6=%c1) args(%arg7=%arg0, %arg8=%arg1, %arg9=%arg2) : memref<4096xi32>, memref<4096xi32>, memref<4096xi32> { + %c0 = arith.constant 0 : index + %c32 = arith.constant 32 : index + %alloc = memref.alloc() {sym_name = "inA"} : memref<32xi32, 2> + %alloc_0 = memref.alloc() {sym_name = "inB"} : memref<32xi32, 2> + %alloc_1 = memref.alloc() {sym_name = "outC"} : memref<32xi32, 2> + affine.for %arg10 = 0 to 4096 step 32 { + air.dma_memcpy_nd (%alloc[%c0] [%c32] [%c0], %arg7[%arg10] [%c32] [%c0]) {id = 1 : i32} : (memref<32xi32, 2>, memref<4096xi32>) + air.dma_memcpy_nd (%alloc_0[%c0] [%c32] [%c0], %arg8[%arg10] [%c32] [%c0]) {id = 1 : i32} : (memref<32xi32, 2>, memref<4096xi32>) + affine.for %arg11 = 0 to 32 { + %0 = affine.load %alloc[%arg11] : memref<32xi32, 2> + %1 = affine.load %alloc_0[%arg11] : memref<32xi32, 2> + %2 = arith.addi %1, %0 : i32 + affine.store %2, %alloc_1[%arg11] : memref<32xi32, 2> + } + air.dma_memcpy_nd (%arg9[%arg10] [%c32] [%c0], %alloc_1[%c0] [%c32] [%c0]) {id = 2 : i32} : (memref<4096xi32>, memref<32xi32, 2>) + } + memref.dealloc %alloc_1 : memref<32xi32, 2> + memref.dealloc %alloc_0 : memref<32xi32, 2> + memref.dealloc %alloc : memref<32xi32, 2> + air.herd_terminator + } + return + } +} + diff --git a/test/50_air_channel_move/04_two_inputs/channels.mlir b/test/50_air_channel_move/04_two_inputs/channels.mlir new file mode 100755 index 000000000..b3369f3ae --- /dev/null +++ b/test/50_air_channel_move/04_two_inputs/channels.mlir @@ -0,0 +1,43 @@ +module { + air.channel @channel_2 [1, 1] + air.channel @channel_1 [1, 1] + air.channel @channel_0 [1, 1] + func.func @graph(%arg0: memref<4096xi32>, %arg1: memref<4096xi32>, %arg2: memref<4096xi32>) { + %c32 = arith.constant 32 : index + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + affine.for %arg3 = 0 to 4096 step 32 { + air.channel.put @channel_0[] (%arg0[%arg3] [%c32] [%c0]) : (memref<4096xi32>) + } + affine.for %arg3 = 0 to 4096 step 32 { + air.channel.put @channel_1[] (%arg1[%arg3] [%c32] [%c0]) : (memref<4096xi32>) + } + affine.for %arg3 = 0 to 4096 step 32 { + air.channel.get @channel_2[] (%arg2[%arg3] [%c32] [%c0]) : (memref<4096xi32>) + } + air.herd @herd_0 tile (%arg3, %arg4) in (%arg5=%c1, %arg6=%c1) { + %c0_0 = arith.constant 0 : index + %c32_1 = arith.constant 32 : index + %alloc = memref.alloc() {sym_name = "inA"} : memref<32xi32, 2> + %alloc_2 = memref.alloc() {sym_name = "inB"} : memref<32xi32, 2> + %alloc_3 = memref.alloc() {sym_name = "outC"} : memref<32xi32, 2> + affine.for %arg7 = 0 to 4096 step 32 { + air.channel.get @channel_0[] (%alloc[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + air.channel.get @channel_1[] (%alloc_2[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + affine.for %arg8 = 0 to 32 { + %0 = affine.load %alloc[%arg8] : memref<32xi32, 2> + %1 = affine.load %alloc_2[%arg8] : memref<32xi32, 2> + %2 = arith.addi %1, %0 : i32 + affine.store %2, %alloc_3[%arg8] : memref<32xi32, 2> + } + air.channel.put @channel_2[] (%alloc_3[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + } + memref.dealloc %alloc_3 : memref<32xi32, 2> + memref.dealloc %alloc_2 : memref<32xi32, 2> + memref.dealloc %alloc : memref<32xi32, 2> + air.herd_terminator + } + return + } +} + diff --git a/test/50_air_channel_move/05_mini_transformer_pipe/aie.mlir b/test/50_air_channel_move/05_mini_transformer_pipe/aie.mlir new file mode 100755 index 000000000..4c69ec28f --- /dev/null +++ b/test/50_air_channel_move/05_mini_transformer_pipe/aie.mlir @@ -0,0 +1,57 @@ +module @aie.partition_0 { + %0 = AIE.tile(1, 1) + %1 = AIE.tile(1, 2) + %2 = AIE.tile(2, 0) + %3 = AIE.tile(3, 0) + %4 = AIE.objectFifo.createObjectFifo(%1, {%2}, 1) : !AIE.objectFifo> + %5 = AIE.objectFifo.createObjectFifo(%2, {%1}, 1) : !AIE.objectFifo> + %6 = AIE.objectFifo.createObjectFifo(%2, {%1}, 1) : !AIE.objectFifo> + %7 = AIE.objectFifo.createObjectFifo(%0, {%1}, 1) : !AIE.objectFifo> + %8 = AIE.objectFifo.createObjectFifo(%3, {%0}, 1) : !AIE.objectFifo> + %9 = AIE.objectFifo.createObjectFifo(%3, {%0}, 1) : !AIE.objectFifo> + %10 = AIE.core(%1) { + affine.for %arg0 = 0 to 4096 step 32 { + %12 = AIE.objectFifo.acquire (%7 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %13 = AIE.objectFifo.subview.access %12[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + %14 = AIE.objectFifo.acquire (%6 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %15 = AIE.objectFifo.subview.access %14[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + %16 = AIE.objectFifo.acquire (%5 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %17 = AIE.objectFifo.subview.access %16[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + %18 = AIE.objectFifo.acquire (%4 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %19 = AIE.objectFifo.subview.access %18[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + affine.for %arg1 = 0 to 32 { + %20 = affine.load %15[%arg1] : memref<32xi32, 2> + %21 = affine.load %17[%arg1] : memref<32xi32, 2> + %22 = arith.addi %21, %20 : i32 + %23 = affine.load %13[%arg1] : memref<32xi32, 2> + %24 = arith.addi %22, %23 : i32 + affine.store %24, %19[%arg1] : memref<32xi32, 2> + } + AIE.objectFifo.release (%4 : !AIE.objectFifo>, 1) + AIE.objectFifo.release (%5 : !AIE.objectFifo>, 1) + AIE.objectFifo.release (%6 : !AIE.objectFifo>, 1) + AIE.objectFifo.release (%7 : !AIE.objectFifo>, 1) + } + AIE.end + } {elf_file = "partition_0_core_1_2.elf"} + %11 = AIE.core(%0) { + affine.for %arg0 = 0 to 4096 step 32 { + %12 = AIE.objectFifo.acquire (%9 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %13 = AIE.objectFifo.subview.access %12[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + %14 = AIE.objectFifo.acquire (%8 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %15 = AIE.objectFifo.subview.access %14[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + %16 = AIE.objectFifo.acquire (%7 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %17 = AIE.objectFifo.subview.access %16[0] : !AIE.objectFifoSubview> -> memref<32xi32, 2> + affine.for %arg1 = 0 to 32 { + %18 = affine.load %13[%arg1] : memref<32xi32, 2> + %19 = affine.load %15[%arg1] : memref<32xi32, 2> + %20 = arith.addi %19, %18 : i32 + affine.store %20, %17[%arg1] : memref<32xi32, 2> + } + AIE.objectFifo.release (%7 : !AIE.objectFifo>, 1) + AIE.objectFifo.release (%8 : !AIE.objectFifo>, 1) + AIE.objectFifo.release (%9 : !AIE.objectFifo>, 1) + } + AIE.end + } {elf_file = "partition_0_core_1_1.elf"} +} diff --git a/test/50_air_channel_move/05_mini_transformer_pipe/channels.mlir b/test/50_air_channel_move/05_mini_transformer_pipe/channels.mlir new file mode 100755 index 000000000..020ec586b --- /dev/null +++ b/test/50_air_channel_move/05_mini_transformer_pipe/channels.mlir @@ -0,0 +1,80 @@ +#set = affine_set<()[s0, s1] : (s0 >= 0, s1 == 0)> +#set1 = affine_set<()[s0, s1] : (s0 >= 0, s1 - 1 == 0)> +module { + air.channel @channel_5 [1, 1] // D + air.channel @channel_4 [1, 1] // C + air.channel @channel_3 [1, 1] // partial + air.channel @channel_2 [1, 1] // output + air.channel @channel_1 [1, 1] // B + air.channel @channel_0 [1, 1] // A + func.func @graph(%arg0: memref<4096xi32>, %arg1: memref<4096xi32>, %arg01: memref<4096xi32>, %arg11: memref<4096xi32>, %arg2: memref<4096xi32>) { + %c32 = arith.constant 32 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %c0 = arith.constant 0 : index + affine.for %arg3 = 0 to 4096 step 32 { + air.channel.put @channel_0[] (%arg0[%arg3] [%c32] [%c0]) : (memref<4096xi32>) + } + affine.for %arg3 = 0 to 4096 step 32 { + air.channel.put @channel_1[] (%arg1[%arg3] [%c32] [%c0]) : (memref<4096xi32>) + } + affine.for %arg3 = 0 to 4096 step 32 { + air.channel.put @channel_4[] (%arg01[%arg3] [%c32] [%c0]) : (memref<4096xi32>) + } + affine.for %arg3 = 0 to 4096 step 32 { + air.channel.put @channel_5[] (%arg11[%arg3] [%c32] [%c0]) : (memref<4096xi32>) + } + affine.for %arg3 = 0 to 4096 step 32 { + air.channel.get @channel_2[] (%arg2[%arg3] [%c32] [%c0]) : (memref<4096xi32>) + } + air.herd @herd_0 tile (%arg3, %arg4) in (%arg5=%c1, %arg6=%c2) { + %c0_0 = arith.constant 0 : index + %c32_1 = arith.constant 32 : index + affine.if #set()[%arg3, %arg4] { + %alloc = memref.alloc() {sym_name = "inA"} : memref<32xi32, 2> + %alloc_2 = memref.alloc() {sym_name = "inB"} : memref<32xi32, 2> + %alloc_3 = memref.alloc() {sym_name = "outC"} : memref<32xi32, 2> + affine.for %arg7 = 0 to 4096 step 32 { + air.channel.get @channel_0[] (%alloc[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + air.channel.get @channel_1[] (%alloc_2[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + affine.for %arg8 = 0 to 32 { + %0 = affine.load %alloc[%arg8] : memref<32xi32, 2> + %1 = affine.load %alloc_2[%arg8] : memref<32xi32, 2> + %2 = arith.addi %1, %0 : i32 + affine.store %2, %alloc_3[%arg8] : memref<32xi32, 2> + } + air.channel.put @channel_3[] (%alloc_3[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + } + memref.dealloc %alloc_3 : memref<32xi32, 2> + memref.dealloc %alloc_2 : memref<32xi32, 2> + memref.dealloc %alloc : memref<32xi32, 2> + } + affine.if #set1()[%arg3, %arg4] { + %alloc = memref.alloc() {sym_name = "inA"} : memref<32xi32, 2> + %alloc_1 = memref.alloc() {sym_name = "inB"} : memref<32xi32, 2> + %alloc_2 = memref.alloc() {sym_name = "inC"} : memref<32xi32, 2> + %alloc_3 = memref.alloc() {sym_name = "outC"} : memref<32xi32, 2> + affine.for %arg7 = 0 to 4096 step 32 { + air.channel.get @channel_4[] (%alloc[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + air.channel.get @channel_5[] (%alloc_1[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + air.channel.get @channel_3[] (%alloc_2[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + affine.for %arg8 = 0 to 32 { + %0 = affine.load %alloc[%arg8] : memref<32xi32, 2> + %1 = affine.load %alloc_1[%arg8] : memref<32xi32, 2> + %2 = arith.addi %1, %0 : i32 + %3 = affine.load %alloc_2[%arg8] : memref<32xi32, 2> + %4 = arith.addi %2, %3 : i32 + affine.store %4, %alloc_3[%arg8] : memref<32xi32, 2> + } + air.channel.put @channel_2[] (%alloc_3[%c0_0] [%c32_1] [%c0_0]) : (memref<32xi32, 2>) + } + memref.dealloc %alloc_3 : memref<32xi32, 2> + memref.dealloc %alloc_2 : memref<32xi32, 2> + memref.dealloc %alloc_1 : memref<32xi32, 2> + memref.dealloc %alloc : memref<32xi32, 2> + } + air.herd_terminator + } + return + } +} diff --git a/test/50_air_channel_move/06_multiple_worker_to_worker/aie.mlir b/test/50_air_channel_move/06_multiple_worker_to_worker/aie.mlir new file mode 100755 index 000000000..cfa35de27 --- /dev/null +++ b/test/50_air_channel_move/06_multiple_worker_to_worker/aie.mlir @@ -0,0 +1,32 @@ +module @aie.partition_0 { + %0 = AIE.tile(1, 1) + %1 = AIE.tile(2, 1) + %2 = AIE.tile(1, 2) + %3 = AIE.tile(2, 2) + %4 = AIE.objectFifo.createObjectFifo(%0, {%1}, 1) : !AIE.objectFifo> + %5 = AIE.objectFifo.createObjectFifo(%2, {%3}, 1) : !AIE.objectFifo> + %6 = AIE.core(%3) { + %10 = AIE.objectFifo.acquire (%5 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %11 = AIE.objectFifo.subview.access %10[0] : !AIE.objectFifoSubview> -> memref<32xi32> + AIE.objectFifo.release (%5 : !AIE.objectFifo>, 1) + AIE.end + } {elf_file = "partition_0_core_2_2.elf"} + %7 = AIE.core(%2) { + %10 = AIE.objectFifo.acquire (%5 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %11 = AIE.objectFifo.subview.access %10[0] : !AIE.objectFifoSubview> -> memref<32xi32> + AIE.objectFifo.release (%5 : !AIE.objectFifo>, 1) + AIE.end + } {elf_file = "partition_0_core_1_2.elf"} + %8 = AIE.core(%1) { + %10 = AIE.objectFifo.acquire (%4 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %11 = AIE.objectFifo.subview.access %10[0] : !AIE.objectFifoSubview> -> memref<32xi32> + AIE.objectFifo.release (%4 : !AIE.objectFifo>, 1) + AIE.end + } {elf_file = "partition_0_core_2_1.elf"} + %9 = AIE.core(%0) { + %10 = AIE.objectFifo.acquire (%4 : !AIE.objectFifo>, 1) : !AIE.objectFifoSubview> + %11 = AIE.objectFifo.subview.access %10[0] : !AIE.objectFifoSubview> -> memref<32xi32> + AIE.objectFifo.release (%4 : !AIE.objectFifo>, 1) + AIE.end + } {elf_file = "partition_0_core_1_1.elf"} +} \ No newline at end of file diff --git a/test/50_air_channel_move/06_multiple_worker_to_worker/channels.mlir b/test/50_air_channel_move/06_multiple_worker_to_worker/channels.mlir new file mode 100755 index 000000000..28b217792 --- /dev/null +++ b/test/50_air_channel_move/06_multiple_worker_to_worker/channels.mlir @@ -0,0 +1,22 @@ +#set = affine_set<()[s0, s1] : (s0 == 0, s1 >= 0, -s1 + 2 >= 0)> +module @aie.partition_0 { + air.channel @channel_0 [1, 2] + func.func @test() { + %c2 = arith.constant 2 : index + air.herd @herd_0 tile (%arg0, %arg1) in (%arg2=%c2, %arg3=%c2) { + %c0 = arith.constant 0 : index + %c32 = arith.constant 32 : index + affine.if #set()[%arg0, %arg1] { + %alloc = memref.alloc() {sym_name = "scratch"} : memref<32xi32, 2> + air.channel.put @channel_0[%c0, %arg1] (%alloc[%c0] [%c32] [%c0]) : (memref<32xi32, 2>) + memref.dealloc %alloc : memref<32xi32, 2> + } else { + %alloc = memref.alloc() {sym_name = "scratch_copy"} : memref<32xi32, 2> + air.channel.get @channel_0[%c0, %arg1] (%alloc[%c0] [%c32] [%c0]) : (memref<32xi32, 2>) + memref.dealloc %alloc : memref<32xi32, 2> + } + air.herd_terminator + } + return + } +} \ No newline at end of file diff --git a/test/50_air_channel_move/config.make b/test/50_air_channel_move/config.make new file mode 100755 index 000000000..898616126 --- /dev/null +++ b/test/50_air_channel_move/config.make @@ -0,0 +1,40 @@ +# Copyright (C) 2022, Xilinx Inc. +# Copyright (C) 2022, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +AIE_SRC_DIR = acdc_project +ARM_SRC_DIR = . +ARM_OBJ_DIR = . + +AIE_OPT = aie-opt +AIE_XLATE = aie-translate +AIR_OPT = air-opt +ATEN_OPT = aten-opt +AIECC = aiecc.py + +AIR_INSTALL_PATH = $(dir $(shell which air-opt))/.. +AIE_INSTALL_PATH = $(dir $(shell which aie-opt))/.. + +LDFLAGS = -fuse-ld=lld \ + -rdynamic \ + -lxaiengine \ + -Wl,--whole-archive -lairhost -Wl,--no-whole-archive \ + -lstdc++ \ + -ldl + +CC = clang +CFLAGS += -g -I/opt/xaiengine/include +LDFLAGS += -L/opt/xaiengine/lib + +CFLAGS += -std=c++17 \ + -I$(AIR_INSTALL_PATH)/runtime_lib/airhost/include \ + -I${AIE_INSTALL_PATH}/runtime_lib \ + -DAIR_LIBXAIE_ENABLE \ + -DLIBXAIENGINEV2 + +LDFLAGS += -L$(AIR_INSTALL_PATH)/runtime_lib/airhost + +default: all + +test_library.o: ${AIE_INSTALL_PATH}/runtime_lib/test_library.cpp + $(CC) $^ $(CFLAGS) -c -o $@