-
Notifications
You must be signed in to change notification settings - Fork 113
/
Copy pathaie.mlir
178 lines (161 loc) · 8.42 KB
/
aie.mlir
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
//===- aie.mlir ------------------------------------------------*- MLIR -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2022-2024 Advanced Micro Devices, Inc. or its affiliates
// Copyright (C) 2020-2022, Xilinx Inc.
//
//===----------------------------------------------------------------------===//
module {
aie.device(npu1_1col) {
memref.global "public" @objFifo_in0 : memref<16xi32>
memref.global "public" @objFifo_in0_cons : memref<16xi32>
memref.global "public" @objFifo_in1 : memref<8xi32>
memref.global "public" @objFifo_in1_cons : memref<8xi32>
memref.global "public" @objFifo_out0 : memref<16xi32>
memref.global "public" @objFifo_out0_cons : memref<16xi32>
memref.global "public" @objFifo_out1 : memref<8xi32>
memref.global "public" @objFifo_out1_cons : memref<8xi32>
%tile_0_0 = aie.tile(0, 0)
%tile_0_1 = aie.tile(0, 1)
%tile_0_2 = aie.tile(0, 2)
%objFifo_in1_cons_buff_0 = aie.buffer(%tile_0_2) {sym_name = "objFifo_in1_cons_buff_0"} : memref<8xi32>
%objFifo_in1_cons_buff_1 = aie.buffer(%tile_0_2) {sym_name = "objFifo_in1_cons_buff_1"} : memref<8xi32>
%objFifo_out1_buff_0 = aie.buffer(%tile_0_2) {sym_name = "objFifo_out1_buff_0"} : memref<8xi32>
%objFifo_out1_buff_1 = aie.buffer(%tile_0_2) {sym_name = "objFifo_out1_buff_1"} : memref<8xi32>
%objFifo_in1_cons_prod_lock = aie.lock(%tile_0_2, 0) {init = 2 : i32, sym_name = "objFifo_in1_cons_prod_lock"}
%objFifo_in1_cons_cons_lock = aie.lock(%tile_0_2, 1) {init = 0 : i32, sym_name = "objFifo_in1_cons_cons_lock"}
%objFifo_out1_prod_lock = aie.lock(%tile_0_2, 2) {init = 2 : i32, sym_name = "objFifo_out1_prod_lock"}
%objFifo_out1_cons_lock = aie.lock(%tile_0_2, 3) {init = 0 : i32, sym_name = "objFifo_out1_cons_lock"}
aie.flow(%tile_0_0, DMA : 0, %tile_0_1, DMA : 0)
aie.flow(%tile_0_1, DMA : 0, %tile_0_2, DMA : 0)
aie.flow(%tile_0_1, DMA : 1, %tile_0_0, DMA : 0)
aie.flow(%tile_0_2, DMA : 0, %tile_0_1, DMA : 1)
%core_0_2 = aie.core(%tile_0_2) {
%c8 = arith.constant 8 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c1_i32 = arith.constant 1 : i32
%c2 = arith.constant 2 : index
scf.for %arg0 = %c0 to %c8 step %c2 {
aie.use_lock(%objFifo_in1_cons_cons_lock, AcquireGreaterEqual, 1)
aie.use_lock(%objFifo_out1_prod_lock, AcquireGreaterEqual, 1)
scf.for %arg1 = %c0 to %c8 step %c1 {
%0 = memref.load %objFifo_in1_cons_buff_0[%arg1] : memref<8xi32>
%1 = arith.addi %0, %c1_i32 : i32
memref.store %1, %objFifo_out1_buff_0[%arg1] : memref<8xi32>
}
aie.use_lock(%objFifo_in1_cons_prod_lock, Release, 1)
aie.use_lock(%objFifo_out1_cons_lock, Release, 1)
aie.use_lock(%objFifo_in1_cons_cons_lock, AcquireGreaterEqual, 1)
aie.use_lock(%objFifo_out1_prod_lock, AcquireGreaterEqual, 1)
scf.for %arg1 = %c0 to %c8 step %c1 {
%0 = memref.load %objFifo_in1_cons_buff_1[%arg1] : memref<8xi32>
%1 = arith.addi %0, %c1_i32 : i32
memref.store %1, %objFifo_out1_buff_1[%arg1] : memref<8xi32>
}
aie.use_lock(%objFifo_in1_cons_prod_lock, Release, 1)
aie.use_lock(%objFifo_out1_cons_lock, Release, 1)
}
aie.end
}
aie.shim_dma_allocation @objFifo_in0(MM2S, 0, 0)
func.func @bobsyouruncle(%arg0: memref<64xi32>, %arg1: memref<32xi32>, %arg2: memref<64xi32>) {
%c0_i64 = arith.constant 0 : i64
%c1_i64 = arith.constant 1 : i64
%c64_i64 = arith.constant 64 : i64
aiex.npu.dma_memcpy_nd(0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64] [%c1_i64, %c1_i64, %c1_i64, %c64_i64] [%c0_i64, %c0_i64, %c0_i64]) {id = 0 : i64, metadata = @objFifo_in0} : memref<64xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64] [%c1_i64, %c1_i64, %c1_i64, %c64_i64] [%c0_i64, %c0_i64, %c0_i64]) {id = 1 : i64, metadata = @objFifo_out0} : memref<64xi32>
return
}
%memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) {
%objFifo_in0_cons_buff_0 = aie.buffer(%tile_0_1) {sym_name = "objFifo_in0_cons_buff_0"} : memref<16xi32>
%objFifo_in0_cons_buff_1 = aie.buffer(%tile_0_1) {sym_name = "objFifo_in0_cons_buff_1"} : memref<16xi32>
%objFifo_out0_buff_0 = aie.buffer(%tile_0_1) {sym_name = "objFifo_out0_buff_0"} : memref<16xi32>
%objFifo_out0_buff_1 = aie.buffer(%tile_0_1) {sym_name = "objFifo_out0_buff_1"} : memref<16xi32>
%objFifo_in0_cons_prod_lock = aie.lock(%tile_0_1, 0) {init = 2 : i32, sym_name = "objFifo_in0_cons_prod_lock"}
%objFifo_in0_cons_cons_lock = aie.lock(%tile_0_1, 1) {init = 0 : i32, sym_name = "objFifo_in0_cons_cons_lock"}
%objFifo_out0_prod_lock = aie.lock(%tile_0_1, 2) {init = 2 : i32, sym_name = "objFifo_out0_prod_lock"}
%objFifo_out0_cons_lock = aie.lock(%tile_0_1, 3) {init = 0 : i32, sym_name = "objFifo_out0_cons_lock"}
%0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3)
^bb1: // 2 preds: ^bb0, ^bb2
aie.use_lock(%objFifo_in0_cons_prod_lock, AcquireGreaterEqual, 1)
aie.dma_bd(%objFifo_in0_cons_buff_0 : memref<16xi32>, 0, 16)
aie.use_lock(%objFifo_in0_cons_cons_lock, Release, 1)
aie.next_bd ^bb2
^bb2: // pred: ^bb1
aie.use_lock(%objFifo_in0_cons_prod_lock, AcquireGreaterEqual, 1)
aie.dma_bd(%objFifo_in0_cons_buff_1 : memref<16xi32>, 0, 16)
aie.use_lock(%objFifo_in0_cons_cons_lock, Release, 1)
aie.next_bd ^bb1
^bb3: // pred: ^bb0
%1 = aie.dma_start(MM2S, 0, ^bb4, ^bb6)
^bb4: // 2 preds: ^bb3, ^bb5
aie.use_lock(%objFifo_in0_cons_cons_lock, AcquireGreaterEqual, 1)
aie.dma_bd(%objFifo_in0_cons_buff_0 : memref<16xi32>, 0, 16)
aie.use_lock(%objFifo_in0_cons_prod_lock, Release, 1)
aie.next_bd ^bb5
^bb5: // pred: ^bb4
aie.use_lock(%objFifo_in0_cons_cons_lock, AcquireGreaterEqual, 1)
aie.dma_bd(%objFifo_in0_cons_buff_1 : memref<16xi32>, 0, 16)
aie.use_lock(%objFifo_in0_cons_prod_lock, Release, 1)
aie.next_bd ^bb4
^bb6: // pred: ^bb3
%2 = aie.dma_start(MM2S, 1, ^bb7, ^bb9)
^bb7: // 2 preds: ^bb6, ^bb8
aie.use_lock(%objFifo_out0_cons_lock, AcquireGreaterEqual, 1)
aie.dma_bd(%objFifo_out0_buff_0 : memref<16xi32>, 0, 16)
aie.use_lock(%objFifo_out0_prod_lock, Release, 1)
aie.next_bd ^bb8
^bb8: // pred: ^bb7
aie.use_lock(%objFifo_out0_cons_lock, AcquireGreaterEqual, 1)
aie.dma_bd(%objFifo_out0_buff_1 : memref<16xi32>, 0, 16)
aie.use_lock(%objFifo_out0_prod_lock, Release, 1)
aie.next_bd ^bb7
^bb9: // pred: ^bb6
%3 = aie.dma_start(S2MM, 1, ^bb10, ^bb12)
^bb10: // 2 preds: ^bb9, ^bb11
aie.use_lock(%objFifo_out0_prod_lock, AcquireGreaterEqual, 1)
aie.dma_bd(%objFifo_out0_buff_0 : memref<16xi32>, 0, 16)
aie.use_lock(%objFifo_out0_cons_lock, Release, 1)
aie.next_bd ^bb11
^bb11: // pred: ^bb10
aie.use_lock(%objFifo_out0_prod_lock, AcquireGreaterEqual, 1)
aie.dma_bd(%objFifo_out0_buff_1 : memref<16xi32>, 0, 16)
aie.use_lock(%objFifo_out0_cons_lock, Release, 1)
aie.next_bd ^bb10
^bb12: // pred: ^bb9
aie.end
}
aie.shim_dma_allocation @objFifo_out0(S2MM, 0, 0)
%mem_0_2 = aie.mem(%tile_0_2) {
%0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3)
^bb1: // 2 preds: ^bb0, ^bb2
aie.use_lock(%objFifo_in1_cons_prod_lock, AcquireGreaterEqual, 1)
aie.dma_bd(%objFifo_in1_cons_buff_0 : memref<8xi32>, 0, 8)
aie.use_lock(%objFifo_in1_cons_cons_lock, Release, 1)
aie.next_bd ^bb2
^bb2: // pred: ^bb1
aie.use_lock(%objFifo_in1_cons_prod_lock, AcquireGreaterEqual, 1)
aie.dma_bd(%objFifo_in1_cons_buff_1 : memref<8xi32>, 0, 8)
aie.use_lock(%objFifo_in1_cons_cons_lock, Release, 1)
aie.next_bd ^bb1
^bb3: // pred: ^bb0
%1 = aie.dma_start(MM2S, 0, ^bb4, ^bb6)
^bb4: // 2 preds: ^bb3, ^bb5
aie.use_lock(%objFifo_out1_cons_lock, AcquireGreaterEqual, 1)
aie.dma_bd(%objFifo_out1_buff_0 : memref<8xi32>, 0, 8)
aie.use_lock(%objFifo_out1_prod_lock, Release, 1)
aie.next_bd ^bb5
^bb5: // pred: ^bb4
aie.use_lock(%objFifo_out1_cons_lock, AcquireGreaterEqual, 1)
aie.dma_bd(%objFifo_out1_buff_1 : memref<8xi32>, 0, 8)
aie.use_lock(%objFifo_out1_prod_lock, Release, 1)
aie.next_bd ^bb4
^bb6: // pred: ^bb3
aie.end
}
}
}