Skip to content

Commit b01ca3c

Browse files
committed
[AIE] Add addressing intrinsics using dims_2d_t/dims_3d_t
1 parent 614da1d commit b01ca3c

File tree

3 files changed

+140
-1
lines changed

3 files changed

+140
-1
lines changed

clang/lib/Headers/aie2p_addr.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ struct dims_2d_t {
3333
: num1(size1), inc1(inc1), inc2(inc2), count1(0) {};
3434
dims_2d_t(unsigned int size1, int inc1, int inc2, addr_t count1)
3535
: num1(size1), inc1(inc1), inc2(inc2), count1(count1) {};
36+
// Default constructor
37+
dims_2d_t() : num1(0), inc1(0), inc2(0), count1(0) {};
3638
};
3739

3840
INTRINSIC(dims_2d_t)
@@ -114,6 +116,9 @@ struct dims_3d_t {
114116
int inc3, addr_t count1, addr_t count2)
115117
: num1(size1), inc1(inc1), num2(size2), inc2(inc2), inc3(inc3),
116118
count1(count1), count2(count2) {};
119+
// Default constructor
120+
dims_3d_t()
121+
: num1(0), inc1(0), num2(0), inc2(0), inc3(0), count1(0), count2(0) {};
117122
};
118123

119124
INTRINSIC(dims_3d_t)
@@ -131,6 +136,17 @@ dims_3d_from_steps(unsigned int size1, int step1, unsigned int size2, int step2,
131136
count1, count2);
132137
};
133138

139+
template <typename T> INTRINSIC(T *) add_2d_byte(T *a, dims_2d_t &params) {
140+
return add_2d_byte(a, params.inc2, params.num1, (addr_t &)params.count1,
141+
params.inc1);
142+
}
143+
144+
template <typename T> INTRINSIC(T *) add_3d_byte(T *a, dims_3d_t &params) {
145+
return add_3d_byte(a, params.inc3, params.num1, (addr_t &)params.count1,
146+
params.inc1, params.num2, (addr_t &)params.count2,
147+
params.inc2);
148+
}
149+
134150
INTRINSIC(v8int32) load_4x16_lo(v8int32 addr) {
135151
return __builtin_aie2p_load_4x16_lo(addr);
136152
}

clang/lib/Headers/aiev2_addr.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,4 +264,15 @@ load_lut_2x_float(const void *lut1, const void *lut2, v16uint32 offset,
264264
(v32bfloat16)insert(v2, 1, (v16bfloat16)read_lut64_3(lut1, lut2, offset));
265265
}
266266

267+
template <typename T> INTRINSIC(T *) add_2d_byte(T *a, dims_2d_t &params) {
268+
return add_2d_byte(a, params.inc2, params.num1, (addr_t &)params.count1,
269+
params.inc1);
270+
}
271+
272+
template <typename T> INTRINSIC(T *) add_3d_byte(T *a, dims_3d_t &params) {
273+
return add_3d_byte(a, params.inc3, params.num1, (addr_t &)params.count1,
274+
params.inc1, params.num2, (addr_t &)params.count2,
275+
params.inc2);
276+
}
277+
267278
#endif /*__AIEV2_ADDR_H__*/

clang/test/CodeGen/aie/aie-addr-intrinsic.cpp

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ dims_2d_t test_dims_2d_t (unsigned int size1, int inc1, int inc2) {
2323
return dims_2d_t(size1,inc1, inc2);
2424
}
2525

26-
//
2726
// CHECK-COMMON-LABEL: @_Z15test2_dims_2d_tjiii(
2827
// CHECK-COMMON-NEXT: entry:
2928
// CHECK-COMMON-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DIMS_2D_T:%.*]] poison, i32 [[SIZE1:%.*]], 0
@@ -225,6 +224,119 @@ const v16int32* test_add_2d_byte(const v16int32* a, int off, int size1, addr_t&
225224
return add_2d_byte(a,off,size1,count1,inc1);
226225
}
227226

227+
// AIE2-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t(
228+
// AIE2-NEXT: entry:
229+
// AIE2-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 8
230+
// AIE2-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA14:![0-9]+]]
231+
// AIE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA16:![0-9]+]]
232+
// AIE2-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
233+
// AIE2-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
234+
// AIE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA17:![0-9]+]]
235+
// AIE2-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i20
236+
// AIE2-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i20
237+
// AIE2-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20
238+
// AIE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
239+
// AIE2-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i20
240+
// AIE2-NEXT: [[TMP8:%.*]] = tail call { ptr, i20 } @llvm.aie2.add.2d(ptr [[A:%.*]], i20 [[TMP3]], i20 [[TMP4]], i20 [[TMP5]], i20 [[TMP7]])
241+
// AIE2-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 1
242+
// AIE2-NEXT: [[TMP10:%.*]] = zext i20 [[TMP9]] to i32
243+
// AIE2-NEXT: store i32 [[TMP10]], ptr [[COUNT1_I]], align 4
244+
// AIE2-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 0
245+
// AIE2-NEXT: ret ptr [[TMP11]]
246+
//
247+
// AIE2P-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t(
248+
// AIE2P-NEXT: entry:
249+
// AIE2P-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 8
250+
// AIE2P-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA14:![0-9]+]]
251+
// AIE2P-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA16:![0-9]+]]
252+
// AIE2P-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
253+
// AIE2P-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
254+
// AIE2P-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA17:![0-9]+]]
255+
// AIE2P-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i20
256+
// AIE2P-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i20
257+
// AIE2P-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20
258+
// AIE2P-NEXT: [[TMP6:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
259+
// AIE2P-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i20
260+
// AIE2P-NEXT: [[TMP8:%.*]] = tail call { ptr, i20 } @llvm.aie2p.add.2d(ptr [[A:%.*]], i20 [[TMP3]], i20 [[TMP4]], i20 [[TMP5]], i20 [[TMP7]])
261+
// AIE2P-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 1
262+
// AIE2P-NEXT: [[TMP10:%.*]] = zext i20 [[TMP9]] to i32
263+
// AIE2P-NEXT: store i32 [[TMP10]], ptr [[COUNT1_I]], align 4
264+
// AIE2P-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 0
265+
// AIE2P-NEXT: ret ptr [[TMP11]]
266+
//
267+
v16int32* test_add_2d_byte(v16int32* a, dims_2d_t &params){
268+
return add_2d_byte(a,params);
269+
}
270+
271+
// AIE2-LABEL: @_Z16test_add_3d_bytePDv16_iR9dims_3d_t(
272+
// AIE2-NEXT: entry:
273+
// AIE2-NEXT: [[INC3_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 16
274+
// AIE2-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC3_I]], align 4, !tbaa [[TBAA10]]
275+
// AIE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA2]]
276+
// AIE2-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 20
277+
// AIE2-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
278+
// AIE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA7]]
279+
// AIE2-NEXT: [[NUM2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 8
280+
// AIE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[NUM2_I]], align 4, !tbaa [[TBAA8]]
281+
// AIE2-NEXT: [[COUNT2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 24
282+
// AIE2-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
283+
// AIE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA9]]
284+
// AIE2-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP0]] to i20
285+
// AIE2-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP2]] to i20
286+
// AIE2-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP4]] to i20
287+
// AIE2-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP1]] to i20
288+
// AIE2-NEXT: [[TMP9:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
289+
// AIE2-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i20
290+
// AIE2-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP3]] to i20
291+
// AIE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[COUNT2_I]], align 4, !tbaa [[TBAA13]]
292+
// AIE2-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i20
293+
// AIE2-NEXT: [[TMP14:%.*]] = tail call { ptr, i20, i20 } @llvm.aie2.add.3d(ptr [[A:%.*]], i20 [[TMP5]], i20 [[TMP6]], i20 [[TMP7]], i20 [[TMP8]], i20 [[TMP10]], i20 [[TMP11]], i20 [[TMP13]])
294+
// AIE2-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 1
295+
// AIE2-NEXT: [[TMP16:%.*]] = zext i20 [[TMP15]] to i32
296+
// AIE2-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 2
297+
// AIE2-NEXT: [[TMP18:%.*]] = zext i20 [[TMP17]] to i32
298+
// AIE2-NEXT: store i32 [[TMP16]], ptr [[COUNT1_I]], align 4
299+
// AIE2-NEXT: store i32 [[TMP18]], ptr [[COUNT2_I]], align 4
300+
// AIE2-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 0
301+
// AIE2-NEXT: ret ptr [[TMP19]]
302+
//
303+
// AIE2P-LABEL: @_Z16test_add_3d_bytePDv16_iR9dims_3d_t(
304+
// AIE2P-NEXT: entry:
305+
// AIE2P-NEXT: [[INC3_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 16
306+
// AIE2P-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC3_I]], align 4, !tbaa [[TBAA10]]
307+
// AIE2P-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA2]]
308+
// AIE2P-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 20
309+
// AIE2P-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
310+
// AIE2P-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA7]]
311+
// AIE2P-NEXT: [[NUM2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 8
312+
// AIE2P-NEXT: [[TMP3:%.*]] = load i32, ptr [[NUM2_I]], align 4, !tbaa [[TBAA8]]
313+
// AIE2P-NEXT: [[COUNT2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 24
314+
// AIE2P-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
315+
// AIE2P-NEXT: [[TMP4:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA9]]
316+
// AIE2P-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP0]] to i20
317+
// AIE2P-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP2]] to i20
318+
// AIE2P-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP4]] to i20
319+
// AIE2P-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP1]] to i20
320+
// AIE2P-NEXT: [[TMP9:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
321+
// AIE2P-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i20
322+
// AIE2P-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP3]] to i20
323+
// AIE2P-NEXT: [[TMP12:%.*]] = load i32, ptr [[COUNT2_I]], align 4, !tbaa [[TBAA13]]
324+
// AIE2P-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i20
325+
// AIE2P-NEXT: [[TMP14:%.*]] = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr [[A:%.*]], i20 [[TMP5]], i20 [[TMP6]], i20 [[TMP7]], i20 [[TMP8]], i20 [[TMP10]], i20 [[TMP11]], i20 [[TMP13]])
326+
// AIE2P-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 1
327+
// AIE2P-NEXT: [[TMP16:%.*]] = zext i20 [[TMP15]] to i32
328+
// AIE2P-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 2
329+
// AIE2P-NEXT: [[TMP18:%.*]] = zext i20 [[TMP17]] to i32
330+
// AIE2P-NEXT: store i32 [[TMP16]], ptr [[COUNT1_I]], align 4
331+
// AIE2P-NEXT: store i32 [[TMP18]], ptr [[COUNT2_I]], align 4
332+
// AIE2P-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 0
333+
// AIE2P-NEXT: ret ptr [[TMP19]]
334+
//
335+
v16int32* test_add_3d_byte(v16int32* a, dims_3d_t &params){
336+
return add_3d_byte(a,params);
337+
}
338+
339+
228340
// AIE2-LABEL: @_Z15test_add_3d_ptrPDv16_iiiRiiiS1_i(
229341
// AIE2-NEXT: entry:
230342
// AIE2-NEXT: [[MUL_I:%.*]] = shl i32 [[OFF:%.*]], 6

0 commit comments

Comments
 (0)