@@ -23,7 +23,6 @@ dims_2d_t test_dims_2d_t (unsigned int size1, int inc1, int inc2) {
2323 return dims_2d_t (size1,inc1, inc2);
2424}
2525
26- //
2726// CHECK-COMMON-LABEL: @_Z15test2_dims_2d_tjiii(
2827// CHECK-COMMON-NEXT: entry:
2928// CHECK-COMMON-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DIMS_2D_T:%.*]] poison, i32 [[SIZE1:%.*]], 0
@@ -225,6 +224,119 @@ const v16int32* test_add_2d_byte(const v16int32* a, int off, int size1, addr_t&
225224 return add_2d_byte (a,off,size1,count1,inc1);
226225}
227226
227+ // AIE2-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t(
228+ // AIE2-NEXT: entry:
229+ // AIE2-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 8
230+ // AIE2-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA14:![0-9]+]]
231+ // AIE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA16:![0-9]+]]
232+ // AIE2-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
233+ // AIE2-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
234+ // AIE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA17:![0-9]+]]
235+ // AIE2-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i20
236+ // AIE2-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i20
237+ // AIE2-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20
238+ // AIE2-NEXT: [[TMP6:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
239+ // AIE2-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i20
240+ // AIE2-NEXT: [[TMP8:%.*]] = tail call { ptr, i20 } @llvm.aie2.add.2d(ptr [[A:%.*]], i20 [[TMP3]], i20 [[TMP4]], i20 [[TMP5]], i20 [[TMP7]])
241+ // AIE2-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 1
242+ // AIE2-NEXT: [[TMP10:%.*]] = zext i20 [[TMP9]] to i32
243+ // AIE2-NEXT: store i32 [[TMP10]], ptr [[COUNT1_I]], align 4
244+ // AIE2-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 0
245+ // AIE2-NEXT: ret ptr [[TMP11]]
246+ //
247+ // AIE2P-LABEL: @_Z16test_add_2d_bytePDv16_iR9dims_2d_t(
248+ // AIE2P-NEXT: entry:
249+ // AIE2P-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 8
250+ // AIE2P-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA14:![0-9]+]]
251+ // AIE2P-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA16:![0-9]+]]
252+ // AIE2P-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
253+ // AIE2P-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
254+ // AIE2P-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA17:![0-9]+]]
255+ // AIE2P-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i20
256+ // AIE2P-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i20
257+ // AIE2P-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP1]] to i20
258+ // AIE2P-NEXT: [[TMP6:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
259+ // AIE2P-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i20
260+ // AIE2P-NEXT: [[TMP8:%.*]] = tail call { ptr, i20 } @llvm.aie2p.add.2d(ptr [[A:%.*]], i20 [[TMP3]], i20 [[TMP4]], i20 [[TMP5]], i20 [[TMP7]])
261+ // AIE2P-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 1
262+ // AIE2P-NEXT: [[TMP10:%.*]] = zext i20 [[TMP9]] to i32
263+ // AIE2P-NEXT: store i32 [[TMP10]], ptr [[COUNT1_I]], align 4
264+ // AIE2P-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i20 } [[TMP8]], 0
265+ // AIE2P-NEXT: ret ptr [[TMP11]]
266+ //
267+ v16int32* test_add_2d_byte (v16int32* a, dims_2d_t ¶ms){
268+ return add_2d_byte (a,params);
269+ }
270+
271+ // AIE2-LABEL: @_Z16test_add_3d_bytePDv16_iR9dims_3d_t(
272+ // AIE2-NEXT: entry:
273+ // AIE2-NEXT: [[INC3_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 16
274+ // AIE2-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC3_I]], align 4, !tbaa [[TBAA10]]
275+ // AIE2-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA2]]
276+ // AIE2-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 20
277+ // AIE2-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
278+ // AIE2-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA7]]
279+ // AIE2-NEXT: [[NUM2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 8
280+ // AIE2-NEXT: [[TMP3:%.*]] = load i32, ptr [[NUM2_I]], align 4, !tbaa [[TBAA8]]
281+ // AIE2-NEXT: [[COUNT2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 24
282+ // AIE2-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
283+ // AIE2-NEXT: [[TMP4:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA9]]
284+ // AIE2-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP0]] to i20
285+ // AIE2-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP2]] to i20
286+ // AIE2-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP4]] to i20
287+ // AIE2-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP1]] to i20
288+ // AIE2-NEXT: [[TMP9:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
289+ // AIE2-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i20
290+ // AIE2-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP3]] to i20
291+ // AIE2-NEXT: [[TMP12:%.*]] = load i32, ptr [[COUNT2_I]], align 4, !tbaa [[TBAA13]]
292+ // AIE2-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i20
293+ // AIE2-NEXT: [[TMP14:%.*]] = tail call { ptr, i20, i20 } @llvm.aie2.add.3d(ptr [[A:%.*]], i20 [[TMP5]], i20 [[TMP6]], i20 [[TMP7]], i20 [[TMP8]], i20 [[TMP10]], i20 [[TMP11]], i20 [[TMP13]])
294+ // AIE2-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 1
295+ // AIE2-NEXT: [[TMP16:%.*]] = zext i20 [[TMP15]] to i32
296+ // AIE2-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 2
297+ // AIE2-NEXT: [[TMP18:%.*]] = zext i20 [[TMP17]] to i32
298+ // AIE2-NEXT: store i32 [[TMP16]], ptr [[COUNT1_I]], align 4
299+ // AIE2-NEXT: store i32 [[TMP18]], ptr [[COUNT2_I]], align 4
300+ // AIE2-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 0
301+ // AIE2-NEXT: ret ptr [[TMP19]]
302+ //
303+ // AIE2P-LABEL: @_Z16test_add_3d_bytePDv16_iR9dims_3d_t(
304+ // AIE2P-NEXT: entry:
305+ // AIE2P-NEXT: [[INC3_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i20 16
306+ // AIE2P-NEXT: [[TMP0:%.*]] = load i32, ptr [[INC3_I]], align 4, !tbaa [[TBAA10]]
307+ // AIE2P-NEXT: [[TMP1:%.*]] = load i32, ptr [[PARAMS]], align 4, !tbaa [[TBAA2]]
308+ // AIE2P-NEXT: [[COUNT1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 20
309+ // AIE2P-NEXT: [[INC1_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 4
310+ // AIE2P-NEXT: [[TMP2:%.*]] = load i32, ptr [[INC1_I]], align 4, !tbaa [[TBAA7]]
311+ // AIE2P-NEXT: [[NUM2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 8
312+ // AIE2P-NEXT: [[TMP3:%.*]] = load i32, ptr [[NUM2_I]], align 4, !tbaa [[TBAA8]]
313+ // AIE2P-NEXT: [[COUNT2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 24
314+ // AIE2P-NEXT: [[INC2_I:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i20 12
315+ // AIE2P-NEXT: [[TMP4:%.*]] = load i32, ptr [[INC2_I]], align 4, !tbaa [[TBAA9]]
316+ // AIE2P-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP0]] to i20
317+ // AIE2P-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP2]] to i20
318+ // AIE2P-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP4]] to i20
319+ // AIE2P-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP1]] to i20
320+ // AIE2P-NEXT: [[TMP9:%.*]] = load i32, ptr [[COUNT1_I]], align 4, !tbaa [[TBAA13]]
321+ // AIE2P-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i20
322+ // AIE2P-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP3]] to i20
323+ // AIE2P-NEXT: [[TMP12:%.*]] = load i32, ptr [[COUNT2_I]], align 4, !tbaa [[TBAA13]]
324+ // AIE2P-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i20
325+ // AIE2P-NEXT: [[TMP14:%.*]] = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr [[A:%.*]], i20 [[TMP5]], i20 [[TMP6]], i20 [[TMP7]], i20 [[TMP8]], i20 [[TMP10]], i20 [[TMP11]], i20 [[TMP13]])
326+ // AIE2P-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 1
327+ // AIE2P-NEXT: [[TMP16:%.*]] = zext i20 [[TMP15]] to i32
328+ // AIE2P-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 2
329+ // AIE2P-NEXT: [[TMP18:%.*]] = zext i20 [[TMP17]] to i32
330+ // AIE2P-NEXT: store i32 [[TMP16]], ptr [[COUNT1_I]], align 4
331+ // AIE2P-NEXT: store i32 [[TMP18]], ptr [[COUNT2_I]], align 4
332+ // AIE2P-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i20, i20 } [[TMP14]], 0
333+ // AIE2P-NEXT: ret ptr [[TMP19]]
334+ //
335+ v16int32* test_add_3d_byte (v16int32* a, dims_3d_t ¶ms){
336+ return add_3d_byte (a,params);
337+ }
338+
339+
228340// AIE2-LABEL: @_Z15test_add_3d_ptrPDv16_iiiRiiiS1_i(
229341// AIE2-NEXT: entry:
230342// AIE2-NEXT: [[MUL_I:%.*]] = shl i32 [[OFF:%.*]], 6
0 commit comments