@@ -119,61 +119,17 @@ entry:
119119
120120; CHECK: define internal { double*, double* } @[[augMod]](i32 %len, double* noalias %m, double* %"m'", i32 %incm, double* noalias %n, double* %"n'", i32 %incn)
121121; CHECK-NEXT: entry:
122- ; CHECK-NEXT: %mallocsize = mul nuw nsw i32 %len, 8
123- ; CHECK-NEXT: %malloccall = tail call noalias nonnull i8* @malloc(i32 %mallocsize)
124- ; CHECK-NEXT: %0 = bitcast i8* %malloccall to double*
125- ; CHECK-NEXT: %1 = icmp eq i32 %len, 0
126- ; CHECK-NEXT: br i1 %1, label %__enzyme_memcpy_double_32_da0sa0stride.exit, label %init.idx.i
127-
128- ; CHECK: init.idx.i: ; preds = %entry
129- ; CHECK-NEXT: %a.i = sub nsw i32 1, %len
130- ; CHECK-NEXT: %negidx.i = mul nsw i32 %a.i, %incm
131- ; CHECK-NEXT: %is.neg.i = icmp slt i32 %incm, 0
132- ; CHECK-NEXT: %startidx.i = select i1 %is.neg.i, i32 %negidx.i, i32 0
133- ; CHECK-NEXT: br label %for.body.i
134-
135- ; CHECK: for.body.i: ; preds = %for.body.i, %init.idx.i
136- ; CHECK-NEXT: %idx.i = phi i32 [ 0, %init.idx.i ], [ %idx.next.i, %for.body.i ]
137- ; CHECK-NEXT: %sidx.i = phi i32 [ %startidx.i, %init.idx.i ], [ %sidx.next.i, %for.body.i ]
138- ; CHECK-NEXT: %dst.i.i = getelementptr inbounds double, double* %0, i32 %idx.i
139- ; CHECK-NEXT: %src.i.i = getelementptr inbounds double, double* %m, i32 %sidx.i
140- ; CHECK-NEXT: %src.i.l.i = load double, double* %src.i.i
141- ; CHECK-NEXT: store double %src.i.l.i, double* %dst.i.i
142- ; CHECK-NEXT: %idx.next.i = add nsw i32 %idx.i, 1
143- ; CHECK-NEXT: %sidx.next.i = add nsw i32 %sidx.i, %incm
144- ; CHECK-NEXT: %2 = icmp eq i32 %len, %idx.next.i
145- ; CHECK-NEXT: br i1 %2, label %__enzyme_memcpy_double_32_da0sa0stride.exit, label %for.body.i
146-
147- ; CHECK: __enzyme_memcpy_double_32_da0sa0stride.exit: ; preds = %entry, %for.body.i
148- ; CHECK-NEXT: %mallocsize1 = mul nuw nsw i32 %len, 8
149- ; CHECK-NEXT: %malloccall2 = tail call noalias nonnull i8* @malloc(i32 %mallocsize1)
150- ; CHECK-NEXT: %3 = bitcast i8* %malloccall2 to double*
151- ; CHECK-NEXT: %4 = icmp eq i32 %len, 0
152- ; CHECK-NEXT: br i1 %4, label %__enzyme_memcpy_double_32_da0sa0stride.exit14, label %init.idx.i5
153-
154- ; CHECK: init.idx.i5: ; preds = %__enzyme_memcpy_double_32_da0sa0stride.exit
155- ; CHECK-NEXT: %a.i1 = sub nsw i32 1, %len
156- ; CHECK-NEXT: %negidx.i2 = mul nsw i32 %a.i1, %incn
157- ; CHECK-NEXT: %is.neg.i3 = icmp slt i32 %incn, 0
158- ; CHECK-NEXT: %startidx.i4 = select i1 %is.neg.i3, i32 %negidx.i2, i32 0
159- ; CHECK-NEXT: br label %for.body.i13
160-
161- ; CHECK: for.body.i13: ; preds = %for.body.i13, %init.idx.i5
162- ; CHECK-NEXT: %idx.i6 = phi i32 [ 0, %init.idx.i5 ], [ %idx.next.i11, %for.body.i13 ]
163- ; CHECK-NEXT: %sidx.i7 = phi i32 [ %startidx.i4, %init.idx.i5 ], [ %sidx.next.i12, %for.body.i13 ]
164- ; CHECK-NEXT: %dst.i.i8 = getelementptr inbounds double, double* %3, i32 %idx.i6
165- ; CHECK-NEXT: %src.i.i9 = getelementptr inbounds double, double* %n, i32 %sidx.i7
166- ; CHECK-NEXT: %src.i.l.i10 = load double, double* %src.i.i9
167- ; CHECK-NEXT: store double %src.i.l.i10, double* %dst.i.i8
168- ; CHECK-NEXT: %idx.next.i11 = add nsw i32 %idx.i6, 1
169- ; CHECK-NEXT: %sidx.next.i12 = add nsw i32 %sidx.i7, %incn
170- ; CHECK-NEXT: %5 = icmp eq i32 %len, %idx.next.i11
171- ; CHECK-NEXT: br i1 %5, label %__enzyme_memcpy_double_32_da0sa0stride.exit14, label %for.body.i13
172-
173- ; CHECK: __enzyme_memcpy_double_32_da0sa0stride.exit14: ; preds = %__enzyme_memcpy_double_32_da0sa0stride.exit, %for.body.i13
174- ; CHECK-NEXT: %6 = insertvalue { double*, double* } undef, double* %0, 0
175- ; CHECK-NEXT: %7 = insertvalue { double*, double* } %6, double* %3, 1
176- ; CHECK-NEXT: ret { double*, double* } %7
122+ ; CHECK-NEXT: %mallocsize = mul nuw nsw i32 %len, 8
123+ ; CHECK-NEXT: %malloccall = tail call noalias nonnull i8* @malloc(i32 %mallocsize)
124+ ; CHECK-NEXT: %0 = bitcast i8* %malloccall to double*
125+ ; CHECK-NEXT: call void @cblas_dcopy(i32 %len, double* %m, i32 %incm, double* %0, i32 1)
126+ ; CHECK-NEXT: %mallocsize1 = mul nuw nsw i32 %len, 8
127+ ; CHECK-NEXT: %malloccall2 = tail call noalias nonnull i8* @malloc(i32 %mallocsize1)
128+ ; CHECK-NEXT: %1 = bitcast i8* %malloccall2 to double*
129+ ; CHECK-NEXT: call void @cblas_dcopy(i32 %len, double* %n, i32 %incn, double* %1, i32 1)
130+ ; CHECK-NEXT: %2 = insertvalue { double*, double* } undef, double* %0, 0
131+ ; CHECK-NEXT: %3 = insertvalue { double*, double* } %2, double* %1, 1
132+ ; CHECK-NEXT: ret { double*, double* } %3
177133; CHECK-NEXT: }
178134
179135; CHECK: define internal void @[[revMod]](i32 %len, double* noalias %m, double* %"m'", i32 %incm, double* noalias %n, double* %"n'", i32 %incn, double %differeturn, { double*, double* }
@@ -198,33 +154,11 @@ entry:
198154
199155; CHECK: define internal double* @augmented_f.6(i32 %len, double* noalias %m, i32 %incm, double* noalias %n, double* %"n'", i32 %incn)
200156; CHECK-NEXT: entry:
201- ; CHECK-NEXT: %mallocsize = mul nuw nsw i32 %len, 8
202- ; CHECK-NEXT: %malloccall = tail call noalias nonnull i8* @malloc(i32 %mallocsize)
203- ; CHECK-NEXT: %0 = bitcast i8* %malloccall to double*
204- ; CHECK-NEXT: %1 = icmp eq i32 %len, 0
205- ; CHECK-NEXT: br i1 %1, label %__enzyme_memcpy_double_32_da0sa0stride.exit, label %init.idx.i
206-
207- ; CHECK: init.idx.i: ; preds = %entry
208- ; CHECK-NEXT: %a.i = sub nsw i32 1, %len
209- ; CHECK-NEXT: %negidx.i = mul nsw i32 %a.i, %incm
210- ; CHECK-NEXT: %is.neg.i = icmp slt i32 %incm, 0
211- ; CHECK-NEXT: %startidx.i = select i1 %is.neg.i, i32 %negidx.i, i32 0
212- ; CHECK-NEXT: br label %for.body.i
213-
214- ; CHECK: for.body.i: ; preds = %for.body.i, %init.idx.i
215- ; CHECK-NEXT: %idx.i = phi i32 [ 0, %init.idx.i ], [ %idx.next.i, %for.body.i ]
216- ; CHECK-NEXT: %sidx.i = phi i32 [ %startidx.i, %init.idx.i ], [ %sidx.next.i, %for.body.i ]
217- ; CHECK-NEXT: %dst.i.i = getelementptr inbounds double, double* %0, i32 %idx.i
218- ; CHECK-NEXT: %src.i.i = getelementptr inbounds double, double* %m, i32 %sidx.i
219- ; CHECK-NEXT: %src.i.l.i = load double, double* %src.i.i
220- ; CHECK-NEXT: store double %src.i.l.i, double* %dst.i.i
221- ; CHECK-NEXT: %idx.next.i = add nsw i32 %idx.i, 1
222- ; CHECK-NEXT: %sidx.next.i = add nsw i32 %sidx.i, %incm
223- ; CHECK-NEXT: %2 = icmp eq i32 %len, %idx.next.i
224- ; CHECK-NEXT: br i1 %2, label %__enzyme_memcpy_double_32_da0sa0stride.exit, label %for.body.i
225-
226- ; CHECK: __enzyme_memcpy_double_32_da0sa0stride.exit: ; preds = %entry, %for.body.i
227- ; CHECK-NEXT: ret double* %0
157+ ; CHECK-NEXT: %mallocsize = mul nuw nsw i32 %len, 8
158+ ; CHECK-NEXT: %malloccall = tail call noalias nonnull i8* @malloc(i32 %mallocsize)
159+ ; CHECK-NEXT: %0 = bitcast i8* %malloccall to double*
160+ ; CHECK-NEXT: call void @cblas_dcopy(i32 %len, double* %m, i32 %incm, double* %0, i32 1)
161+ ; CHECK-NEXT: ret double* %0
228162; CHECK-NEXT: }
229163
230164; CHECK: define internal void @[[revModFirst]](i32 %len, double* noalias %m, i32 %incm, double* noalias %n, double* %"n'", i32 %incn, double %differeturn, double*
@@ -244,33 +178,11 @@ entry:
244178
245179; CHECK: define internal double* @[[augModSecond]](i32 %len, double* noalias %m, double* %"m'", i32 %incm, double* noalias %n, i32 %incn)
246180; CHECK-NEXT: entry:
247- ; CHECK-NEXT: %mallocsize = mul nuw nsw i32 %len, 8
248- ; CHECK-NEXT: %malloccall = tail call noalias nonnull i8* @malloc(i32 %mallocsize)
249- ; CHECK-NEXT: %0 = bitcast i8* %malloccall to double*
250- ; CHECK-NEXT: %1 = icmp eq i32 %len, 0
251- ; CHECK-NEXT: br i1 %1, label %__enzyme_memcpy_double_32_da0sa0stride.exit, label %init.idx.i
252-
253- ; CHECK: init.idx.i: ; preds = %entry
254- ; CHECK-NEXT: %a.i = sub nsw i32 1, %len
255- ; CHECK-NEXT: %negidx.i = mul nsw i32 %a.i, %incn
256- ; CHECK-NEXT: %is.neg.i = icmp slt i32 %incn, 0
257- ; CHECK-NEXT: %startidx.i = select i1 %is.neg.i, i32 %negidx.i, i32 0
258- ; CHECK-NEXT: br label %for.body.i
259-
260- ; CHECK: for.body.i: ; preds = %for.body.i, %init.idx.i
261- ; CHECK-NEXT: %idx.i = phi i32 [ 0, %init.idx.i ], [ %idx.next.i, %for.body.i ]
262- ; CHECK-NEXT: %sidx.i = phi i32 [ %startidx.i, %init.idx.i ], [ %sidx.next.i, %for.body.i ]
263- ; CHECK-NEXT: %dst.i.i = getelementptr inbounds double, double* %0, i32 %idx.i
264- ; CHECK-NEXT: %src.i.i = getelementptr inbounds double, double* %n, i32 %sidx.i
265- ; CHECK-NEXT: %src.i.l.i = load double, double* %src.i.i
266- ; CHECK-NEXT: store double %src.i.l.i, double* %dst.i.i
267- ; CHECK-NEXT: %idx.next.i = add nsw i32 %idx.i, 1
268- ; CHECK-NEXT: %sidx.next.i = add nsw i32 %sidx.i, %incn
269- ; CHECK-NEXT: %2 = icmp eq i32 %len, %idx.next.i
270- ; CHECK-NEXT: br i1 %2, label %__enzyme_memcpy_double_32_da0sa0stride.exit, label %for.body.i
271-
272- ; CHECK: __enzyme_memcpy_double_32_da0sa0stride.exit: ; preds = %entry, %for.body.i
273- ; CHECK-NEXT: ret double* %0
181+ ; CHECK-NEXT: %mallocsize = mul nuw nsw i32 %len, 8
182+ ; CHECK-NEXT: %malloccall = tail call noalias nonnull i8* @malloc(i32 %mallocsize)
183+ ; CHECK-NEXT: %0 = bitcast i8* %malloccall to double*
184+ ; CHECK-NEXT: call void @cblas_dcopy(i32 %len, double* %n, i32 %incn, double* %0, i32 1)
185+ ; CHECK-NEXT: ret double* %0
274186; CHECK-NEXT: }
275187
276188
0 commit comments