Skip to content

Commit 8530c4a

Browse files
authored
Perform shadow alloc and null init first (#1319)
1 parent f5dc158 commit 8530c4a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+143
-111
lines changed

enzyme/Enzyme/AdjointGenerator.h

Lines changed: 57 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -8938,6 +8938,7 @@ class AdjointGenerator
89388938
anti = gutils->cacheForReverse(
89398939
bb, anti, getIndex(&call, CacheType::Shadow));
89408940
} else {
8941+
bool zeroed = false;
89418942
auto rule = [&]() {
89428943
#if LLVM_VERSION_MAJOR >= 11
89438944
Value *anti = bb.CreateCall(call.getFunctionType(),
@@ -9006,6 +9007,19 @@ class AdjointGenerator
90069007
#endif
90079008
}
90089009
}
9010+
if (Mode == DerivativeMode::ReverseModeCombined ||
9011+
(Mode == DerivativeMode::ReverseModePrimal &&
9012+
forwardsShadow) ||
9013+
(Mode == DerivativeMode::ReverseModeGradient &&
9014+
backwardsShadow) ||
9015+
(Mode == DerivativeMode::ForwardModeSplit &&
9016+
backwardsShadow)) {
9017+
if (!inLoop) {
9018+
zeroKnownAllocation(bb, anti, args, funcName, gutils->TLI,
9019+
&call);
9020+
zeroed = true;
9021+
}
9022+
}
90099023
}
90109024
return anti;
90119025
};
@@ -9024,6 +9038,7 @@ class AdjointGenerator
90249038
else {
90259039
if (auto MD = hasMetadata(&call, "enzyme_fromstack")) {
90269040
isAlloca = true;
9041+
bb.SetInsertPoint(cast<Instruction>(anti));
90279042
Value *Size;
90289043
if (funcName == "malloc")
90299044
Size = args[0];
@@ -9058,55 +9073,60 @@ class AdjointGenerator
90589073
#if LLVM_VERSION_MAJOR >= 15
90599074
}
90609075
#endif
9061-
Value *replacement = bb.CreateAlloca(elTy, Size, name);
9062-
if (name.size() == 0)
9063-
replacement->takeName(anti);
9064-
else
9065-
anti->setName("");
9066-
auto Alignment = cast<ConstantInt>(cast<ConstantAsMetadata>(
9067-
MD->getOperand(0))
9068-
->getValue())
9069-
->getLimitedValue();
9070-
if (Alignment) {
9076+
auto rule = [&](Value *anti) {
9077+
Value *replacement = bb.CreateAlloca(elTy, Size, name);
9078+
if (name.size() == 0)
9079+
replacement->takeName(anti);
9080+
else
9081+
anti->setName("");
9082+
auto Alignment = cast<ConstantInt>(cast<ConstantAsMetadata>(
9083+
MD->getOperand(0))
9084+
->getValue())
9085+
->getLimitedValue();
9086+
if (Alignment) {
90719087
#if LLVM_VERSION_MAJOR >= 10
9072-
cast<AllocaInst>(replacement)
9073-
->setAlignment(Align(Alignment));
9088+
cast<AllocaInst>(replacement)
9089+
->setAlignment(Align(Alignment));
90749090
#else
9075-
cast<AllocaInst>(replacement)->setAlignment(Alignment);
9091+
cast<AllocaInst>(replacement)->setAlignment(Alignment);
90769092
#endif
9077-
}
9093+
}
90789094
#if LLVM_VERSION_MAJOR >= 15
9079-
if (call.getContext().supportsTypedPointers()) {
9095+
if (call.getContext().supportsTypedPointers()) {
90809096
#endif
9081-
if (anti->getType()->getPointerElementType() != elTy)
9082-
replacement = bb.CreatePointerCast(
9083-
replacement,
9084-
PointerType::getUnqual(
9085-
anti->getType()->getPointerElementType()));
9097+
if (anti->getType()->getPointerElementType() != elTy)
9098+
replacement = bb.CreatePointerCast(
9099+
replacement,
9100+
PointerType::getUnqual(
9101+
anti->getType()->getPointerElementType()));
90869102
#if LLVM_VERSION_MAJOR >= 15
9087-
}
9103+
}
90889104
#endif
90899105

9090-
if (int AS = cast<PointerType>(anti->getType())
9091-
->getAddressSpace()) {
9092-
llvm::PointerType *PT;
9106+
if (int AS = cast<PointerType>(anti->getType())
9107+
->getAddressSpace()) {
9108+
llvm::PointerType *PT;
90939109
#if LLVM_VERSION_MAJOR >= 15
9094-
if (call.getContext().supportsTypedPointers()) {
9110+
if (call.getContext().supportsTypedPointers()) {
90959111
#endif
9096-
PT = PointerType::get(
9097-
anti->getType()->getPointerElementType(), AS);
9112+
PT = PointerType::get(
9113+
anti->getType()->getPointerElementType(), AS);
90989114
#if LLVM_VERSION_MAJOR >= 15
9099-
} else {
9100-
PT = PointerType::get(anti->getContext(), AS);
9101-
}
9115+
} else {
9116+
PT = PointerType::get(anti->getContext(), AS);
9117+
}
91029118
#endif
9103-
replacement = bb.CreateAddrSpaceCast(replacement, PT);
9104-
cast<Instruction>(replacement)
9105-
->setMetadata(
9106-
"enzyme_backstack",
9107-
MDNode::get(replacement->getContext(), {}));
9108-
}
9119+
replacement = bb.CreateAddrSpaceCast(replacement, PT);
9120+
cast<Instruction>(replacement)
9121+
->setMetadata(
9122+
"enzyme_backstack",
9123+
MDNode::get(replacement->getContext(), {}));
9124+
}
9125+
return replacement;
9126+
};
91099127

9128+
auto replacement =
9129+
applyChainRule(call.getType(), bb, rule, anti);
91109130
gutils->replaceAWithB(cast<Instruction>(anti), replacement);
91119131
gutils->erase(cast<Instruction>(anti));
91129132
anti = replacement;
@@ -9121,13 +9141,7 @@ class AdjointGenerator
91219141
(Mode == DerivativeMode::ForwardModeSplit &&
91229142
backwardsShadow)) {
91239143
if (!inLoop) {
9124-
applyChainRule(
9125-
bb,
9126-
[&](Value *anti) {
9127-
zeroKnownAllocation(bb, anti, args, funcName,
9128-
gutils->TLI, &call);
9129-
},
9130-
anti);
9144+
assert(zeroed);
91319145
}
91329146
}
91339147
}

enzyme/Enzyme/GradientUtils.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8213,14 +8213,30 @@ void GradientUtils::forceAugmentedReturns() {
82138213

82148214
IRBuilder<> BuilderZ(inst);
82158215
getForwardBuilder(BuilderZ);
8216+
8217+
// Shadow allocations must strictly preceede the primal, lest Julia have
8218+
// GC issues. Consider the following: %r = gc_alloc() init %r
8219+
// ...
8220+
// if the shadow did not preceed
8221+
// %r = gc_alloc()
8222+
// %dr = gc_alloc()
8223+
// zero %dr
8224+
// init %r, %dr
8225+
// ...
8226+
// After %r, before %dr the %r memory would be uninit, so the allocator
8227+
// inside %dr would hit garbage and segfault. However, by having the %dr
8228+
// first, then it will be zero'd before the %r allocation, preventing the
8229+
// issue.
8230+
if (isAllocationCall(inst, TLI))
8231+
BuilderZ.SetInsertPoint(getNewFromOriginal(inst));
82168232
Type *antiTy = getShadowType(inst->getType());
82178233

82188234
PHINode *anti = BuilderZ.CreatePHI(antiTy, 1, op->getName() + "'ip_phi");
82198235
anti->setDebugLoc(getNewFromOriginal(op->getDebugLoc()));
82208236
invertedPointers.insert(
82218237
std::make_pair((const Value *)inst, InvertedPointerVH(this, anti)));
82228238

8223-
if (called && isAllocationFunction(called->getName(), TLI)) {
8239+
if (isAllocationCall(inst, TLI)) {
82248240
anti->setName(op->getName() + "'mi");
82258241
}
82268242
}

enzyme/test/Enzyme/ReverseMode/allocacache.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,9 +484,9 @@ attributes #11 = { cold }
484484

485485
; CHECK: define internal { { <2 x double>, i8*, i8*, <2 x double> }, <2 x double> } @augmented_subfn(<2 x double>* %W, <2 x double>* %"W'", double %B1, double %B2, i64 %row)
486486
; CHECK-NEXT: entry:
487-
; CHECK-NEXT: %malloccall = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16)
488487
; CHECK-NEXT: %"malloccall'mi" = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16)
489488
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* {{(noundef )?}}nonnull align 1 dereferenceable(16) dereferenceable_or_null(16) %"malloccall'mi", i8 0, i64 16, i1 false)
489+
; CHECK-NEXT: %malloccall = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16)
490490
; CHECK-NEXT: %"Bref'ipc" = bitcast i8* %"malloccall'mi" to <2 x double>*
491491
; CHECK-NEXT: %Bref = bitcast i8* %malloccall to <2 x double>*
492492
; CHECK-NEXT: %W34p = getelementptr inbounds <2 x double>, <2 x double>* %W, i64 1
@@ -512,8 +512,8 @@ attributes #11 = { cold }
512512

513513
; CHECK: define internal { double, double } @diffesubfn(<2 x double>* %W, <2 x double>* %"W'", double %B1, double %B2, i64 %row, <2 x double> %differeturn, { <2 x double>, i8*, i8*, <2 x double> } %tapeArg)
514514
; CHECK-NEXT: entry:
515-
; CHECK-NEXT: %[[malloccall:.+]] = extractvalue { <2 x double>, i8*, i8*, <2 x double> } %tapeArg, 2
516515
; CHECK-NEXT: %[[malloccallmi:.+]] = extractvalue { <2 x double>, i8*, i8*, <2 x double> } %tapeArg, 1
516+
; CHECK-NEXT: %[[malloccall:.+]] = extractvalue { <2 x double>, i8*, i8*, <2 x double> } %tapeArg, 2
517517
; CHECK-NEXT: %[[Brefipc:.+]] = bitcast i8* %[[malloccallmi]] to <2 x double>*
518518
; CHECK-NEXT: %[[Bref:.+]] = bitcast i8* %[[malloccall]] to <2 x double>*
519519
; CHECK-NEXT: %[[W34pipge:.+]] = getelementptr inbounds <2 x double>, <2 x double>* %"W'", i64 1

enzyme/test/Enzyme/ReverseMode/alloctomalloc.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -495,9 +495,9 @@ attributes #9 = { cold }
495495

496496
; CHECK: define internal { <2 x double>*, i8*, i8*, <2 x double>, double, <2 x double>, double } @augmented_subfn(<2 x double>* %dst, <2 x double>* %"dst'", %"class.Eigen::Matrix"* %W, %"class.Eigen::Matrix"* %"W'", double* %B, double* %"B'")
497497
; CHECK-NEXT: entry:
498-
; CHECK-NEXT: %malloccall = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16)
499498
; CHECK-NEXT: %"malloccall'mi" = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16)
500499
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* {{(noundef )?}}nonnull align 1 dereferenceable(16) dereferenceable_or_null(16) %"malloccall'mi", i8 0, i64 16, i1 false)
500+
; CHECK-NEXT: %malloccall = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16)
501501
; CHECK-NEXT: %"tmp.i'ipc" = bitcast i8* %"malloccall'mi" to <2 x double>*
502502
; CHECK-NEXT: %tmp.i = bitcast i8* %malloccall to <2 x double>*
503503
; CHECK-NEXT: %subcast_augmented = call { <2 x double>*, <2 x double>* } @augmented_subcast(<2 x double>*{{( nonnull)?}} %tmp.i, <2 x double>*{{( nonnull)?}} %"tmp.i'ipc")
@@ -534,8 +534,8 @@ attributes #9 = { cold }
534534

535535
; CHECK: define internal void @diffesubfn(<2 x double>* %dst, <2 x double>* %"dst'", %"class.Eigen::Matrix"* %W, %"class.Eigen::Matrix"* %"W'", double* %B, double* %"B'", { <2 x double>*, i8*, i8*, <2 x double>, double, <2 x double>, double } %tapeArg)
536536
; CHECK-NEXT: entry:
537-
; CHECK-NEXT: %[[malloccall:.+]] = extractvalue { <2 x double>*, i8*, i8*, <2 x double>, double, <2 x double>, double } %tapeArg, 2
538537
; CHECK-NEXT: %[[malloccallmi:.+]] = extractvalue { <2 x double>*, i8*, i8*, <2 x double>, double, <2 x double>, double } %tapeArg, 1
538+
; CHECK-NEXT: %[[malloccall:.+]] = extractvalue { <2 x double>*, i8*, i8*, <2 x double>, double, <2 x double>, double } %tapeArg, 2
539539

540540
; CHECK-NEXT: %[[tmpiipc:.+]] = bitcast i8* %[[malloccallmi]] to <2 x double>*
541541
; CHECK-NEXT: %[[tmpi:.+]] = bitcast i8* %[[malloccall]] to <2 x double>*

enzyme/test/Enzyme/ReverseMode/badconstmalloc.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -753,9 +753,9 @@ attributes #12 = { cold }
753753

754754
; CHECK: define internal { { i8*, i8* }, double*, double* } @augmented_inneralloc()
755755
; CHECK-NEXT: entry:
756-
; CHECK-NEXT: %call.i.i = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16)
757756
; CHECK-NEXT: %"call.i.i'mi" = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16)
758757
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* {{(noundef )?}}nonnull align 1 dereferenceable(16) dereferenceable_or_null(16) %"call.i.i'mi", i8 0, i64 16, i1 false)
758+
; CHECK-NEXT: %call.i.i = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @malloc(i64 16)
759759
; CHECK-NEXT: %"'ipc" = bitcast i8* %"call.i.i'mi" to double*
760760
; CHECK-NEXT: %0 = bitcast i8* %call.i.i to double*
761761
; CHECK-NEXT: %.fca.0.0.insert = insertvalue { { i8*, i8* }, double*, double* } {{(undef|poison)}}, i8* %"call.i.i'mi", 0, 0

enzyme/test/Enzyme/ReverseMode/callundefinphi.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -447,9 +447,9 @@ attributes #22 = { readnone speculatable }
447447

448448
; CHECK: define internal void @diffe_ZL6matvecPKN5Eigen6MatrixIdLin1ELin1ELi0ELin1ELin1EEES3_(double* noalias %W, double* %"W'", double* noalias %M, double* %"M'", double %differeturn)
449449
; CHECK-NEXT: entry:
450-
; CHECK-NEXT: %call.i.i.i.i.i.i.i = call noalias nonnull dereferenceable(128) dereferenceable_or_null(128) i8* @malloc(i64 128)
451450
; CHECK-NEXT: %"call.i.i.i.i.i.i.i'mi" = call noalias nonnull dereferenceable(128) dereferenceable_or_null(128) i8* @malloc(i64 128)
452451
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* {{(noundef )?}}nonnull dereferenceable(128) dereferenceable_or_null(128) %"call.i.i.i.i.i.i.i'mi", i8 0, i64 128, i1 false)
452+
; CHECK-NEXT: %call.i.i.i.i.i.i.i = call noalias nonnull dereferenceable(128) dereferenceable_or_null(128) i8* @malloc(i64 128)
453453
; CHECK-NEXT: %"'ipc" = bitcast i8* %"call.i.i.i.i.i.i.i'mi" to double*
454454
; CHECK-NEXT: %0 = bitcast i8* %call.i.i.i.i.i.i.i to double*
455455
; CHECK-NEXT: br label %for.body.i.i
@@ -468,9 +468,9 @@ attributes #22 = { readnone speculatable }
468468
; CHECK-NEXT: br i1 %exitcond.i.i, label %_ZN5Eigen8internal26call_dense_assignment_loopINS_6MatrixIdLin1ELin1ELi0ELin1ELin1EEENS_13CwiseBinaryOpINS0_20scalar_difference_opIddEEKS3_S7_EENS0_9assign_opIddEEEEvRT_RKT0_RKT1_.exit, label %for.body.i.i
469469

470470
; CHECK: _ZN5Eigen8internal26call_dense_assignment_loopINS_6MatrixIdLin1ELin1ELi0ELin1ELin1EEENS_13CwiseBinaryOpINS0_20scalar_difference_opIddEEKS3_S7_EENS0_9assign_opIddEEEEvRT_RKT0_RKT1_.exit: ; preds = %for.body.i.i
471-
; CHECK-NEXT: %call.i.i.i.i.i.i.i13 = call noalias nonnull dereferenceable(128) dereferenceable_or_null(128) i8* @malloc(i64 128)
472471
; CHECK-NEXT: %"call.i.i.i.i.i.i.i13'mi" = call noalias nonnull dereferenceable(128) dereferenceable_or_null(128) i8* @malloc(i64 128)
473472
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* {{(noundef )?}}nonnull dereferenceable(128) dereferenceable_or_null(128) %"call.i.i.i.i.i.i.i13'mi", i8 0, i64 128, i1 false)
473+
; CHECK-NEXT: %call.i.i.i.i.i.i.i13 = call noalias nonnull dereferenceable(128) dereferenceable_or_null(128) i8* @malloc(i64 128)
474474
; CHECK-NEXT: %[[ipc8:.+]] = bitcast i8* %"call.i.i.i.i.i.i.i13'mi" to double*
475475
; CHECK-NEXT: %[[unwrap:.+]] = bitcast i8* %call.i.i.i.i.i.i.i13 to double*
476476
; CHECK-NEXT: br label %for.body.i

enzyme/test/Enzyme/ReverseMode/callundefinphi2.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,9 @@ attributes #22 = { readnone speculatable }
237237

238238
; CHECK: define internal void @diffe_ZL6matvecPKN5Eigen6MatrixIdLin1ELin1ELi0ELin1ELin1EEES3_(double* noalias %W, double* %"W'", double* noalias %M, double* %"M'", double %differeturn)
239239
; CHECK-NEXT: entry:
240-
; CHECK-NEXT: %call.i.i.i.i.i.i.i = call noalias nonnull dereferenceable(128) dereferenceable_or_null(128) i8* @malloc(i64 128)
241240
; CHECK-NEXT: %"call.i.i.i.i.i.i.i'mi" = call noalias nonnull dereferenceable(128) dereferenceable_or_null(128) i8* @malloc(i64 128)
242241
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull dereferenceable(128) dereferenceable_or_null(128) %"call.i.i.i.i.i.i.i'mi", i8 0, i64 128, i1 false)
242+
; CHECK-NEXT: %call.i.i.i.i.i.i.i = call noalias nonnull dereferenceable(128) dereferenceable_or_null(128) i8* @malloc(i64 128)
243243
; CHECK-NEXT: %"'ipc" = bitcast i8* %"call.i.i.i.i.i.i.i'mi" to double*
244244
; CHECK-NEXT: %0 = bitcast i8* %call.i.i.i.i.i.i.i to double*
245245
; CHECK-NEXT: br label %for.body.i.i
@@ -258,9 +258,9 @@ attributes #22 = { readnone speculatable }
258258
; CHECK-NEXT: br i1 %exitcond.i.i, label %_ZN5Eigen8internal26call_dense_assignment_loopINS_6MatrixIdLin1ELin1ELi0ELin1ELin1EEENS_13CwiseBinaryOpINS0_20scalar_difference_opIddEEKS3_S7_EENS0_9assign_opIddEEEEvRT_RKT0_RKT1_.exit, label %for.body.i.i
259259

260260
; CHECK: _ZN5Eigen8internal26call_dense_assignment_loopINS_6MatrixIdLin1ELin1ELi0ELin1ELin1EEENS_13CwiseBinaryOpINS0_20scalar_difference_opIddEEKS3_S7_EENS0_9assign_opIddEEEEvRT_RKT0_RKT1_.exit: ; preds = %for.body.i.i
261-
; CHECK-NEXT: %call.i.i.i.i.i.i.i13 = call noalias nonnull dereferenceable(128) dereferenceable_or_null(128) i8* @malloc(i64 128)
262261
; CHECK-NEXT: %"call.i.i.i.i.i.i.i13'mi" = call noalias nonnull dereferenceable(128) dereferenceable_or_null(128) i8* @malloc(i64 128)
263262
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull dereferenceable(128) dereferenceable_or_null(128) %"call.i.i.i.i.i.i.i13'mi", i8 0, i64 128, i1 false)
263+
; CHECK-NEXT: %call.i.i.i.i.i.i.i13 = call noalias nonnull dereferenceable(128) dereferenceable_or_null(128) i8* @malloc(i64 128)
264264
; CHECK-NEXT: %[[ipc8:.+]] = bitcast i8* %"call.i.i.i.i.i.i.i13'mi" to double*
265265
; CHECK-NEXT: %3 = bitcast i8* %call.i.i.i.i.i.i.i13 to double*
266266
; CHECK-NEXT: %_augmented = call double** @augmented_subfn(double* nonnull %3, double* nonnull %[[ipc8]], double* nonnull %0, double* nonnull %"'ipc")

enzyme/test/Enzyme/ReverseMode/cppllist.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,9 @@ attributes #8 = { builtin nounwind }
182182
; CHECK-NEXT: %[[nodevar:.+]] = phi %class.node* [ %"'ipc", %for.body ], [ null, %entry ]
183183
; CHECK-NEXT: %list.09 = phi %class.node* [ %[[bcnode:.+]], %for.body ], [ null, %entry ]
184184
; CHECK-NEXT: %[[ivnext]] = add nuw nsw i64 %[[iv]], 1
185-
; CHECK-NEXT: %call = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @_Znwm(i64 16)
186185
; CHECK-NEXT: %"call'mi" = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @_Znwm(i64 16)
187186
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* {{(noundef )?}}nonnull {{(align 1 )?}}dereferenceable(16) dereferenceable_or_null(16) %"call'mi", i8 0, i64 16, {{(i32 1, )?}}i1 false)
187+
; CHECK-NEXT: %call = tail call noalias nonnull dereferenceable(16) dereferenceable_or_null(16) i8* @_Znwm(i64 16)
188188
; CHECK-NEXT: %"'ipc" = bitcast i8* %"call'mi" to %class.node*
189189
; CHECK-NEXT: %[[bcnode]] = bitcast i8* %call to %class.node*
190190
; CHECK-NEXT: %value.i = bitcast i8* %call to double*

enzyme/test/Enzyme/ReverseMode/duplicatemalloc.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,9 @@ attributes #4 = { nounwind }
8282
; CHECK: define internal { double } @diffemalloced(double %x, i64 %n, double %differeturn)
8383
; CHECK-NEXT: entry:
8484
; CHECK-NEXT: %mul = shl i64 %n, 3
85-
; CHECK-NEXT: %call = tail call i8* @malloc(i64 %mul)
8685
; CHECK-NEXT: %[[dcall:.+]] = tail call noalias nonnull i8* @malloc(i64 %mul)
8786
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull {{(align 1 )?}}%"call'mi", i8 0, i64 %mul, {{(i32 1, )?}}i1 false)
87+
; CHECK-NEXT: %call = tail call i8* @malloc(i64 %mul)
8888
; CHECK-NEXT: %[[ipci:.+]] = bitcast i8* %[[dcall]] to double*
8989
; CHECK-NEXT: %[[bccall:.+]] = bitcast i8* %call to double*
9090
; CHECK-NEXT: store double %x, double* %[[bccall]], align 8, !tbaa !2

enzyme/test/Enzyme/ReverseMode/duplicatemallocloop.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ attributes #6 = { nounwind }
5656

5757
; CHECK: define internal void @diffemalloced(double* noalias nocapture %a0, double* nocapture %"a0'", double* noalias nocapture readonly %a1, double* nocapture %"a1'", i32 %a2)
5858
; CHECK-NEXT: entry:
59-
; CHECK-NEXT: %a5 = call noalias nonnull dereferenceable(8) dereferenceable_or_null(8) i8* @malloc(i32 8)
6059
; CHECK-NEXT: %"a5'mi" = call noalias nonnull dereferenceable(8) dereferenceable_or_null(8) i8* @malloc(i32 8)
6160
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull dereferenceable(8) dereferenceable_or_null(8) %"a5'mi", i8 0, i64 8, i1 false)
61+
; CHECK-NEXT: %a5 = call noalias nonnull dereferenceable(8) dereferenceable_or_null(8) i8* @malloc(i32 8)
6262
; CHECK-NEXT: %"a6'ipc" = bitcast i8* %"a5'mi" to double*
6363
; CHECK-NEXT: %a6 = bitcast i8* %a5 to double*
6464
; CHECK-NEXT: br label %loop

0 commit comments

Comments
 (0)