Skip to content

Commit e8ccecc

Browse files
artagnonnikic
andauthored
[IVDesc] Fix off-by-one error in FindFirstIV ranges (#174441)
ConstantRange::getNonEmpty was excluding MAX and MAX - 1 in FindFirstIV vectorization, and this was discovered in an i1 miscompile, where it returns the full range: fix it to exclude MAX only. The change has also necessitated fixing a test that's not supposed to be vectorized. Fixes #173459. Co-authored-by: Nikita Popov <[email protected]>
1 parent 774ea53 commit e8ccecc

File tree

3 files changed

+260
-155
lines changed

3 files changed

+260
-155
lines changed

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -796,20 +796,15 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
796796
const ConstantRange IVRange =
797797
IsSigned ? SE.getSignedRange(AR) : SE.getUnsignedRange(AR);
798798
unsigned NumBits = Ty->getIntegerBitWidth();
799-
ConstantRange ValidRange = ConstantRange::getEmpty(NumBits);
799+
APInt Sentinel;
800800
if (isFindLastIVRecurrenceKind(Kind)) {
801-
APInt Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits)
802-
: APInt::getMinValue(NumBits);
803-
ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
801+
Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits)
802+
: APInt::getMinValue(NumBits);
804803
} else {
805-
if (IsSigned)
806-
ValidRange =
807-
ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits),
808-
APInt::getSignedMaxValue(NumBits) - 1);
809-
else
810-
ValidRange = ConstantRange::getNonEmpty(
811-
APInt::getMinValue(NumBits), APInt::getMaxValue(NumBits) - 1);
804+
Sentinel = IsSigned ? APInt::getSignedMaxValue(NumBits)
805+
: APInt::getMaxValue(NumBits);
812806
}
807+
ConstantRange ValidRange = ConstantRange(Sentinel).inverse();
813808

814809
LLVM_DEBUG(dbgs() << "LV: "
815810
<< (isFindLastIVRecurrenceKind(Kind) ? "FindLastIV"

llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll

Lines changed: 239 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,7 @@ define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr
970970
; CHECK-NEXT: [[ENTRY:.*]]:
971971
; CHECK-NEXT: br label %[[LOOP:.*]]
972972
; CHECK: [[LOOP]]:
973-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
973+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
974974
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[LOOP]] ]
975975
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
976976
; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
@@ -988,6 +988,244 @@ define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr
988988
entry:
989989
br label %loop
990990

991+
loop:
992+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
993+
%rdx = phi i64 [ %rdx.start, %entry ], [ %cond, %loop ]
994+
%iv.next = add i64 %iv, -1
995+
%gep.a.iv = getelementptr inbounds i8, ptr %a, i64 %iv.next
996+
%ld.a = load i8, ptr %gep.a.iv, align 1
997+
%gep.b.iv = getelementptr inbounds i8, ptr %b, i64 %iv.next
998+
%ld.b = load i8, ptr %gep.b.iv, align 1
999+
%cmp.a.b = icmp sgt i8 %ld.a, %ld.b
1000+
%cond = select i1 %cmp.a.b, i64 %iv.next, i64 %rdx
1001+
%exit.cond = icmp eq i64 %iv.next, 0
1002+
br i1 %exit.cond, label %exit, label %loop
1003+
1004+
exit:
1005+
ret i64 %cond
1006+
}
1007+
1008+
; 0 is the sentinel value, and -1 is just within the bounds for vectorization.
1009+
define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(ptr %a, ptr %b, i64 %rdx.start) {
1010+
; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(
1011+
; IC1VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
1012+
; IC1VF4-NEXT: [[ENTRY:.*:]]
1013+
; IC1VF4-NEXT: br label %[[VECTOR_PH:.*]]
1014+
; IC1VF4: [[VECTOR_PH]]:
1015+
; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
1016+
; IC1VF4: [[VECTOR_BODY]]:
1017+
; IC1VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1018+
; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 -1, i64 -2, i64 -3, i64 -4>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
1019+
; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ]
1020+
; IC1VF4-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1)
1021+
; IC1VF4-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
1022+
; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
1023+
; IC1VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0
1024+
; IC1VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3
1025+
; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
1026+
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1027+
; IC1VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
1028+
; IC1VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0
1029+
; IC1VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -3
1030+
; IC1VF4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
1031+
; IC1VF4-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD1]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1032+
; IC1VF4-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i8> [[REVERSE]], [[REVERSE2]]
1033+
; IC1VF4-NEXT: [[TMP9]] = select <4 x i1> [[TMP8]], <4 x i64> [[TMP0]], <4 x i64> [[VEC_PHI]]
1034+
; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1035+
; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
1036+
; IC1VF4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], -4
1037+
; IC1VF4-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1038+
; IC1VF4: [[MIDDLE_BLOCK]]:
1039+
; IC1VF4-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP9]])
1040+
; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP11]], -1
1041+
; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP11]], i64 [[RDX_START]]
1042+
; IC1VF4-NEXT: br label %[[SCALAR_PH:.*]]
1043+
; IC1VF4: [[SCALAR_PH]]:
1044+
; IC1VF4-NEXT: br label %[[LOOP:.*]]
1045+
; IC1VF4: [[LOOP]]:
1046+
; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 3, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1047+
; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[LOOP]] ]
1048+
; IC1VF4-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
1049+
; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
1050+
; IC1VF4-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A_IV]], align 1
1051+
; IC1VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV_NEXT]]
1052+
; IC1VF4-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B_IV]], align 1
1053+
; IC1VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]]
1054+
; IC1VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
1055+
; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
1056+
; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
1057+
; IC1VF4: [[EXIT]]:
1058+
; IC1VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
1059+
; IC1VF4-NEXT: ret i64 [[COND_LCSSA]]
1060+
;
1061+
; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(
1062+
; IC4VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
1063+
; IC4VF4-NEXT: [[ENTRY:.*:]]
1064+
; IC4VF4-NEXT: br label %[[VECTOR_PH:.*]]
1065+
; IC4VF4: [[VECTOR_PH]]:
1066+
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
1067+
; IC4VF4: [[VECTOR_BODY]]:
1068+
; IC4VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1069+
; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 -1, i64 -2, i64 -3, i64 -4>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
1070+
; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
1071+
; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ]
1072+
; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
1073+
; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[VECTOR_BODY]] ]
1074+
; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
1075+
; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -4)
1076+
; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4)
1077+
; IC4VF4-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1)
1078+
; IC4VF4-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
1079+
; IC4VF4-NEXT: [[TMP2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -1)
1080+
; IC4VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -1)
1081+
; IC4VF4-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -1)
1082+
; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
1083+
; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0
1084+
; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -3
1085+
; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -4
1086+
; IC4VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 -3
1087+
; IC4VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -8
1088+
; IC4VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 -3
1089+
; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -12
1090+
; IC4VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 -3
1091+
; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
1092+
; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
1093+
; IC4VF4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1
1094+
; IC4VF4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1
1095+
; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1096+
; IC4VF4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD4]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1097+
; IC4VF4-NEXT: [[REVERSE8:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1098+
; IC4VF4-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD6]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1099+
; IC4VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
1100+
; IC4VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 0
1101+
; IC4VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 -3
1102+
; IC4VF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -4
1103+
; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i64 -3
1104+
; IC4VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -8
1105+
; IC4VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 -3
1106+
; IC4VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -12
1107+
; IC4VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP21]], i64 -3
1108+
; IC4VF4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i8>, ptr [[TMP16]], align 1
1109+
; IC4VF4-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1
1110+
; IC4VF4-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i8>, ptr [[TMP20]], align 1
1111+
; IC4VF4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP22]], align 1
1112+
; IC4VF4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD10]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1113+
; IC4VF4-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD11]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1114+
; IC4VF4-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD12]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1115+
; IC4VF4-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1116+
; IC4VF4-NEXT: [[TMP23:%.*]] = icmp sgt <4 x i8> [[REVERSE]], [[REVERSE14]]
1117+
; IC4VF4-NEXT: [[TMP24:%.*]] = icmp sgt <4 x i8> [[REVERSE7]], [[REVERSE15]]
1118+
; IC4VF4-NEXT: [[TMP25:%.*]] = icmp sgt <4 x i8> [[REVERSE8]], [[REVERSE16]]
1119+
; IC4VF4-NEXT: [[TMP26:%.*]] = icmp sgt <4 x i8> [[REVERSE9]], [[REVERSE17]]
1120+
; IC4VF4-NEXT: [[TMP27]] = select <4 x i1> [[TMP23]], <4 x i64> [[TMP0]], <4 x i64> [[VEC_PHI]]
1121+
; IC4VF4-NEXT: [[TMP28]] = select <4 x i1> [[TMP24]], <4 x i64> [[TMP2]], <4 x i64> [[VEC_PHI1]]
1122+
; IC4VF4-NEXT: [[TMP29]] = select <4 x i1> [[TMP25]], <4 x i64> [[TMP3]], <4 x i64> [[VEC_PHI2]]
1123+
; IC4VF4-NEXT: [[TMP30]] = select <4 x i1> [[TMP26]], <4 x i64> [[TMP4]], <4 x i64> [[VEC_PHI3]]
1124+
; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1125+
; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4)
1126+
; IC4VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], -16
1127+
; IC4VF4-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1128+
; IC4VF4: [[MIDDLE_BLOCK]]:
1129+
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP27]], <4 x i64> [[TMP28]])
1130+
; IC4VF4-NEXT: [[RDX_MINMAX18:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP29]])
1131+
; IC4VF4-NEXT: [[RDX_MINMAX19:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX18]], <4 x i64> [[TMP30]])
1132+
; IC4VF4-NEXT: [[TMP32:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[RDX_MINMAX19]])
1133+
; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP32]], -1
1134+
; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP32]], i64 [[RDX_START]]
1135+
; IC4VF4-NEXT: br label %[[SCALAR_PH:.*]]
1136+
; IC4VF4: [[SCALAR_PH]]:
1137+
; IC4VF4-NEXT: br label %[[LOOP:.*]]
1138+
; IC4VF4: [[LOOP]]:
1139+
; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 15, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1140+
; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[LOOP]] ]
1141+
; IC4VF4-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
1142+
; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
1143+
; IC4VF4-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A_IV]], align 1
1144+
; IC4VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV_NEXT]]
1145+
; IC4VF4-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B_IV]], align 1
1146+
; IC4VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]]
1147+
; IC4VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
1148+
; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
1149+
; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
1150+
; IC4VF4: [[EXIT]]:
1151+
; IC4VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
1152+
; IC4VF4-NEXT: ret i64 [[COND_LCSSA]]
1153+
;
1154+
; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(
1155+
; IC4VF1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
1156+
; IC4VF1-NEXT: [[ENTRY:.*:]]
1157+
; IC4VF1-NEXT: br label %[[VECTOR_PH:.*]]
1158+
; IC4VF1: [[VECTOR_PH]]:
1159+
; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
1160+
; IC4VF1: [[VECTOR_BODY]]:
1161+
; IC4VF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1162+
; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
1163+
; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ]
1164+
; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
1165+
; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[VECTOR_BODY]] ]
1166+
; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i64 -1, [[INDEX]]
1167+
; IC4VF1-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
1168+
; IC4VF1-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -2
1169+
; IC4VF1-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -3
1170+
; IC4VF1-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1
1171+
; IC4VF1-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], -1
1172+
; IC4VF1-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], -1
1173+
; IC4VF1-NEXT: [[TMP6:%.*]] = add i64 [[TMP2]], -1
1174+
; IC4VF1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
1175+
; IC4VF1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
1176+
; IC4VF1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP5]]
1177+
; IC4VF1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
1178+
; IC4VF1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP7]], align 1
1179+
; IC4VF1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP8]], align 1
1180+
; IC4VF1-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP9]], align 1
1181+
; IC4VF1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP10]], align 1
1182+
; IC4VF1-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
1183+
; IC4VF1-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
1184+
; IC4VF1-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
1185+
; IC4VF1-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
1186+
; IC4VF1-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP15]], align 1
1187+
; IC4VF1-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP16]], align 1
1188+
; IC4VF1-NEXT: [[TMP21:%.*]] = load i8, ptr [[TMP17]], align 1
1189+
; IC4VF1-NEXT: [[TMP22:%.*]] = load i8, ptr [[TMP18]], align 1
1190+
; IC4VF1-NEXT: [[TMP23:%.*]] = icmp sgt i8 [[TMP11]], [[TMP19]]
1191+
; IC4VF1-NEXT: [[TMP24:%.*]] = icmp sgt i8 [[TMP12]], [[TMP20]]
1192+
; IC4VF1-NEXT: [[TMP25:%.*]] = icmp sgt i8 [[TMP13]], [[TMP21]]
1193+
; IC4VF1-NEXT: [[TMP26:%.*]] = icmp sgt i8 [[TMP14]], [[TMP22]]
1194+
; IC4VF1-NEXT: [[TMP27]] = select i1 [[TMP23]], i64 [[TMP3]], i64 [[VEC_PHI]]
1195+
; IC4VF1-NEXT: [[TMP28]] = select i1 [[TMP24]], i64 [[TMP4]], i64 [[VEC_PHI1]]
1196+
; IC4VF1-NEXT: [[TMP29]] = select i1 [[TMP25]], i64 [[TMP5]], i64 [[VEC_PHI2]]
1197+
; IC4VF1-NEXT: [[TMP30]] = select i1 [[TMP26]], i64 [[TMP6]], i64 [[VEC_PHI3]]
1198+
; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1199+
; IC4VF1-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], -4
1200+
; IC4VF1-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1201+
; IC4VF1: [[MIDDLE_BLOCK]]:
1202+
; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP27]], i64 [[TMP28]])
1203+
; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.umin.i64(i64 [[RDX_MINMAX]], i64 [[TMP29]])
1204+
; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.umin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP30]])
1205+
; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -1
1206+
; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
1207+
; IC4VF1-NEXT: br label %[[SCALAR_PH:.*]]
1208+
; IC4VF1: [[SCALAR_PH]]:
1209+
; IC4VF1-NEXT: br label %[[LOOP:.*]]
1210+
; IC4VF1: [[LOOP]]:
1211+
; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 3, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1212+
; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[LOOP]] ]
1213+
; IC4VF1-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
1214+
; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
1215+
; IC4VF1-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A_IV]], align 1
1216+
; IC4VF1-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV_NEXT]]
1217+
; IC4VF1-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B_IV]], align 1
1218+
; IC4VF1-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]]
1219+
; IC4VF1-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
1220+
; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
1221+
; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
1222+
; IC4VF1: [[EXIT]]:
1223+
; IC4VF1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
1224+
; IC4VF1-NEXT: ret i64 [[COND_LCSSA]]
1225+
;
1226+
entry:
1227+
br label %loop
1228+
9911229
loop:
9921230
%iv = phi i64 [ -1, %entry ], [ %iv.next, %loop ]
9931231
%rdx = phi i64 [ %rdx.start, %entry ], [ %cond, %loop ]

0 commit comments

Comments
 (0)