@@ -970,7 +970,7 @@ define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr
970970; CHECK-NEXT: [[ENTRY:.*]]:
971971; CHECK-NEXT: br label %[[LOOP:.*]]
972972; CHECK: [[LOOP]]:
973- ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -1 , %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
973+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0 , %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
974974; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[LOOP]] ]
975975; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
976976; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
@@ -988,6 +988,244 @@ define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr
988988entry:
989989 br label %loop
990990
991+ loop:
992+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
993+ %rdx = phi i64 [ %rdx.start , %entry ], [ %cond , %loop ]
994+ %iv.next = add i64 %iv , -1
995+ %gep.a.iv = getelementptr inbounds i8 , ptr %a , i64 %iv.next
996+ %ld.a = load i8 , ptr %gep.a.iv , align 1
997+ %gep.b.iv = getelementptr inbounds i8 , ptr %b , i64 %iv.next
998+ %ld.b = load i8 , ptr %gep.b.iv , align 1
999+ %cmp.a.b = icmp sgt i8 %ld.a , %ld.b
1000+ %cond = select i1 %cmp.a.b , i64 %iv.next , i64 %rdx
1001+ %exit.cond = icmp eq i64 %iv.next , 0
1002+ br i1 %exit.cond , label %exit , label %loop
1003+
1004+ exit:
1005+ ret i64 %cond
1006+ }
1007+
1008+ ; 0 is the sentinel value, and -1 is just within the bounds for vectorization.
1009+ define i64 @select_decreasing_induction_icmp_iv_just_within_bounds (ptr %a , ptr %b , i64 %rdx.start ) {
1010+ ; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(
1011+ ; IC1VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
1012+ ; IC1VF4-NEXT: [[ENTRY:.*:]]
1013+ ; IC1VF4-NEXT: br label %[[VECTOR_PH:.*]]
1014+ ; IC1VF4: [[VECTOR_PH]]:
1015+ ; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
1016+ ; IC1VF4: [[VECTOR_BODY]]:
1017+ ; IC1VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1018+ ; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 -1, i64 -2, i64 -3, i64 -4>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
1019+ ; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ]
1020+ ; IC1VF4-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1)
1021+ ; IC1VF4-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
1022+ ; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
1023+ ; IC1VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0
1024+ ; IC1VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3
1025+ ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
1026+ ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1027+ ; IC1VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
1028+ ; IC1VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0
1029+ ; IC1VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -3
1030+ ; IC1VF4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
1031+ ; IC1VF4-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD1]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1032+ ; IC1VF4-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i8> [[REVERSE]], [[REVERSE2]]
1033+ ; IC1VF4-NEXT: [[TMP9]] = select <4 x i1> [[TMP8]], <4 x i64> [[TMP0]], <4 x i64> [[VEC_PHI]]
1034+ ; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1035+ ; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
1036+ ; IC1VF4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], -4
1037+ ; IC1VF4-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1038+ ; IC1VF4: [[MIDDLE_BLOCK]]:
1039+ ; IC1VF4-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP9]])
1040+ ; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP11]], -1
1041+ ; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP11]], i64 [[RDX_START]]
1042+ ; IC1VF4-NEXT: br label %[[SCALAR_PH:.*]]
1043+ ; IC1VF4: [[SCALAR_PH]]:
1044+ ; IC1VF4-NEXT: br label %[[LOOP:.*]]
1045+ ; IC1VF4: [[LOOP]]:
1046+ ; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 3, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1047+ ; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[LOOP]] ]
1048+ ; IC1VF4-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
1049+ ; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
1050+ ; IC1VF4-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A_IV]], align 1
1051+ ; IC1VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV_NEXT]]
1052+ ; IC1VF4-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B_IV]], align 1
1053+ ; IC1VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]]
1054+ ; IC1VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
1055+ ; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
1056+ ; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
1057+ ; IC1VF4: [[EXIT]]:
1058+ ; IC1VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
1059+ ; IC1VF4-NEXT: ret i64 [[COND_LCSSA]]
1060+ ;
1061+ ; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(
1062+ ; IC4VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
1063+ ; IC4VF4-NEXT: [[ENTRY:.*:]]
1064+ ; IC4VF4-NEXT: br label %[[VECTOR_PH:.*]]
1065+ ; IC4VF4: [[VECTOR_PH]]:
1066+ ; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
1067+ ; IC4VF4: [[VECTOR_BODY]]:
1068+ ; IC4VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1069+ ; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 -1, i64 -2, i64 -3, i64 -4>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
1070+ ; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
1071+ ; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ]
1072+ ; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
1073+ ; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[VECTOR_BODY]] ]
1074+ ; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
1075+ ; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -4)
1076+ ; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4)
1077+ ; IC4VF4-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1)
1078+ ; IC4VF4-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
1079+ ; IC4VF4-NEXT: [[TMP2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -1)
1080+ ; IC4VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -1)
1081+ ; IC4VF4-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -1)
1082+ ; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
1083+ ; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0
1084+ ; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -3
1085+ ; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -4
1086+ ; IC4VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 -3
1087+ ; IC4VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -8
1088+ ; IC4VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 -3
1089+ ; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -12
1090+ ; IC4VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 -3
1091+ ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
1092+ ; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
1093+ ; IC4VF4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1
1094+ ; IC4VF4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1
1095+ ; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1096+ ; IC4VF4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD4]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1097+ ; IC4VF4-NEXT: [[REVERSE8:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1098+ ; IC4VF4-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD6]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1099+ ; IC4VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
1100+ ; IC4VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 0
1101+ ; IC4VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 -3
1102+ ; IC4VF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -4
1103+ ; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i64 -3
1104+ ; IC4VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -8
1105+ ; IC4VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 -3
1106+ ; IC4VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -12
1107+ ; IC4VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP21]], i64 -3
1108+ ; IC4VF4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i8>, ptr [[TMP16]], align 1
1109+ ; IC4VF4-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1
1110+ ; IC4VF4-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i8>, ptr [[TMP20]], align 1
1111+ ; IC4VF4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP22]], align 1
1112+ ; IC4VF4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD10]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1113+ ; IC4VF4-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD11]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1114+ ; IC4VF4-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD12]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1115+ ; IC4VF4-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1116+ ; IC4VF4-NEXT: [[TMP23:%.*]] = icmp sgt <4 x i8> [[REVERSE]], [[REVERSE14]]
1117+ ; IC4VF4-NEXT: [[TMP24:%.*]] = icmp sgt <4 x i8> [[REVERSE7]], [[REVERSE15]]
1118+ ; IC4VF4-NEXT: [[TMP25:%.*]] = icmp sgt <4 x i8> [[REVERSE8]], [[REVERSE16]]
1119+ ; IC4VF4-NEXT: [[TMP26:%.*]] = icmp sgt <4 x i8> [[REVERSE9]], [[REVERSE17]]
1120+ ; IC4VF4-NEXT: [[TMP27]] = select <4 x i1> [[TMP23]], <4 x i64> [[TMP0]], <4 x i64> [[VEC_PHI]]
1121+ ; IC4VF4-NEXT: [[TMP28]] = select <4 x i1> [[TMP24]], <4 x i64> [[TMP2]], <4 x i64> [[VEC_PHI1]]
1122+ ; IC4VF4-NEXT: [[TMP29]] = select <4 x i1> [[TMP25]], <4 x i64> [[TMP3]], <4 x i64> [[VEC_PHI2]]
1123+ ; IC4VF4-NEXT: [[TMP30]] = select <4 x i1> [[TMP26]], <4 x i64> [[TMP4]], <4 x i64> [[VEC_PHI3]]
1124+ ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1125+ ; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4)
1126+ ; IC4VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], -16
1127+ ; IC4VF4-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1128+ ; IC4VF4: [[MIDDLE_BLOCK]]:
1129+ ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP27]], <4 x i64> [[TMP28]])
1130+ ; IC4VF4-NEXT: [[RDX_MINMAX18:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP29]])
1131+ ; IC4VF4-NEXT: [[RDX_MINMAX19:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX18]], <4 x i64> [[TMP30]])
1132+ ; IC4VF4-NEXT: [[TMP32:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[RDX_MINMAX19]])
1133+ ; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP32]], -1
1134+ ; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP32]], i64 [[RDX_START]]
1135+ ; IC4VF4-NEXT: br label %[[SCALAR_PH:.*]]
1136+ ; IC4VF4: [[SCALAR_PH]]:
1137+ ; IC4VF4-NEXT: br label %[[LOOP:.*]]
1138+ ; IC4VF4: [[LOOP]]:
1139+ ; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 15, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1140+ ; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[LOOP]] ]
1141+ ; IC4VF4-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
1142+ ; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
1143+ ; IC4VF4-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A_IV]], align 1
1144+ ; IC4VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV_NEXT]]
1145+ ; IC4VF4-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B_IV]], align 1
1146+ ; IC4VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]]
1147+ ; IC4VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
1148+ ; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
1149+ ; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
1150+ ; IC4VF4: [[EXIT]]:
1151+ ; IC4VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
1152+ ; IC4VF4-NEXT: ret i64 [[COND_LCSSA]]
1153+ ;
1154+ ; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(
1155+ ; IC4VF1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
1156+ ; IC4VF1-NEXT: [[ENTRY:.*:]]
1157+ ; IC4VF1-NEXT: br label %[[VECTOR_PH:.*]]
1158+ ; IC4VF1: [[VECTOR_PH]]:
1159+ ; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
1160+ ; IC4VF1: [[VECTOR_BODY]]:
1161+ ; IC4VF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1162+ ; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
1163+ ; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ]
1164+ ; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
1165+ ; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[VECTOR_BODY]] ]
1166+ ; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i64 -1, [[INDEX]]
1167+ ; IC4VF1-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
1168+ ; IC4VF1-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -2
1169+ ; IC4VF1-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -3
1170+ ; IC4VF1-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1
1171+ ; IC4VF1-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], -1
1172+ ; IC4VF1-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], -1
1173+ ; IC4VF1-NEXT: [[TMP6:%.*]] = add i64 [[TMP2]], -1
1174+ ; IC4VF1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
1175+ ; IC4VF1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
1176+ ; IC4VF1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP5]]
1177+ ; IC4VF1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
1178+ ; IC4VF1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP7]], align 1
1179+ ; IC4VF1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP8]], align 1
1180+ ; IC4VF1-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP9]], align 1
1181+ ; IC4VF1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP10]], align 1
1182+ ; IC4VF1-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
1183+ ; IC4VF1-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
1184+ ; IC4VF1-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
1185+ ; IC4VF1-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
1186+ ; IC4VF1-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP15]], align 1
1187+ ; IC4VF1-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP16]], align 1
1188+ ; IC4VF1-NEXT: [[TMP21:%.*]] = load i8, ptr [[TMP17]], align 1
1189+ ; IC4VF1-NEXT: [[TMP22:%.*]] = load i8, ptr [[TMP18]], align 1
1190+ ; IC4VF1-NEXT: [[TMP23:%.*]] = icmp sgt i8 [[TMP11]], [[TMP19]]
1191+ ; IC4VF1-NEXT: [[TMP24:%.*]] = icmp sgt i8 [[TMP12]], [[TMP20]]
1192+ ; IC4VF1-NEXT: [[TMP25:%.*]] = icmp sgt i8 [[TMP13]], [[TMP21]]
1193+ ; IC4VF1-NEXT: [[TMP26:%.*]] = icmp sgt i8 [[TMP14]], [[TMP22]]
1194+ ; IC4VF1-NEXT: [[TMP27]] = select i1 [[TMP23]], i64 [[TMP3]], i64 [[VEC_PHI]]
1195+ ; IC4VF1-NEXT: [[TMP28]] = select i1 [[TMP24]], i64 [[TMP4]], i64 [[VEC_PHI1]]
1196+ ; IC4VF1-NEXT: [[TMP29]] = select i1 [[TMP25]], i64 [[TMP5]], i64 [[VEC_PHI2]]
1197+ ; IC4VF1-NEXT: [[TMP30]] = select i1 [[TMP26]], i64 [[TMP6]], i64 [[VEC_PHI3]]
1198+ ; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1199+ ; IC4VF1-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], -4
1200+ ; IC4VF1-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1201+ ; IC4VF1: [[MIDDLE_BLOCK]]:
1202+ ; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP27]], i64 [[TMP28]])
1203+ ; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.umin.i64(i64 [[RDX_MINMAX]], i64 [[TMP29]])
1204+ ; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.umin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP30]])
1205+ ; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -1
1206+ ; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
1207+ ; IC4VF1-NEXT: br label %[[SCALAR_PH:.*]]
1208+ ; IC4VF1: [[SCALAR_PH]]:
1209+ ; IC4VF1-NEXT: br label %[[LOOP:.*]]
1210+ ; IC4VF1: [[LOOP]]:
1211+ ; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 3, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1212+ ; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[LOOP]] ]
1213+ ; IC4VF1-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
1214+ ; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
1215+ ; IC4VF1-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A_IV]], align 1
1216+ ; IC4VF1-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV_NEXT]]
1217+ ; IC4VF1-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B_IV]], align 1
1218+ ; IC4VF1-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]]
1219+ ; IC4VF1-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
1220+ ; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
1221+ ; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
1222+ ; IC4VF1: [[EXIT]]:
1223+ ; IC4VF1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
1224+ ; IC4VF1-NEXT: ret i64 [[COND_LCSSA]]
1225+ ;
1226+ entry:
1227+ br label %loop
1228+
9911229loop:
9921230 %iv = phi i64 [ -1 , %entry ], [ %iv.next , %loop ]
9931231 %rdx = phi i64 [ %rdx.start , %entry ], [ %cond , %loop ]
0 commit comments