Skip to content

Conversation

@wangpc-pp
Copy link
Contributor

We have disabled unrolling for vectorized loops in #151525 but this
PR only checked the instruction type.

For some loops, there is no instruction with vector type but they
are still vector operations (just like the memset zero test in the
precommit test).

Here we check the operands as well to cover these cases.

We have disabled unrolling for vectorized loops in llvm#151525 but this
PR only checked the instruction type.

For some loops, there is no instruction with vector type but they
are still vector operations (just like the memset zero test in the
precommit test).

Here we check the operands as well to cover these cases.
@llvmbot
Copy link
Member

llvmbot commented Dec 8, 2025

@llvm/pr-subscribers-backend-risc-v

@llvm/pr-subscribers-llvm-transforms

Author: Pengcheng Wang (wangpc-pp)

Changes

We have disabled unrolling for vectorized loops in #151525 but this
PR only checked the instruction type.

For some loops, there is no instruction with vector type but they
are still vector operations (just like the memset zero test in the
precommit test).

Here we check the operands as well to cover these cases.


Full diff: https://github.com/llvm/llvm-project/pull/171089.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+4-1)
  • (modified) llvm/test/Transforms/LoopUnroll/RISCV/vector.ll (+129-7)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index afc2f2c9cd07b..79cd651febf85 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2802,7 +2802,10 @@ void RISCVTTIImpl::getUnrollingPreferences(
       // Both auto-vectorized loops and the scalar remainder have the
       // isvectorized attribute, so differentiate between them by the presence
       // of vector instructions.
-      if (IsVectorized && I.getType()->isVectorTy())
+      if (IsVectorized && (I.getType()->isVectorTy() ||
+                           llvm::any_of(I.operand_values(), [](Value *V) {
+                             return V->getType()->isVectorTy();
+                           })))
         return;
 
       if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
diff --git a/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll b/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
index b575057ff6d15..b441f42f267af 100644
--- a/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
+++ b/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -p loop-unroll -mtriple riscv64 -mattr=+v,+f -S %s | FileCheck %s --check-prefixes=COMMON,CHECK
-; RUN: opt -p loop-unroll -mtriple=riscv64 -mcpu=sifive-s76 -S %s | FileCheck %s --check-prefixes=COMMON,SIFIVE
+; RUN: opt -p loop-unroll -mtriple=riscv64 -mcpu=sifive-p870 -S %s | FileCheck %s --check-prefixes=COMMON,SIFIVE
 
 define void @reverse(ptr %dst, ptr %src, i64 %len) {
 ; CHECK-LABEL: define void @reverse(
@@ -248,7 +248,7 @@ define void @saxpy_tripcount1K_av0(ptr %dst, ptr %src, float %a) {
 ; SIFIVE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; SIFIVE-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; SIFIVE:       [[VECTOR_BODY]]:
-; SIFIVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; SIFIVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT_15:%.*]], %[[VECTOR_BODY]] ]
 ; SIFIVE-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]]
 ; SIFIVE-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
 ; SIFIVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX]]
@@ -276,9 +276,93 @@ define void @saxpy_tripcount1K_av0(ptr %dst, ptr %src, float %a) {
 ; SIFIVE-NEXT:    [[WIDE_LOAD12_3:%.*]] = load <4 x float>, ptr [[TMP10]], align 4
 ; SIFIVE-NEXT:    [[TMP11:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_3]], <4 x float> [[WIDE_LOAD12_3]])
 ; SIFIVE-NEXT:    store <4 x float> [[TMP11]], ptr [[TMP10]], align 4
-; SIFIVE-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 16
-; SIFIVE-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; SIFIVE-NEXT:    br i1 [[TMP3]], label %[[EXIT:.*]], label %[[VECTOR_BODY]]
+; SIFIVE-NEXT:    [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
+; SIFIVE-NEXT:    [[TMP49:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP49]], align 4
+; SIFIVE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_4:%.*]] = load <4 x float>, ptr [[TMP13]], align 4
+; SIFIVE-NEXT:    [[TMP14:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_4]], <4 x float> [[WIDE_LOAD12_4]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP14]], ptr [[TMP13]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX]], 20
+; SIFIVE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_4]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP15]], align 4
+; SIFIVE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_4]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_5:%.*]] = load <4 x float>, ptr [[TMP16]], align 4
+; SIFIVE-NEXT:    [[TMP17:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_5]], <4 x float> [[WIDE_LOAD12_5]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP17]], ptr [[TMP16]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX]], 24
+; SIFIVE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_5]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP18]], align 4
+; SIFIVE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_5]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_6:%.*]] = load <4 x float>, ptr [[TMP19]], align 4
+; SIFIVE-NEXT:    [[TMP20:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_6]], <4 x float> [[WIDE_LOAD12_6]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP20]], ptr [[TMP19]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_6:%.*]] = add nuw nsw i64 [[INDEX]], 28
+; SIFIVE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_6]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP21]], align 4
+; SIFIVE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_6]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4
+; SIFIVE-NEXT:    [[TMP23:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_7]], <4 x float> [[WIDE_LOAD12_7]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP23]], ptr [[TMP22]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX]], 32
+; SIFIVE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_7]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP24]], align 4
+; SIFIVE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_7]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_8:%.*]] = load <4 x float>, ptr [[TMP25]], align 4
+; SIFIVE-NEXT:    [[TMP26:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_8]], <4 x float> [[WIDE_LOAD12_8]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP26]], ptr [[TMP25]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_8:%.*]] = add nuw nsw i64 [[INDEX]], 36
+; SIFIVE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_8]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4
+; SIFIVE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_8]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_9:%.*]] = load <4 x float>, ptr [[TMP28]], align 4
+; SIFIVE-NEXT:    [[TMP29:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_9]], <4 x float> [[WIDE_LOAD12_9]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP29]], ptr [[TMP28]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_9:%.*]] = add nuw nsw i64 [[INDEX]], 40
+; SIFIVE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_9]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_10:%.*]] = load <4 x float>, ptr [[TMP30]], align 4
+; SIFIVE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_9]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_10:%.*]] = load <4 x float>, ptr [[TMP31]], align 4
+; SIFIVE-NEXT:    [[TMP32:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_10]], <4 x float> [[WIDE_LOAD12_10]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP32]], ptr [[TMP31]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_10:%.*]] = add nuw nsw i64 [[INDEX]], 44
+; SIFIVE-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_10]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_11:%.*]] = load <4 x float>, ptr [[TMP33]], align 4
+; SIFIVE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_10]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4
+; SIFIVE-NEXT:    [[TMP35:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_11]], <4 x float> [[WIDE_LOAD12_11]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP35]], ptr [[TMP34]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_11:%.*]] = add nuw nsw i64 [[INDEX]], 48
+; SIFIVE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_11]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_12:%.*]] = load <4 x float>, ptr [[TMP36]], align 4
+; SIFIVE-NEXT:    [[TMP37:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_11]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_12:%.*]] = load <4 x float>, ptr [[TMP37]], align 4
+; SIFIVE-NEXT:    [[TMP38:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_12]], <4 x float> [[WIDE_LOAD12_12]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP38]], ptr [[TMP37]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_12:%.*]] = add nuw nsw i64 [[INDEX]], 52
+; SIFIVE-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_12]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_13:%.*]] = load <4 x float>, ptr [[TMP39]], align 4
+; SIFIVE-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_12]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_13:%.*]] = load <4 x float>, ptr [[TMP40]], align 4
+; SIFIVE-NEXT:    [[TMP41:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_13]], <4 x float> [[WIDE_LOAD12_13]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP41]], ptr [[TMP40]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_13:%.*]] = add nuw nsw i64 [[INDEX]], 56
+; SIFIVE-NEXT:    [[TMP42:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_13]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_14:%.*]] = load <4 x float>, ptr [[TMP42]], align 4
+; SIFIVE-NEXT:    [[TMP43:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_13]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_14:%.*]] = load <4 x float>, ptr [[TMP43]], align 4
+; SIFIVE-NEXT:    [[TMP44:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_14]], <4 x float> [[WIDE_LOAD12_14]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP44]], ptr [[TMP43]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_14:%.*]] = add nuw nsw i64 [[INDEX]], 60
+; SIFIVE-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_14]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_15:%.*]] = load <4 x float>, ptr [[TMP45]], align 4
+; SIFIVE-NEXT:    [[TMP46:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_14]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_15:%.*]] = load <4 x float>, ptr [[TMP46]], align 4
+; SIFIVE-NEXT:    [[TMP47:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_15]], <4 x float> [[WIDE_LOAD12_15]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP47]], ptr [[TMP46]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_15]] = add nuw nsw i64 [[INDEX]], 64
+; SIFIVE-NEXT:    [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT_15]], 1024
+; SIFIVE-NEXT:    br i1 [[TMP48]], label %[[EXIT:.*]], label %[[VECTOR_BODY]]
 ; SIFIVE:       [[EXIT]]:
 ; SIFIVE-NEXT:    ret void
 ;
@@ -345,8 +429,6 @@ vector.body:                                      ; preds = %vector.body, %entry
 exit:                                 ; preds = %vector.body
   ret void
 }
-!0 = !{!0, !1}
-!1 = !{!"llvm.loop.isvectorized", i32 1}
 
 ; On SiFive we should runtime unroll the scalar epilogue loop, but not the
 ; vector loop.
@@ -587,6 +669,46 @@ exit:
   ret void
 }
 
+define void @vector_operands(ptr %p, i64 %n) {
+; COMMON-LABEL: define void @vector_operands(
+; COMMON-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT:  [[ENTRY:.*]]:
+; COMMON-NEXT:    br label %[[VECTOR_BODY:.*]]
+; COMMON:       [[VECTOR_BODY]]:
+; COMMON-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; COMMON-NEXT:    [[AVL:%.*]] = phi i64 [ [[N]], %[[ENTRY]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; COMMON-NEXT:    [[VL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
+; COMMON-NEXT:    [[ADDR:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]]
+; COMMON-NEXT:    call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR]], <vscale x 2 x i1> splat (i1 true), i32 [[VL]])
+; COMMON-NEXT:    [[VL_ZEXT:%.*]] = zext i32 [[VL]] to i64
+; COMMON-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[VL_ZEXT]], [[EVL_BASED_IV]]
+; COMMON-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[VL_ZEXT]]
+; COMMON-NEXT:    [[TMP0:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
+; COMMON-NEXT:    br i1 [[TMP0]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; COMMON:       [[EXIT]]:
+; COMMON-NEXT:    ret void
+;
+entry:
+  br label %vector.body
+
+vector.body:
+  %evl.based.iv = phi i64 [ 0, %entry ], [ %index.evl.next, %vector.body ]
+  %avl = phi i64 [ %n, %entry ], [ %avl.next, %vector.body ]
+  %vl = call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 2, i1 true)
+  %addr = getelementptr i64, ptr %p, i64 %evl.based.iv
+  call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> splat (i64 0), ptr align 8 %addr, <vscale x 2 x i1> splat (i1 true), i32 %vl)
+  %vl.zext = zext i32 %vl to i64
+  %index.evl.next = add nuw i64 %vl.zext, %evl.based.iv
+  %avl.next = sub nuw i64 %avl, %vl.zext
+  %3 = icmp eq i64 %avl.next, 0
+  br i1 %3, label %exit, label %vector.body, !llvm.loop !2
+
+exit:
+  ret void
+}
+
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.isvectorized", i32 1}
 !2 = distinct !{!2, !1}
 !3 = distinct !{!3, !1}
 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants