[LV] Extend FindLastIV to unsigned case #141752

artagnon · 2025-05-28T12:33:31Z

Split the FindLastIV RecurKind into SMax and UMax variants, depending on the reduction op produced.

llvmbot · 2025-05-28T12:34:04Z

@llvm/pr-subscribers-vectorizers
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-analysis

Author: Ramkumar Ramachandra (artagnon)

Changes

In an effort to not have two different RecurKinds, one for the signed case, and another for the unsigned case, introduce RecurrenceDescriptor::isReduxSigned() to indicate whether the the RecurKind is of the signed or unsigned variant. Demonstrate its use by extending FindLastIV to the unsigned case.

Patch is 31.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141752.diff

5 Files Affected:

(modified) llvm/include/llvm/Analysis/IVDescriptors.h (+24-13)
(modified) llvm/lib/Analysis/IVDescriptors.cpp (+35-21)
(modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+1-1)
(modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+4-3)
(modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll (+146-56)

diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index d94ffa7287db3..db2731869ed20 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -79,12 +79,13 @@ class RecurrenceDescriptor {
 
   RecurrenceDescriptor(Value *Start, Instruction *Exit, StoreInst *Store,
                        RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
-                       Type *RT, bool Signed, bool Ordered,
-                       SmallPtrSetImpl<Instruction *> &CI,
+                       Type *RT, bool IsResultSigned, bool IsReduxSigned,
+                       bool Ordered, SmallPtrSetImpl<Instruction *> &CI,
                        unsigned MinWidthCastToRecurTy)
       : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
         Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
-        IsSigned(Signed), IsOrdered(Ordered),
+        IsResultSigned(IsResultSigned), IsReduxSigned(IsReduxSigned),
+        IsOrdered(Ordered),
         MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
     CastInsts.insert_range(CI);
   }
@@ -96,12 +97,14 @@ class RecurrenceDescriptor {
         : IsRecurrence(IsRecur), PatternLastInst(I),
           RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {}
 
-    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr)
-        : IsRecurrence(true), PatternLastInst(I), RecKind(K),
-          ExactFPMathInst(ExactFP) {}
+    InstDesc(Instruction *I, RecurKind K, bool IsSigned = false)
+        : IsRecurrence(true), IsSigned(IsSigned), PatternLastInst(I),
+          RecKind(K) {}
 
     bool isRecurrence() const { return IsRecurrence; }
 
+    bool isSigned() const { return IsSigned; }
+
     bool needsExactFPMath() const { return ExactFPMathInst != nullptr; }
 
     Instruction *getExactFPMathInst() const { return ExactFPMathInst; }
@@ -113,13 +116,15 @@ class RecurrenceDescriptor {
   private:
     // Is this instruction a recurrence candidate.
     bool IsRecurrence;
+    // Is this recurrence a signed variant.
+    bool IsSigned = false;
     // The last instruction in a min/max pattern (select of the select(icmp())
     // pattern), or the current recurrence instruction otherwise.
     Instruction *PatternLastInst;
     // If this is a min/max pattern.
     RecurKind RecKind;
     // Recurrence does not allow floating-point reassociation.
-    Instruction *ExactFPMathInst;
+    Instruction *ExactFPMathInst = nullptr;
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -268,8 +273,9 @@ class RecurrenceDescriptor {
   Value *getSentinelValue() const {
     assert(isFindLastIVRecurrenceKind(Kind) && "Unexpected recurrence kind");
     Type *Ty = StartValue->getType();
-    return ConstantInt::get(Ty,
-                            APInt::getSignedMinValue(Ty->getIntegerBitWidth()));
+    unsigned BW = Ty->getIntegerBitWidth();
+    return ConstantInt::get(Ty, isReduxSigned() ? APInt::getSignedMinValue(BW)
+                                                : APInt::getMinValue(BW));
   }
 
   /// Returns a reference to the instructions used for type-promoting the
@@ -281,8 +287,11 @@ class RecurrenceDescriptor {
     return MinWidthCastToRecurrenceType;
   }
 
-  /// Returns true if all source operands of the recurrence are SExtInsts.
-  bool isSigned() const { return IsSigned; }
+  /// Returns true if the reduction result is signed.
+  bool isResultSigned() const { return IsResultSigned; }
+
+  /// Returns true if the reduction redux is signed.
+  bool isReduxSigned() const { return IsReduxSigned; }
 
   /// Expose an ordered FP reduction to the instance users.
   bool isOrdered() const { return IsOrdered; }
@@ -318,8 +327,10 @@ class RecurrenceDescriptor {
   Instruction *ExactFPMathInst = nullptr;
   // The type of the recurrence.
   Type *RecurrenceType = nullptr;
-  // True if all source operands of the recurrence are SExtInsts.
-  bool IsSigned = false;
+  // True if reduction result is signed.
+  bool IsResultSigned = false;
+  // True if reduction redux is signed.
+  bool IsReduxSigned = false;
   // True if this recurrence can be treated as an in-order reduction.
   // Currently only a non-reassociative FAdd can be considered in-order,
   // if it is also the only FAdd in the PHI's use chain.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b7c7bcab168cc..8d568236f0053 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -88,7 +88,7 @@ static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
 }
 
 /// Compute the minimal bit width needed to represent a reduction whose exit
-/// instruction is given by Exit.
+/// instruction is given by Exit, along with its signedness.
 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
                                                      DemandedBits *DB,
                                                      AssumptionCache *AC,
@@ -255,7 +255,7 @@ bool RecurrenceDescriptor::AddReductionVar(
   SmallPtrSet<Instruction *, 4> CastInsts;
   unsigned MinWidthCastToRecurrenceType;
   Instruction *Start = Phi;
-  bool IsSigned = false;
+  bool IsResultSigned = false, IsReduxSigned = false;
 
   SmallPtrSet<Instruction *, 8> VisitedInsts;
   SmallVector<Instruction *, 8> Worklist;
@@ -396,6 +396,7 @@ bool RecurrenceDescriptor::AddReductionVar(
       //       state accurate while processing the worklist?
       if (ReduxDesc.getRecKind() != RecurKind::None)
         Kind = ReduxDesc.getRecKind();
+      IsReduxSigned = ReduxDesc.isSigned();
     }
 
     bool IsASelect = isa<SelectInst>(Cur);
@@ -565,7 +566,7 @@ bool RecurrenceDescriptor::AddReductionVar(
     //       smaller type. We should just generate a correctly typed expression
     //       to begin with.
     Type *ComputedType;
-    std::tie(ComputedType, IsSigned) =
+    std::tie(ComputedType, IsResultSigned) =
         computeRecurrenceType(ExitInstruction, DB, AC, DT);
     if (ComputedType != RecurrenceType)
       return false;
@@ -595,8 +596,9 @@ bool RecurrenceDescriptor::AddReductionVar(
 
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
-                          FMF, ExactFPMathInst, RecurrenceType, IsSigned,
-                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
+                          FMF, ExactFPMathInst, RecurrenceType, IsResultSigned,
+                          IsReduxSigned, IsOrdered, CastInsts,
+                          MinWidthCastToRecurrenceType);
   RedDes = RD;
 
   return true;
@@ -700,47 +702,59 @@ RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
                                      m_Value(NonRdxPhi)))))
     return InstDesc(false, I);
 
-  auto IsIncreasingLoopInduction = [&](Value *V) {
+  // Returns a non-nullopt boolean indicating the signedness of the recurrence
+  // when a valid FindLastIV pattern is found.
+  auto GetInductionSignedness = [&](Value *V) -> std::optional<bool> {
     Type *Ty = V->getType();
     if (!SE.isSCEVable(Ty))
-      return false;
+      return std::nullopt;
 
     auto *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(V));
     if (!AR || AR->getLoop() != TheLoop)
-      return false;
+      return std::nullopt;
 
     const SCEV *Step = AR->getStepRecurrence(SE);
     if (!SE.isKnownPositive(Step))
-      return false;
+      return std::nullopt;
 
-    const ConstantRange IVRange = SE.getSignedRange(AR);
+    const ConstantRange SignedIVRange = SE.getSignedRange(AR);
+    const ConstantRange UnsignedIVRange = SE.getUnsignedRange(AR);
     unsigned NumBits = Ty->getIntegerBitWidth();
     // Keep the minimum value of the recurrence type as the sentinel value.
     // The maximum acceptable range for the increasing induction variable,
     // called the valid range, will be defined as
     //   [<sentinel value> + 1, <sentinel value>)
-    // where <sentinel value> is SignedMin(<recurrence type>)
+    // where <sentinel value> is [Signed|Unsigned]Min(<recurrence type>)
     // TODO: This range restriction can be lifted by adding an additional
     // virtual OR reduction.
-    const APInt Sentinel = APInt::getSignedMinValue(NumBits);
-    const ConstantRange ValidRange =
-        ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
-    LLVM_DEBUG(dbgs() << "LV: FindLastIV valid range is " << ValidRange
-                      << ", and the signed range of " << *AR << " is "
-                      << IVRange << "\n");
+    const APInt SignedSentinel = APInt::getSignedMinValue(NumBits);
+    const APInt UnsignedSentinel = APInt::getMinValue(NumBits);
+    const ConstantRange ValidSignedRange =
+        ConstantRange::getNonEmpty(SignedSentinel + 1, SignedSentinel);
+    const ConstantRange ValidUnsignedRange =
+        ConstantRange::getNonEmpty(UnsignedSentinel + 1, UnsignedSentinel);
+    LLVM_DEBUG(dbgs() << "LV: FindLastIV valid signed range is "
+                      << ValidSignedRange << ", valid unsigned range is "
+                      << ValidUnsignedRange << ", " << *AR
+                      << " signed range is " << SignedIVRange
+                      << ", and unsigned range is " << UnsignedIVRange << "\n");
     // Ensure the induction variable does not wrap around by verifying that its
     // range is fully contained within the valid range.
-    return ValidRange.contains(IVRange);
+    if (ValidSignedRange.contains(SignedIVRange))
+      return true;
+    if (ValidUnsignedRange.contains(UnsignedIVRange))
+      return false;
+    return std::nullopt;
   };
 
   // We are looking for selects of the form:
   //   select(cmp(), phi, increasing_loop_induction) or
   //   select(cmp(), increasing_loop_induction, phi)
   // TODO: Support for monotonically decreasing induction variable
-  if (!IsIncreasingLoopInduction(NonRdxPhi))
-    return InstDesc(false, I);
+  if (auto IsSigned = GetInductionSignedness(NonRdxPhi))
+    return InstDesc(I, RecurKind::FindLastIV, *IsSigned);
 
-  return InstDesc(I, RecurKind::FindLastIV);
+  return InstDesc(false, I);
 }
 
 RecurrenceDescriptor::InstDesc
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 05b5764ffcafc..e5c13f7ddfb68 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9535,7 +9535,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
       auto *Trunc =
           new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
       auto *Extnd =
-          RdxDesc.isSigned()
+          RdxDesc.isResultSigned()
               ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy)
               : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy);
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 18229780bc4a5..9404423c2afcd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -621,8 +621,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
     unsigned UF = getNumOperands() - 2;
     Value *ReducedPartRdx = State.get(getOperand(2));
     for (unsigned Part = 1; Part < UF; ++Part) {
-      ReducedPartRdx = createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx,
-                                      State.get(getOperand(2 + Part)));
+      ReducedPartRdx = createMinMaxOp(
+          Builder, RdxDesc.isReduxSigned() ? RecurKind::SMax : RecurKind::UMax,
+          ReducedPartRdx, State.get(getOperand(2 + Part)));
     }
 
     return createFindLastIVReduction(Builder, ReducedPartRdx,
@@ -696,7 +697,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
       // If the reduction can be performed in a smaller type, we need to extend
       // the reduction to the wider type before we branch to the original loop.
       if (PhiTy != RdxDesc.getRecurrenceType())
-        ReducedPartRdx = RdxDesc.isSigned()
+        ReducedPartRdx = RdxDesc.isResultSigned()
                              ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
                              : Builder.CreateZExt(ReducedPartRdx, PhiTy);
     }
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
index e4597ebfe7dc8..a497ffcb9923a 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4
@@ -843,66 +843,184 @@ exit:                                             ; preds = %for.body, %entry
 ; This test cannot be vectorized, even with a runtime check.
 ; The construct that are introduced by IndVarSimplify is:
 ;   %1 = trunc i64 %iv to i32
-; However, the loop exit condition is a constant that overflows i32:
+; The loop exit condition is a constant that overflows signed i32,
+; but not unsigned i32:
 ;   %exitcond.not = icmp eq i64 %inc, 4294967294
-; Hence, the i32 will most certainly wrap and hit the sentinel value, and we
-; cannot vectorize this case.
-define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) {
-; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
+; Hence, we can vectorize with the unsigned variant of FindLastIV.
+define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) {
+; CHECK-VF4IC1-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range(
 ; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) {
 ; CHECK-VF4IC1-NEXT:  [[ENTRY:.*]]:
+; CHECK-VF4IC1-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-VF4IC1:       [[VECTOR_PH]]:
+; CHECK-VF4IC1-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK-VF4IC1:       [[VECTOR_BODY]]:
+; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 2147483646, i32 2147483647, i32 -2147483648, i32 -2147483647>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = add i64 2147483646, [[INDEX]]
+; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 0
+; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 3)
+; CHECK-VF4IC1-NEXT:    [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-VF4IC1-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648
+; CHECK-VF4IC1-NEXT:    br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-VF4IC1:       [[MIDDLE_BLOCK]]:
+; CHECK-VF4IC1-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]])
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP5]], 0
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP5]], i32 331
+; CHECK-VF4IC1-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-VF4IC1:       [[SCALAR_PH]]:
+; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4294967294, %[[MIDDLE_BLOCK]] ], [ 2147483646, %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
 ; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK-VF4IC1:       [[FOR_BODY]]:
-; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
-; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4IC1-NEXT:    [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV1]]
+; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
 ; CHECK-VF4IC1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
-; CHECK-VF4IC1-NEXT:    [[CONV:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-VF4IC1-NEXT:    [[CONV:%.*]] = trunc i64 [[IV1]] to i32
 ; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]]
-; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
+; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV1]], 1
 ; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294
-; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK-VF4IC1:       [[EXIT]]:
-; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-VF4IC1-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
 ;
-; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
+; CHECK-VF4IC4-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range(
 ; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) {
 ; CHECK-VF4IC4-NEXT:  [[ENTRY:.*]]:
+; CHECK-VF4IC4-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-VF4IC4:       [[VECTOR_PH]]:
+; CHECK-VF4IC4-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK-VF4IC4:       [[VECTOR_BODY]]:
+; CHECK-VF4IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 2147483646, i32 2147483647, i32 -2147483648, i32 -2147483647>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
+; CHECK-VF4IC4-NEXT:    [[OFFSET_IDX:%.*]] = add i64 2147483646, [[INDEX]]
+; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]]
+; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
+; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4
+; CHECK-VF4IC4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8
+; CHECK-VF4IC4-NEXT:    [[TMP4:%.*]] = getelemen...
[truncated]

fhahn

Hmm, introducing another boolean to distinguish between signed/unsigned ops seems inconsistent with some of the current code, that has dedicated kinds for SMin/UMin, SMax/UMax.

Some recent changes have been towards removing some unnecessary state from RecurrenceDesciptor (e.g. not storing the opcode as it is implied from the kind).

It seems like having more state that does not apply to all kinds makes it slightly more complex to construct and reason about the code in RecurrenceDescriptor

fhahn

(Accidentally marked as approved)

artagnon · 2025-06-04T08:41:15Z

Hmm, introducing another boolean to distinguish between signed/unsigned ops seems inconsistent with some of the current code, that has dedicated kinds for SMin/UMin, SMax/UMax.

I could try to unify SMin/UMin and SMax/UMax in a follow-up? The issue here is that addReductionVar is called on each of these kinds separately, leading to unnecessary compile-time bloat. The other alternative is a strange asymmetry in calls to addReductionVar.

It seems like having more state that does not apply to all kinds makes it slightly more complex to construct and reason about the code in RecurrenceDescriptor

I agree, but I'm trying to de-duplicate RecurKinds, so that addReductionVar is only called the necessary number of times.

artagnon · 2025-06-04T10:09:39Z

The other alternative is a strange asymmetry in calls to addReductionVar.

After thinking some more, I think this is the best compromise: I've updated the patch.

llvm/include/llvm/Analysis/IVDescriptors.h

llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll

lukel97

I think this makes sense to me. To make sure I'm understanding this correctly, the benefit of using the umax reduction is that it allows us to handle IVs whose range contains the signed min. Whereas the smax reduction allows to handle IVs whose range contains the unsigned min?

llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll

In an effort to not have two different RecurKinds, one for the signed case, and another for the unsigned case, introduce RecurrenceDescriptor::isReduxSigned() to indicate whether the the RecurKind is of the signed or unsigned variant. Demonstrate its use by extending FindLastIV to the unsigned case.

artagnon · 2025-06-20T09:25:41Z

To make sure I'm understanding this correctly, the benefit of using the umax reduction is that it allows us to handle IVs whose range contains the signed min. Whereas the smax reduction allows to handle IVs whose range contains the unsigned min?

Yes, that's correct.

lukel97

LGTM

llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll

fhahn

LGTM, thanks!

The other alternative is a strange asymmetry in calls to addReductionVar.

After thinking some more, I think this is the best compromise: I've updated the patch.

Yep that matches the approach in #140451 I think.

llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll

Split the FindLastIV RecurKind into SMax and UMax variants, depending on the reduction op produced.

artagnon requested review from fhahn and Mel-Chen May 28, 2025 12:33

llvmbot added vectorizers llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels May 28, 2025

artagnon mentioned this pull request May 28, 2025

[LV] Add support for cmp reductions with decreasing IVs. #140451

Merged

artagnon force-pushed the lv-findlast-unsigned branch from f92d1a3 to 1b89079 Compare June 3, 2025 15:47

artagnon requested a review from lukel97 June 3, 2025 15:48

fhahn approved these changes Jun 4, 2025

View reviewed changes

fhahn requested changes Jun 4, 2025

View reviewed changes

artagnon force-pushed the lv-findlast-unsigned branch from 1b89079 to 717adeb Compare June 4, 2025 10:08

lukel97 reviewed Jun 11, 2025

View reviewed changes

llvm/include/llvm/Analysis/IVDescriptors.h Outdated Show resolved Hide resolved

llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll Outdated Show resolved Hide resolved

artagnon force-pushed the lv-findlast-unsigned branch from 717adeb to 2b0c5b9 Compare June 15, 2025 12:22

lukel97 reviewed Jun 19, 2025

View reviewed changes

llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll Outdated Show resolved Hide resolved

llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll Outdated Show resolved Hide resolved

artagnon and others added 2 commits June 20, 2025 09:54

[LV] Address review; preserve test coverage

935b383

artagnon force-pushed the lv-findlast-unsigned branch from 2b0c5b9 to 935b383 Compare June 20, 2025 09:18

lukel97 approved these changes Jun 20, 2025

View reviewed changes

fhahn reviewed Jun 20, 2025

View reviewed changes

llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll Show resolved Hide resolved

[LV] Add test for unsigned_iv_range in non-trunc case

0cd6b96

fhahn approved these changes Jun 22, 2025

View reviewed changes

llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll Outdated Show resolved Hide resolved

[LV] Tweak test (NFC)

4ef4f93

artagnon merged commit bb8c42e into llvm:main Jun 23, 2025
7 checks passed

artagnon deleted the lv-findlast-unsigned branch June 23, 2025 14:27

miguelcsx pushed a commit to miguelcsx/llvm-project that referenced this pull request Jun 23, 2025

[LV] Extend FindLastIV to unsigned case (llvm#141752)

ef411a6

Split the FindLastIV RecurKind into SMax and UMax variants, depending on the reduction op produced.

Jaddyen pushed a commit to Jaddyen/llvm-project that referenced this pull request Jun 23, 2025

[LV] Extend FindLastIV to unsigned case (llvm#141752)

f49bb32

Split the FindLastIV RecurKind into SMax and UMax variants, depending on the reduction op produced.

anthonyhatran pushed a commit to anthonyhatran/llvm-project that referenced this pull request Jun 26, 2025

[LV] Extend FindLastIV to unsigned case (llvm#141752)

421e6fe

Split the FindLastIV RecurKind into SMax and UMax variants, depending on the reduction op produced.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[LV] Extend FindLastIV to unsigned case #141752

[LV] Extend FindLastIV to unsigned case #141752

Uh oh!

artagnon commented May 28, 2025 •

edited

Loading

Uh oh!

llvmbot commented May 28, 2025 •

edited

Loading

Uh oh!

fhahn left a comment

Uh oh!

fhahn left a comment

Uh oh!

artagnon commented Jun 4, 2025

Uh oh!

artagnon commented Jun 4, 2025

Uh oh!

Uh oh!

Uh oh!

lukel97 left a comment

Uh oh!

Uh oh!

Uh oh!

artagnon commented Jun 20, 2025

Uh oh!

lukel97 left a comment

Uh oh!

Uh oh!

fhahn left a comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

[LV] Extend FindLastIV to unsigned case #141752

[LV] Extend FindLastIV to unsigned case #141752

Uh oh!

Conversation

artagnon commented May 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

artagnon commented Jun 4, 2025

Uh oh!

artagnon commented Jun 4, 2025

Uh oh!

Uh oh!

Uh oh!

lukel97 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

artagnon commented Jun 20, 2025

Uh oh!

lukel97 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

artagnon commented May 28, 2025 •

edited

Loading

llvmbot commented May 28, 2025 •

edited

Loading