-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Revert "[VPlan] Fold NOT into predicate of wide compares." #130347
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This reverts commit cb3ce30.
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesReverts llvm/llvm-project#129430 this seems to have introduced a divergence between legacy and VPlan-based cost model https://lab.llvm.org/buildbot/#/builders/30/builds/17159 Patch is 123.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130347.diff 28 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 50e6209738c1f..b277ed4816b8e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -459,9 +459,6 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
/// Returns the debug location of the recipe.
DebugLoc getDebugLoc() const { return DL; }
- /// Set the recipe's debug location to \p NewDL.
- void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
-
protected:
/// Compute the cost of this recipe either using a recipe's specialized
/// implementation or using the legacy cost model and the underlying
@@ -797,12 +794,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
return CmpPredicate;
}
- void setPredicate(CmpInst::Predicate Pred) {
- assert(OpType == OperationType::Cmp &&
- "recipe doesn't have a compare predicate");
- CmpPredicate = Pred;
- }
-
GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; }
/// Returns true if the recipe has fast-math flags.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 5c9aa9220ca1d..7646350ca0ed2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -972,25 +972,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
return R.getVPSingleValue()->replaceAllUsesWith(A);
- if (match(&R, m_Not(m_VPValue(A)))) {
- if (match(A, m_Not(m_VPValue(A))))
- return R.getVPSingleValue()->replaceAllUsesWith(A);
-
- // Try to fold Not into compares by adjusting the predicate in-place.
- if (isa<VPWidenRecipe>(A) && A->getNumUsers() == 1) {
- auto *WideCmp = cast<VPWidenRecipe>(A);
- if (WideCmp->getOpcode() == Instruction::ICmp ||
- WideCmp->getOpcode() == Instruction::FCmp) {
- WideCmp->setPredicate(
- CmpInst::getInversePredicate(WideCmp->getPredicate()));
- R.getVPSingleValue()->replaceAllUsesWith(WideCmp);
- // If WideCmp doesn't have a debug location, use the one from the
- // negation, to preserve the location.
- if (!WideCmp->getDebugLoc() && R.getDebugLoc())
- WideCmp->setDebugLoc(R.getDebugLoc());
- }
- }
- }
+ if (match(&R, m_Not(m_Not(m_VPValue(A)))))
+ return R.getVPSingleValue()->replaceAllUsesWith(A);
// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
if ((match(&R,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 0850648c518b0..07873fba86b6d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -1037,7 +1037,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFALWAYS-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]]
; TFALWAYS-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
; TFALWAYS-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
-; TFALWAYS-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP8]], zeroinitializer
+; TFALWAYS-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
+; TFALWAYS-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
; TFALWAYS-NEXT: [[TMP11:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer
; TFALWAYS-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
; TFALWAYS-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
@@ -1081,7 +1082,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFFALLBACK-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]]
; TFFALLBACK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
; TFFALLBACK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
-; TFFALLBACK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP8]], zeroinitializer
+; TFFALLBACK-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
+; TFFALLBACK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
; TFFALLBACK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer
; TFFALLBACK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
; TFFALLBACK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
@@ -1131,8 +1133,10 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]]
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP10]], i32 1
-; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = fcmp ule <2 x double> [[TMP8]], zeroinitializer
-; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = fcmp ule <2 x double> [[TMP12]], zeroinitializer
+; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
+; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer
+; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP13]], splat (i1 true)
+; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP14]], splat (i1 true)
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK2]], <2 x i1> [[TMP16]], <2 x i1> zeroinitializer
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP17]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
index 1bacae764f760..93bc131ee5c5a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
@@ -5,8 +5,8 @@
target triple = "aarch64-unknown-linux-gnu"
; CHECK-COST: Checking a loop in 'fixed_width'
-; CHECK-COST: Cost of 10 for VF 2: WIDEN store vp<{{.+}}>, ir<2>, ir<{{.+}}>
-; CHECK-COST: Cost of 20 for VF 4: WIDEN store vp<{{.+}}>, ir<2>, ir<{{.+}}>
+; CHECK-COST: Cost of 10 for VF 2: WIDEN store vp<%6>, ir<2>, vp<%5>
+; CHECK-COST: Cost of 20 for VF 4: WIDEN store vp<%6>, ir<2>, vp<%5>
; CHECK-COST: Selecting VF: 1.
; We should decide this loop is not worth vectorising using fixed width vectors
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index 53065a062328b..5f926db1131f6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -43,8 +43,9 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP14]], align 1
-; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP5]]
+; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 16 x i1> [[TMP15]], splat (i1 true)
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP16]])
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i64> [[VEC_IND]], [[DOTSPLAT]]
@@ -125,8 +126,9 @@ define i64 @same_exit_block_pre_inc_use4() {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 1
-; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <2 x i64> [[VEC_IND]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <2 x i64> [[VEC_IND]], [[WIDE_LOAD]]
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
@@ -202,8 +204,9 @@ define i64 @loop_contains_safe_call() #1 {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD]])
-; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast oge <4 x float> [[TMP3]], splat (float 3.000000e+00)
+; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ult <4 x float> [[TMP3]], splat (float 3.000000e+00)
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
@@ -296,8 +299,9 @@ define i64 @loop_contains_safe_div() #1 {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP13:%.*]] = udiv <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 20000)
-; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <vscale x 4 x i32> [[TMP13]], splat (i32 1)
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <vscale x 4 x i32> [[TMP13]], splat (i32 1)
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX2]], [[TMP5]]
+; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[TMP14]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[INDEX1]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
@@ -377,11 +381,12 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 1
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 1)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 1)
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P2]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
index 1018bdd7a4ea1..daf29847e81d6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
@@ -10,8 +10,9 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 {
; CHECK-VF4IC1: vector.body:
; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
-; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_LOAD]], splat (i32 3)
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[VEC_ICMP]]
+; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], splat (i32 3)
+; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], splat (i1 true)
+; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]]
; CHECK-VF4IC1: middle.block:
; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]]
@@ -24,14 +25,18 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 {
; CHECK-VF4IC4: [[VEC_PHI2:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ]
; CHECK-VF4IC4: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ]
; CHECK-VF4IC4: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ]
-; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp ne <vscale x 4 x i32> {{.*}}, splat (i32 3)
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp ne <vscale x 4 x i32> {{.*}}, splat (i32 3)
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp ne <vscale x 4 x i32> {{.*}}, splat (i32 3)
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp ne <vscale x 4 x i32> {{.*}}, splat (i32 3)
-; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or <vscale x 4 x i1> [[VEC_PHI1]], [[VEC_ICMP1]]
-; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or <vscale x 4 x i1> [[VEC_PHI2]], [[VEC_ICMP2]]
-; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <vscale x 4 x i1> [[VEC_PHI3]], [[VEC_ICMP3]]
-; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or <vscale x 4 x i1> [[VEC_PHI4]], [[VEC_ICMP4]]
+; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, splat (i32 3)
+; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, splat (i32 3)
+; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, splat (i32 3)
+; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, splat (i32 3)
+; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP1]], splat (i1 true)
+; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP2]], splat (i1 true)
+; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP3]], splat (i1 true)
+; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP4]], splat (i1 true)
+; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or <vscale x 4 x i1> [[VEC_PHI1]], [[NOT1]]
+; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or <vscale x 4 x i1> [[VEC_PHI2]], [[NOT2]]
+; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <vscale x 4 x i1> [[VEC_PHI3]], [[NOT3]]
+; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or <vscale x 4 x i1> [[VEC_PHI4]], [[NOT4]]
; CHECK-VF4IC4: middle.block:
; CHECK-VF4IC4-NEXT: [[OR1:%.*]] = or <vscale x 4 x i1> [[VEC_SEL2]], [[VEC_SEL1]]
; CHECK-VF4IC4-NEXT: [[OR2:%.*]] = or <vscale x 4 x i1> [[VEC_SEL3]], [[OR1]]
@@ -66,8 +71,9 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64
; CHECK-VF4IC1: vector.body:
; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
-; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_LOAD]], splat (i32 3)
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[VEC_ICMP]]
+; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], splat (i32 3)
+; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], splat (i1 true)
+; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]]
; CHECK-VF4IC1: middle.block:
; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]]
@@ -99,8 +105,9 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 {
; CHECK-VF4IC1: vector.body:
; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x float>
-; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = fcmp fast one <vscale x 4 x float> [[VEC_LOAD]], splat (float 3.000000e+00)
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[VEC_ICMP]]
+; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = fcmp fast ueq <vscale x 4 x float> [[VEC_LOAD]], splat (float 3.000000e+00)
+; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], splat (i1 true)
+; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]]
; CHECK-VF4IC1: middle.block:
; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
index 6de629aea25c6..75b2df93c9350 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
@@ -460,7 +460,8 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
-; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[TMP13]], splat (i1 true)
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> poison)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll
index bcbc65f50cf2f..ed5467258c71f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll
@@ -62,8 +62,9 @@ define void @cond_uniform_load(ptr nocapture %dst, ptr nocapture readonly %src,
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[INIT_ACTIVE_LANE_MASK]], %vector.ph ], [ [[NEXT_ACTIVE_LANE_MASK:%.*]], %vector.body ]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IDX]], 0
; CHECK: [[COND_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{%.*}}, i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[COND_LOAD]], zeroinitializer
-; CHECK-NEXT: [[MASK:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32>...
[truncated]
|
Reverts llvm#129430 this seems to have introduced a divergence between legacy and VPlan-based cost model https://lab.llvm.org/buildbot/#/builders/30/builds/17159
) This reverts commit 8dd160f.
) This reverts commit 8dd160f. The recommit contains an adjustment to planContainsAdditionalSimplifications, which considers changes to the original predicate for compares. Original commit message: Add simplification to fold negation into a compare, if the negation is the only user of the compare. This removes a number of redundant negations. Alive2 Proofs for FPCMP test changes: https://alive2.llvm.org/ce/z/WGDz9U PR: llvm#129430
This reverts commit 8dd160f. The recommit contains an adjustment to planContainsAdditionalSimplifications, which considers changes to the original predicate for compares. Original commit message: Add simplification to fold negation into a compare, if the negation is the only user of the compare. This removes a number of redundant negations. Alive2 Proofs for FPCMP test changes: https://alive2.llvm.org/ce/z/WGDz9U PR: #129430
) This reverts commit 8dd160f. The recommit contains an adjustment to planContainsAdditionalSimplifications, which considers changes to the original predicate for compares. Original commit message: Add simplification to fold negation into a compare, if the negation is the only user of the compare. This removes a number of redundant negations. Alive2 Proofs for FPCMP test changes: https://alive2.llvm.org/ce/z/WGDz9U PR: llvm#129430
) This reverts commit 8dd160f. The recommit contains an adjustment to planContainsAdditionalSimplifications, which considers changes to the original predicate for compares. Original commit message: Add simplification to fold negation into a compare, if the negation is the only user of the compare. This removes a number of redundant negations. Alive2 Proofs for FPCMP test changes: https://alive2.llvm.org/ce/z/WGDz9U PR: llvm#129430
) This reverts commit 8dd160f. The recommit contains an adjustment to planContainsAdditionalSimplifications, which considers changes to the original predicate for compares. Original commit message: Add simplification to fold negation into a compare, if the negation is the only user of the compare. This removes a number of redundant negations. Alive2 Proofs for FPCMP test changes: https://alive2.llvm.org/ce/z/WGDz9U PR: llvm#129430
) This reverts commit 8dd160f. The recommit contains an adjustment to planContainsAdditionalSimplifications, which considers changes to the original predicate for compares. Original commit message: Add simplification to fold negation into a compare, if the negation is the only user of the compare. This removes a number of redundant negations. Alive2 Proofs for FPCMP test changes: https://alive2.llvm.org/ce/z/WGDz9U PR: llvm#129430
) This reverts commit 8dd160f. The recommit contains an adjustment to planContainsAdditionalSimplifications, which considers changes to the original predicate for compares. Original commit message: Add simplification to fold negation into a compare, if the negation is the only user of the compare. This removes a number of redundant negations. Alive2 Proofs for FPCMP test changes: https://alive2.llvm.org/ce/z/WGDz9U PR: llvm#129430
Reverts #129430
this seems to have introduced a divergence between legacy and VPlan-based cost model
https://lab.llvm.org/buildbot/#/builders/30/builds/17159