VPlan: increase simplification power of simplifyRecipe #93998

artagnon · 2024-05-31T18:00:00Z

Use a worklist in simplifyRecipes to break up patterns in simplifyRecipe, increasing its simplification power.

-- 8< --
Based on #105699.

llvmbot · 2024-05-31T18:00:29Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-systemz

Author: Ramkumar Ramachandra (artagnon)

Changes

Since simplifyRecipe creates new recipes in some cases, use a Worklist in its caller to capture newly-created recipes, and add it to the Worklist, as a candidate for further simplification. This patch thoroughly rewrites simplifyRecipe to simplify matched patterns, eraseFromParent when applicable, and simplify the logic.

Patch is 20.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93998.diff

3 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h (+5-2)
(modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+108-46)
(modified) llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll (+37-63)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 0587468807435..fe296a6b70a95 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -70,8 +70,9 @@ template <unsigned BitWidth = 0> struct specific_intval {
     if (!CI)
       return false;
 
-    assert((BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
-           "Trying the match constant with unexpected bitwidth.");
+    if (BitWidth != 0 && CI->getBitWidth() != BitWidth)
+      return false;
+
     return APInt::isSameValue(CI->getValue(), Val);
   }
 };
@@ -82,6 +83,8 @@ inline specific_intval<0> m_SpecificInt(uint64_t V) {
 
 inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); }
 
+inline specific_intval<1> m_True() { return specific_intval<1>(APInt(64, 1)); }
+
 /// Matching combinators
 template <typename LTy, typename RTy> struct match_combine_or {
   LTy L;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 422579ea8b84f..977aef26d20e7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -877,55 +877,53 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
   }
 }
 
-/// Try to simplify recipe \p R.
-static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
+/// Try to simplify recipe \p R. Returns candidates for further simplification.
+static SmallVector<VPRecipeBase *>
+simplifyRecipe(VPRecipeBase *R, VPTypeAnalysis &TypeInfo, LLVMContext &Ctx) {
   using namespace llvm::VPlanPatternMatch;
   // Try to remove redundant blend recipes.
-  if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
+  if (auto *Blend = dyn_cast<VPBlendRecipe>(R)) {
     VPValue *Inc0 = Blend->getIncomingValue(0);
     for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
       if (Inc0 != Blend->getIncomingValue(I) &&
           !match(Blend->getMask(I), m_False()))
-        return;
+        return {};
     Blend->replaceAllUsesWith(Inc0);
     Blend->eraseFromParent();
-    return;
+    return {};
   }
 
-  VPValue *A;
-  if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
-    VPValue *Trunc = R.getVPSingleValue();
+  VPValue *X, *X1, *Y, *Z;
+  if (match(R, m_Trunc(m_ZExtOrSExt(m_VPValue(X))))) {
+    VPValue *Trunc = R->getVPSingleValue();
     Type *TruncTy = TypeInfo.inferScalarType(Trunc);
-    Type *ATy = TypeInfo.inferScalarType(A);
-    if (TruncTy == ATy) {
-      Trunc->replaceAllUsesWith(A);
+    Type *XTy = TypeInfo.inferScalarType(X);
+    VPWidenCastRecipe *VPC = nullptr;
+    if (TruncTy == XTy) {
+      Trunc->replaceAllUsesWith(X);
     } else {
       // Don't replace a scalarizing recipe with a widened cast.
-      if (isa<VPReplicateRecipe>(&R))
-        return;
-      if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
-
-        unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
-                                 ? Instruction::SExt
-                                 : Instruction::ZExt;
-        auto *VPC =
-            new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
-        VPC->insertBefore(&R);
-        Trunc->replaceAllUsesWith(VPC);
-      } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
-        auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
-        VPC->insertBefore(&R);
-        Trunc->replaceAllUsesWith(VPC);
-      }
+      if (isa<VPReplicateRecipe>(R))
+        return {};
+
+      unsigned ExtOpcode = match(R->getOperand(0), m_SExt(m_VPValue()))
+                               ? Instruction::SExt
+                               : Instruction::ZExt;
+      VPC = XTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()
+                ? new VPWidenCastRecipe(Instruction::Trunc, X, TruncTy)
+                : new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), X,
+                                        TruncTy);
+      VPC->insertBefore(R);
+      Trunc->replaceAllUsesWith(VPC);
     }
 #ifndef NDEBUG
     // Verify that the cached type info is for both A and its users is still
     // accurate by comparing it to freshly computed types.
     VPTypeAnalysis TypeInfo2(
-        R.getParent()->getPlan()->getCanonicalIV()->getScalarType(),
+        R->getParent()->getPlan()->getCanonicalIV()->getScalarType(),
         TypeInfo.getContext());
-    assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
-    for (VPUser *U : A->users()) {
+    assert(TypeInfo.inferScalarType(X) == TypeInfo2.inferScalarType(X));
+    for (VPUser *U : X->users()) {
       auto *R = dyn_cast<VPRecipeBase>(U);
       if (!R)
         continue;
@@ -933,24 +931,80 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
         assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
     }
 #endif
+    if (VPC)
+      return {VPC};
+    return {};
   }
 
-  // Simplify (X && Y) || (X && !Y) -> X.
-  // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
-  // && (Y || Z) and (X || !X) into true. This requires queuing newly created
-  // recipes to be visited during simplification.
-  VPValue *X, *Y, *X1, *Y1;
-  if (match(&R,
-            m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
-                         m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
-      X == X1 && Y == Y1) {
-    R.getVPSingleValue()->replaceAllUsesWith(X);
-    return;
+  // (X || !X) -> true.
+  if (match(R, m_c_BinaryOr(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
+    auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
+    R->getVPSingleValue()->replaceAllUsesWith(VPV);
+    R->eraseFromParent();
+    return {};
+  }
+
+  // (X || true) -> true.
+  if (match(R, m_c_BinaryOr(m_VPValue(X), m_True()))) {
+    auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
+    R->getVPSingleValue()->replaceAllUsesWith(VPV);
+    R->eraseFromParent();
+    return {};
+  }
+
+  // (X || false) -> X.
+  if (match(R, m_c_BinaryOr(m_VPValue(X), m_False()))) {
+    R->getVPSingleValue()->replaceAllUsesWith(X);
+    R->eraseFromParent();
+    return {};
+  }
+
+  // (X && !X) -> false.
+  if (match(R, m_LogicalAnd(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
+    auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
+    R->getVPSingleValue()->replaceAllUsesWith(VPV);
+    R->eraseFromParent();
+    return {};
+  }
+
+  // (X && true) -> X.
+  if (match(R, m_LogicalAnd(m_VPValue(X), m_True()))) {
+    R->getVPSingleValue()->replaceAllUsesWith(X);
+    R->eraseFromParent();
+    return {};
+  }
+
+  // (X && false) -> false.
+  if (match(R, m_LogicalAnd(m_VPValue(X), m_False()))) {
+    auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
+    R->getVPSingleValue()->replaceAllUsesWith(VPV);
+    R->eraseFromParent();
+    return {};
+  }
+
+  // (X * 1) -> X.
+  if (match(R, m_CombineOr(m_Mul(m_VPValue(X), m_SpecificInt(1)),
+                           m_Mul(m_SpecificInt(1), m_VPValue(X))))) {
+    R->getVPSingleValue()->replaceAllUsesWith(X);
+    R->eraseFromParent();
+    return {};
+  }
+
+  // (X && Y) || (X && Z) -> X && (Y || Z).
+  if (match(R, m_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
+                          m_LogicalAnd(m_VPValue(X1), m_VPValue(Z)))) &&
+      X == X1) {
+    auto *YorZ = new VPInstruction(Instruction::Or, {Y, Z}, R->getDebugLoc());
+    YorZ->insertBefore(R);
+    auto *VPI = new VPInstruction(VPInstruction::LogicalAnd, {X, YorZ},
+                                  R->getDebugLoc());
+    VPI->insertBefore(R);
+    R->getVPSingleValue()->replaceAllUsesWith(VPI);
+    R->eraseFromParent();
+    return {VPI, YorZ};
   }
 
-  if (match(&R, m_CombineOr(m_Mul(m_VPValue(A), m_SpecificInt(1)),
-                            m_Mul(m_SpecificInt(1), m_VPValue(A)))))
-    return R.getVPSingleValue()->replaceAllUsesWith(A);
+  return {};
 }
 
 /// Try to simplify the recipes in \p Plan.
@@ -959,8 +1013,16 @@ static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
       Plan.getEntry());
   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
-    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
-      simplifyRecipe(R, TypeInfo);
+    // Populate a Worklist, as simplifyRecipe might return a new recipe that we
+    // need to re-process.
+    SmallVector<VPRecipeBase *> Worklist;
+    for (auto &R : VPBB->getRecipeList())
+      Worklist.push_back(&R);
+
+    while (!Worklist.empty()) {
+      VPRecipeBase *R = Worklist.pop_back_val();
+      for (VPRecipeBase *Cand : simplifyRecipe(R, TypeInfo, Ctx))
+        Worklist.push_back(Cand);
     }
   }
 }
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
index 07a1cca1bc21e..203abe6c91312 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
@@ -7,8 +7,6 @@ define void @test(ptr %p, i40 %a) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE32:%.*]] ]
@@ -16,126 +14,102 @@ define void @test(ptr %p, i40 %a) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], <i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24>
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], <i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28>
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32>
-; CHECK-NEXT:    [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <16 x i1> [[TMP4]], zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult <16 x i1> zeroinitializer, [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = or <16 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt <16 x i1> [[TMP7]], zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
-; CHECK-NEXT:    store i1 [[TMP10]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
-; CHECK:       pred.store.if3:
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <16 x i1> [[TMP8]], i32 1
-; CHECK-NEXT:    store i1 [[TMP12]], ptr [[P]], align 1
+; CHECK:       pred.store.if1:
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
+; CHECK:       pred.store.continue2:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
+; CHECK:       pred.store.if3:
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE5]]
 ; CHECK:       pred.store.continue4:
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
 ; CHECK:       pred.store.if5:
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <16 x i1> [[TMP8]], i32 2
-; CHECK-NEXT:    store i1 [[TMP14]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
-; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
 ; CHECK:       pred.store.if7:
-; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <16 x i1> [[TMP8]], i32 3
-; CHECK-NEXT:    store i1 [[TMP16]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
 ; CHECK:       pred.store.continue8:
-; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
 ; CHECK:       pred.store.if9:
-; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <16 x i1> [[TMP8]], i32 4
-; CHECK-NEXT:    store i1 [[TMP18]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE10]]
 ; CHECK:       pred.store.continue10:
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
 ; CHECK-NEXT:    br i1 [[TMP19]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
 ; CHECK:       pred.store.if11:
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <16 x i1> [[TMP8]], i32 5
-; CHECK-NEXT:    store i1 [[TMP20]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE12]]
 ; CHECK:       pred.store.continue12:
-; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
+; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
 ; CHECK:       pred.store.if13:
-; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <16 x i1> [[TMP8]], i32 6
-; CHECK-NEXT:    store i1 [[TMP22]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE14]]
 ; CHECK:       pred.store.continue14:
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
+; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
 ; CHECK-NEXT:    br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
 ; CHECK:       pred.store.if15:
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <16 x i1> [[TMP8]], i32 7
-; CHECK-NEXT:    store i1 [[TMP24]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE16]]
 ; CHECK:       pred.store.continue16:
-; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
+; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
 ; CHECK-NEXT:    br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
 ; CHECK:       pred.store.if17:
-; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <16 x i1> [[TMP8]], i32 8
-; CHECK-NEXT:    store i1 [[TMP26]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE18]]
 ; CHECK:       pred.store.continue18:
-; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
+; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
 ; CHECK:       pred.store.if19:
-; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <16 x i1> [[TMP8]], i32 9
-; CHECK-NEXT:    store i1 [[TMP28]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE20]]
 ; CHECK:       pred.store.continue20:
-; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
+; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
 ; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
 ; CHECK:       pred.store.if21:
-; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <16 x i1> [[TMP8]], i32 10
-; CHECK-NEXT:    store i1 [[TMP30]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE22]]
 ; CHECK:       pred.store.continue22:
-; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
+; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
 ; CHECK-NEXT:    br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
 ; CHECK:       pred.store.if23:
-; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <16 x i1> [[TMP8]], i32 11
-; CHECK-NEXT:    store i1 [[TMP32]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE24]]
 ; CHECK:       pred.store.continue24:
-; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
 ; CHECK-NEXT:    br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
 ; CHECK:       pred.store.if25:
-; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <16 x i1> [[TMP8]], i32 12
-; CHECK-NEXT:    store i1 [[TMP34]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE26]]
 ; CHECK:       pred.store.continue26:
-; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
+; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
 ; CHECK-NEXT:    br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
 ; CHECK:       pred.store.if27:
-; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <16 x i1> [[TMP8]], i32 13
-; CHECK-NEXT:    store i1 [[TMP36]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE28]]
 ; CHECK:       pred.store.continue28:
-; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
-; CHECK-NEXT:    br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
+; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE32]]
 ; CHECK:       pred.store.if29:
-; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <16 x i1> [[TMP8]], i32 14
-; CHECK-NEXT:    store i1 [[TMP38]], ptr [[P]], align 1
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE30]]
-; CHECK:       pred.store.continue30:
-; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
-; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32]]
-; CHECK:       pred.store.if31:
-; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <16 x i1> [[TMP8]], i32 15
-; CHECK-NEXT:    store i1 [[TMP40]], ptr [[P]], align 1
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE32]]
-; CHECK:       pred....
[truncated]

artagnon · 2024-06-06T10:18:03Z

Rebase and ping.

artagnon · 2024-06-24T09:06:43Z

Gentle ping.

artagnon · 2024-07-03T10:56:56Z

Gentle ping. The objective of this patch should be straightforward.

artagnon · 2024-07-22T13:17:54Z

Rebase and ping.

artagnon · 2024-08-19T11:26:25Z

Gentle ping.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll

fhahn · 2024-08-22T10:56:19Z

If it is not too much extra work, it would probably also be good to introduce the worklist as NFC commit without test changes and then replace the patterns.

artagnon · 2024-08-22T11:36:04Z

If it is not too much extra work, it would probably also be good to introduce the worklist as NFC commit without test changes and then replace the patterns.

Not sure what you mean by introducing the worklist as an NFC. Doesn't the introduction of worklist mean that we return candidates for further simplification, reducing recursively, and by definition, the change has higher simplification power? I might be able to introduce the worklist without any test changes, that isn't technically an NFC, with some difficulty; however, I don't see the value of such a patch.

fhahn · 2024-08-22T16:34:49Z

If it is not too much extra work, it would probably also be good to introduce the worklist as NFC commit without test changes and then replace the patterns.

Not sure what you mean by introducing the worklist as an NFC. Doesn't the introduction of worklist mean that we return candidates for further simplification, reducing recursively, and by definition, the change has higher simplification power? I might be able to introduce the worklist without any test changes, that isn't technically an NFC, with some difficulty; however, I don't see the value of such a patch.

Ah Right, probably not entirely NFC, the main point would be to separate introducing the worklist from also adjusting the patterns, which should hopefully be fairly straigth-forward to separate as 2 distinct patches (the PR title already implies that the PR introduces a worklist only)

artagnon · 2024-08-22T17:10:18Z

If it is not too much extra work, it would probably also be good to introduce the worklist as NFC commit without test changes and then replace the patterns.

Not sure what you mean by introducing the worklist as an NFC. Doesn't the introduction of worklist mean that we return candidates for further simplification, reducing recursively, and by definition, the change has higher simplification power? I might be able to introduce the worklist without any test changes, that isn't technically an NFC, with some difficulty; however, I don't see the value of such a patch.

Ah Right, probably not entirely NFC, the main point would be to separate introducing the worklist from also adjusting the patterns, which should hopefully be fairly straigth-forward to separate as 2 distinct patches (the PR title already implies that the PR introduces a worklist only)

Thanks, it was much simpler than I originally thought.

github-actions · 2024-08-26T11:23:03Z

✅ With the latest revision this PR passed the C/C++ code formatter.

In order to break up patterns in simplifyRecipes, and increase its simplification power, introudce a worklist keeping a running list of candidates for simplification, as a prelude to breaking up patterns in simplifyRecipe.

Since simplifyRecipe creates new recipes in some cases, use a Worklist in its caller to capture newly-created recipes, and add it to the Worklist, as a candidate for further simplification. This patch thoroughly rewrites simplifyRecipe to simplify matched patterns, eraseFromParent when applicable, and simplify the logic.

artagnon · 2024-09-04T15:22:37Z

Honestly, I'm not sure what value #105699 adds, but this patch should be good now, either to be merged after #105699, or subsuming #105699 altogether.

artagnon requested review from fhahn and ayalz May 31, 2024 18:00

llvmbot added backend:SystemZ vectorizers llvm:transforms labels May 31, 2024

artagnon changed the title ~~VPlan: use Worklist in simplifyRecipes~~ VPlan: use worklist in simplifyRecipes May 31, 2024

artagnon force-pushed the vplan-worklist branch from fd3d596 to 6187a56 Compare June 6, 2024 10:17

artagnon force-pushed the vplan-worklist branch from 6187a56 to db940e1 Compare July 22, 2024 13:17

artagnon requested review from rengolin, alexey-bataev and aniragil July 22, 2024 13:17

fhahn reviewed Aug 20, 2024

View reviewed changes

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp Outdated Show resolved Hide resolved

fhahn reviewed Aug 21, 2024

View reviewed changes

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp Outdated Show resolved Hide resolved

llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll Show resolved Hide resolved

artagnon force-pushed the vplan-worklist branch from d18e05e to 755e78c Compare August 21, 2024 13:35

fhahn reviewed Aug 22, 2024

View reviewed changes

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp Outdated Show resolved Hide resolved

llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll Show resolved Hide resolved

fhahn mentioned this pull request Aug 22, 2024

[LLVM][VPlan] Pick more optimal initial value for VPBlend. #104019

Merged

artagnon force-pushed the vplan-worklist branch from e9267cc to 0597bc3 Compare August 22, 2024 17:07

artagnon changed the title ~~VPlan: use worklist in simplifyRecipes~~ VPlan: increase simplification power of simplifyRecipe Aug 22, 2024

artagnon mentioned this pull request Aug 23, 2024

VPlan: prepare to recursively simplify recipes (NFC) #105699

Closed

artagnon force-pushed the vplan-worklist branch from 0597bc3 to d873ba1 Compare August 26, 2024 11:19

artagnon force-pushed the vplan-worklist branch from d873ba1 to 958b973 Compare August 26, 2024 11:24

artagnon and others added 2 commits September 4, 2024 14:06

VPlan: introduce worklist in simplifyRecipes

0f28761

In order to break up patterns in simplifyRecipes, and increase its simplification power, introudce a worklist keeping a running list of candidates for simplification, as a prelude to breaking up patterns in simplifyRecipe.

artagnon force-pushed the vplan-worklist branch from 958b973 to 3167410 Compare September 4, 2024 15:20

artagnon mentioned this pull request Sep 17, 2024

[VPlan] Refine return types in VPBuilder (NFC) #108858

Merged

artagnon closed this Jan 9, 2025

artagnon deleted the vplan-worklist branch January 9, 2025 18:33

artagnon mentioned this pull request Apr 2, 2025

[VPlan] Process simplifyRecipes via a worklist #133977

Open

VPlan: increase simplification power of simplifyRecipe #93998

VPlan: increase simplification power of simplifyRecipe #93998

Uh oh!

Conversation

artagnon commented May 31, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 31, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

artagnon commented Jun 6, 2024

Uh oh!

artagnon commented Jun 24, 2024

Uh oh!

artagnon commented Jul 3, 2024

Uh oh!

artagnon commented Jul 22, 2024

Uh oh!

artagnon commented Aug 19, 2024

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

fhahn commented Aug 22, 2024

Uh oh!

artagnon commented Aug 22, 2024

Uh oh!

fhahn commented Aug 22, 2024

Uh oh!

artagnon commented Aug 22, 2024

Uh oh!

github-actions bot commented Aug 26, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

artagnon commented Sep 4, 2024

Uh oh!

Uh oh!

artagnon commented May 31, 2024 •

edited

Loading

llvmbot commented May 31, 2024 •

edited

Loading

github-actions bot commented Aug 26, 2024 •

edited

Loading