Skip to content

Commit 3167410

Browse files
committed
VPlan: use Worklist in simplifyRecipes
Since simplifyRecipe creates new recipes in some cases, use a Worklist in its caller to capture newly-created recipes, and add it to the Worklist, as a candidate for further simplification. This patch thoroughly rewrites simplifyRecipe to simplify matched patterns, eraseFromParent when applicable, and simplify the logic.
1 parent 0f28761 commit 3167410

File tree

7 files changed

+146
-109
lines changed

7 files changed

+146
-109
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -161,33 +161,34 @@ class VPBuilder {
161161
return tryInsertInstruction(
162162
new VPInstruction(Opcode, Operands, WrapFlags, DL, Name));
163163
}
164-
VPValue *createNot(VPValue *Operand, DebugLoc DL = {},
165-
const Twine &Name = "") {
164+
VPInstruction *createNot(VPValue *Operand, DebugLoc DL = {},
165+
const Twine &Name = "") {
166166
return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
167167
}
168168

169-
VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
170-
const Twine &Name = "") {
169+
VPInstruction *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
170+
const Twine &Name = "") {
171171
return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
172172
}
173173

174-
VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
175-
const Twine &Name = "") {
174+
VPInstruction *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
175+
const Twine &Name = "") {
176176

177177
return tryInsertInstruction(new VPInstruction(
178178
Instruction::BinaryOps::Or, {LHS, RHS},
179179
VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
180180
}
181181

182-
VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
183-
const Twine &Name = "") {
182+
VPInstruction *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
183+
const Twine &Name = "") {
184184
return tryInsertInstruction(
185185
new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
186186
}
187187

188-
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
189-
DebugLoc DL = {}, const Twine &Name = "",
190-
std::optional<FastMathFlags> FMFs = std::nullopt) {
188+
VPInstruction *
189+
createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
190+
DebugLoc DL = {}, const Twine &Name = "",
191+
std::optional<FastMathFlags> FMFs = std::nullopt) {
191192
auto *Select =
192193
FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
193194
*FMFs, DL, Name)
@@ -199,8 +200,8 @@ class VPBuilder {
199200
/// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
200201
/// and \p B.
201202
/// TODO: add createFCmp when needed.
202-
VPValue *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
203-
DebugLoc DL = {}, const Twine &Name = "");
203+
VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
204+
DebugLoc DL = {}, const Twine &Name = "");
204205

205206
//===--------------------------------------------------------------------===//
206207
// RAII helpers.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6931,8 +6931,9 @@ void LoopVectorizationCostModel::collectInLoopReductions() {
69316931
}
69326932
}
69336933

6934-
VPValue *VPBuilder::createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
6935-
DebugLoc DL, const Twine &Name) {
6934+
VPInstruction *VPBuilder::createICmp(CmpInst::Predicate Pred, VPValue *A,
6935+
VPValue *B, DebugLoc DL,
6936+
const Twine &Name) {
69366937
assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
69376938
Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
69386939
return tryInsertInstruction(

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ template <unsigned BitWidth = 0> struct specific_intval {
7575
if (!CI)
7676
return false;
7777

78-
assert((BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
79-
"Trying the match constant with unexpected bitwidth.");
78+
if (BitWidth != 0 && CI->getBitWidth() != BitWidth)
79+
return false;
80+
8081
return APInt::isSameValue(CI->getValue(), Val);
8182
}
8283
};
@@ -87,6 +88,8 @@ inline specific_intval<0> m_SpecificInt(uint64_t V) {
8788

8889
inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); }
8990

91+
inline specific_intval<1> m_True() { return specific_intval<1>(APInt(64, 1)); }
92+
9093
/// Matching combinators
9194
template <typename LTy, typename RTy> struct match_combine_or {
9295
LTy L;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 80 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/Analysis/VectorUtils.h"
2626
#include "llvm/IR/Intrinsics.h"
2727
#include "llvm/IR/PatternMatch.h"
28+
#include <deque>
2829

2930
using namespace llvm;
3031

@@ -852,9 +853,10 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
852853
}
853854
}
854855

855-
/// Try to simplify recipe \p R. Returns any new recipes introduced during
856-
/// simplification, as a candidate for further simplification.
857-
static VPRecipeBase *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
856+
/// Try to simplify recipe \p R. Returns any new recipes introduced during
857+
/// simplification, as candidates for further simplification.
858+
static SmallVector<VPRecipeBase *>
859+
simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo, VPlan &Plan) {
858860
using namespace llvm::VPlanPatternMatch;
859861

860862
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
@@ -869,11 +871,11 @@ static VPRecipeBase *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
869871
if (UniqueValues.size() == 1) {
870872
Blend->replaceAllUsesWith(*UniqueValues.begin());
871873
Blend->eraseFromParent();
872-
return nullptr;
874+
return {};
873875
}
874876

875877
if (Blend->isNormalized())
876-
return nullptr;
878+
return {};
877879

878880
// Normalize the blend so its first incoming value is used as the initial
879881
// value with the others blended into it.
@@ -908,7 +910,7 @@ static VPRecipeBase *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
908910
Blend->replaceAllUsesWith(NewBlend);
909911
Blend->eraseFromParent();
910912
recursivelyDeleteDeadRecipes(DeadMask);
911-
return nullptr;
913+
return {};
912914
}
913915

914916
VPValue *A;
@@ -921,7 +923,7 @@ static VPRecipeBase *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
921923
} else {
922924
// Don't replace a scalarizing recipe with a widened cast.
923925
if (isa<VPReplicateRecipe>(&R))
924-
return nullptr;
926+
return {};
925927
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
926928

927929
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
@@ -956,26 +958,73 @@ static VPRecipeBase *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
956958
assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
957959
}
958960
#endif
959-
return nullptr;
961+
return {};
962+
}
963+
964+
VPValue *X, *X1, *Y, *Z;
965+
LLVMContext &Ctx = TypeInfo.getContext();
966+
967+
// (X || !X) -> true.
968+
if (match(&R, m_c_BinaryOr(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
969+
VPValue *VPV = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
970+
R.getVPSingleValue()->replaceAllUsesWith(VPV);
971+
return {};
972+
}
973+
974+
// (X || true) -> true.
975+
if (match(&R, m_c_BinaryOr(m_VPValue(X), m_True()))) {
976+
VPValue *VPV = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
977+
R.getVPSingleValue()->replaceAllUsesWith(VPV);
978+
return {};
979+
}
980+
981+
// (X || false) -> X.
982+
if (match(&R, m_c_BinaryOr(m_VPValue(X), m_False()))) {
983+
R.getVPSingleValue()->replaceAllUsesWith(X);
984+
return {};
985+
}
986+
987+
// (X && !X) -> false.
988+
if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
989+
VPValue *VPV = Plan.getOrAddLiveIn(ConstantInt::getFalse(Ctx));
990+
R.getVPSingleValue()->replaceAllUsesWith(VPV);
991+
return {};
992+
}
993+
994+
// (X && true) -> X.
995+
if (match(&R, m_LogicalAnd(m_VPValue(X), m_True()))) {
996+
R.getVPSingleValue()->replaceAllUsesWith(X);
997+
return {};
998+
}
999+
1000+
// (X && false) -> false.
1001+
if (match(&R, m_LogicalAnd(m_VPValue(X), m_False()))) {
1002+
VPValue *VPV = Plan.getOrAddLiveIn(ConstantInt::getFalse(Ctx));
1003+
R.getVPSingleValue()->replaceAllUsesWith(VPV);
1004+
return {};
9601005
}
9611006

962-
// Simplify (X && Y) || (X && !Y) -> X.
963-
// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
964-
// && (Y || Z) and (X || !X) into true. This requires queuing newly created
965-
// recipes to be visited during simplification.
966-
VPValue *X, *Y, *X1, *Y1;
967-
if (match(&R,
968-
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
969-
m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
970-
X == X1 && Y == Y1) {
1007+
// (X * 1) -> X.
1008+
if (match(&R, m_c_Mul(m_VPValue(X), m_SpecificInt(1)))) {
9711009
R.getVPSingleValue()->replaceAllUsesWith(X);
1010+
return {};
1011+
}
1012+
1013+
// (X && Y) || (X && Z) -> X && (Y || Z).
1014+
if (match(&R, m_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1015+
m_LogicalAnd(m_VPValue(X1), m_VPValue(Z)))) &&
1016+
X == X1) {
1017+
VPBuilder Builder(&R);
1018+
VPInstruction *YorZ = Builder.createOr(Y, Z, R.getDebugLoc());
1019+
VPInstruction *VPI = Builder.createLogicalAnd(X, YorZ, R.getDebugLoc());
1020+
R.getVPSingleValue()->replaceAllUsesWith(VPI);
9721021
R.eraseFromParent();
973-
return nullptr;
1022+
// Order of simplification matters: simplify sub-recipes before root
1023+
// recipes.
1024+
return {YorZ, VPI};
9741025
}
9751026

976-
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
977-
R.getVPSingleValue()->replaceAllUsesWith(A);
978-
return nullptr;
1027+
return {};
9791028
}
9801029

9811030
/// Try to simplify the recipes in \p Plan.
@@ -984,10 +1033,17 @@ static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
9841033
Plan.getEntry());
9851034
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
9861035
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
1036+
// Order of simplification matters: add new candidates for simplification to
1037+
// the back of the Worklist, while the Worklist processes recipes from the
1038+
// front.
1039+
std::deque<VPRecipeBase *> Worklist;
9871040
for (auto &R : make_early_inc_range(*VPBB)) {
988-
VPRecipeBase *NewR = simplifyRecipe(R, TypeInfo);
989-
while (NewR)
990-
NewR = simplifyRecipe(*NewR, TypeInfo);
1041+
Worklist.emplace_front(&R);
1042+
while (!Worklist.empty()) {
1043+
VPRecipeBase *R = Worklist.front();
1044+
Worklist.pop_front();
1045+
append_range(Worklist, simplifyRecipe(*R, TypeInfo, Plan));
1046+
}
9911047
}
9921048
}
9931049
}

0 commit comments

Comments
 (0)