Skip to content

Commit 755e78c

Browse files
committed
VPlan: use Worklist in simplifyRecipes
Since simplifyRecipe creates new recipes in some cases, use a Worklist in its caller to capture newly-created recipes, and add it to the Worklist, as a candidate for further simplification. This patch thoroughly rewrites simplifyRecipe to simplify matched patterns, eraseFromParent when applicable, and simplify the logic.
1 parent 4f07508 commit 755e78c

File tree

6 files changed

+145
-105
lines changed

6 files changed

+145
-105
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -161,33 +161,34 @@ class VPBuilder {
161161
return tryInsertInstruction(
162162
new VPInstruction(Opcode, Operands, WrapFlags, DL, Name));
163163
}
164-
VPValue *createNot(VPValue *Operand, DebugLoc DL = {},
165-
const Twine &Name = "") {
164+
VPInstruction *createNot(VPValue *Operand, DebugLoc DL = {},
165+
const Twine &Name = "") {
166166
return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
167167
}
168168

169-
VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
170-
const Twine &Name = "") {
169+
VPInstruction *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
170+
const Twine &Name = "") {
171171
return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
172172
}
173173

174-
VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
175-
const Twine &Name = "") {
174+
VPInstruction *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
175+
const Twine &Name = "") {
176176

177177
return tryInsertInstruction(new VPInstruction(
178178
Instruction::BinaryOps::Or, {LHS, RHS},
179179
VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
180180
}
181181

182-
VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
183-
const Twine &Name = "") {
182+
VPInstruction *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
183+
const Twine &Name = "") {
184184
return tryInsertInstruction(
185185
new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
186186
}
187187

188-
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
189-
DebugLoc DL = {}, const Twine &Name = "",
190-
std::optional<FastMathFlags> FMFs = std::nullopt) {
188+
VPInstruction *
189+
createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
190+
DebugLoc DL = {}, const Twine &Name = "",
191+
std::optional<FastMathFlags> FMFs = std::nullopt) {
191192
auto *Select =
192193
FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
193194
*FMFs, DL, Name)
@@ -199,8 +200,8 @@ class VPBuilder {
199200
/// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
200201
/// and \p B.
201202
/// TODO: add createFCmp when needed.
202-
VPValue *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
203-
DebugLoc DL = {}, const Twine &Name = "");
203+
VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
204+
DebugLoc DL = {}, const Twine &Name = "");
204205

205206
//===--------------------------------------------------------------------===//
206207
// RAII helpers.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6903,8 +6903,9 @@ void LoopVectorizationCostModel::collectInLoopReductions() {
69036903
}
69046904
}
69056905

6906-
VPValue *VPBuilder::createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
6907-
DebugLoc DL, const Twine &Name) {
6906+
VPInstruction *VPBuilder::createICmp(CmpInst::Predicate Pred, VPValue *A,
6907+
VPValue *B, DebugLoc DL,
6908+
const Twine &Name) {
69086909
assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
69096910
Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
69106911
return tryInsertInstruction(

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ template <unsigned BitWidth = 0> struct specific_intval {
7575
if (!CI)
7676
return false;
7777

78-
assert((BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
79-
"Trying the match constant with unexpected bitwidth.");
78+
if (BitWidth != 0 && CI->getBitWidth() != BitWidth)
79+
return false;
80+
8081
return APInt::isSameValue(CI->getValue(), Val);
8182
}
8283
};
@@ -87,6 +88,8 @@ inline specific_intval<0> m_SpecificInt(uint64_t V) {
8788

8889
inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); }
8990

91+
inline specific_intval<1> m_True() { return specific_intval<1>(APInt(64, 1)); }
92+
9093
/// Matching combinators
9194
template <typename LTy, typename RTy> struct match_combine_or {
9295
LTy L;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 80 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -892,8 +892,9 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
892892
}
893893
}
894894

895-
/// Try to simplify recipe \p R.
896-
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
895+
/// Try to simplify recipe \p R. Returns candidates for further simplification.
896+
static SmallVector<VPRecipeBase *>
897+
simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo, LLVMContext &Ctx) {
897898
using namespace llvm::VPlanPatternMatch;
898899

899900
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
@@ -908,11 +909,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
908909
if (UniqueValues.size() == 1) {
909910
Blend->replaceAllUsesWith(*UniqueValues.begin());
910911
Blend->eraseFromParent();
911-
return;
912+
return {};
912913
}
913914

914915
if (Blend->isNormalized())
915-
return;
916+
return {};
916917

917918
// Normalize the blend so its first incomming value is used as the initial
918919
// value with the others blended into it.
@@ -936,26 +937,27 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
936937
Blend->replaceAllUsesWith(NewBlend);
937938
Blend->eraseFromParent();
938939
recursivelyDeleteDeadRecipes(DeadMask);
939-
return;
940+
return {};
940941
}
941942

942943
VPValue *A;
943944
if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
944945
VPValue *Trunc = R.getVPSingleValue();
945946
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
946947
Type *ATy = TypeInfo.inferScalarType(A);
948+
VPWidenCastRecipe *VPC = nullptr;
947949
if (TruncTy == ATy) {
948950
Trunc->replaceAllUsesWith(A);
949951
} else {
950952
// Don't replace a scalarizing recipe with a widened cast.
951953
if (isa<VPReplicateRecipe>(&R))
952-
return;
954+
return {};
953955
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
954956

955957
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
956958
? Instruction::SExt
957959
: Instruction::ZExt;
958-
auto *VPC =
960+
VPC =
959961
new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
960962
if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
961963
// UnderlyingExt has distinct return type, used to retain legacy cost.
@@ -964,7 +966,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
964966
VPC->insertBefore(&R);
965967
Trunc->replaceAllUsesWith(VPC);
966968
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
967-
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
969+
VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
968970
VPC->insertBefore(&R);
969971
Trunc->replaceAllUsesWith(VPC);
970972
}
@@ -984,23 +986,71 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
984986
assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
985987
}
986988
#endif
989+
if (VPC)
990+
return {VPC};
991+
return {};
992+
}
993+
994+
VPValue *X, *X1, *Y, *Z;
995+
996+
// (X || !X) -> true.
997+
if (match(&R, m_c_BinaryOr(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
998+
auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
999+
R.getVPSingleValue()->replaceAllUsesWith(VPV);
1000+
return {};
9871001
}
9881002

989-
// Simplify (X && Y) || (X && !Y) -> X.
990-
// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
991-
// && (Y || Z) and (X || !X) into true. This requires queuing newly created
992-
// recipes to be visited during simplification.
993-
VPValue *X, *Y, *X1, *Y1;
994-
if (match(&R,
995-
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
996-
m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
997-
X == X1 && Y == Y1) {
1003+
// (X || true) -> true.
1004+
if (match(&R, m_c_BinaryOr(m_VPValue(X), m_True()))) {
1005+
auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
1006+
R.getVPSingleValue()->replaceAllUsesWith(VPV);
1007+
return {};
1008+
}
1009+
1010+
// (X || false) -> X.
1011+
if (match(&R, m_c_BinaryOr(m_VPValue(X), m_False()))) {
9981012
R.getVPSingleValue()->replaceAllUsesWith(X);
999-
return;
1013+
return {};
1014+
}
1015+
1016+
// (X && !X) -> false.
1017+
if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
1018+
auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
1019+
R.getVPSingleValue()->replaceAllUsesWith(VPV);
1020+
return {};
1021+
}
1022+
1023+
// (X && true) -> X.
1024+
if (match(&R, m_LogicalAnd(m_VPValue(X), m_True()))) {
1025+
R.getVPSingleValue()->replaceAllUsesWith(X);
1026+
return {};
1027+
}
1028+
1029+
// (X && false) -> false.
1030+
if (match(&R, m_LogicalAnd(m_VPValue(X), m_False()))) {
1031+
auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
1032+
R.getVPSingleValue()->replaceAllUsesWith(VPV);
1033+
return {};
1034+
}
1035+
1036+
// (X * 1) -> X.
1037+
if (match(&R, m_c_Mul(m_VPValue(X), m_SpecificInt(1)))) {
1038+
R.getVPSingleValue()->replaceAllUsesWith(X);
1039+
return {};
1040+
}
1041+
1042+
// (X && Y) || (X && Z) -> X && (Y || Z).
1043+
if (match(&R, m_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1044+
m_LogicalAnd(m_VPValue(X1), m_VPValue(Z)))) &&
1045+
X == X1) {
1046+
VPBuilder Builder(&R);
1047+
VPInstruction *YorZ = Builder.createOr(Y, Z, R.getDebugLoc());
1048+
VPInstruction *VPI = Builder.createLogicalAnd(X, YorZ, R.getDebugLoc());
1049+
R.getVPSingleValue()->replaceAllUsesWith(VPI);
1050+
return {VPI, YorZ};
10001051
}
10011052

1002-
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
1003-
return R.getVPSingleValue()->replaceAllUsesWith(A);
1053+
return {};
10041054
}
10051055

10061056
/// Try to simplify the recipes in \p Plan.
@@ -1009,8 +1059,16 @@ static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
10091059
Plan.getEntry());
10101060
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
10111061
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
1012-
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1013-
simplifyRecipe(R, TypeInfo);
1062+
// Populate a Worklist, as simplifyRecipe might return a new recipe that we
1063+
// need to re-process.
1064+
SmallVector<VPRecipeBase *> Worklist;
1065+
for (auto &R : VPBB->getRecipeList())
1066+
Worklist.push_back(&R);
1067+
1068+
while (!Worklist.empty()) {
1069+
VPRecipeBase *R = Worklist.pop_back_val();
1070+
for (VPRecipeBase *Cand : simplifyRecipe(*R, TypeInfo, Ctx))
1071+
Worklist.push_back(Cand);
10141072
}
10151073
}
10161074
}

0 commit comments

Comments
 (0)