Skip to content

Commit 0597bc3

Browse files
committed
VPlan: use Worklist in simplifyRecipes
Since simplifyRecipe creates new recipes in some cases, use a Worklist in its caller to capture newly-created recipes, and add it to the Worklist, as a candidate for further simplification. This patch thoroughly rewrites simplifyRecipe to simplify matched patterns, eraseFromParent when applicable, and simplify the logic.
1 parent 714673c commit 0597bc3

File tree

6 files changed

+124
-97
lines changed

6 files changed

+124
-97
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -161,33 +161,34 @@ class VPBuilder {
161161
return tryInsertInstruction(
162162
new VPInstruction(Opcode, Operands, WrapFlags, DL, Name));
163163
}
164-
VPValue *createNot(VPValue *Operand, DebugLoc DL = {},
165-
const Twine &Name = "") {
164+
VPInstruction *createNot(VPValue *Operand, DebugLoc DL = {},
165+
const Twine &Name = "") {
166166
return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
167167
}
168168

169-
VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
170-
const Twine &Name = "") {
169+
VPInstruction *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
170+
const Twine &Name = "") {
171171
return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
172172
}
173173

174-
VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
175-
const Twine &Name = "") {
174+
VPInstruction *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
175+
const Twine &Name = "") {
176176

177177
return tryInsertInstruction(new VPInstruction(
178178
Instruction::BinaryOps::Or, {LHS, RHS},
179179
VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
180180
}
181181

182-
VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
183-
const Twine &Name = "") {
182+
VPInstruction *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
183+
const Twine &Name = "") {
184184
return tryInsertInstruction(
185185
new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
186186
}
187187

188-
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
189-
DebugLoc DL = {}, const Twine &Name = "",
190-
std::optional<FastMathFlags> FMFs = std::nullopt) {
188+
VPInstruction *
189+
createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
190+
DebugLoc DL = {}, const Twine &Name = "",
191+
std::optional<FastMathFlags> FMFs = std::nullopt) {
191192
auto *Select =
192193
FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
193194
*FMFs, DL, Name)
@@ -199,8 +200,8 @@ class VPBuilder {
199200
/// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
200201
/// and \p B.
201202
/// TODO: add createFCmp when needed.
202-
VPValue *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
203-
DebugLoc DL = {}, const Twine &Name = "");
203+
VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
204+
DebugLoc DL = {}, const Twine &Name = "");
204205

205206
//===--------------------------------------------------------------------===//
206207
// RAII helpers.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6903,8 +6903,9 @@ void LoopVectorizationCostModel::collectInLoopReductions() {
69036903
}
69046904
}
69056905

6906-
VPValue *VPBuilder::createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
6907-
DebugLoc DL, const Twine &Name) {
6906+
VPInstruction *VPBuilder::createICmp(CmpInst::Predicate Pred, VPValue *A,
6907+
VPValue *B, DebugLoc DL,
6908+
const Twine &Name) {
69086909
assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
69096910
Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
69106911
return tryInsertInstruction(

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ template <unsigned BitWidth = 0> struct specific_intval {
7575
if (!CI)
7676
return false;
7777

78-
assert((BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
79-
"Trying the match constant with unexpected bitwidth.");
78+
if (BitWidth != 0 && CI->getBitWidth() != BitWidth)
79+
return false;
80+
8081
return APInt::isSameValue(CI->getValue(), Val);
8182
}
8283
};
@@ -87,6 +88,8 @@ inline specific_intval<0> m_SpecificInt(uint64_t V) {
8788

8889
inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); }
8990

91+
inline specific_intval<1> m_True() { return specific_intval<1>(APInt(64, 1)); }
92+
9093
/// Matching combinators
9194
template <typename LTy, typename RTy> struct match_combine_or {
9295
LTy L;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 59 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -893,8 +893,8 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
893893
}
894894

895895
/// Try to simplify recipe \p R. Returns candidates for further simplification.
896-
static SmallVector<VPRecipeBase *> simplifyRecipe(VPRecipeBase &R,
897-
VPTypeAnalysis &TypeInfo) {
896+
static SmallVector<VPRecipeBase *>
897+
simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo, VPlan &Plan) {
898898
using namespace llvm::VPlanPatternMatch;
899899

900900
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
@@ -991,21 +991,66 @@ static SmallVector<VPRecipeBase *> simplifyRecipe(VPRecipeBase &R,
991991
return {};
992992
}
993993

994-
// Simplify (X && Y) || (X && !Y) -> X.
995-
// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
996-
// && (Y || Z) and (X || !X) into true. This requires queuing newly created
997-
// recipes to be visited during simplification.
998-
VPValue *X, *Y, *X1, *Y1;
999-
if (match(&R,
1000-
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1001-
m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
1002-
X == X1 && Y == Y1) {
994+
VPValue *X, *X1, *Y, *Z;
995+
LLVMContext &Ctx = TypeInfo.getContext();
996+
997+
// (X || !X) -> true.
998+
if (match(&R, m_c_BinaryOr(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
999+
VPValue *VPV = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
1000+
R.getVPSingleValue()->replaceAllUsesWith(VPV);
1001+
return {};
1002+
}
1003+
1004+
// (X || true) -> true.
1005+
if (match(&R, m_c_BinaryOr(m_VPValue(X), m_True()))) {
1006+
VPValue *VPV = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
1007+
R.getVPSingleValue()->replaceAllUsesWith(VPV);
1008+
return {};
1009+
}
1010+
1011+
// (X || false) -> X.
1012+
if (match(&R, m_c_BinaryOr(m_VPValue(X), m_False()))) {
10031013
R.getVPSingleValue()->replaceAllUsesWith(X);
10041014
return {};
10051015
}
10061016

1007-
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
1008-
R.getVPSingleValue()->replaceAllUsesWith(A);
1017+
// (X && !X) -> false.
1018+
if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
1019+
VPValue *VPV = Plan.getOrAddLiveIn(ConstantInt::getFalse(Ctx));
1020+
R.getVPSingleValue()->replaceAllUsesWith(VPV);
1021+
return {};
1022+
}
1023+
1024+
// (X && true) -> X.
1025+
if (match(&R, m_LogicalAnd(m_VPValue(X), m_True()))) {
1026+
R.getVPSingleValue()->replaceAllUsesWith(X);
1027+
return {};
1028+
}
1029+
1030+
// (X && false) -> false.
1031+
if (match(&R, m_LogicalAnd(m_VPValue(X), m_False()))) {
1032+
VPValue *VPV = Plan.getOrAddLiveIn(ConstantInt::getFalse(Ctx));
1033+
R.getVPSingleValue()->replaceAllUsesWith(VPV);
1034+
return {};
1035+
}
1036+
1037+
// (X * 1) -> X.
1038+
if (match(&R, m_c_Mul(m_VPValue(X), m_SpecificInt(1)))) {
1039+
R.getVPSingleValue()->replaceAllUsesWith(X);
1040+
return {};
1041+
}
1042+
1043+
// (X && Y) || (X && Z) -> X && (Y || Z).
1044+
if (match(&R, m_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1045+
m_LogicalAnd(m_VPValue(X1), m_VPValue(Z)))) &&
1046+
X == X1) {
1047+
VPBuilder Builder(&R);
1048+
VPInstruction *YorZ = Builder.createOr(Y, Z, R.getDebugLoc());
1049+
VPInstruction *VPI = Builder.createLogicalAnd(X, YorZ, R.getDebugLoc());
1050+
R.getVPSingleValue()->replaceAllUsesWith(VPI);
1051+
return {VPI, YorZ};
1052+
}
1053+
10091054
return {};
10101055
}
10111056

@@ -1023,7 +1068,7 @@ static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
10231068

10241069
while (!Worklist.empty()) {
10251070
VPRecipeBase *R = Worklist.pop_back_val();
1026-
for (VPRecipeBase *Cand : simplifyRecipe(*R, TypeInfo))
1071+
for (VPRecipeBase *Cand : simplifyRecipe(*R, TypeInfo, Plan))
10271072
Worklist.push_back(Cand);
10281073
}
10291074
}

llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll

Lines changed: 37 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,135 +7,109 @@ define void @test(ptr %p, i40 %a) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
99
; CHECK: vector.ph:
10-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0
11-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer
1210
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1311
; CHECK: vector.body:
1412
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE32:%.*]] ]
1513
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i64 0
1614
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
1715
; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1816
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
19-
; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], <i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24>
20-
; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], <i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28>
21-
; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32>
22-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
23-
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <16 x i1> [[TMP4]], zeroinitializer
24-
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i1> zeroinitializer, [[TMP5]]
25-
; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
26-
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <16 x i1> [[TMP7]], zeroinitializer
2717
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
2818
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
2919
; CHECK: pred.store.if:
30-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
31-
; CHECK-NEXT: store i1 [[TMP10]], ptr [[P]], align 1
20+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
3221
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
3322
; CHECK: pred.store.continue:
3423
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
3524
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
36-
; CHECK: pred.store.if3:
37-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP8]], i32 1
38-
; CHECK-NEXT: store i1 [[TMP12]], ptr [[P]], align 1
25+
; CHECK: pred.store.if1:
26+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
3927
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
28+
; CHECK: pred.store.continue2:
29+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
30+
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
31+
; CHECK: pred.store.if3:
32+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
33+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
4034
; CHECK: pred.store.continue4:
41-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
35+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
4236
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
4337
; CHECK: pred.store.if5:
44-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP8]], i32 2
45-
; CHECK-NEXT: store i1 [[TMP14]], ptr [[P]], align 1
38+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
4639
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
4740
; CHECK: pred.store.continue6:
48-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
41+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
4942
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
5043
; CHECK: pred.store.if7:
51-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP8]], i32 3
52-
; CHECK-NEXT: store i1 [[TMP16]], ptr [[P]], align 1
44+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
5345
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
5446
; CHECK: pred.store.continue8:
55-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
47+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
5648
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
5749
; CHECK: pred.store.if9:
58-
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP8]], i32 4
59-
; CHECK-NEXT: store i1 [[TMP18]], ptr [[P]], align 1
50+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
6051
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
6152
; CHECK: pred.store.continue10:
62-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
53+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
6354
; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
6455
; CHECK: pred.store.if11:
65-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP8]], i32 5
66-
; CHECK-NEXT: store i1 [[TMP20]], ptr [[P]], align 1
56+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
6757
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
6858
; CHECK: pred.store.continue12:
69-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
59+
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
7060
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
7161
; CHECK: pred.store.if13:
72-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP8]], i32 6
73-
; CHECK-NEXT: store i1 [[TMP22]], ptr [[P]], align 1
62+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
7463
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
7564
; CHECK: pred.store.continue14:
76-
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
65+
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
7766
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
7867
; CHECK: pred.store.if15:
79-
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP8]], i32 7
80-
; CHECK-NEXT: store i1 [[TMP24]], ptr [[P]], align 1
68+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
8169
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
8270
; CHECK: pred.store.continue16:
83-
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
71+
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
8472
; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
8573
; CHECK: pred.store.if17:
86-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP8]], i32 8
87-
; CHECK-NEXT: store i1 [[TMP26]], ptr [[P]], align 1
74+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
8875
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
8976
; CHECK: pred.store.continue18:
90-
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
77+
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
9178
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
9279
; CHECK: pred.store.if19:
93-
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP8]], i32 9
94-
; CHECK-NEXT: store i1 [[TMP28]], ptr [[P]], align 1
80+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
9581
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
9682
; CHECK: pred.store.continue20:
97-
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
83+
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
9884
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
9985
; CHECK: pred.store.if21:
100-
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP8]], i32 10
101-
; CHECK-NEXT: store i1 [[TMP30]], ptr [[P]], align 1
86+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
10287
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
10388
; CHECK: pred.store.continue22:
104-
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
89+
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
10590
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
10691
; CHECK: pred.store.if23:
107-
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP8]], i32 11
108-
; CHECK-NEXT: store i1 [[TMP32]], ptr [[P]], align 1
92+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
10993
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
11094
; CHECK: pred.store.continue24:
111-
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
95+
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
11296
; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
11397
; CHECK: pred.store.if25:
114-
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP8]], i32 12
115-
; CHECK-NEXT: store i1 [[TMP34]], ptr [[P]], align 1
98+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
11699
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
117100
; CHECK: pred.store.continue26:
118-
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
101+
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
119102
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
120103
; CHECK: pred.store.if27:
121-
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP8]], i32 13
122-
; CHECK-NEXT: store i1 [[TMP36]], ptr [[P]], align 1
104+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
123105
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
124106
; CHECK: pred.store.continue28:
125-
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
126-
; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
107+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
108+
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE32]]
127109
; CHECK: pred.store.if29:
128-
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP8]], i32 14
129-
; CHECK-NEXT: store i1 [[TMP38]], ptr [[P]], align 1
130-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
131-
; CHECK: pred.store.continue30:
132-
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
133-
; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32]]
134-
; CHECK: pred.store.if31:
135-
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP8]], i32 15
136-
; CHECK-NEXT: store i1 [[TMP40]], ptr [[P]], align 1
110+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
137111
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]]
138-
; CHECK: pred.store.continue32:
112+
; CHECK: pred.store.continue30:
139113
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
140114
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
141115
; CHECK: middle.block:

0 commit comments

Comments
 (0)