Skip to content

Commit fd3d596

Browse files
committed
VPlan: use Worklist in simplifyRecipes
Since simplifyRecipe creates new recipes in some cases, use a Worklist in its caller to capture newly-created recipes, and add it to the Worklist, as a candidate for further simplification. This patch thoroughly rewrites simplifyRecipe to simplify matched patterns, eraseFromParent when applicable, and simplify the logic.
1 parent fecf5c7 commit fd3d596

File tree

3 files changed

+150
-111
lines changed

3 files changed

+150
-111
lines changed

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ template <unsigned BitWidth = 0> struct specific_intval {
7070
if (!CI)
7171
return false;
7272

73-
assert((BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
74-
"Trying the match constant with unexpected bitwidth.");
73+
if (BitWidth != 0 && CI->getBitWidth() != BitWidth)
74+
return false;
75+
7576
return APInt::isSameValue(CI->getValue(), Val);
7677
}
7778
};
@@ -82,6 +83,8 @@ inline specific_intval<0> m_SpecificInt(uint64_t V) {
8283

8384
inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); }
8485

86+
inline specific_intval<1> m_True() { return specific_intval<1>(APInt(64, 1)); }
87+
8588
/// Matching combinators
8689
template <typename LTy, typename RTy> struct match_combine_or {
8790
LTy L;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 108 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -877,80 +877,134 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
877877
}
878878
}
879879

880-
/// Try to simplify recipe \p R.
881-
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
880+
/// Try to simplify recipe \p R. Returns candidates for further simplification.
881+
static SmallVector<VPRecipeBase *>
882+
simplifyRecipe(VPRecipeBase *R, VPTypeAnalysis &TypeInfo, LLVMContext &Ctx) {
882883
using namespace llvm::VPlanPatternMatch;
883884
// Try to remove redundant blend recipes.
884-
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
885+
if (auto *Blend = dyn_cast<VPBlendRecipe>(R)) {
885886
VPValue *Inc0 = Blend->getIncomingValue(0);
886887
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
887888
if (Inc0 != Blend->getIncomingValue(I) &&
888889
!match(Blend->getMask(I), m_False()))
889-
return;
890+
return {};
890891
Blend->replaceAllUsesWith(Inc0);
891892
Blend->eraseFromParent();
892-
return;
893+
return {};
893894
}
894895

895-
VPValue *A;
896-
if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
897-
VPValue *Trunc = R.getVPSingleValue();
896+
VPValue *X, *X1, *Y, *Z;
897+
if (match(R, m_Trunc(m_ZExtOrSExt(m_VPValue(X))))) {
898+
VPValue *Trunc = R->getVPSingleValue();
898899
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
899-
Type *ATy = TypeInfo.inferScalarType(A);
900-
if (TruncTy == ATy) {
901-
Trunc->replaceAllUsesWith(A);
900+
Type *XTy = TypeInfo.inferScalarType(X);
901+
VPWidenCastRecipe *VPC = nullptr;
902+
if (TruncTy == XTy) {
903+
Trunc->replaceAllUsesWith(X);
902904
} else {
903905
// Don't replace a scalarizing recipe with a widened cast.
904-
if (isa<VPReplicateRecipe>(&R))
905-
return;
906-
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
907-
908-
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
909-
? Instruction::SExt
910-
: Instruction::ZExt;
911-
auto *VPC =
912-
new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
913-
VPC->insertBefore(&R);
914-
Trunc->replaceAllUsesWith(VPC);
915-
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
916-
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
917-
VPC->insertBefore(&R);
918-
Trunc->replaceAllUsesWith(VPC);
919-
}
906+
if (isa<VPReplicateRecipe>(R))
907+
return {};
908+
909+
unsigned ExtOpcode = match(R->getOperand(0), m_SExt(m_VPValue()))
910+
? Instruction::SExt
911+
: Instruction::ZExt;
912+
VPC = XTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()
913+
? new VPWidenCastRecipe(Instruction::Trunc, X, TruncTy)
914+
: new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), X,
915+
TruncTy);
916+
VPC->insertBefore(R);
917+
Trunc->replaceAllUsesWith(VPC);
920918
}
921919
#ifndef NDEBUG
922920
// Verify that the cached type info is for both A and its users is still
923921
// accurate by comparing it to freshly computed types.
924922
VPTypeAnalysis TypeInfo2(
925-
R.getParent()->getPlan()->getCanonicalIV()->getScalarType(),
923+
R->getParent()->getPlan()->getCanonicalIV()->getScalarType(),
926924
TypeInfo.getContext());
927-
assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
928-
for (VPUser *U : A->users()) {
925+
assert(TypeInfo.inferScalarType(X) == TypeInfo2.inferScalarType(X));
926+
for (VPUser *U : X->users()) {
929927
auto *R = dyn_cast<VPRecipeBase>(U);
930928
if (!R)
931929
continue;
932930
for (VPValue *VPV : R->definedValues())
933931
assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
934932
}
935933
#endif
934+
if (VPC)
935+
return {VPC};
936+
return {};
936937
}
937938

938-
// Simplify (X && Y) || (X && !Y) -> X.
939-
// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
940-
// && (Y || Z) and (X || !X) into true. This requires queuing newly created
941-
// recipes to be visited during simplification.
942-
VPValue *X, *Y, *X1, *Y1;
943-
if (match(&R,
944-
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
945-
m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
946-
X == X1 && Y == Y1) {
947-
R.getVPSingleValue()->replaceAllUsesWith(X);
948-
return;
939+
// (X || !X) -> true.
940+
if (match(R, m_c_BinaryOr(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
941+
auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
942+
R->getVPSingleValue()->replaceAllUsesWith(VPV);
943+
R->eraseFromParent();
944+
return {};
945+
}
946+
947+
// (X || true) -> true.
948+
if (match(R, m_c_BinaryOr(m_VPValue(X), m_True()))) {
949+
auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
950+
R->getVPSingleValue()->replaceAllUsesWith(VPV);
951+
R->eraseFromParent();
952+
return {};
953+
}
954+
955+
// (X || false) -> X.
956+
if (match(R, m_c_BinaryOr(m_VPValue(X), m_False()))) {
957+
R->getVPSingleValue()->replaceAllUsesWith(X);
958+
R->eraseFromParent();
959+
return {};
960+
}
961+
962+
// (X && !X) -> false.
963+
if (match(R, m_LogicalAnd(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
964+
auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
965+
R->getVPSingleValue()->replaceAllUsesWith(VPV);
966+
R->eraseFromParent();
967+
return {};
968+
}
969+
970+
// (X && true) -> X.
971+
if (match(R, m_LogicalAnd(m_VPValue(X), m_True()))) {
972+
R->getVPSingleValue()->replaceAllUsesWith(X);
973+
R->eraseFromParent();
974+
return {};
975+
}
976+
977+
// (X && false) -> false.
978+
if (match(R, m_LogicalAnd(m_VPValue(X), m_False()))) {
979+
auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
980+
R->getVPSingleValue()->replaceAllUsesWith(VPV);
981+
R->eraseFromParent();
982+
return {};
983+
}
984+
985+
// (X * 1) -> X.
986+
if (match(R, m_CombineOr(m_Mul(m_VPValue(X), m_SpecificInt(1)),
987+
m_Mul(m_SpecificInt(1), m_VPValue(X))))) {
988+
R->getVPSingleValue()->replaceAllUsesWith(X);
989+
R->eraseFromParent();
990+
return {};
991+
}
992+
993+
// (X && Y) || (X && Z) -> X && (Y || Z).
994+
if (match(R, m_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
995+
m_LogicalAnd(m_VPValue(X1), m_VPValue(Z)))) &&
996+
X == X1) {
997+
auto *YorZ = new VPInstruction(Instruction::Or, {Y, Z}, R->getDebugLoc());
998+
YorZ->insertBefore(R);
999+
auto *VPI = new VPInstruction(VPInstruction::LogicalAnd, {X, YorZ},
1000+
R->getDebugLoc());
1001+
VPI->insertBefore(R);
1002+
R->getVPSingleValue()->replaceAllUsesWith(VPI);
1003+
R->eraseFromParent();
1004+
return {VPI, YorZ};
9491005
}
9501006

951-
if (match(&R, m_CombineOr(m_Mul(m_VPValue(A), m_SpecificInt(1)),
952-
m_Mul(m_SpecificInt(1), m_VPValue(A)))))
953-
return R.getVPSingleValue()->replaceAllUsesWith(A);
1007+
return {};
9541008
}
9551009

9561010
/// Try to simplify the recipes in \p Plan.
@@ -959,8 +1013,16 @@ static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
9591013
Plan.getEntry());
9601014
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
9611015
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
962-
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
963-
simplifyRecipe(R, TypeInfo);
1016+
// Populate a Worklist, as simplifyRecipe might return a new recipe that we
1017+
// need to re-process.
1018+
SmallVector<VPRecipeBase *> Worklist;
1019+
for (auto &R : VPBB->getRecipeList())
1020+
Worklist.push_back(&R);
1021+
1022+
while (!Worklist.empty()) {
1023+
VPRecipeBase *R = Worklist.pop_back_val();
1024+
for (VPRecipeBase *Cand : simplifyRecipe(R, TypeInfo, Ctx))
1025+
Worklist.push_back(Cand);
9641026
}
9651027
}
9661028
}

llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll

Lines changed: 37 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,135 +7,109 @@ define void @test(ptr %p, i40 %a) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
99
; CHECK: vector.ph:
10-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0
11-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer
1210
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1311
; CHECK: vector.body:
1412
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE32:%.*]] ]
1513
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i64 0
1614
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
1715
; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1816
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
19-
; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], <i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24>
20-
; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], <i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28>
21-
; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32>
22-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
23-
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <16 x i1> [[TMP4]], zeroinitializer
24-
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i1> zeroinitializer, [[TMP5]]
25-
; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
26-
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <16 x i1> [[TMP7]], zeroinitializer
2717
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
2818
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
2919
; CHECK: pred.store.if:
30-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
31-
; CHECK-NEXT: store i1 [[TMP10]], ptr [[P]], align 1
20+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
3221
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
3322
; CHECK: pred.store.continue:
3423
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
3524
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
36-
; CHECK: pred.store.if3:
37-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP8]], i32 1
38-
; CHECK-NEXT: store i1 [[TMP12]], ptr [[P]], align 1
25+
; CHECK: pred.store.if1:
26+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
3927
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
28+
; CHECK: pred.store.continue2:
29+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
30+
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
31+
; CHECK: pred.store.if3:
32+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
33+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
4034
; CHECK: pred.store.continue4:
41-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
35+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
4236
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
4337
; CHECK: pred.store.if5:
44-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP8]], i32 2
45-
; CHECK-NEXT: store i1 [[TMP14]], ptr [[P]], align 1
38+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
4639
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
4740
; CHECK: pred.store.continue6:
48-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
41+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
4942
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
5043
; CHECK: pred.store.if7:
51-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP8]], i32 3
52-
; CHECK-NEXT: store i1 [[TMP16]], ptr [[P]], align 1
44+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
5345
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
5446
; CHECK: pred.store.continue8:
55-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
47+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
5648
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
5749
; CHECK: pred.store.if9:
58-
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP8]], i32 4
59-
; CHECK-NEXT: store i1 [[TMP18]], ptr [[P]], align 1
50+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
6051
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
6152
; CHECK: pred.store.continue10:
62-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
53+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
6354
; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
6455
; CHECK: pred.store.if11:
65-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP8]], i32 5
66-
; CHECK-NEXT: store i1 [[TMP20]], ptr [[P]], align 1
56+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
6757
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
6858
; CHECK: pred.store.continue12:
69-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
59+
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
7060
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
7161
; CHECK: pred.store.if13:
72-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP8]], i32 6
73-
; CHECK-NEXT: store i1 [[TMP22]], ptr [[P]], align 1
62+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
7463
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
7564
; CHECK: pred.store.continue14:
76-
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
65+
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
7766
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
7867
; CHECK: pred.store.if15:
79-
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP8]], i32 7
80-
; CHECK-NEXT: store i1 [[TMP24]], ptr [[P]], align 1
68+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
8169
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
8270
; CHECK: pred.store.continue16:
83-
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
71+
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
8472
; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
8573
; CHECK: pred.store.if17:
86-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP8]], i32 8
87-
; CHECK-NEXT: store i1 [[TMP26]], ptr [[P]], align 1
74+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
8875
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
8976
; CHECK: pred.store.continue18:
90-
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
77+
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
9178
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
9279
; CHECK: pred.store.if19:
93-
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP8]], i32 9
94-
; CHECK-NEXT: store i1 [[TMP28]], ptr [[P]], align 1
80+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
9581
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
9682
; CHECK: pred.store.continue20:
97-
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
83+
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
9884
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
9985
; CHECK: pred.store.if21:
100-
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP8]], i32 10
101-
; CHECK-NEXT: store i1 [[TMP30]], ptr [[P]], align 1
86+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
10287
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
10388
; CHECK: pred.store.continue22:
104-
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
89+
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
10590
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
10691
; CHECK: pred.store.if23:
107-
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP8]], i32 11
108-
; CHECK-NEXT: store i1 [[TMP32]], ptr [[P]], align 1
92+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
10993
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
11094
; CHECK: pred.store.continue24:
111-
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
95+
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
11296
; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
11397
; CHECK: pred.store.if25:
114-
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP8]], i32 12
115-
; CHECK-NEXT: store i1 [[TMP34]], ptr [[P]], align 1
98+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
11699
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
117100
; CHECK: pred.store.continue26:
118-
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
101+
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
119102
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
120103
; CHECK: pred.store.if27:
121-
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP8]], i32 13
122-
; CHECK-NEXT: store i1 [[TMP36]], ptr [[P]], align 1
104+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
123105
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
124106
; CHECK: pred.store.continue28:
125-
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
126-
; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
107+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
108+
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE32]]
127109
; CHECK: pred.store.if29:
128-
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP8]], i32 14
129-
; CHECK-NEXT: store i1 [[TMP38]], ptr [[P]], align 1
130-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
131-
; CHECK: pred.store.continue30:
132-
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
133-
; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32]]
134-
; CHECK: pred.store.if31:
135-
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP8]], i32 15
136-
; CHECK-NEXT: store i1 [[TMP40]], ptr [[P]], align 1
110+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
137111
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]]
138-
; CHECK: pred.store.continue32:
112+
; CHECK: pred.store.continue30:
139113
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
140114
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
141115
; CHECK: middle.block:

0 commit comments

Comments
 (0)