Skip to content

Commit db940e1

Browse files
committed
VPlan: use Worklist in simplifyRecipes
Since simplifyRecipe creates new recipes in some cases, use a Worklist in its caller to capture newly-created recipes, and add it to the Worklist, as a candidate for further simplification. This patch thoroughly rewrites simplifyRecipe to simplify matched patterns, eraseFromParent when applicable, and simplify the logic.
1 parent c1622ca commit db940e1

File tree

3 files changed

+147
-105
lines changed

3 files changed

+147
-105
lines changed

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ template <unsigned BitWidth = 0> struct specific_intval {
7070
if (!CI)
7171
return false;
7272

73-
assert((BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
74-
"Trying the match constant with unexpected bitwidth.");
73+
if (BitWidth != 0 && CI->getBitWidth() != BitWidth)
74+
return false;
75+
7576
return APInt::isSameValue(CI->getValue(), Val);
7677
}
7778
};
@@ -82,6 +83,8 @@ inline specific_intval<0> m_SpecificInt(uint64_t V) {
8283

8384
inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); }
8485

86+
inline specific_intval<1> m_True() { return specific_intval<1>(APInt(64, 1)); }
87+
8588
/// Matching combinators
8689
template <typename LTy, typename RTy> struct match_combine_or {
8790
LTy L;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 105 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -986,83 +986,140 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
986986
}
987987
}
988988

989-
/// Try to simplify recipe \p R.
990-
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
989+
/// Try to simplify recipe \p R. Returns candidates for further simplification.
990+
static SmallVector<VPRecipeBase *>
991+
simplifyRecipe(VPRecipeBase *R, VPTypeAnalysis &TypeInfo, LLVMContext &Ctx) {
991992
using namespace llvm::VPlanPatternMatch;
992993
// Try to remove redundant blend recipes.
993-
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
994+
if (auto *Blend = dyn_cast<VPBlendRecipe>(R)) {
994995
VPValue *Inc0 = Blend->getIncomingValue(0);
995996
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
996997
if (Inc0 != Blend->getIncomingValue(I) &&
997998
!match(Blend->getMask(I), m_False()))
998-
return;
999+
return {};
9991000
Blend->replaceAllUsesWith(Inc0);
10001001
Blend->eraseFromParent();
1001-
return;
1002+
return {};
10021003
}
10031004

1004-
VPValue *A;
1005-
if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
1006-
VPValue *Trunc = R.getVPSingleValue();
1005+
VPValue *X, *X1, *Y, *Z;
1006+
if (match(R, m_Trunc(m_ZExtOrSExt(m_VPValue(X))))) {
1007+
VPValue *Trunc = R->getVPSingleValue();
10071008
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
1008-
Type *ATy = TypeInfo.inferScalarType(A);
1009-
if (TruncTy == ATy) {
1010-
Trunc->replaceAllUsesWith(A);
1009+
Type *XTy = TypeInfo.inferScalarType(X);
1010+
VPWidenCastRecipe *VPC = nullptr;
1011+
if (TruncTy == XTy) {
1012+
Trunc->replaceAllUsesWith(X);
10111013
} else {
10121014
// Don't replace a scalarizing recipe with a widened cast.
1013-
if (isa<VPReplicateRecipe>(&R))
1014-
return;
1015-
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
1016-
1017-
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
1015+
if (isa<VPReplicateRecipe>(R))
1016+
return {};
1017+
if (XTy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
1018+
unsigned ExtOpcode = match(R->getOperand(0), m_SExt(m_VPValue()))
10181019
? Instruction::SExt
10191020
: Instruction::ZExt;
10201021
auto *VPC =
1021-
new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
1022-
if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
1022+
new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), X, TruncTy);
1023+
if (auto *UnderlyingExt = R->getOperand(0)->getUnderlyingValue()) {
10231024
// UnderlyingExt has distinct return type, used to retain legacy cost.
10241025
VPC->setUnderlyingValue(UnderlyingExt);
10251026
}
1026-
VPC->insertBefore(&R);
1027+
VPC->insertBefore(R);
10271028
Trunc->replaceAllUsesWith(VPC);
1028-
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
1029-
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
1030-
VPC->insertBefore(&R);
1029+
} else if (XTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
1030+
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, X, TruncTy);
1031+
VPC->insertBefore(R);
10311032
Trunc->replaceAllUsesWith(VPC);
10321033
}
10331034
}
10341035
#ifndef NDEBUG
10351036
// Verify that the cached type info is for both A and its users is still
10361037
// accurate by comparing it to freshly computed types.
10371038
VPTypeAnalysis TypeInfo2(
1038-
R.getParent()->getPlan()->getCanonicalIV()->getScalarType(),
1039+
R->getParent()->getPlan()->getCanonicalIV()->getScalarType(),
10391040
TypeInfo.getContext());
1040-
assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
1041-
for (VPUser *U : A->users()) {
1041+
assert(TypeInfo.inferScalarType(X) == TypeInfo2.inferScalarType(X));
1042+
for (VPUser *U : X->users()) {
10421043
auto *R = dyn_cast<VPRecipeBase>(U);
10431044
if (!R)
10441045
continue;
10451046
for (VPValue *VPV : R->definedValues())
10461047
assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
10471048
}
10481049
#endif
1050+
if (VPC)
1051+
return {VPC};
1052+
return {};
10491053
}
10501054

1051-
// Simplify (X && Y) || (X && !Y) -> X.
1052-
// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
1053-
// && (Y || Z) and (X || !X) into true. This requires queuing newly created
1054-
// recipes to be visited during simplification.
1055-
VPValue *X, *Y, *X1, *Y1;
1056-
if (match(&R,
1057-
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1058-
m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
1059-
X == X1 && Y == Y1) {
1060-
R.getVPSingleValue()->replaceAllUsesWith(X);
1061-
return;
1055+
// (X || !X) -> true.
1056+
if (match(R, m_c_BinaryOr(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
1057+
auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
1058+
R->getVPSingleValue()->replaceAllUsesWith(VPV);
1059+
R->eraseFromParent();
1060+
return {};
1061+
}
1062+
1063+
// (X || true) -> true.
1064+
if (match(R, m_c_BinaryOr(m_VPValue(X), m_True()))) {
1065+
auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
1066+
R->getVPSingleValue()->replaceAllUsesWith(VPV);
1067+
R->eraseFromParent();
1068+
return {};
1069+
}
1070+
1071+
// (X || false) -> X.
1072+
if (match(R, m_c_BinaryOr(m_VPValue(X), m_False()))) {
1073+
R->getVPSingleValue()->replaceAllUsesWith(X);
1074+
R->eraseFromParent();
1075+
return {};
1076+
}
1077+
1078+
// (X && !X) -> false.
1079+
if (match(R, m_LogicalAnd(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
1080+
auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
1081+
R->getVPSingleValue()->replaceAllUsesWith(VPV);
1082+
R->eraseFromParent();
1083+
return {};
1084+
}
1085+
1086+
// (X && true) -> X.
1087+
if (match(R, m_LogicalAnd(m_VPValue(X), m_True()))) {
1088+
R->getVPSingleValue()->replaceAllUsesWith(X);
1089+
R->eraseFromParent();
1090+
return {};
1091+
}
1092+
1093+
// (X && false) -> false.
1094+
if (match(R, m_LogicalAnd(m_VPValue(X), m_False()))) {
1095+
auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
1096+
R->getVPSingleValue()->replaceAllUsesWith(VPV);
1097+
R->eraseFromParent();
1098+
return {};
1099+
}
1100+
1101+
// (X * 1) -> X.
1102+
if (match(R, m_c_Mul(m_VPValue(X), m_SpecificInt(1)))) {
1103+
R->getVPSingleValue()->replaceAllUsesWith(X);
1104+
R->eraseFromParent();
1105+
return {};
1106+
}
1107+
1108+
// (X && Y) || (X && Z) -> X && (Y || Z).
1109+
if (match(R, m_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1110+
m_LogicalAnd(m_VPValue(X1), m_VPValue(Z)))) &&
1111+
X == X1) {
1112+
auto *YorZ = new VPInstruction(Instruction::Or, {Y, Z}, R->getDebugLoc());
1113+
YorZ->insertBefore(R);
1114+
auto *VPI = new VPInstruction(VPInstruction::LogicalAnd, {X, YorZ},
1115+
R->getDebugLoc());
1116+
VPI->insertBefore(R);
1117+
R->getVPSingleValue()->replaceAllUsesWith(VPI);
1118+
R->eraseFromParent();
1119+
return {VPI, YorZ};
10621120
}
10631121

1064-
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
1065-
return R.getVPSingleValue()->replaceAllUsesWith(A);
1122+
return {};
10661123
}
10671124

10681125
/// Try to simplify the recipes in \p Plan.
@@ -1071,8 +1128,16 @@ static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
10711128
Plan.getEntry());
10721129
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
10731130
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
1074-
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1075-
simplifyRecipe(R, TypeInfo);
1131+
// Populate a Worklist, as simplifyRecipe might return a new recipe that we
1132+
// need to re-process.
1133+
SmallVector<VPRecipeBase *> Worklist;
1134+
for (auto &R : VPBB->getRecipeList())
1135+
Worklist.push_back(&R);
1136+
1137+
while (!Worklist.empty()) {
1138+
VPRecipeBase *R = Worklist.pop_back_val();
1139+
for (VPRecipeBase *Cand : simplifyRecipe(R, TypeInfo, Ctx))
1140+
Worklist.push_back(Cand);
10761141
}
10771142
}
10781143
}

llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll

Lines changed: 37 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,135 +7,109 @@ define void @test(ptr %p, i40 %a) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
99
; CHECK: vector.ph:
10-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0
11-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer
1210
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1311
; CHECK: vector.body:
1412
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE32:%.*]] ]
1513
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i64 0
1614
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
1715
; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1816
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
19-
; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], <i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24>
20-
; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], <i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28>
21-
; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32>
22-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
23-
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <16 x i1> [[TMP4]], zeroinitializer
24-
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i1> zeroinitializer, [[TMP5]]
25-
; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
26-
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <16 x i1> [[TMP7]], zeroinitializer
2717
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
2818
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
2919
; CHECK: pred.store.if:
30-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
31-
; CHECK-NEXT: store i1 [[TMP10]], ptr [[P]], align 1
20+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
3221
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
3322
; CHECK: pred.store.continue:
3423
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
3524
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
36-
; CHECK: pred.store.if3:
37-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP8]], i32 1
38-
; CHECK-NEXT: store i1 [[TMP12]], ptr [[P]], align 1
25+
; CHECK: pred.store.if1:
26+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
3927
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
28+
; CHECK: pred.store.continue2:
29+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
30+
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
31+
; CHECK: pred.store.if3:
32+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
33+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
4034
; CHECK: pred.store.continue4:
41-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
35+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
4236
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
4337
; CHECK: pred.store.if5:
44-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP8]], i32 2
45-
; CHECK-NEXT: store i1 [[TMP14]], ptr [[P]], align 1
38+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
4639
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
4740
; CHECK: pred.store.continue6:
48-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
41+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
4942
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
5043
; CHECK: pred.store.if7:
51-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP8]], i32 3
52-
; CHECK-NEXT: store i1 [[TMP16]], ptr [[P]], align 1
44+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
5345
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
5446
; CHECK: pred.store.continue8:
55-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
47+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
5648
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
5749
; CHECK: pred.store.if9:
58-
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP8]], i32 4
59-
; CHECK-NEXT: store i1 [[TMP18]], ptr [[P]], align 1
50+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
6051
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
6152
; CHECK: pred.store.continue10:
62-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
53+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
6354
; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
6455
; CHECK: pred.store.if11:
65-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP8]], i32 5
66-
; CHECK-NEXT: store i1 [[TMP20]], ptr [[P]], align 1
56+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
6757
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
6858
; CHECK: pred.store.continue12:
69-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
59+
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
7060
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
7161
; CHECK: pred.store.if13:
72-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP8]], i32 6
73-
; CHECK-NEXT: store i1 [[TMP22]], ptr [[P]], align 1
62+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
7463
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
7564
; CHECK: pred.store.continue14:
76-
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
65+
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
7766
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
7867
; CHECK: pred.store.if15:
79-
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP8]], i32 7
80-
; CHECK-NEXT: store i1 [[TMP24]], ptr [[P]], align 1
68+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
8169
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
8270
; CHECK: pred.store.continue16:
83-
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
71+
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
8472
; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
8573
; CHECK: pred.store.if17:
86-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP8]], i32 8
87-
; CHECK-NEXT: store i1 [[TMP26]], ptr [[P]], align 1
74+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
8875
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
8976
; CHECK: pred.store.continue18:
90-
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
77+
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
9178
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
9279
; CHECK: pred.store.if19:
93-
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP8]], i32 9
94-
; CHECK-NEXT: store i1 [[TMP28]], ptr [[P]], align 1
80+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
9581
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
9682
; CHECK: pred.store.continue20:
97-
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
83+
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
9884
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
9985
; CHECK: pred.store.if21:
100-
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP8]], i32 10
101-
; CHECK-NEXT: store i1 [[TMP30]], ptr [[P]], align 1
86+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
10287
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
10388
; CHECK: pred.store.continue22:
104-
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
89+
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
10590
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
10691
; CHECK: pred.store.if23:
107-
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP8]], i32 11
108-
; CHECK-NEXT: store i1 [[TMP32]], ptr [[P]], align 1
92+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
10993
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
11094
; CHECK: pred.store.continue24:
111-
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
95+
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
11296
; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
11397
; CHECK: pred.store.if25:
114-
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP8]], i32 12
115-
; CHECK-NEXT: store i1 [[TMP34]], ptr [[P]], align 1
98+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
11699
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
117100
; CHECK: pred.store.continue26:
118-
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
101+
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
119102
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
120103
; CHECK: pred.store.if27:
121-
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP8]], i32 13
122-
; CHECK-NEXT: store i1 [[TMP36]], ptr [[P]], align 1
104+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
123105
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
124106
; CHECK: pred.store.continue28:
125-
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
126-
; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
107+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
108+
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE32]]
127109
; CHECK: pred.store.if29:
128-
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP8]], i32 14
129-
; CHECK-NEXT: store i1 [[TMP38]], ptr [[P]], align 1
130-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
131-
; CHECK: pred.store.continue30:
132-
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
133-
; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32]]
134-
; CHECK: pred.store.if31:
135-
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP8]], i32 15
136-
; CHECK-NEXT: store i1 [[TMP40]], ptr [[P]], align 1
110+
; CHECK-NEXT: store i1 false, ptr [[P]], align 1
137111
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]]
138-
; CHECK: pred.store.continue32:
112+
; CHECK: pred.store.continue30:
139113
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
140114
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
141115
; CHECK: middle.block:

0 commit comments

Comments
 (0)