Skip to content

Commit b050048

Browse files
authored
[VPlan] Simplify (X && Y) || (X && !Y) -> X. (#89386)
Simplify a common pattern generated for masks when folding the tail. PR: #89386
1 parent c1c1567 commit b050048

File tree

8 files changed

+37
-54
lines changed

8 files changed

+37
-54
lines changed

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,9 +270,15 @@ m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
270270

271271
template <typename Op0_t, typename Op1_t>
272272
inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or>
273-
m_Or(const Op0_t &Op0, const Op1_t &Op1) {
273+
m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
274274
return m_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
275275
}
276+
277+
template <typename Op0_t, typename Op1_t>
278+
inline BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::LogicalAnd>
279+
m_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
280+
return m_VPInstruction<VPInstruction::LogicalAnd, Op0_t, Op1_t>(Op0, Op1);
281+
}
276282
} // namespace VPlanPatternMatch
277283
} // namespace llvm
278284

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,19 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
935935
#endif
936936
}
937937

938+
// Simplify (X && Y) || (X && !Y) -> X.
939+
// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
940+
// && (Y || Z) and (X || !X) into true. This requires queuing newly created
941+
// recipes to be visited during simplification.
942+
VPValue *X, *Y, *X1, *Y1;
943+
if (match(&R,
944+
m_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
945+
m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
946+
X == X1 && Y == Y1) {
947+
R.getVPSingleValue()->replaceAllUsesWith(X);
948+
return;
949+
}
950+
938951
if (match(&R, m_CombineOr(m_Mul(m_VPValue(A), m_SpecificInt(1)),
939952
m_Mul(m_SpecificInt(1), m_VPValue(A)))))
940953
return R.getVPSingleValue()->replaceAllUsesWith(A);
@@ -1402,7 +1415,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
14021415
// for dependence analysis). Instead, replace it with an equivalent Add.
14031416
// This is possible as all users of the disjoint OR only access lanes
14041417
// where the operands are disjoint or poison otherwise.
1405-
if (match(RecWithFlags, m_Or(m_VPValue(A), m_VPValue(B))) &&
1418+
if (match(RecWithFlags, m_BinaryOr(m_VPValue(A), m_VPValue(B))) &&
14061419
RecWithFlags->isDisjoint()) {
14071420
VPBuilder Builder(RecWithFlags);
14081421
VPInstruction *New = Builder.createOverflowingOp(

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -223,10 +223,9 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
223223
; TFCOMMON-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP9]])
224224
; TFCOMMON-NEXT: [[TMP11:%.*]] = xor <vscale x 2 x i1> [[TMP8]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
225225
; TFCOMMON-NEXT: [[TMP12:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i1> zeroinitializer
226-
; TFCOMMON-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i1> [[TMP9]], [[TMP12]]
227226
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP10]]
228227
; TFCOMMON-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
229-
; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP14]], i32 8, <vscale x 2 x i1> [[TMP13]])
228+
; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP14]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
230229
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
231230
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025)
232231
; TFCOMMON-NEXT: [[TMP15:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
@@ -272,16 +271,14 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
272271
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor <vscale x 2 x i1> [[TMP14]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
273272
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP19]], <vscale x 2 x i1> zeroinitializer
274273
; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP20]], <vscale x 2 x i1> zeroinitializer
275-
; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = or <vscale x 2 x i1> [[TMP15]], [[TMP21]]
276-
; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = or <vscale x 2 x i1> [[TMP16]], [[TMP22]]
277274
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP21]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP17]]
278275
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP22]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP18]]
279276
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
280277
; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
281278
; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 2
282279
; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP25]], i64 [[TMP27]]
283-
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP25]], i32 8, <vscale x 2 x i1> [[TMP23]])
284-
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP28]], i32 8, <vscale x 2 x i1> [[TMP24]])
280+
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP25]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
281+
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP28]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
285282
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT:%.*]] = add i64 [[INDEX]], [[TMP6]]
286283
; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
287284
; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 2

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,9 +1241,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no
12411241
; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP16]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x float> poison)
12421242
; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[TMP13]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
12431243
; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP17]], <vscale x 4 x i1> zeroinitializer
1244-
; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = or <vscale x 4 x i1> [[TMP15]], [[TMP18]]
12451244
; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP18]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> [[WIDE_MASKED_LOAD1]]
1246-
; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP19]], <vscale x 4 x float> [[PREDPHI]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
1245+
; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[PREDPHI]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
12471246
; CHECK-ORDERED-TF-NEXT: [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP20]])
12481247
; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP23]]
12491248
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]])

llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -462,13 +462,10 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
462462
; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 10, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
463463
; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer
464464
; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i64> poison)
465-
; TF-SCALABLE-NEXT: [[TMP14:%.*]] = xor <vscale x 2 x i1> [[TMP12]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
466-
; TF-SCALABLE-NEXT: [[TMP15:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer
467-
; TF-SCALABLE-NEXT: [[TMP17:%.*]] = or <vscale x 2 x i1> [[TMP13]], [[TMP15]]
468465
; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i64> [[WIDE_MASKED_GATHER]], <vscale x 2 x i64> zeroinitializer
469466
; TF-SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]]
470467
; TF-SCALABLE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i32 0
471-
; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP18]], i32 8, <vscale x 2 x i1> [[TMP17]])
468+
; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP18]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
472469
; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]]
473470
; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
474471
; TF-SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -510,13 +507,10 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
510507
; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], <i64 10, i64 10, i64 10, i64 10>
511508
; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
512509
; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison)
513-
; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP1]], <i1 true, i1 true, i1 true, i1 true>
514-
; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
515-
; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP2]], [[TMP4]]
516510
; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer
517511
; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
518512
; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
519-
; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP7]], i32 8, <4 x i1> [[TMP6]])
513+
; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP7]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
520514
; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
521515
; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
522516
; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
@@ -1296,12 +1290,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
12961290
; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 10, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
12971291
; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer
12981292
; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP13]])
1299-
; TF-SCALABLE-NEXT: [[TMP15:%.*]] = xor <vscale x 2 x i1> [[TMP12]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
1300-
; TF-SCALABLE-NEXT: [[TMP16:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP15]], <vscale x 2 x i1> zeroinitializer
1301-
; TF-SCALABLE-NEXT: [[TMP17:%.*]] = or <vscale x 2 x i1> [[TMP13]], [[TMP16]]
13021293
; TF-SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]]
13031294
; TF-SCALABLE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0
1304-
; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 8, <vscale x 2 x i1> [[TMP17]])
1295+
; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
13051296
; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]]
13061297
; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
13071298
; TF-SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1344,12 +1335,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
13441335
; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], <i64 10, i64 10, i64 10, i64 10>
13451336
; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
13461337
; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP2]])
1347-
; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP1]], <i1 true, i1 true, i1 true, i1 true>
1348-
; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
1349-
; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP2]], [[TMP5]]
13501338
; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
13511339
; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
1352-
; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, <4 x i1> [[TMP6]])
1340+
; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
13531341
; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
13541342
; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
13551343
; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028

llvm/test/Transforms/LoopVectorize/uniform-blend.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,8 @@ define void @blend_chain_iv(i1 %c) {
8686
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
8787
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
8888
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[MASK1]], <4 x i1> [[MASK1]], <4 x i1> zeroinitializer
89-
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[MASK1]], <i1 true, i1 true, i1 true, i1 true>
90-
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[MASK1]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
91-
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP6]], [[TMP5]]
9289
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[VEC_IND]], <4 x i64> undef
93-
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP8]], <4 x i64> [[PREDPHI]], <4 x i64> undef
90+
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[MASK1]], <4 x i64> [[PREDPHI]], <4 x i64> undef
9491
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[PREDPHI1]], i32 0
9592
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP9]]
9693
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[PREDPHI1]], i32 1

llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,6 @@ define void @test_not_first_lane_only_wide_compare_incoming_order_swapped(ptr %A
172172
; CHECK: vector.ph:
173173
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[X]], i64 0
174174
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
175-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[Y]], i64 0
176-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer
177175
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
178176
; CHECK: vector.body:
179177
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -184,14 +182,8 @@ define void @test_not_first_lane_only_wide_compare_incoming_order_swapped(ptr %A
184182
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2
185183
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
186184
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
187-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT2]]
188-
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
189-
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
190-
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
191-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
192-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
193-
; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]]
194-
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP11]], ptr [[B]], ptr poison
185+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
186+
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], ptr [[B]], ptr poison
195187
; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[PREDPHI]], align 2
196188
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP12]], i64 0
197189
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT3]], <4 x i16> poison, <4 x i32> zeroinitializer

0 commit comments

Comments
 (0)