Skip to content

Commit 8cc66ea

Browse files
committed
[VPlan] Simplify VPBlendRecipes to select instructions
Stacked on llvm#133977 When looking at some EVL tail folded code in SPEC CPU 2017 I noticed we sometimes have both VPBlendRecipes and select VPInstructions in the same plan: EMIT vp<%active.lane.mask> = active lane mask vp<%5>, vp<%3> EMIT vp<%7> = icmp ... EMIT vp<%8> = logical-and vp<%active.lane.mask>, vp<%7> BLEND ir<%8> = ir<%n.015> ir<%foo>/vp<%8> EMIT vp<%9> = select vp<%active.lane.mask>, ir<%8>, ir<%n.015> Since a blend will ultimately generate a chain of selects, we could fold the blend into the select: EMIT vp<%active.lane.mask> = active lane mask vp<%5>, vp<%3> EMIT vp<%7> = icmp ... EMIT vp<%8> = logical-and vp<%active.lane.mask>, vp<%7> EMIT ir<%8> = select vp<%8>, ir<%foo>, ir<%n.015> So this patch canonicalizes blends to a series of select instructions, which allows them to be simplified further with other select instructions. Eventually we may be able to remove VPBlendRecipes altogether and emit the select VPInstructions directly, but I've left that out of this patch for now as it needs the normalization logic to be rewritten in terms of selects. The `BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask` is converted to selects. A `select c1, (select c2, x, y), x -> select (c1 & c2), x, y` combine is also added to prevent regressions, without it we end up with extra selects for some reason. We also need to mark VPInstructions with underlying values as generating vectors in willGenerateVectors to prevent a regression.
1 parent 43e9f29 commit 8cc66ea

13 files changed

+67
-106
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4468,7 +4468,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
44684468
case VPDef::VPScalarIVStepsSC:
44694469
case VPDef::VPScalarCastSC:
44704470
case VPDef::VPReplicateSC:
4471-
case VPDef::VPInstructionSC:
44724471
case VPDef::VPCanonicalIVPHISC:
44734472
case VPDef::VPVectorPointerSC:
44744473
case VPDef::VPVectorEndPointerSC:
@@ -4477,6 +4476,10 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
44774476
case VPDef::VPPredInstPHISC:
44784477
case VPDef::VPBranchOnMaskSC:
44794478
continue;
4479+
case VPDef::VPInstructionSC:
4480+
if (!cast<VPInstruction>(R).getUnderlyingValue())
4481+
continue;
4482+
break;
44804483
case VPDef::VPReductionSC:
44814484
case VPDef::VPActiveLaneMaskPHISC:
44824485
case VPDef::VPWidenCallSC:

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2150,10 +2150,6 @@ class VPBlendRecipe : public VPSingleDefRecipe {
21502150
/// Generate the phi/select nodes.
21512151
void execute(VPTransformState &State) override;
21522152

2153-
/// Return the cost of this VPWidenMemoryRecipe.
2154-
InstructionCost computeCost(ElementCount VF,
2155-
VPCostContext &Ctx) const override;
2156-
21572153
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
21582154
/// Print the recipe.
21592155
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 14 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,19 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
797797
return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,
798798
Ctx.CostKind);
799799
}
800+
case Instruction::Select: {
801+
if (!getUnderlyingValue())
802+
return 0;
803+
// Handle cases where only the first lane is used the same way as the legacy
804+
// cost model.
805+
if (vputils::onlyFirstLaneUsed(this))
806+
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
807+
Type *ResTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
808+
Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
809+
return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResTy, CmpTy,
810+
CmpInst::BAD_ICMP_PREDICATE,
811+
Ctx.CostKind);
812+
}
800813
case VPInstruction::AnyOf: {
801814
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
802815
return Ctx.TTI.getArithmeticReductionCost(
@@ -2276,54 +2289,7 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
22762289
#endif
22772290

22782291
void VPBlendRecipe::execute(VPTransformState &State) {
2279-
assert(isNormalized() && "Expected blend to be normalized!");
2280-
State.setDebugLocFrom(getDebugLoc());
2281-
// We know that all PHIs in non-header blocks are converted into
2282-
// selects, so we don't have to worry about the insertion order and we
2283-
// can just use the builder.
2284-
// At this point we generate the predication tree. There may be
2285-
// duplications since this is a simple recursive scan, but future
2286-
// optimizations will clean it up.
2287-
2288-
unsigned NumIncoming = getNumIncomingValues();
2289-
2290-
// Generate a sequence of selects of the form:
2291-
// SELECT(Mask3, In3,
2292-
// SELECT(Mask2, In2,
2293-
// SELECT(Mask1, In1,
2294-
// In0)))
2295-
// Note that Mask0 is never used: lanes for which no path reaches this phi and
2296-
// are essentially undef are taken from In0.
2297-
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
2298-
Value *Result = nullptr;
2299-
for (unsigned In = 0; In < NumIncoming; ++In) {
2300-
// We might have single edge PHIs (blocks) - use an identity
2301-
// 'select' for the first PHI operand.
2302-
Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
2303-
if (In == 0)
2304-
Result = In0; // Initialize with the first incoming value.
2305-
else {
2306-
// Select between the current value and the previous incoming edge
2307-
// based on the incoming mask.
2308-
Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
2309-
Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
2310-
}
2311-
}
2312-
State.set(this, Result, OnlyFirstLaneUsed);
2313-
}
2314-
2315-
InstructionCost VPBlendRecipe::computeCost(ElementCount VF,
2316-
VPCostContext &Ctx) const {
2317-
// Handle cases where only the first lane is used the same way as the legacy
2318-
// cost model.
2319-
if (vputils::onlyFirstLaneUsed(this))
2320-
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
2321-
2322-
Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
2323-
Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2324-
return (getNumIncomingValues() - 1) *
2325-
Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2326-
CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind);
2292+
llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
23272293
}
23282294

23292295
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,26 @@ static VPValue *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
990990
m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y))))))
991991
return X;
992992

993+
// select c1, (select c2, x, y), x -> select (c1 & c2), x, y
994+
VPBuilder Builder(&R);
995+
VPValue *C1, *C2;
996+
if (match(&R, m_Select(m_VPValue(C1),
997+
m_Select(m_VPValue(C2), m_VPValue(X), m_VPValue(Y)),
998+
m_Deferred(X))) &&
999+
!R.getOperand(1)->hasMoreThanOneUniqueUser()) {
1000+
auto *S = Builder.createSelect(Builder.createLogicalAnd(C1, C2), X, Y,
1001+
R.getDebugLoc());
1002+
S->setUnderlyingValue(R.getVPSingleValue()->getUnderlyingValue());
1003+
return S;
1004+
}
1005+
1006+
// select !c, x, y -> select c, y, x
1007+
if (match(&R, m_Select(m_Not(m_VPValue(C1)), m_VPValue(X), m_VPValue(Y)))) {
1008+
auto *S = Builder.createSelect(C1, Y, X, R.getDebugLoc());
1009+
S->setUnderlyingValue(R.getVPSingleValue()->getUnderlyingValue());
1010+
return S;
1011+
}
1012+
9931013
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
9941014
return A;
9951015

@@ -1075,38 +1095,17 @@ void VPlanTransforms::simplifyBlends(VPlan &Plan) {
10751095
}
10761096
}
10771097

1078-
SmallVector<VPValue *, 4> OperandsWithMask;
1079-
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
1080-
1098+
VPBuilder Builder(&R);
1099+
VPValue *Select = Blend->getIncomingValue(StartIndex);
10811100
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
10821101
if (I == StartIndex)
10831102
continue;
1084-
OperandsWithMask.push_back(Blend->getIncomingValue(I));
1085-
OperandsWithMask.push_back(Blend->getMask(I));
1086-
}
1087-
1088-
auto *NewBlend = new VPBlendRecipe(
1089-
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
1090-
NewBlend->insertBefore(&R);
1091-
1092-
VPValue *DeadMask = Blend->getMask(StartIndex);
1093-
Blend->replaceAllUsesWith(NewBlend);
1094-
Blend->eraseFromParent();
1095-
recursivelyDeleteDeadRecipes(DeadMask);
1096-
1097-
/// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
1098-
VPValue *NewMask;
1099-
if (NewBlend->getNumOperands() == 3 &&
1100-
match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
1101-
VPValue *Inc0 = NewBlend->getOperand(0);
1102-
VPValue *Inc1 = NewBlend->getOperand(1);
1103-
VPValue *OldMask = NewBlend->getOperand(2);
1104-
NewBlend->setOperand(0, Inc1);
1105-
NewBlend->setOperand(1, Inc0);
1106-
NewBlend->setOperand(2, NewMask);
1107-
if (OldMask->getNumUsers() == 0)
1108-
cast<VPInstruction>(OldMask)->eraseFromParent();
1103+
Select =
1104+
Builder.createSelect(Blend->getMask(I), Blend->getIncomingValue(I),
1105+
Select, R.getDebugLoc(), "predphi");
1106+
Select->setUnderlyingValue(Blend->getUnderlyingValue());
11091107
}
1108+
Blend->replaceAllUsesWith(Select);
11101109
}
11111110
}
11121111
}

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,8 +300,8 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
300300
; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[VP_OP_LOAD]]
301301
; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = xor <vscale x 4 x i1> [[TMP18]], splat (i1 true)
302302
; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[TMP20]], <vscale x 4 x i1> zeroinitializer
303-
; IF-EVL-OUTLOOP-NEXT: [[PREDPHI1:%.*]] = select <vscale x 4 x i1> [[TMP21]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP19]]
304-
; IF-EVL-OUTLOOP-NEXT: [[PREDPHI]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[PREDPHI1]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]])
303+
; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[TMP21]], <vscale x 4 x i1> zeroinitializer
304+
; IF-EVL-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP22]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP19]]
305305
; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP11]] to i64
306306
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
307307
; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]

llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,11 +1360,10 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
13601360
; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
13611361
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
13621362
; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
1363-
; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
13641363
; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
13651364
; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
1366-
; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]]
1367-
; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
1365+
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
1366+
; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP10]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
13681367
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
13691368
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
13701369
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]

llvm/test/Transforms/LoopVectorize/reduction-inloop.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -695,11 +695,10 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
695695
; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
696696
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
697697
; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
698-
; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
699698
; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
700699
; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
701-
; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]]
702-
; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
700+
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
701+
; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP10]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
703702
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
704703
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
705704
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]

llvm/test/Transforms/LoopVectorize/reduction-predselect.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -876,7 +876,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
876876
; CHECK: pred.load.continue6:
877877
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
878878
; CHECK-NEXT: [[TMP24:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]])
879-
; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
879+
; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[TMP23]]
880880
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
881881
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
882882
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
@@ -962,7 +962,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
962962
; CHECK: pred.load.continue6:
963963
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
964964
; CHECK-NEXT: [[TMP24:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]])
965-
; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
965+
; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[TMP23]]
966966
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
967967
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
968968
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260

llvm/test/Transforms/LoopVectorize/reduction.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -766,11 +766,10 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
766766
; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
767767
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
768768
; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
769-
; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
770769
; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
771770
; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
772-
; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]]
773-
; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
771+
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
772+
; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP10]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
774773
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
775774
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
776775
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]

llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,7 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) {
107107
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP5]], align 1
108108
; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
109109
; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer
110-
; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
111-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x i16> zeroinitializer, <2 x i16> [[WIDE_LOAD]]
110+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> [[WIDE_LOAD]], <2 x i16> zeroinitializer
112111
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> splat (i16 1), <2 x i16> [[PREDPHI]]
113112
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]]
114113
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP9]], i32 0
@@ -297,8 +296,7 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) {
297296
; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ]
298297
; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
299298
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer
300-
; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true)
301-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP17]], <2 x i16> zeroinitializer, <2 x i16> [[TMP14]]
299+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer
302300
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> splat (i16 1), <2 x i16> [[PREDPHI]]
303301
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]]
304302
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0

llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,15 +166,17 @@ define void @test_not_first_lane_only_wide_compare_incoming_order_swapped(ptr %A
166166
; CHECK-NEXT: entry:
167167
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
168168
; CHECK: vector.ph:
169+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[X]], i64 0
170+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
169171
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
170172
; CHECK: vector.body:
171173
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
172174
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
173175
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[OFFSET_IDX]]
174176
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
175177
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2
176-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 0
177-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i16 [[TMP3]], [[X]]
178+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
179+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
178180
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP4]], ptr poison, ptr [[B]]
179181
; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[PREDPHI]], align 2
180182
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP12]], i64 0

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) {
186186
; CHECK-NEXT: Successor(s): if.then.0
187187
; CHECK-EMPTY:
188188
; CHECK-NEXT: if.then.0:
189-
; CHECK-NEXT: BLEND ir<%d> = ir<0> vp<[[PRED]]>/ir<%cmp>
189+
; CHECK-NEXT: EMIT ir<%d> = select ir<%cmp>, vp<[[PRED]]>, ir<0>
190190
; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, vp<[[STEPS]]>
191191
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx>
192192
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%d>
@@ -375,7 +375,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db
375375
; CHECK-NEXT: if.then.0:
376376
; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not ir<%cmp2>
377377
; CHECK-NEXT: EMIT vp<[[SEL2:%.+]]> = logical-and vp<[[NOT1]]>, vp<[[NOT2]]>
378-
; CHECK-NEXT: BLEND ir<%ysd.0> = vp<[[PHI]]> ir<%psd>/vp<[[SEL2]]>
378+
; CHECK-NEXT: EMIT ir<%ysd.0> = select vp<[[SEL2]]>, ir<%psd>, vp<[[PHI]]>
379379
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%isd>
380380
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%ysd.0>
381381
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
@@ -756,7 +756,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) {
756756
; CHECK-EMPTY:
757757
; CHECK-NEXT: if.then.1:
758758
; CHECK-NEXT: WIDEN ir<%fadd> = fadd vp<[[PHI1]]>, vp<[[PHI2]]>
759-
; CHECK-NEXT: BLEND ir<%st.value> = ir<%ld.value> ir<%fadd>/ir<%ifcond>
759+
; CHECK-NEXT: EMIT ir<%st.value> = select ir<%ifcond>, ir<%fadd>, ir<%ld.value>
760760
; CHECK-NEXT: CLONE ir<%st.addr> = getelementptr inbounds ir<%dest>, vp<[[STEPS]]>
761761
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%st.addr>
762762
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%st.value>

llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ define void @pred_cfg1(i32 %k, i32 %j) {
369369
; CHECK-NEXT: Successor(s): then.0.0
370370
; CHECK-EMPTY:
371371
; CHECK-NEXT: then.0.0:
372-
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
372+
; CHECK-NEXT: EMIT ir<%p> = select vp<[[MASK2]]>, vp<[[PRED]]>, ir<0>
373373
; CHECK-NEXT: Successor(s): pred.store
374374
; CHECK-EMPTY:
375375
; CHECK-NEXT: <xVFxUF> pred.store: {
@@ -468,7 +468,7 @@ define void @pred_cfg2(i32 %k, i32 %j) {
468468
; CHECK-NEXT: Successor(s): then.0.0
469469
; CHECK-EMPTY:
470470
; CHECK-NEXT: then.0.0:
471-
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
471+
; CHECK-NEXT: EMIT ir<%p> = select vp<[[MASK2]]>, vp<[[PRED]]>, ir<0>
472472
; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, ir<%c.1>
473473
; CHECK-NEXT: Successor(s): pred.store
474474
; CHECK-EMPTY:
@@ -574,7 +574,7 @@ define void @pred_cfg3(i32 %k, i32 %j) {
574574
; CHECK-NEXT: Successor(s): then.0.0
575575
; CHECK-EMPTY:
576576
; CHECK-NEXT: then.0.0:
577-
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
577+
; CHECK-NEXT: EMIT ir<%p> = select vp<[[MASK2]]>, vp<[[PRED]]>, ir<0>
578578
; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, ir<%c.0>
579579
; CHECK-NEXT: Successor(s): pred.store
580580
; CHECK-EMPTY:

0 commit comments

Comments
 (0)