Skip to content

Commit 4277c21

Browse files
authored
[VPlan] Introduce explicit broadcasts for live-ins. (#124644)
Add a new VPInstruction::Broadcast opcode and use it to materialize explicit broadcasts of live-ins. The initial patch only materlizes the broadcasts if the vector preheader dominates all uses that need it. Later patches will pick the best valid insert point, thus retiring implicit hoisting of broadcasts from VPTransformsState::get(). PR: #124644
1 parent 15fbdc2 commit 4277c21

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+437
-334
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7697,7 +7697,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76977697
((VectorizingEpilogue && ExpandedSCEVs) ||
76987698
(!VectorizingEpilogue && !ExpandedSCEVs)) &&
76997699
"expanded SCEVs to reuse can only be used during epilogue vectorization");
7700-
7700+
VPlanTransforms::materializeLiveInBroadcasts(BestVPlan);
77017701
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
77027702
// cost model is complete for better cost estimates.
77037703
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ VPlan::~VPlan() {
874874
}
875875
delete VPB;
876876
}
877-
for (VPValue *VPV : VPLiveInsToFree)
877+
for (VPValue *VPV : getLiveIns())
878878
delete VPV;
879879
if (BackedgeTakenCount)
880880
delete BackedgeTakenCount;
@@ -1245,7 +1245,7 @@ VPlan *VPlan::duplicate() {
12451245
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
12461246
auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
12471247
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1248-
for (VPValue *OldLiveIn : VPLiveInsToFree) {
1248+
for (VPValue *OldLiveIn : getLiveIns()) {
12491249
Old2NewVPValues[OldLiveIn] =
12501250
NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
12511251
}
@@ -1549,7 +1549,7 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
15491549
assignName(&Plan.VectorTripCount);
15501550
if (Plan.BackedgeTakenCount)
15511551
assignName(Plan.BackedgeTakenCount);
1552-
for (VPValue *LI : Plan.VPLiveInsToFree)
1552+
for (VPValue *LI : Plan.getLiveIns())
15531553
assignName(LI);
15541554

15551555
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
864864
CanonicalIVIncrementForPart,
865865
BranchOnCount,
866866
BranchOnCond,
867+
Broadcast,
867868
ComputeReductionResult,
868869
// Takes the VPValue to extract from as first operand and the lane or part
869870
// to extract as second operand, counting from the end starting with 1 for
@@ -1460,6 +1461,13 @@ struct VPWidenSelectRecipe : public VPRecipeWithIRFlags {
14601461
bool isInvariantCond() const {
14611462
return getCond()->isDefinedOutsideLoopRegions();
14621463
}
1464+
1465+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1466+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1467+
assert(is_contained(operands(), Op) &&
1468+
"Op must be an operand of the recipe");
1469+
return Op == getCond() && isInvariantCond();
1470+
}
14631471
};
14641472

14651473
/// A recipe for handling GEP instructions.
@@ -1507,6 +1515,13 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
15071515
void print(raw_ostream &O, const Twine &Indent,
15081516
VPSlotTracker &SlotTracker) const override;
15091517
#endif
1518+
1519+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1520+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1521+
assert(is_contained(operands(), Op) &&
1522+
"Op must be an operand of the recipe");
1523+
return Op == getOperand(0) && isPointerLoopInvariant();
1524+
}
15101525
};
15111526

15121527
/// A recipe to compute the pointers for widened memory accesses of IndexTy
@@ -1822,6 +1837,16 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
18221837
VPValue *getLastUnrolledPartOperand() {
18231838
return getNumOperands() == 5 ? getOperand(4) : this;
18241839
}
1840+
1841+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1842+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1843+
assert(is_contained(operands(), Op) &&
1844+
"Op must be an operand of the recipe");
1845+
// The recipe creates its own wide start value, so it only requests the
1846+
// first lane of the operand.
1847+
// TODO: Remove once creating the start value is modeled separately.
1848+
return Op == getStartValue();
1849+
}
18251850
};
18261851

18271852
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
@@ -1854,6 +1879,13 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
18541879
/// Returns true if only scalar values will be generated.
18551880
bool onlyScalarsGenerated(bool IsScalable);
18561881

1882+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1883+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1884+
assert(is_contained(operands(), Op) &&
1885+
"Op must be an operand of the recipe");
1886+
return Op == getOperand(0);
1887+
}
1888+
18571889
/// Returns the VPValue representing the value of this induction at
18581890
/// the first unrolled part, if it exists. Returns itself if unrolling did not
18591891
/// take place.
@@ -1975,6 +2007,13 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
19752007
void print(raw_ostream &O, const Twine &Indent,
19762008
VPSlotTracker &SlotTracker) const override;
19772009
#endif
2010+
2011+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2012+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2013+
assert(is_contained(operands(), Op) &&
2014+
"Op must be an operand of the recipe");
2015+
return Op == getStartValue();
2016+
}
19782017
};
19792018

19802019
/// A recipe for handling reduction phis. The start value is the first operand
@@ -2041,6 +2080,13 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
20412080

20422081
/// Returns true, if the phi is part of an in-loop reduction.
20432082
bool isInLoop() const { return IsInLoop; }
2083+
2084+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2085+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2086+
assert(is_contained(operands(), Op) &&
2087+
"Op must be an operand of the recipe");
2088+
return Op == getStartValue();
2089+
}
20442090
};
20452091

20462092
/// A recipe for forming partial reductions. In the loop, an accumulator and
@@ -3464,7 +3510,7 @@ class VPlan {
34643510

34653511
/// Contains all the external definitions created for this VPlan. External
34663512
/// definitions are VPValues that hold a pointer to their underlying IR.
3467-
SmallVector<VPValue *, 16> VPLiveInsToFree;
3513+
SmallVector<VPValue *, 16> VPLiveIns;
34683514

34693515
/// Mapping from SCEVs to the VPValues representing their expansions.
34703516
/// NOTE: This mapping is temporary and will be removed once all users have
@@ -3656,7 +3702,7 @@ class VPlan {
36563702
auto [It, Inserted] = Value2VPValue.try_emplace(V);
36573703
if (Inserted) {
36583704
VPValue *VPV = new VPValue(V);
3659-
VPLiveInsToFree.push_back(VPV);
3705+
VPLiveIns.push_back(VPV);
36603706
assert(VPV->isLiveIn() && "VPV must be a live-in.");
36613707
It->second = VPV;
36623708
}
@@ -3668,6 +3714,16 @@ class VPlan {
36683714
/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
36693715
VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
36703716

3717+
/// Return the list of live-in VPValues available in the VPlan.
3718+
ArrayRef<VPValue *> getLiveIns() const {
3719+
assert(all_of(Value2VPValue,
3720+
[this](const auto &P) {
3721+
return is_contained(VPLiveIns, P.second);
3722+
}) &&
3723+
"all VPValues in Value2VPValue must also be in VPLiveIns");
3724+
return VPLiveIns;
3725+
}
3726+
36713727
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
36723728
/// Print the live-ins of this VPlan to \p O.
36733729
void printLiveIns(raw_ostream &O) const;

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,9 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
9090
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
9191
"LogicalAnd operands should be bool");
9292
return IntegerType::get(Ctx, 1);
93+
case VPInstruction::Broadcast:
9394
case VPInstruction::PtrAdd:
94-
// Return the type based on the pointer argument (i.e. first operand).
95+
// Return the type based on first operand.
9596
return inferScalarType(R->getOperand(0));
9697
case VPInstruction::BranchOnCond:
9798
case VPInstruction::BranchOnCount:

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
587587
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
588588
return CondBr;
589589
}
590+
case VPInstruction::Broadcast: {
591+
return Builder.CreateVectorSplat(
592+
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
593+
}
590594
case VPInstruction::ComputeReductionResult: {
591595
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
592596
// and will be removed by breaking up the recipe further.
@@ -837,7 +841,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
837841
case Instruction::ICmp:
838842
case Instruction::Select:
839843
case Instruction::Or:
840-
case VPInstruction::PtrAdd:
841844
// TODO: Cover additional opcodes.
842845
return vputils::onlyFirstLaneUsed(this);
843846
case VPInstruction::ActiveLaneMask:
@@ -848,6 +851,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
848851
case VPInstruction::BranchOnCond:
849852
case VPInstruction::ResumePhi:
850853
return true;
854+
case VPInstruction::PtrAdd:
855+
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
851856
};
852857
llvm_unreachable("switch should return");
853858
}
@@ -920,6 +925,10 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
920925
case VPInstruction::BranchOnCount:
921926
O << "branch-on-count";
922927
break;
928+
case VPInstruction::Broadcast:
929+
O << "broadcast";
930+
break;
931+
923932
case VPInstruction::ExtractFromEnd:
924933
O << "extract-from-end";
925934
break;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2112,3 +2112,37 @@ void VPlanTransforms::handleUncountableEarlyExit(
21122112
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
21132113
LatchExitingBranch->eraseFromParent();
21142114
}
2115+
2116+
void VPlanTransforms::materializeLiveInBroadcasts(VPlan &Plan) {
2117+
if (Plan.hasScalarVFOnly())
2118+
return;
2119+
2120+
VPDominatorTree VPDT;
2121+
VPDT.recalculate(Plan);
2122+
auto *VectorPreheader = Plan.getVectorPreheader();
2123+
VPBuilder Builder(VectorPreheader);
2124+
for (VPValue *LiveIn : Plan.getLiveIns()) {
2125+
if (all_of(LiveIn->users(),
2126+
[LiveIn](VPUser *U) {
2127+
return cast<VPRecipeBase>(U)->usesScalars(LiveIn);
2128+
}) ||
2129+
!LiveIn->getLiveInIRValue() ||
2130+
isa<Constant>(LiveIn->getLiveInIRValue()))
2131+
continue;
2132+
2133+
// Add explicit broadcast if the vector preheader dominates all users.
2134+
// TODO: Find valid insert point for all users.
2135+
if (all_of(LiveIn->users(), [&VPDT, VectorPreheader](VPUser *U) {
2136+
return VectorPreheader != cast<VPRecipeBase>(U)->getParent() &&
2137+
VPDT.dominates(VectorPreheader,
2138+
cast<VPRecipeBase>(U)->getParent());
2139+
})) {
2140+
auto *Broadcast =
2141+
Builder.createNaryOp(VPInstruction::Broadcast, {LiveIn});
2142+
LiveIn->replaceUsesWithIf(Broadcast, [LiveIn, Broadcast](VPUser &U,
2143+
unsigned Idx) {
2144+
return Broadcast != &U && !cast<VPRecipeBase>(&U)->usesScalars(LiveIn);
2145+
});
2146+
}
2147+
}
2148+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,9 @@ struct VPlanTransforms {
173173
static void
174174
optimizeInductionExitUsers(VPlan &Plan,
175175
DenseMap<VPValue *, VPValue *> &EndValues);
176+
177+
/// Add explicit broadcasts for live-ins used as vectors.
178+
static void materializeLiveInBroadcasts(VPlan &Plan);
176179
};
177180

178181
} // namespace llvm

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1717
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
1818
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1919
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
20+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
21+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2022
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
2123
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
2224
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
2325
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
2426
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
2527
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
26-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
27-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2828
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2929
; CHECK: vector.body:
3030
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -103,14 +103,14 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
103103
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
104104
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
105105
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
106+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
107+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
106108
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
107109
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
108110
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
109111
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
110112
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
111113
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
112-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
113-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
114114
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
115115
; CHECK: vector.body:
116116
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,14 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
124124
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
125125
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
126126
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
127+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
128+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
127129
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
128130
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
129131
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
130132
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
131133
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
132134
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
133-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
134-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
135135
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
136136
; CHECK: [[VECTOR_BODY]]:
137137
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -248,21 +248,21 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
248248
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]]
249249
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
250250
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]])
251+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
252+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
251253
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
252254
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
253255
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
254256
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
255-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
256-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
257-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
257+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
258258
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
259259
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
260260
; CHECK: [[VECTOR_BODY]]:
261261
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
262262
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
263263
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
264264
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0
265-
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
265+
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
266266
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[TMP21]], [[MUL_2_I]]
267267
; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]]
268268
; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]]
@@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
283283
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
284284
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
285285
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
286-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
286+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
287287
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0
288288
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
289289
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)