Skip to content

Commit f4174c0

Browse files
committed
[VPlan] Introduce explicit broadcasts for live-ins.
Add a new VPInstruction::Broadcast opcode and use it to materialize explicit broadcasts of live-ins. The initial patch only materlizes the broadcasts if the vector preheader dominates all uses that need it. Later patches will pick the best valid insert point, thus retiring implicit hoisting of broadcasts from VPTransformsState::get().
1 parent 7469032 commit f4174c0

35 files changed

+360
-271
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7658,7 +7658,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76587658
((VectorizingEpilogue && ExpandedSCEVs) ||
76597659
(!VectorizingEpilogue && !ExpandedSCEVs)) &&
76607660
"expanded SCEVs to reuse can only be used during epilogue vectorization");
7661-
7661+
VPlanTransforms::materializeBroadcasts(BestVPlan);
76627662
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
76637663
// cost model is complete for better cost estimates.
76647664
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,6 +1210,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
12101210
CanonicalIVIncrementForPart,
12111211
BranchOnCount,
12121212
BranchOnCond,
1213+
Broadcast,
12131214
ComputeReductionResult,
12141215
// Takes the VPValue to extract from as first operand and the lane or part
12151216
// to extract as second operand, counting from the end starting with 1 for
@@ -1858,6 +1859,13 @@ struct VPWidenSelectRecipe : public VPRecipeWithIRFlags {
18581859
bool isInvariantCond() const {
18591860
return getCond()->isDefinedOutsideLoopRegions();
18601861
}
1862+
1863+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1864+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1865+
assert(is_contained(operands(), Op) &&
1866+
"Op must be an operand of the recipe");
1867+
return Op == getCond() && isInvariantCond();
1868+
}
18611869
};
18621870

18631871
/// A recipe for handling GEP instructions.
@@ -1905,6 +1913,13 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
19051913
void print(raw_ostream &O, const Twine &Indent,
19061914
VPSlotTracker &SlotTracker) const override;
19071915
#endif
1916+
1917+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1918+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1919+
assert(is_contained(operands(), Op) &&
1920+
"Op must be an operand of the recipe");
1921+
return Op == getOperand(0);
1922+
}
19081923
};
19091924

19101925
/// A recipe to compute the pointers for widened memory accesses of IndexTy
@@ -2220,6 +2235,13 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
22202235
VPValue *getLastUnrolledPartOperand() {
22212236
return getNumOperands() == 5 ? getOperand(4) : this;
22222237
}
2238+
2239+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2240+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2241+
assert(is_contained(operands(), Op) &&
2242+
"Op must be an operand of the recipe");
2243+
return Op == getStartValue();
2244+
}
22232245
};
22242246

22252247
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
@@ -2252,6 +2274,13 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
22522274
/// Returns true if only scalar values will be generated.
22532275
bool onlyScalarsGenerated(bool IsScalable);
22542276

2277+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2278+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2279+
assert(is_contained(operands(), Op) &&
2280+
"Op must be an operand of the recipe");
2281+
return Op == getOperand(0);
2282+
}
2283+
22552284
/// Returns the VPValue representing the value of this induction at
22562285
/// the first unrolled part, if it exists. Returns itself if unrolling did not
22572286
/// take place.
@@ -2380,6 +2409,13 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
23802409
void print(raw_ostream &O, const Twine &Indent,
23812410
VPSlotTracker &SlotTracker) const override;
23822411
#endif
2412+
2413+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2414+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2415+
assert(is_contained(operands(), Op) &&
2416+
"Op must be an operand of the recipe");
2417+
return Op == getStartValue();
2418+
}
23832419
};
23842420

23852421
/// A recipe for handling reduction phis. The start value is the first operand
@@ -2446,6 +2482,13 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
24462482

24472483
/// Returns true, if the phi is part of an in-loop reduction.
24482484
bool isInLoop() const { return IsInLoop; }
2485+
2486+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2487+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2488+
assert(is_contained(operands(), Op) &&
2489+
"Op must be an operand of the recipe");
2490+
return Op == getStartValue();
2491+
}
24492492
};
24502493

24512494
/// A recipe for forming partial reductions. In the loop, an accumulator and
@@ -4063,6 +4106,8 @@ class VPlan {
40634106
/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
40644107
VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
40654108

4109+
ArrayRef<VPValue *> getLiveIns() const { return VPLiveInsToFree; }
4110+
40664111
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
40674112
/// Print the live-ins of this VPlan to \p O.
40684113
void printLiveIns(raw_ostream &O) const;

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
9090
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
9191
"LogicalAnd operands should be bool");
9292
return IntegerType::get(Ctx, 1);
93+
case VPInstruction::Broadcast:
9394
case VPInstruction::PtrAdd:
9495
// Return the type based on the pointer argument (i.e. first operand).
9596
return inferScalarType(R->getOperand(0));

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
575575
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
576576
return CondBr;
577577
}
578+
case VPInstruction::Broadcast: {
579+
return Builder.CreateVectorSplat(
580+
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
581+
}
578582
case VPInstruction::ComputeReductionResult: {
579583
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
580584
// and will be removed by breaking up the recipe further.
@@ -798,7 +802,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
798802
case Instruction::ICmp:
799803
case Instruction::Select:
800804
case Instruction::Or:
801-
case VPInstruction::PtrAdd:
802805
// TODO: Cover additional opcodes.
803806
return vputils::onlyFirstLaneUsed(this);
804807
case VPInstruction::ActiveLaneMask:
@@ -809,6 +812,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
809812
case VPInstruction::BranchOnCond:
810813
case VPInstruction::ResumePhi:
811814
return true;
815+
case VPInstruction::PtrAdd:
816+
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
812817
};
813818
llvm_unreachable("switch should return");
814819
}
@@ -881,6 +886,10 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
881886
case VPInstruction::BranchOnCount:
882887
O << "branch-on-count";
883888
break;
889+
case VPInstruction::Broadcast:
890+
O << "broadcast";
891+
break;
892+
884893
case VPInstruction::ExtractFromEnd:
885894
O << "extract-from-end";
886895
break;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2153,3 +2153,34 @@ void VPlanTransforms::handleUncountableEarlyExit(
21532153
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
21542154
LatchExitingBranch->eraseFromParent();
21552155
}
2156+
2157+
void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
2158+
VPDominatorTree VPDT;
2159+
VPDT.recalculate(Plan);
2160+
auto *VectorPreheader = Plan.getVectorPreheader();
2161+
VPBuilder Builder(VectorPreheader);
2162+
for (VPValue *LiveIn : Plan.getLiveIns()) {
2163+
if (all_of(LiveIn->users(),
2164+
[LiveIn](VPUser *U) {
2165+
return cast<VPRecipeBase>(U)->usesScalars(LiveIn);
2166+
}) ||
2167+
!LiveIn->getLiveInIRValue() ||
2168+
isa<Constant>(LiveIn->getLiveInIRValue()))
2169+
continue;
2170+
2171+
// Add explicit broadcast if the vector preheader dominates all users.
2172+
// TODO: Find valid inert point for all users.
2173+
if (all_of(LiveIn->users(), [&VPDT, VectorPreheader](VPUser *U) {
2174+
return VectorPreheader != cast<VPRecipeBase>(U)->getParent() &&
2175+
VPDT.dominates(VectorPreheader,
2176+
cast<VPRecipeBase>(U)->getParent());
2177+
})) {
2178+
auto *Broadcast =
2179+
Builder.createNaryOp(VPInstruction::Broadcast, {LiveIn});
2180+
LiveIn->replaceUsesWithIf(Broadcast, [LiveIn, Broadcast](VPUser &U,
2181+
unsigned Idx) {
2182+
return Broadcast != &U && !cast<VPRecipeBase>(&U)->usesScalars(LiveIn);
2183+
});
2184+
}
2185+
}
2186+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,9 @@ struct VPlanTransforms {
169169
static void
170170
optimizeInductionExitUsers(VPlan &Plan,
171171
DenseMap<VPValue *, VPValue *> &EndValues);
172+
173+
/// Add explicit broadcasts for live-ins used as vectors.
174+
static void materializeBroadcasts(VPlan &Plan);
172175
};
173176

174177
} // namespace llvm

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1717
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
1818
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1919
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
20+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
21+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2022
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
2123
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
2224
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
2325
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
2426
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
2527
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
26-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
27-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2828
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2929
; CHECK: vector.body:
3030
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -103,14 +103,14 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
103103
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
104104
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
105105
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
106+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
107+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
106108
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
107109
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
108110
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
109111
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
110112
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
111113
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
112-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
113-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
114114
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
115115
; CHECK: vector.body:
116116
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,14 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
124124
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
125125
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
126126
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
127+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
128+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
127129
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
128130
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
129131
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
130132
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
131133
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
132134
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
133-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
134-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
135135
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
136136
; CHECK: [[VECTOR_BODY]]:
137137
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -248,21 +248,21 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
248248
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]]
249249
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
250250
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]])
251+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
252+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
251253
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
252254
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
253255
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
254256
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
255-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
256-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
257-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
257+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
258258
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
259259
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
260260
; CHECK: [[VECTOR_BODY]]:
261261
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
262262
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
263263
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
264264
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0
265-
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
265+
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
266266
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[TMP21]], [[MUL_2_I]]
267267
; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]]
268268
; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]]
@@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
283283
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
284284
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
285285
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
286-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
286+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
287287
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0
288288
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
289289
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)