Skip to content

Commit 2aaf89c

Browse files
committed
[VPlan] Introduce explicit broadcasts for live-ins.
Add a new VPInstruction::Broadcast opcode and use it to materialize explicit broadcasts of live-ins. The initial patch only materlizes the broadcasts if the vector preheader dominates all uses that need it. Later patches will pick the best valid insert point, thus retiring implicit hoisting of broadcasts from VPTransformsState::get().
1 parent 3b2b7ec commit 2aaf89c

35 files changed

+360
-271
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7648,7 +7648,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76487648
((VectorizingEpilogue && ExpandedSCEVs) ||
76497649
(!VectorizingEpilogue && !ExpandedSCEVs)) &&
76507650
"expanded SCEVs to reuse can only be used during epilogue vectorization");
7651-
7651+
VPlanTransforms::materializeBroadcasts(BestVPlan);
76527652
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
76537653
// cost model is complete for better cost estimates.
76547654
VPlanTransforms::unrollByUF(BestVPlan, BestUF,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,6 +1210,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
12101210
CanonicalIVIncrementForPart,
12111211
BranchOnCount,
12121212
BranchOnCond,
1213+
Broadcast,
12131214
ComputeReductionResult,
12141215
// Takes the VPValue to extract from as first operand and the lane or part
12151216
// to extract as second operand, counting from the end starting with 1 for
@@ -1855,6 +1856,13 @@ struct VPWidenSelectRecipe : public VPRecipeWithIRFlags {
18551856
bool isInvariantCond() const {
18561857
return getCond()->isDefinedOutsideLoopRegions();
18571858
}
1859+
1860+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1861+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1862+
assert(is_contained(operands(), Op) &&
1863+
"Op must be an operand of the recipe");
1864+
return Op == getCond() && isInvariantCond();
1865+
}
18581866
};
18591867

18601868
/// A recipe for handling GEP instructions.
@@ -1902,6 +1910,13 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
19021910
void print(raw_ostream &O, const Twine &Indent,
19031911
VPSlotTracker &SlotTracker) const override;
19041912
#endif
1913+
1914+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1915+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1916+
assert(is_contained(operands(), Op) &&
1917+
"Op must be an operand of the recipe");
1918+
return Op == getOperand(0);
1919+
}
19051920
};
19061921

19071922
/// A recipe to compute the pointers for widened memory accesses of IndexTy
@@ -2217,6 +2232,13 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
22172232
VPValue *getLastUnrolledPartOperand() {
22182233
return getNumOperands() == 5 ? getOperand(4) : this;
22192234
}
2235+
2236+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2237+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2238+
assert(is_contained(operands(), Op) &&
2239+
"Op must be an operand of the recipe");
2240+
return Op == getStartValue();
2241+
}
22202242
};
22212243

22222244
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
@@ -2249,6 +2271,13 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
22492271
/// Returns true if only scalar values will be generated.
22502272
bool onlyScalarsGenerated(bool IsScalable);
22512273

2274+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2275+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2276+
assert(is_contained(operands(), Op) &&
2277+
"Op must be an operand of the recipe");
2278+
return Op == getOperand(0);
2279+
}
2280+
22522281
/// Returns the VPValue representing the value of this induction at
22532282
/// the first unrolled part, if it exists. Returns itself if unrolling did not
22542283
/// take place.
@@ -2377,6 +2406,13 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
23772406
void print(raw_ostream &O, const Twine &Indent,
23782407
VPSlotTracker &SlotTracker) const override;
23792408
#endif
2409+
2410+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2411+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2412+
assert(is_contained(operands(), Op) &&
2413+
"Op must be an operand of the recipe");
2414+
return Op == getStartValue();
2415+
}
23802416
};
23812417

23822418
/// A recipe for handling reduction phis. The start value is the first operand
@@ -2443,6 +2479,13 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
24432479

24442480
/// Returns true, if the phi is part of an in-loop reduction.
24452481
bool isInLoop() const { return IsInLoop; }
2482+
2483+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2484+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2485+
assert(is_contained(operands(), Op) &&
2486+
"Op must be an operand of the recipe");
2487+
return Op == getStartValue();
2488+
}
24462489
};
24472490

24482491
/// A recipe for forming partial reductions. In the loop, an accumulator and
@@ -4054,6 +4097,8 @@ class VPlan {
40544097
/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
40554098
VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
40564099

4100+
ArrayRef<VPValue *> getLiveIns() const { return VPLiveInsToFree; }
4101+
40574102
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
40584103
/// Print the live-ins of this VPlan to \p O.
40594104
void printLiveIns(raw_ostream &O) const;

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
8989
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
9090
"LogicalAnd operands should be bool");
9191
return IntegerType::get(Ctx, 1);
92+
case VPInstruction::Broadcast:
9293
case VPInstruction::PtrAdd:
9394
// Return the type based on the pointer argument (i.e. first operand).
9495
return inferScalarType(R->getOperand(0));

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
575575
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
576576
return CondBr;
577577
}
578+
case VPInstruction::Broadcast: {
579+
return Builder.CreateVectorSplat(
580+
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
581+
}
578582
case VPInstruction::ComputeReductionResult: {
579583
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
580584
// and will be removed by breaking up the recipe further.
@@ -790,7 +794,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
790794
case Instruction::ICmp:
791795
case Instruction::Select:
792796
case Instruction::Or:
793-
case VPInstruction::PtrAdd:
794797
// TODO: Cover additional opcodes.
795798
return vputils::onlyFirstLaneUsed(this);
796799
case VPInstruction::ActiveLaneMask:
@@ -801,6 +804,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
801804
case VPInstruction::BranchOnCond:
802805
case VPInstruction::ResumePhi:
803806
return true;
807+
case VPInstruction::PtrAdd:
808+
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
804809
};
805810
llvm_unreachable("switch should return");
806811
}
@@ -873,6 +878,10 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
873878
case VPInstruction::BranchOnCount:
874879
O << "branch-on-count";
875880
break;
881+
case VPInstruction::Broadcast:
882+
O << "broadcast";
883+
break;
884+
876885
case VPInstruction::ExtractFromEnd:
877886
O << "extract-from-end";
878887
break;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2146,3 +2146,34 @@ bool VPlanTransforms::handleUncountableEarlyExit(
21462146
LatchExitingBranch->eraseFromParent();
21472147
return true;
21482148
}
2149+
2150+
void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
2151+
VPDominatorTree VPDT;
2152+
VPDT.recalculate(Plan);
2153+
auto *VectorPreheader = Plan.getVectorPreheader();
2154+
VPBuilder Builder(VectorPreheader);
2155+
for (VPValue *LiveIn : Plan.getLiveIns()) {
2156+
if (all_of(LiveIn->users(),
2157+
[LiveIn](VPUser *U) {
2158+
return cast<VPRecipeBase>(U)->usesScalars(LiveIn);
2159+
}) ||
2160+
!LiveIn->getLiveInIRValue() ||
2161+
isa<Constant>(LiveIn->getLiveInIRValue()))
2162+
continue;
2163+
2164+
// Add explicit broadcast if the vector preheader dominates all users.
2165+
// TODO: Find valid inert point for all users.
2166+
if (all_of(LiveIn->users(), [&VPDT, VectorPreheader](VPUser *U) {
2167+
return VectorPreheader != cast<VPRecipeBase>(U)->getParent() &&
2168+
VPDT.dominates(VectorPreheader,
2169+
cast<VPRecipeBase>(U)->getParent());
2170+
})) {
2171+
auto *Broadcast =
2172+
Builder.createNaryOp(VPInstruction::Broadcast, {LiveIn});
2173+
LiveIn->replaceUsesWithIf(Broadcast, [LiveIn, Broadcast](VPUser &U,
2174+
unsigned Idx) {
2175+
return Broadcast != &U && !cast<VPRecipeBase>(&U)->usesScalars(LiveIn);
2176+
});
2177+
}
2178+
}
2179+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ struct VPlanTransforms {
144144
static void
145145
optimizeInductionExitUsers(VPlan &Plan,
146146
DenseMap<VPValue *, VPValue *> &EndValues);
147+
148+
/// Add explicit broadcasts for live-ins used as vectors.
149+
static void materializeBroadcasts(VPlan &Plan);
147150
};
148151

149152
} // namespace llvm

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1717
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
1818
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1919
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
20+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
21+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2022
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
2123
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
2224
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
2325
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
2426
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
2527
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
26-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
27-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2828
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2929
; CHECK: vector.body:
3030
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -103,14 +103,14 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
103103
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
104104
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
105105
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
106+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
107+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
106108
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
107109
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
108110
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
109111
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
110112
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
111113
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
112-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
113-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
114114
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
115115
; CHECK: vector.body:
116116
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,14 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
124124
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
125125
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
126126
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
127+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
128+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
127129
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
128130
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
129131
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
130132
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
131133
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
132134
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
133-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
134-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
135135
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
136136
; CHECK: [[VECTOR_BODY]]:
137137
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -248,21 +248,21 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
248248
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]]
249249
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
250250
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]])
251+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
252+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
251253
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
252254
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
253255
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
254256
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
255-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
256-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
257-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
257+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
258258
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
259259
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
260260
; CHECK: [[VECTOR_BODY]]:
261261
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
262262
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
263263
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
264264
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0
265-
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
265+
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
266266
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[TMP21]], [[MUL_2_I]]
267267
; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]]
268268
; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]]
@@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
283283
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
284284
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
285285
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
286-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
286+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
287287
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0
288288
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
289289
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)