Skip to content

Commit 92210ef

Browse files
committed
[VPlan] Introduce explicit broadcasts for live-ins.
Add a new VPInstruction::Broadcast opcode and use it to materialize explicit broadcasts of live-ins. The initial patch only materlizes the broadcasts if the vector preheader dominates all uses that need it. Later patches will pick the best valid insert point, thus retiring implicit hoisting of broadcasts from VPTransformsState::get().
1 parent ec7167b commit 92210ef

35 files changed

+360
-271
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7679,7 +7679,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76797679
((VectorizingEpilogue && ExpandedSCEVs) ||
76807680
(!VectorizingEpilogue && !ExpandedSCEVs)) &&
76817681
"expanded SCEVs to reuse can only be used during epilogue vectorization");
7682-
7682+
VPlanTransforms::materializeBroadcasts(BestVPlan);
76837683
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
76847684
// cost model is complete for better cost estimates.
76857685
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
866866
CanonicalIVIncrementForPart,
867867
BranchOnCount,
868868
BranchOnCond,
869+
Broadcast,
869870
ComputeReductionResult,
870871
// Takes the VPValue to extract from as first operand and the lane or part
871872
// to extract as second operand, counting from the end starting with 1 for
@@ -1511,6 +1512,13 @@ struct VPWidenSelectRecipe : public VPRecipeWithIRFlags {
15111512
bool isInvariantCond() const {
15121513
return getCond()->isDefinedOutsideLoopRegions();
15131514
}
1515+
1516+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1517+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1518+
assert(is_contained(operands(), Op) &&
1519+
"Op must be an operand of the recipe");
1520+
return Op == getCond() && isInvariantCond();
1521+
}
15141522
};
15151523

15161524
/// A recipe for handling GEP instructions.
@@ -1558,6 +1566,13 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
15581566
void print(raw_ostream &O, const Twine &Indent,
15591567
VPSlotTracker &SlotTracker) const override;
15601568
#endif
1569+
1570+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1571+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1572+
assert(is_contained(operands(), Op) &&
1573+
"Op must be an operand of the recipe");
1574+
return Op == getOperand(0);
1575+
}
15611576
};
15621577

15631578
/// A recipe to compute the pointers for widened memory accesses of IndexTy
@@ -1873,6 +1888,13 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
18731888
VPValue *getLastUnrolledPartOperand() {
18741889
return getNumOperands() == 5 ? getOperand(4) : this;
18751890
}
1891+
1892+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1893+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1894+
assert(is_contained(operands(), Op) &&
1895+
"Op must be an operand of the recipe");
1896+
return Op == getStartValue();
1897+
}
18761898
};
18771899

18781900
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
@@ -1905,6 +1927,13 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
19051927
/// Returns true if only scalar values will be generated.
19061928
bool onlyScalarsGenerated(bool IsScalable);
19071929

1930+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1931+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1932+
assert(is_contained(operands(), Op) &&
1933+
"Op must be an operand of the recipe");
1934+
return Op == getOperand(0);
1935+
}
1936+
19081937
/// Returns the VPValue representing the value of this induction at
19091938
/// the first unrolled part, if it exists. Returns itself if unrolling did not
19101939
/// take place.
@@ -2033,6 +2062,13 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
20332062
void print(raw_ostream &O, const Twine &Indent,
20342063
VPSlotTracker &SlotTracker) const override;
20352064
#endif
2065+
2066+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2067+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2068+
assert(is_contained(operands(), Op) &&
2069+
"Op must be an operand of the recipe");
2070+
return Op == getStartValue();
2071+
}
20362072
};
20372073

20382074
/// A recipe for handling reduction phis. The start value is the first operand
@@ -2099,6 +2135,13 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
20992135

21002136
/// Returns true, if the phi is part of an in-loop reduction.
21012137
bool isInLoop() const { return IsInLoop; }
2138+
2139+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2140+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2141+
assert(is_contained(operands(), Op) &&
2142+
"Op must be an operand of the recipe");
2143+
return Op == getStartValue();
2144+
}
21022145
};
21032146

21042147
/// A recipe for forming partial reductions. In the loop, an accumulator and
@@ -3716,6 +3759,8 @@ class VPlan {
37163759
/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
37173760
VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
37183761

3762+
ArrayRef<VPValue *> getLiveIns() const { return VPLiveInsToFree; }
3763+
37193764
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
37203765
/// Print the live-ins of this VPlan to \p O.
37213766
void printLiveIns(raw_ostream &O) const;

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
9090
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
9191
"LogicalAnd operands should be bool");
9292
return IntegerType::get(Ctx, 1);
93+
case VPInstruction::Broadcast:
9394
case VPInstruction::PtrAdd:
9495
// Return the type based on the pointer argument (i.e. first operand).
9596
return inferScalarType(R->getOperand(0));

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
577577
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
578578
return CondBr;
579579
}
580+
case VPInstruction::Broadcast: {
581+
return Builder.CreateVectorSplat(
582+
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
583+
}
580584
case VPInstruction::ComputeReductionResult: {
581585
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
582586
// and will be removed by breaking up the recipe further.
@@ -814,7 +818,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
814818
case Instruction::ICmp:
815819
case Instruction::Select:
816820
case Instruction::Or:
817-
case VPInstruction::PtrAdd:
818821
// TODO: Cover additional opcodes.
819822
return vputils::onlyFirstLaneUsed(this);
820823
case VPInstruction::ActiveLaneMask:
@@ -825,6 +828,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
825828
case VPInstruction::BranchOnCond:
826829
case VPInstruction::ResumePhi:
827830
return true;
831+
case VPInstruction::PtrAdd:
832+
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
828833
};
829834
llvm_unreachable("switch should return");
830835
}
@@ -897,6 +902,10 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
897902
case VPInstruction::BranchOnCount:
898903
O << "branch-on-count";
899904
break;
905+
case VPInstruction::Broadcast:
906+
O << "broadcast";
907+
break;
908+
900909
case VPInstruction::ExtractFromEnd:
901910
O << "extract-from-end";
902911
break;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2154,3 +2154,34 @@ void VPlanTransforms::handleUncountableEarlyExit(
21542154
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
21552155
LatchExitingBranch->eraseFromParent();
21562156
}
2157+
2158+
void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
2159+
VPDominatorTree VPDT;
2160+
VPDT.recalculate(Plan);
2161+
auto *VectorPreheader = Plan.getVectorPreheader();
2162+
VPBuilder Builder(VectorPreheader);
2163+
for (VPValue *LiveIn : Plan.getLiveIns()) {
2164+
if (all_of(LiveIn->users(),
2165+
[LiveIn](VPUser *U) {
2166+
return cast<VPRecipeBase>(U)->usesScalars(LiveIn);
2167+
}) ||
2168+
!LiveIn->getLiveInIRValue() ||
2169+
isa<Constant>(LiveIn->getLiveInIRValue()))
2170+
continue;
2171+
2172+
// Add explicit broadcast if the vector preheader dominates all users.
2173+
// TODO: Find valid inert point for all users.
2174+
if (all_of(LiveIn->users(), [&VPDT, VectorPreheader](VPUser *U) {
2175+
return VectorPreheader != cast<VPRecipeBase>(U)->getParent() &&
2176+
VPDT.dominates(VectorPreheader,
2177+
cast<VPRecipeBase>(U)->getParent());
2178+
})) {
2179+
auto *Broadcast =
2180+
Builder.createNaryOp(VPInstruction::Broadcast, {LiveIn});
2181+
LiveIn->replaceUsesWithIf(Broadcast, [LiveIn, Broadcast](VPUser &U,
2182+
unsigned Idx) {
2183+
return Broadcast != &U && !cast<VPRecipeBase>(&U)->usesScalars(LiveIn);
2184+
});
2185+
}
2186+
}
2187+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,9 @@ struct VPlanTransforms {
169169
static void
170170
optimizeInductionExitUsers(VPlan &Plan,
171171
DenseMap<VPValue *, VPValue *> &EndValues);
172+
173+
/// Add explicit broadcasts for live-ins used as vectors.
174+
static void materializeBroadcasts(VPlan &Plan);
172175
};
173176

174177
} // namespace llvm

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1717
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
1818
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1919
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
20+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
21+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2022
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
2123
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
2224
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
2325
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
2426
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
2527
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
26-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
27-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2828
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2929
; CHECK: vector.body:
3030
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -103,14 +103,14 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
103103
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
104104
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
105105
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
106+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
107+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
106108
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
107109
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
108110
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
109111
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
110112
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
111113
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
112-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
113-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
114114
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
115115
; CHECK: vector.body:
116116
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,14 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
124124
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
125125
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
126126
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
127+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
128+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
127129
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
128130
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
129131
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
130132
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
131133
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
132134
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
133-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
134-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
135135
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
136136
; CHECK: [[VECTOR_BODY]]:
137137
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -248,21 +248,21 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
248248
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]]
249249
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
250250
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]])
251+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
252+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
251253
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
252254
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
253255
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
254256
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
255-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
256-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
257-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
257+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
258258
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
259259
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
260260
; CHECK: [[VECTOR_BODY]]:
261261
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
262262
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
263263
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
264264
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0
265-
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
265+
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
266266
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[TMP21]], [[MUL_2_I]]
267267
; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]]
268268
; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]]
@@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
283283
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
284284
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
285285
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
286-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
286+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
287287
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0
288288
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
289289
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)