Skip to content

Commit a5891fa

Browse files
authored
[VPlan] Initial modeling of VF * UF as VPValue. (#74761)
This patch starts initial modeling of VF * UF in VPlan. Initially, introduce a dedicated VFxUF VPValue, which is then populated during VPlan::prepareToExecute. Initially, the VF * UF applies only to the main vector loop region. Once we extend the scope of VPlan in the future, we may want to associate different VFxUFs with different vector loop regions (e.g. the epilogue vector loop) This allows explicitly parameterizing recipes that rely on the VF * UF, like the canonical induction increment. At the moment, this mainly helps to avoid generating some duplicated calls to vscale with scalable vectors. It should also allow using EVL as induction increments explicitly in D99750. Referring to VF * UF is also needed in other places that we plan to migrate to VPlan, like the minimum trip count check during skeleton creation. The first version creates the value for VF * UF directly in prepareToExecute to limit the scope of the patch. A follow-on patch will model VF * UF computation explicitly in VPlan using recipes. Moved from Phabricator (https://reviews.llvm.org/D157322)
1 parent 3810342 commit a5891fa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+434
-339
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8645,7 +8645,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
86458645
// Add a CanonicalIVIncrement{NUW} VPInstruction to increment the scalar
86468646
// IV by VF * UF.
86478647
auto *CanonicalIVIncrement =
8648-
new VPInstruction(VPInstruction::CanonicalIVIncrement, {CanonicalIVPHI},
8648+
new VPInstruction(Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()},
86498649
{HasNUW, false}, DL, "index.next");
86508650
CanonicalIVPHI->addOperand(CanonicalIVIncrement);
86518651

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,12 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
741741
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
742742
State.set(&VectorTripCount, VectorTripCountV, Part);
743743

744+
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
745+
// FIXME: Model VF * UF computation completely in VPlan.
746+
State.set(&VFxUF,
747+
createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF),
748+
0);
749+
744750
// When vectorizing the epilogue loop, the canonical induction start value
745751
// needs to be changed from zero to the value after the main vector loop.
746752
// FIXME: Improve modeling for canonical IV start values in the epilogue loop.
@@ -752,7 +758,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
752758
return isa<VPScalarIVStepsRecipe>(U) ||
753759
isa<VPDerivedIVRecipe>(U) ||
754760
cast<VPInstruction>(U)->getOpcode() ==
755-
VPInstruction::CanonicalIVIncrement;
761+
Instruction::Add;
756762
}) &&
757763
"the canonical IV should only be used by its increment or "
758764
"ScalarIVSteps when resetting the start value");
@@ -845,6 +851,13 @@ void VPlan::execute(VPTransformState *State) {
845851
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
846852
void VPlan::printLiveIns(raw_ostream &O) const {
847853
VPSlotTracker SlotTracker(this);
854+
855+
if (VFxUF.getNumUsers() > 0) {
856+
O << "\nLive-in ";
857+
VFxUF.printAsOperand(O, SlotTracker);
858+
O << " = VF * UF";
859+
}
860+
848861
if (VectorTripCount.getNumUsers() > 0) {
849862
O << "\nLive-in ";
850863
VectorTripCount.printAsOperand(O, SlotTracker);
@@ -1237,6 +1250,8 @@ void VPSlotTracker::assignSlot(const VPValue *V) {
12371250
}
12381251

12391252
void VPSlotTracker::assignSlots(const VPlan &Plan) {
1253+
if (Plan.VFxUF.getNumUsers() > 0)
1254+
assignSlot(&Plan.VFxUF);
12401255
assignSlot(&Plan.VectorTripCount);
12411256
if (Plan.BackedgeTakenCount)
12421257
assignSlot(Plan.BackedgeTakenCount);
@@ -1260,6 +1275,11 @@ bool vputils::onlyFirstLaneUsed(VPValue *Def) {
12601275
[Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
12611276
}
12621277

1278+
bool vputils::onlyFirstPartUsed(VPValue *Def) {
1279+
return all_of(Def->users(),
1280+
[Def](VPUser *U) { return U->onlyFirstPartUsed(Def); });
1281+
}
1282+
12631283
VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
12641284
ScalarEvolution &SE) {
12651285
if (auto *Expanded = Plan.getSCEVExpansion(Expr))

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,9 +1058,7 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue {
10581058
SLPStore,
10591059
ActiveLaneMask,
10601060
CalculateTripCountMinusVF,
1061-
CanonicalIVIncrement,
1062-
// The next op is similar to the above, but instead increment the
1063-
// canonical IV separately for each unrolled part.
1061+
// Increment the canonical IV separately for each unrolled part.
10641062
CanonicalIVIncrementForPart,
10651063
BranchOnCount,
10661064
BranchOnCond
@@ -1168,13 +1166,27 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue {
11681166
return false;
11691167
case VPInstruction::ActiveLaneMask:
11701168
case VPInstruction::CalculateTripCountMinusVF:
1171-
case VPInstruction::CanonicalIVIncrement:
11721169
case VPInstruction::CanonicalIVIncrementForPart:
11731170
case VPInstruction::BranchOnCount:
11741171
return true;
11751172
};
11761173
llvm_unreachable("switch should return");
11771174
}
1175+
1176+
/// Returns true if the recipe only uses the first part of operand \p Op.
1177+
bool onlyFirstPartUsed(const VPValue *Op) const override {
1178+
assert(is_contained(operands(), Op) &&
1179+
"Op must be an operand of the recipe");
1180+
if (getOperand(0) != Op)
1181+
return false;
1182+
switch (getOpcode()) {
1183+
default:
1184+
return false;
1185+
case VPInstruction::BranchOnCount:
1186+
return true;
1187+
};
1188+
llvm_unreachable("switch should return");
1189+
}
11781190
};
11791191

11801192
/// VPWidenRecipe is a recipe for producing a copy of vector type its
@@ -2126,6 +2138,13 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
21262138
return true;
21272139
}
21282140

2141+
/// Returns true if the recipe only uses the first part of operand \p Op.
2142+
bool onlyFirstPartUsed(const VPValue *Op) const override {
2143+
assert(is_contained(operands(), Op) &&
2144+
"Op must be an operand of the recipe");
2145+
return true;
2146+
}
2147+
21292148
/// Check if the induction described by \p Kind, /p Start and \p Step is
21302149
/// canonical, i.e. has the same start, step (of 1), and type as the
21312150
/// canonical IV.
@@ -2545,6 +2564,9 @@ class VPlan {
25452564
/// Represents the vector trip count.
25462565
VPValue VectorTripCount;
25472566

2567+
/// Represents the loop-invariant VF * UF of the vector loop region.
2568+
VPValue VFxUF;
2569+
25482570
/// Holds a mapping between Values and their corresponding VPValue inside
25492571
/// VPlan.
25502572
Value2VPValueTy Value2VPValue;
@@ -2624,6 +2646,9 @@ class VPlan {
26242646
/// The vector trip count.
26252647
VPValue &getVectorTripCount() { return VectorTripCount; }
26262648

2649+
/// Returns VF * UF of the vector loop region.
2650+
VPValue &getVFxUF() { return VFxUF; }
2651+
26272652
/// Mark the plan to indicate that using Value2VPValue is not safe any
26282653
/// longer, because it may be stale.
26292654
void disableValue2VPValue() { Value2VPValueEnabled = false; }
@@ -3054,6 +3079,9 @@ namespace vputils {
30543079
/// Returns true if only the first lane of \p Def is used.
30553080
bool onlyFirstLaneUsed(VPValue *Def);
30563081

3082+
/// Returns true if only the first part of \p Def is used.
3083+
bool onlyFirstPartUsed(VPValue *Def);
3084+
30573085
/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
30583086
/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
30593087
/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
120120
case Instruction::ICmp:
121121
case VPInstruction::Not:
122122
case VPInstruction::CalculateTripCountMinusVF:
123-
case VPInstruction::CanonicalIVIncrement:
124123
case VPInstruction::CanonicalIVIncrementForPart:
125124
return false;
126125
default:
@@ -272,9 +271,16 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
272271
Builder.SetCurrentDebugLocation(getDebugLoc());
273272

274273
if (Instruction::isBinaryOp(getOpcode())) {
274+
if (Part != 0 && vputils::onlyFirstPartUsed(this))
275+
return State.get(this, 0);
276+
275277
Value *A = State.get(getOperand(0), Part);
276278
Value *B = State.get(getOperand(1), Part);
277-
return Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
279+
auto *Res =
280+
Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
281+
if (auto *I = dyn_cast<Instruction>(Res))
282+
setFlags(I);
283+
return Res;
278284
}
279285

280286
switch (getOpcode()) {
@@ -335,25 +341,6 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
335341
Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
336342
return Builder.CreateSelect(Cmp, Sub, Zero);
337343
}
338-
case VPInstruction::CanonicalIVIncrement: {
339-
if (Part == 0) {
340-
auto *Phi = State.get(getOperand(0), 0);
341-
// The loop step is equal to the vectorization factor (num of SIMD
342-
// elements) times the unroll factor (num of SIMD instructions).
343-
Value *Step;
344-
{
345-
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
346-
IRBuilder<> PHBuilder(VectorPH->getTerminator());
347-
// Step is loop-invariant, calls to vscale will be placed in the
348-
// preheader.
349-
Step = createStepForVF(PHBuilder, Phi->getType(), State.VF, State.UF);
350-
}
351-
return Builder.CreateAdd(Phi, Step, Name, hasNoUnsignedWrap(),
352-
hasNoSignedWrap());
353-
}
354-
return State.get(this, 0);
355-
}
356-
357344
case VPInstruction::CanonicalIVIncrementForPart: {
358345
auto *IV = State.get(getOperand(0), VPIteration(0, 0));
359346
if (Part == 0)
@@ -474,9 +461,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
474461
case VPInstruction::FirstOrderRecurrenceSplice:
475462
O << "first-order splice";
476463
break;
477-
case VPInstruction::CanonicalIVIncrement:
478-
O << "VF * UF +";
479-
break;
480464
case VPInstruction::BranchOnCond:
481465
O << "branch-on-cond";
482466
break;

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,14 @@ class VPUser {
303303
"Op must be an operand of the recipe");
304304
return false;
305305
}
306+
307+
/// Returns true if the VPUser only uses the first part of operand \p Op.
308+
/// Conservatively returns false.
309+
virtual bool onlyFirstPartUsed(const VPValue *Op) const {
310+
assert(is_contained(operands(), Op) &&
311+
"Op must be an operand of the recipe");
312+
return false;
313+
}
306314
};
307315

308316
/// This class augments a recipe with a set of VPValues defined by the recipe.

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,9 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
6060
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
6161
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
6262
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
63-
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
6463
; TFCOMMON-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
6564
; TFCOMMON-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
65+
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
6666
; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]]
6767
; TFCOMMON: vector.body:
6868
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -161,9 +161,9 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
161161
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
162162
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
163163
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
164-
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
165164
; TFCOMMON-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
166165
; TFCOMMON-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2
166+
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
167167
; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]]
168168
; TFCOMMON: vector.body:
169169
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -283,9 +283,9 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
283283
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
284284
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
285285
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
286-
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
287286
; TFCOMMON-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
288287
; TFCOMMON-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2
288+
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
289289
; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]]
290290
; TFCOMMON: vector.body:
291291
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -516,9 +516,9 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
516516
; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
517517
; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
518518
; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
519-
; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
520519
; TFALWAYS-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
521520
; TFALWAYS-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
521+
; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
522522
; TFALWAYS-NEXT: br label [[VECTOR_BODY:%.*]]
523523
; TFALWAYS: vector.body:
524524
; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -546,9 +546,9 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
546546
; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
547547
; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
548548
; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
549-
; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
550549
; TFFALLBACK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
551550
; TFFALLBACK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
551+
; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
552552
; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]]
553553
; TFFALLBACK: vector.body:
554554
; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -599,10 +599,10 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
599599
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
600600
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
601601
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
602-
; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M:%.*]], i64 0
603-
; TFNONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
604602
; TFNONE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
605603
; TFNONE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
604+
; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M:%.*]], i64 0
605+
; TFNONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
606606
; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]]
607607
; TFNONE: vector.body:
608608
; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -651,11 +651,11 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
651651
; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
652652
; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
653653
; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
654+
; TFALWAYS-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
655+
; TFALWAYS-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
654656
; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
655657
; TFALWAYS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M:%.*]], i64 0
656658
; TFALWAYS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
657-
; TFALWAYS-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
658-
; TFALWAYS-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
659659
; TFALWAYS-NEXT: br label [[VECTOR_BODY:%.*]]
660660
; TFALWAYS: vector.body:
661661
; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -688,11 +688,11 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
688688
; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
689689
; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
690690
; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
691+
; TFFALLBACK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
692+
; TFFALLBACK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
691693
; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
692694
; TFFALLBACK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M:%.*]], i64 0
693695
; TFFALLBACK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
694-
; TFFALLBACK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
695-
; TFFALLBACK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
696696
; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]]
697697
; TFFALLBACK: vector.body:
698698
; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ define void @foo() {
1818
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1919
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
2020
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
21+
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
22+
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4
2123
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
2224
; CHECK-NEXT: [[TMP5:%.*]] = add <vscale x 4 x i64> [[TMP4]], zeroinitializer
2325
; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[TMP5]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
@@ -27,8 +29,6 @@ define void @foo() {
2729
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]]
2830
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
2931
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
30-
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
31-
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4
3232
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
3333
; CHECK: vector.body:
3434
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[OUTER_LOOP_LATCH4:%.*]] ]

llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
2424
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], [[TMP5]]
2525
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
2626
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
27+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
28+
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2
2729
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
2830
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
2931
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.experimental.stepvector.nxv2i32()
@@ -35,8 +37,6 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
3537
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP12]]
3638
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP13]], i64 0
3739
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
38-
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
39-
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2
4040
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
4141
; CHECK: vector.body:
4242
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

0 commit comments

Comments
 (0)