Skip to content

Commit b34c7c0

Browse files
committed
[LV] Use original trip count as the vector trip count if use predicated EVL instructions for tail-folding.
1 parent 809f857 commit b34c7c0

30 files changed

+306
-886
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2425,9 +2425,19 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
24252425
return VectorTripCount;
24262426

24272427
Value *TC = getTripCount();
2428+
Type *Ty = TC->getType();
24282429
IRBuilder<> Builder(InsertBlock->getTerminator());
24292430

2430-
Type *Ty = TC->getType();
2431+
// Use original trip count as the vector trip count if use predicated EVL
2432+
// instructions for tail-folding.
2433+
if (VF.isVector() &&
2434+
Cost->getTailFoldingStyle() == TailFoldingStyle::DataWithEVL) {
2435+
assert(!Cost->requiresScalarEpilogue(true) &&
2436+
"Use predicated EVL instructions for tail-folding does not allow "
2437+
"scalar epilogue");
2438+
return TC;
2439+
}
2440+
24312441
// This is where we can make the step a runtime constant.
24322442
Value *Step = createStepForVF(Builder, Ty, VF, UF);
24332443

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2080,8 +2080,9 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
20802080
EVLPhi->insertAfter(CanonicalIVPHI);
20812081
VPBuilder Builder(Header, Header->getFirstNonPhi());
20822082
// Compute original TC - IV as the AVL (application vector length).
2083-
VPValue *AVL = Builder.createNaryOp(
2084-
Instruction::Sub, {Plan.getTripCount(), EVLPhi}, DebugLoc(), "avl");
2083+
VPValue *AVL = Builder.createNaryOp(Instruction::Sub,
2084+
{&Plan.getVectorTripCount(), EVLPhi},
2085+
DebugLoc(), "avl");
20852086
if (MaxSafeElements) {
20862087
// Support for MaxSafeDist for correct loop emission.
20872088
VPValue *AVLSafe = Plan.getOrAddLiveIn(
@@ -2114,7 +2115,9 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
21142115
// Replace all uses of VPCanonicalIVPHIRecipe by
21152116
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
21162117
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
2118+
CanonicalIVIncrement->replaceAllUsesWith(NextEVLIV);
21172119
CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
2120+
CanonicalIVPHI->setOperand(1, CanonicalIVIncrement);
21182121
// TODO: support unroll factor > 1.
21192122
Plan.setUF(1);
21202123
return true;

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -156,14 +156,14 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
156156
errs() << "EVL is used as an operand in non-VPInstruction::Add\n";
157157
return false;
158158
}
159-
if (I->getNumUsers() != 1) {
160-
errs() << "EVL is used in VPInstruction:Add with multiple "
161-
"users\n";
162-
return false;
163-
}
164-
if (!isa<VPEVLBasedIVPHIRecipe>(*I->users().begin())) {
165-
errs() << "Result of VPInstruction::Add with EVL operand is "
166-
"not used by VPEVLBasedIVPHIRecipe\n";
159+
if (!all_of(I->users(), [](VPUser *U) {
160+
if (auto *VPI = dyn_cast<VPInstruction>(U))
161+
return VPI->getOpcode() == VPInstruction::BranchOnCount;
162+
return isa<VPEVLBasedIVPHIRecipe>(U);
163+
})) {
164+
errs()
165+
<< "Result of VPInstruction::Add with EVL operand is not used "
166+
"by VPEVLBasedIVPHIRecipe or VPInstruction::BranchOnCount\n";
167167
return false;
168168
}
169169
return true;

llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,6 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
122122
; IF-EVL-OUTLOOP: for.body.preheader:
123123
; IF-EVL-OUTLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
124124
; IF-EVL-OUTLOOP: vector.ph:
125-
; IF-EVL-OUTLOOP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
126-
; IF-EVL-OUTLOOP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4
127-
; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1
128-
; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP2]]
129-
; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
130-
; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
131125
; IF-EVL-OUTLOOP-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
132126
; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 4
133127
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -145,13 +139,13 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
145139
; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP5]])
146140
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
147141
; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
148-
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
142+
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
149143
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
150144
; IF-EVL-OUTLOOP: middle.block:
151145
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]])
152146
; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
153147
; IF-EVL-OUTLOOP: scalar.ph:
154-
; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
148+
; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
155149
; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
156150
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
157151
; IF-EVL-OUTLOOP: for.body:
@@ -178,12 +172,6 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
178172
; IF-EVL-INLOOP: for.body.preheader:
179173
; IF-EVL-INLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
180174
; IF-EVL-INLOOP: vector.ph:
181-
; IF-EVL-INLOOP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
182-
; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 8
183-
; IF-EVL-INLOOP-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1
184-
; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP2]]
185-
; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
186-
; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
187175
; IF-EVL-INLOOP-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
188176
; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 8
189177
; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -201,12 +189,12 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
201189
; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
202190
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]]
203191
; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
204-
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
192+
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
205193
; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
206194
; IF-EVL-INLOOP: middle.block:
207195
; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
208196
; IF-EVL-INLOOP: scalar.ph:
209-
; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
197+
; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
210198
; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
211199
; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
212200
; IF-EVL-INLOOP: for.body:
@@ -350,12 +338,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
350338
; IF-EVL-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
351339
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
352340
; IF-EVL-OUTLOOP: vector.ph:
353-
; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
354-
; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
355-
; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
356-
; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
357-
; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
358-
; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
359341
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
360342
; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
361343
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0
@@ -376,13 +358,13 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
376358
; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
377359
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
378360
; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
379-
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
361+
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
380362
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
381363
; IF-EVL-OUTLOOP: middle.block:
382364
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP15]])
383365
; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
384366
; IF-EVL-OUTLOOP: scalar.ph:
385-
; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
367+
; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
386368
; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
387369
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
388370
; IF-EVL-OUTLOOP: for.body:
@@ -407,12 +389,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
407389
; IF-EVL-INLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
408390
; IF-EVL-INLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
409391
; IF-EVL-INLOOP: vector.ph:
410-
; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
411-
; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
412-
; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
413-
; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
414-
; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
415-
; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
416392
; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
417393
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
418394
; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -430,12 +406,12 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
430406
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64
431407
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
432408
; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
433-
; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
409+
; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
434410
; IF-EVL-INLOOP-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
435411
; IF-EVL-INLOOP: middle.block:
436412
; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
437413
; IF-EVL-INLOOP: scalar.ph:
438-
; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
414+
; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
439415
; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
440416
; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
441417
; IF-EVL-INLOOP: for.body:

llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,6 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) {
1111
; CHECK-NEXT: [[ENTRY:.*]]:
1212
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1313
; CHECK: [[VECTOR_PH]]:
14-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
15-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
16-
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
17-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 9, [[TMP2]]
18-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
19-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
2014
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
2115
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
2216
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -36,12 +30,12 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) {
3630
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
3731
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
3832
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
39-
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
33+
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 9
4034
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4135
; CHECK: [[MIDDLE_BLOCK]]:
4236
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
4337
; CHECK: [[SCALAR_PH]]:
44-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
38+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
4539
; CHECK-NEXT: br label %[[LOOP:.*]]
4640
; CHECK: [[LOOP]]:
4741
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]

llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,6 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
2424
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
2525
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
2626
; CHECK: [[VECTOR_PH]]:
27-
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
28-
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8
29-
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
30-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP8]]
31-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
32-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
3327
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
3428
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
3529
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x ptr> poison, ptr [[DSTV]], i64 0
@@ -56,12 +50,12 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
5650
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64
5751
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
5852
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
59-
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
53+
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP0]]
6054
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
6155
; CHECK: [[MIDDLE_BLOCK]]:
6256
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
6357
; CHECK: [[SCALAR_PH]]:
64-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
58+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
6559
; CHECK-NEXT: br label %[[LOOP:.*]]
6660
; CHECK: [[LOOP]]:
6761
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]

0 commit comments

Comments
 (0)