Skip to content

Commit 9bc45ca

Browse files
fhahnagozillon
authored andcommitted
[VPlan] Update VPInst::onlyFirstLaneUsed to check users. (llvm#80269)
A VPInstruction only has its first lane used if all users use its first lane only. Use vputils::onlyFirstLaneUsed to continue checking the recipe's users to handle more cases. Besides allowing additional introduction of scalar steps when interleaving in some cases, this also enables using an Add VPInstruction to model the increment - as a follow up.
1 parent 874ad23 commit 9bc45ca

File tree

4 files changed

+128
-135
lines changed

4 files changed

+128
-135
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1257,22 +1257,7 @@ class VPInstruction : public VPRecipeWithIRFlags {
12571257
}
12581258

12591259
/// Returns true if the recipe only uses the first lane of operand \p Op.
1260-
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1261-
assert(is_contained(operands(), Op) &&
1262-
"Op must be an operand of the recipe");
1263-
if (getOperand(0) != Op)
1264-
return false;
1265-
switch (getOpcode()) {
1266-
default:
1267-
return false;
1268-
case VPInstruction::ActiveLaneMask:
1269-
case VPInstruction::CalculateTripCountMinusVF:
1270-
case VPInstruction::CanonicalIVIncrementForPart:
1271-
case VPInstruction::BranchOnCount:
1272-
return true;
1273-
};
1274-
llvm_unreachable("switch should return");
1275-
}
1260+
bool onlyFirstLaneUsed(const VPValue *Op) const override;
12761261

12771262
/// Returns true if the recipe only uses the first part of operand \p Op.
12781263
bool onlyFirstPartUsed(const VPValue *Op) const override {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,26 @@ void VPInstruction::execute(VPTransformState &State) {
515515
State.set(this, GeneratedValue, Part);
516516
}
517517
}
518+
bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
519+
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
520+
if (Instruction::isBinaryOp(getOpcode()))
521+
return vputils::onlyFirstLaneUsed(this);
522+
523+
switch (getOpcode()) {
524+
default:
525+
return false;
526+
case Instruction::ICmp:
527+
// TODO: Cover additional opcodes.
528+
return vputils::onlyFirstLaneUsed(this);
529+
case VPInstruction::ActiveLaneMask:
530+
case VPInstruction::CalculateTripCountMinusVF:
531+
case VPInstruction::CanonicalIVIncrementForPart:
532+
case VPInstruction::BranchOnCount:
533+
// TODO: Cover additional operands.
534+
return getOperand(0) == Op;
535+
};
536+
llvm_unreachable("switch should return");
537+
}
518538

519539
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
520540
void VPInstruction::dump() const {

llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll

Lines changed: 76 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ define void @pr45679(ptr %A) optsize {
6767
; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1
6868
; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
6969
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
70-
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
70+
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
7171
; CHECK: exit:
7272
; CHECK-NEXT: ret void
7373
;
@@ -129,7 +129,7 @@ define void @pr45679(ptr %A) optsize {
129129
; VF2UF2-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1
130130
; VF2UF2-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
131131
; VF2UF2-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
132-
; VF2UF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
132+
; VF2UF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
133133
; VF2UF2: exit:
134134
; VF2UF2-NEXT: ret void
135135
;
@@ -139,46 +139,42 @@ define void @pr45679(ptr %A) optsize {
139139
; VF1UF4: vector.ph:
140140
; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]]
141141
; VF1UF4: vector.body:
142-
; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ]
143-
; VF1UF4-NEXT: [[VEC_IV:%.*]] = add i32 [[INDEX]], 0
144-
; VF1UF4-NEXT: [[VEC_IV4:%.*]] = add i32 [[INDEX]], 1
145-
; VF1UF4-NEXT: [[VEC_IV5:%.*]] = add i32 [[INDEX]], 2
146-
; VF1UF4-NEXT: [[VEC_IV6:%.*]] = add i32 [[INDEX]], 3
147-
; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i32 [[VEC_IV]], 13
148-
; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i32 [[VEC_IV4]], 13
149-
; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i32 [[VEC_IV5]], 13
150-
; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV6]], 13
151-
; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
142+
; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
143+
; VF1UF4-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
144+
; VF1UF4-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
145+
; VF1UF4-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
146+
; VF1UF4-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
147+
; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i32 [[TMP0]], 13
148+
; VF1UF4-NEXT: [[TMP5:%.*]] = icmp ule i32 [[TMP1]], 13
149+
; VF1UF4-NEXT: [[TMP6:%.*]] = icmp ule i32 [[TMP2]], 13
150+
; VF1UF4-NEXT: [[TMP7:%.*]] = icmp ule i32 [[TMP3]], 13
151+
; VF1UF4-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
152152
; VF1UF4: pred.store.if:
153-
; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
154-
; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDUCTION]]
155-
; VF1UF4-NEXT: store i32 13, ptr [[TMP4]], align 1
153+
; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
154+
; VF1UF4-NEXT: store i32 13, ptr [[TMP8]], align 1
156155
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]]
157156
; VF1UF4: pred.store.continue:
158-
; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
159-
; VF1UF4: pred.store.if4:
160-
; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1
161-
; VF1UF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDUCTION1]]
162-
; VF1UF4-NEXT: store i32 13, ptr [[TMP5]], align 1
163-
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE8]]
164-
; VF1UF4: pred.store.continue5:
165-
; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
166-
; VF1UF4: pred.store.if6:
167-
; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2
168-
; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDUCTION2]]
169-
; VF1UF4-NEXT: store i32 13, ptr [[TMP6]], align 1
170-
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE10]]
171-
; VF1UF4: pred.store.continue7:
172-
; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
173-
; VF1UF4: pred.store.if8:
174-
; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3
175-
; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDUCTION3]]
176-
; VF1UF4-NEXT: store i32 13, ptr [[TMP7]], align 1
177-
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE12]]
178-
; VF1UF4: pred.store.continue9:
157+
; VF1UF4-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
158+
; VF1UF4: pred.store.if1:
159+
; VF1UF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP1]]
160+
; VF1UF4-NEXT: store i32 13, ptr [[TMP9]], align 1
161+
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE2]]
162+
; VF1UF4: pred.store.continue2:
163+
; VF1UF4-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
164+
; VF1UF4: pred.store.if3:
165+
; VF1UF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP2]]
166+
; VF1UF4-NEXT: store i32 13, ptr [[TMP10]], align 1
167+
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE4]]
168+
; VF1UF4: pred.store.continue4:
169+
; VF1UF4-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
170+
; VF1UF4: pred.store.if5:
171+
; VF1UF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP3]]
172+
; VF1UF4-NEXT: store i32 13, ptr [[TMP11]], align 1
173+
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE6]]
174+
; VF1UF4: pred.store.continue6:
179175
; VF1UF4-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
180-
; VF1UF4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
181-
; VF1UF4-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
176+
; VF1UF4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
177+
; VF1UF4-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
182178
; VF1UF4: middle.block:
183179
; VF1UF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
184180
; VF1UF4: scalar.ph:
@@ -190,7 +186,7 @@ define void @pr45679(ptr %A) optsize {
190186
; VF1UF4-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1
191187
; VF1UF4-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
192188
; VF1UF4-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
193-
; VF1UF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
189+
; VF1UF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
194190
; VF1UF4: exit:
195191
; VF1UF4-NEXT: ret void
196192
;
@@ -356,54 +352,50 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
356352
; VF1UF4: vector.ph:
357353
; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]]
358354
; VF1UF4: vector.body:
359-
; VF1UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ]
360-
; VF1UF4-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0
361-
; VF1UF4-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1
362-
; VF1UF4-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2
363-
; VF1UF4-NEXT: [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3
364-
; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i64 [[VEC_IV]], 13
365-
; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV4]], 13
366-
; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV5]], 13
367-
; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 13
368-
; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
355+
; VF1UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
356+
; VF1UF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
357+
; VF1UF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
358+
; VF1UF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
359+
; VF1UF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
360+
; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 13
361+
; VF1UF4-NEXT: [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 13
362+
; VF1UF4-NEXT: [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 13
363+
; VF1UF4-NEXT: [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 13
364+
; VF1UF4-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
369365
; VF1UF4: pred.store.if:
370-
; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
371-
; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDUCTION]]
372-
; VF1UF4-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
373-
; VF1UF4-NEXT: store i64 [[TMP5]], ptr [[B:%.*]], align 8
366+
; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
367+
; VF1UF4-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
368+
; VF1UF4-NEXT: store i64 [[TMP9]], ptr [[B:%.*]], align 8
374369
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]]
375370
; VF1UF4: pred.store.continue:
376-
; VF1UF4-NEXT: [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ]
377-
; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
378-
; VF1UF4: pred.store.if4:
379-
; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
380-
; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDUCTION1]]
381-
; VF1UF4-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
382-
; VF1UF4-NEXT: store i64 [[TMP8]], ptr [[B]], align 8
383-
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE8]]
384-
; VF1UF4: pred.store.continue5:
385-
; VF1UF4-NEXT: [[TMP9:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP8]], [[PRED_STORE_IF7]] ]
386-
; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
387-
; VF1UF4: pred.store.if6:
388-
; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2
389-
; VF1UF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDUCTION2]]
390-
; VF1UF4-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8
391-
; VF1UF4-NEXT: store i64 [[TMP11]], ptr [[B]], align 8
392-
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE10]]
393-
; VF1UF4: pred.store.continue7:
394-
; VF1UF4-NEXT: [[TMP12:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE8]] ], [ [[TMP11]], [[PRED_STORE_IF9]] ]
395-
; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
396-
; VF1UF4: pred.store.if8:
397-
; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3
398-
; VF1UF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDUCTION3]]
399-
; VF1UF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
400-
; VF1UF4-NEXT: store i64 [[TMP14]], ptr [[B]], align 8
401-
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE12]]
402-
; VF1UF4: pred.store.continue9:
403-
; VF1UF4-NEXT: [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE10]] ], [ [[TMP14]], [[PRED_STORE_IF11]] ]
371+
; VF1UF4-NEXT: [[TMP10:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_STORE_IF]] ]
372+
; VF1UF4-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
373+
; VF1UF4: pred.store.if1:
374+
; VF1UF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
375+
; VF1UF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
376+
; VF1UF4-NEXT: store i64 [[TMP12]], ptr [[B]], align 8
377+
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE2]]
378+
; VF1UF4: pred.store.continue2:
379+
; VF1UF4-NEXT: [[TMP13:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP12]], [[PRED_STORE_IF1]] ]
380+
; VF1UF4-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
381+
; VF1UF4: pred.store.if3:
382+
; VF1UF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
383+
; VF1UF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8
384+
; VF1UF4-NEXT: store i64 [[TMP15]], ptr [[B]], align 8
385+
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE4]]
386+
; VF1UF4: pred.store.continue4:
387+
; VF1UF4-NEXT: [[TMP16:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP15]], [[PRED_STORE_IF3]] ]
388+
; VF1UF4-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
389+
; VF1UF4: pred.store.if5:
390+
; VF1UF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
391+
; VF1UF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8
392+
; VF1UF4-NEXT: store i64 [[TMP18]], ptr [[B]], align 8
393+
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE6]]
394+
; VF1UF4: pred.store.continue6:
395+
; VF1UF4-NEXT: [[TMP19:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP18]], [[PRED_STORE_IF5]] ]
404396
; VF1UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
405-
; VF1UF4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
406-
; VF1UF4-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
397+
; VF1UF4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
398+
; VF1UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
407399
; VF1UF4: middle.block:
408400
; VF1UF4-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
409401
; VF1UF4: scalar.ph:
@@ -416,7 +408,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
416408
; VF1UF4-NEXT: store i64 [[V]], ptr [[B]], align 8
417409
; VF1UF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
418410
; VF1UF4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
419-
; VF1UF4-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
411+
; VF1UF4-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
420412
; VF1UF4: for.end:
421413
; VF1UF4-NEXT: ret void
422414
;

0 commit comments

Comments
 (0)