Skip to content

Commit 2630805

Browse files
committed
[VPlan] Add new VPInstruction ocpode for header mask.
This patch adds a new VPInstruction::HeaderMask opcode to model the abstract header-mask used for tail-folding. It will be lowered depending on target preference (either using active-lane-mask, explicit-vector-length or a wide compare of the canonical IV and the backedge taken count) Similarly to llvm#82270, it would be good to clarify/agree on the terminology w.r.t. to recipes/opcodes that cannot be code-gen'd directly (i.e. require further gradual lowering). NOTE: some tests are failing or needed updating, due to widened IVs being replaced by scalar-steps, as their only use was the earlier wide compare. This could be fixed by either adding a suitable wide canonical IV as operand to the header-mask recipe and exactly preserve the original behavior. Alternatively we could keep the current behavior of the patch and update the tests. Or introduce a wide induction PHI instead of VPWidenCanonicalIVReicpe; currently we *only* use a wide IV for VPWidenCanonicalIVRecipe, if there was a suitable IV in the original loop, *even* if the mask compare is the *only* wide use. Either never or always using a wide PHI would be more consistent (or eventually make a more informed cost-based decision).
1 parent c93f029 commit 2630805

File tree

9 files changed

+277
-300
lines changed

9 files changed

+277
-300
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8035,21 +8035,14 @@ void VPRecipeBuilder::createHeaderMask() {
80358035
return;
80368036
}
80378037

8038-
// Introduce the early-exit compare IV <= BTC to form header block mask.
8039-
// This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
8040-
// constructing the desired canonical IV in the header block as its first
8041-
// non-phi instructions.
8042-
8038+
// Introduce an abstract header-mask VPInstruction. This will be lowered later
8039+
// depending on target preference.
80438040
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
80448041
auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
8045-
auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
8046-
HeaderVPBB->insert(IV, NewInsertionPoint);
8047-
80488042
VPBuilder::InsertPointGuard Guard(Builder);
80498043
Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
8050-
VPValue *BlockMask = nullptr;
8051-
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
8052-
BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
8044+
VPValue *BlockMask =
8045+
Builder.createNaryOp(VPInstruction::HeaderMask, {Plan.getCanonicalIV()});
80538046
BlockMaskCache[Header] = BlockMask;
80548047
}
80558048

@@ -8555,6 +8548,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
85558548
// TODO: try to put it close to addActiveLaneMask().
85568549
if (CM.foldTailWithEVL())
85578550
VPlanTransforms::addExplicitVectorLength(*Plan);
8551+
VPlanTransforms::lowerRecipes(*Plan);
85588552
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
85598553
VPlans.push_back(std::move(Plan));
85608554
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,9 @@ class VPInstruction : public VPRecipeWithIRFlags {
11801180
// operand). Only generates scalar values (either for the first lane only or
11811181
// for all lanes, depending on its uses).
11821182
PtrAdd,
1183+
// An abstract representation of the vector loops header mask, to be lowered
1184+
// later depending on target preference.
1185+
HeaderMask,
11831186
};
11841187

11851188
private:

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
132132
case VPInstruction::CalculateTripCountMinusVF:
133133
case VPInstruction::CanonicalIVIncrementForPart:
134134
case VPInstruction::PtrAdd:
135+
case VPInstruction::HeaderMask:
135136
return false;
136137
default:
137138
return true;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 72 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -434,44 +434,6 @@ static void removeRedundantInductionCasts(VPlan &Plan) {
434434
}
435435
}
436436

437-
/// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
438-
/// recipe, if it exists.
439-
static void removeRedundantCanonicalIVs(VPlan &Plan) {
440-
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
441-
VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
442-
for (VPUser *U : CanonicalIV->users()) {
443-
WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
444-
if (WidenNewIV)
445-
break;
446-
}
447-
448-
if (!WidenNewIV)
449-
return;
450-
451-
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
452-
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
453-
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
454-
455-
if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() ||
456-
WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType())
457-
continue;
458-
459-
// Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
460-
// everything WidenNewIV's users need. That is, WidenOriginalIV will
461-
// generate a vector phi or all users of WidenNewIV demand the first lane
462-
// only.
463-
if (any_of(WidenOriginalIV->users(),
464-
[WidenOriginalIV](VPUser *U) {
465-
return !U->usesScalars(WidenOriginalIV);
466-
}) ||
467-
vputils::onlyFirstLaneUsed(WidenNewIV)) {
468-
WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
469-
WidenNewIV->eraseFromParent();
470-
return;
471-
}
472-
}
473-
}
474-
475437
/// Returns true if \p R is dead and can be removed.
476438
static bool isDeadRecipe(VPRecipeBase &R) {
477439
using namespace llvm::PatternMatch;
@@ -1086,7 +1048,6 @@ void VPlanTransforms::truncateToMinimalBitwidths(
10861048
}
10871049

10881050
void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
1089-
removeRedundantCanonicalIVs(Plan);
10901051
removeRedundantInductionCasts(Plan);
10911052

10921053
simplifyRecipes(Plan, SE.getContext());
@@ -1203,52 +1164,32 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
12031164
return LaneMaskPhi;
12041165
}
12051166

1206-
/// Collect all VPValues representing a header mask through the (ICMP_ULE,
1207-
/// WideCanonicalIV, backedge-taken-count) pattern.
1208-
/// TODO: Introduce explicit recipe for header-mask instead of searching
1209-
/// for the header-mask pattern manually.
1210-
static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
1211-
SmallVector<VPValue *> WideCanonicalIVs;
1212-
auto *FoundWidenCanonicalIVUser =
1213-
find_if(Plan.getCanonicalIV()->users(),
1214-
[](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
1215-
assert(count_if(Plan.getCanonicalIV()->users(),
1216-
[](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }) <=
1217-
1 &&
1218-
"Must have at most one VPWideCanonicalIVRecipe");
1219-
if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) {
1220-
auto *WideCanonicalIV =
1221-
cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
1222-
WideCanonicalIVs.push_back(WideCanonicalIV);
1223-
}
1224-
1225-
// Also include VPWidenIntOrFpInductionRecipes that represent a widened
1226-
// version of the canonical induction.
1167+
/// Return the header mask recipe of the VPlan, if there is one.
1168+
static VPInstruction *getHeaderMask(VPlan &Plan) {
12271169
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
1228-
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
1229-
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
1230-
if (WidenOriginalIV && WidenOriginalIV->isCanonical())
1231-
WideCanonicalIVs.push_back(WidenOriginalIV);
1232-
}
1170+
auto R = find_if(*HeaderVPBB, [](VPRecipeBase &R) {
1171+
using namespace llvm::VPlanPatternMatch;
1172+
return match(&R, m_VPInstruction<VPInstruction::HeaderMask>(m_VPValue()));
1173+
});
1174+
return R == HeaderVPBB->end() ? nullptr : cast<VPInstruction>(&*R);
1175+
}
12331176

1234-
// Walk users of wide canonical IVs and collect to all compares of the form
1235-
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
1236-
SmallVector<VPValue *> HeaderMasks;
1237-
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
1238-
for (auto *Wide : WideCanonicalIVs) {
1239-
for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
1240-
auto *HeaderMask = dyn_cast<VPInstruction>(U);
1241-
if (!HeaderMask || HeaderMask->getOpcode() != Instruction::ICmp ||
1242-
HeaderMask->getPredicate() != CmpInst::ICMP_ULE ||
1243-
HeaderMask->getOperand(1) != BTC)
1244-
continue;
1177+
static VPValue *getOrCreateWideCanonicalIV(VPlan &Plan,
1178+
VPRecipeBase *InsertPt) {
12451179

1246-
assert(HeaderMask->getOperand(0) == Wide &&
1247-
"WidenCanonicalIV must be the first operand of the compare");
1248-
HeaderMasks.push_back(HeaderMask);
1249-
}
1180+
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
1181+
for (VPRecipeBase &R : HeaderVPBB->phis()) {
1182+
auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R);
1183+
if (!WideIV || !WideIV->isCanonical() ||
1184+
Plan.getCanonicalIV()->getScalarType() != WideIV->getScalarType())
1185+
continue;
1186+
return WideIV;
1187+
break;
12501188
}
1251-
return HeaderMasks;
1189+
1190+
auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
1191+
IV->insertBefore(InsertPt);
1192+
return IV;
12521193
}
12531194

12541195
void VPlanTransforms::addActiveLaneMask(
@@ -1258,30 +1199,23 @@ void VPlanTransforms::addActiveLaneMask(
12581199
UseActiveLaneMaskForControlFlow) &&
12591200
"DataAndControlFlowWithoutRuntimeCheck implies "
12601201
"UseActiveLaneMaskForControlFlow");
1261-
1262-
auto FoundWidenCanonicalIVUser =
1263-
find_if(Plan.getCanonicalIV()->users(),
1264-
[](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
1265-
assert(FoundWidenCanonicalIVUser &&
1266-
"Must have widened canonical IV when tail folding!");
1267-
auto *WideCanonicalIV =
1268-
cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
1202+
VPValue *HeaderMask = getHeaderMask(Plan);
1203+
assert(HeaderMask && "Active-lane-mask not needed?");
12691204
VPSingleDefRecipe *LaneMask;
12701205
if (UseActiveLaneMaskForControlFlow) {
12711206
LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(
12721207
Plan, DataAndControlFlowWithoutRuntimeCheck);
12731208
} else {
1274-
VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
1275-
LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask,
1276-
{WideCanonicalIV, Plan.getTripCount()}, nullptr,
1277-
"active.lane.mask");
1209+
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
1210+
VPBuilder B;
1211+
B.setInsertPoint(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
1212+
LaneMask = B.createNaryOp(
1213+
VPInstruction::ActiveLaneMask,
1214+
{getOrCreateWideCanonicalIV(Plan, &*HeaderVPBB->getFirstNonPhi()),
1215+
Plan.getTripCount()},
1216+
nullptr, "active.lane.mask");
12781217
}
1279-
1280-
// Walk users of WideCanonicalIV and replace all compares of the form
1281-
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
1282-
// active-lane-mask.
1283-
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan))
1284-
HeaderMask->replaceAllUsesWith(LaneMask);
1218+
HeaderMask->replaceAllUsesWith(LaneMask);
12851219
}
12861220

12871221
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
@@ -1307,6 +1241,10 @@ void VPlanTransforms::addActiveLaneMask(
13071241
/// ...
13081242
///
13091243
void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) {
1244+
VPValue *HeaderMask = getHeaderMask(Plan);
1245+
if (!HeaderMask)
1246+
return;
1247+
13101248
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
13111249
auto *CanonicalIVPHI = Plan.getCanonicalIV();
13121250
VPValue *StartV = CanonicalIVPHI->getStartValue();
@@ -1336,31 +1274,30 @@ void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) {
13361274
NextEVLIV->insertBefore(CanonicalIVIncrement);
13371275
EVLPhi->addOperand(NextEVLIV);
13381276

1339-
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
1340-
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
1341-
auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
1342-
if (!MemR)
1343-
continue;
1344-
assert(!MemR->isReverse() &&
1345-
"Reversed memory operations not supported yet.");
1346-
VPValue *OrigMask = MemR->getMask();
1347-
assert(OrigMask && "Unmasked widen memory recipe when folding tail");
1348-
VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
1349-
if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
1350-
auto *N = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
1351-
N->insertBefore(L);
1352-
L->replaceAllUsesWith(N);
1353-
L->eraseFromParent();
1354-
} else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
1355-
auto *N = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
1356-
N->insertBefore(S);
1357-
S->eraseFromParent();
1358-
} else {
1359-
llvm_unreachable("unsupported recipe");
1360-
}
1277+
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
1278+
auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
1279+
if (!MemR)
1280+
continue;
1281+
assert(!MemR->isReverse() &&
1282+
"Reversed memory operations not supported yet.");
1283+
VPValue *OrigMask = MemR->getMask();
1284+
assert(OrigMask && "Unmasked widen memory recipe when folding tail");
1285+
VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
1286+
if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
1287+
auto *N = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
1288+
N->insertBefore(L);
1289+
L->replaceAllUsesWith(N);
1290+
L->eraseFromParent();
1291+
} else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
1292+
auto *N = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
1293+
N->insertBefore(S);
1294+
S->eraseFromParent();
1295+
} else {
1296+
llvm_unreachable("unsupported recipe");
13611297
}
1362-
recursivelyDeleteDeadRecipes(HeaderMask);
13631298
}
1299+
recursivelyDeleteDeadRecipes(HeaderMask);
1300+
13641301
// Replace all uses of VPCanonicalIVPHIRecipe by
13651302
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
13661303
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
@@ -1465,3 +1402,16 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
14651402
}
14661403
}
14671404
}
1405+
1406+
void VPlanTransforms::lowerRecipes(VPlan &Plan) {
1407+
VPInstruction *HeaderMask = getHeaderMask(Plan);
1408+
if (!HeaderMask)
1409+
return;
1410+
1411+
VPValue *IV = getOrCreateWideCanonicalIV(Plan, HeaderMask);
1412+
VPBuilder Builder(HeaderMask);
1413+
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
1414+
VPValue *M = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
1415+
HeaderMask->replaceAllUsesWith(M);
1416+
HeaderMask->eraseFromParent();
1417+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ struct VPlanTransforms {
105105
/// VPCanonicalIVPHIRecipe is only used to control the loop after
106106
/// this transformation.
107107
static void addExplicitVectorLength(VPlan &Plan);
108+
109+
/// Lower abstract VPInstruction recipes to a concrete sequence of recipes for
110+
/// which code can be generated.
111+
static void lowerRecipes(VPlan &Plan);
108112
};
109113

110114
} // namespace llvm

llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ define dso_local void @alignTC(ptr noalias nocapture %A, i32 %n) optsize {
3737
; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1
3838
; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
3939
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], [[ALIGNEDTC]]
40-
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
40+
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
4141
; CHECK: exit:
4242
; CHECK-NEXT: ret void
4343
;
@@ -158,13 +158,15 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
158158
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
159159
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
160160
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1
161-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
162-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
161+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
162+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
163163
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
164164
; CHECK: vector.body:
165-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
166-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
167-
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
165+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
166+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
167+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
168+
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
169+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT2]]
168170
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
169171
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
170172
; CHECK: pred.store.if:
@@ -174,31 +176,30 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
174176
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
175177
; CHECK: pred.store.continue:
176178
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
177-
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
178-
; CHECK: pred.store.if1:
179+
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
180+
; CHECK: pred.store.if3:
179181
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
180182
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]]
181183
; CHECK-NEXT: store i32 13, ptr [[TMP6]], align 1
182-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
183-
; CHECK: pred.store.continue2:
184+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
185+
; CHECK: pred.store.continue4:
184186
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
185-
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
186-
; CHECK: pred.store.if3:
187+
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
188+
; CHECK: pred.store.if5:
187189
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 2
188190
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP8]]
189191
; CHECK-NEXT: store i32 13, ptr [[TMP9]], align 1
190-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
191-
; CHECK: pred.store.continue4:
192+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
193+
; CHECK: pred.store.continue6:
192194
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
193-
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
194-
; CHECK: pred.store.if5:
195+
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
196+
; CHECK: pred.store.if7:
195197
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 3
196198
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
197199
; CHECK-NEXT: store i32 13, ptr [[TMP12]], align 1
198-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
199-
; CHECK: pred.store.continue6:
200+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
201+
; CHECK: pred.store.continue8:
200202
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
201-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
202203
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
203204
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
204205
; CHECK: middle.block:

0 commit comments

Comments
 (0)