@@ -434,44 +434,6 @@ static void removeRedundantInductionCasts(VPlan &Plan) {
434
434
}
435
435
}
436
436
437
- // / Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
438
- // / recipe, if it exists.
439
- static void removeRedundantCanonicalIVs (VPlan &Plan) {
440
- VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV ();
441
- VPWidenCanonicalIVRecipe *WidenNewIV = nullptr ;
442
- for (VPUser *U : CanonicalIV->users ()) {
443
- WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
444
- if (WidenNewIV)
445
- break ;
446
- }
447
-
448
- if (!WidenNewIV)
449
- return ;
450
-
451
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
452
- for (VPRecipeBase &Phi : HeaderVPBB->phis ()) {
453
- auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
454
-
455
- if (!WidenOriginalIV || !WidenOriginalIV->isCanonical () ||
456
- WidenOriginalIV->getScalarType () != WidenNewIV->getScalarType ())
457
- continue ;
458
-
459
- // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
460
- // everything WidenNewIV's users need. That is, WidenOriginalIV will
461
- // generate a vector phi or all users of WidenNewIV demand the first lane
462
- // only.
463
- if (any_of (WidenOriginalIV->users (),
464
- [WidenOriginalIV](VPUser *U) {
465
- return !U->usesScalars (WidenOriginalIV);
466
- }) ||
467
- vputils::onlyFirstLaneUsed (WidenNewIV)) {
468
- WidenNewIV->replaceAllUsesWith (WidenOriginalIV);
469
- WidenNewIV->eraseFromParent ();
470
- return ;
471
- }
472
- }
473
- }
474
-
475
437
// / Returns true if \p R is dead and can be removed.
476
438
static bool isDeadRecipe (VPRecipeBase &R) {
477
439
using namespace llvm ::PatternMatch;
@@ -1086,7 +1048,6 @@ void VPlanTransforms::truncateToMinimalBitwidths(
1086
1048
}
1087
1049
1088
1050
void VPlanTransforms::optimize (VPlan &Plan, ScalarEvolution &SE) {
1089
- removeRedundantCanonicalIVs (Plan);
1090
1051
removeRedundantInductionCasts (Plan);
1091
1052
1092
1053
simplifyRecipes (Plan, SE.getContext ());
@@ -1203,52 +1164,32 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
1203
1164
return LaneMaskPhi;
1204
1165
}
1205
1166
1206
- // / Collect all VPValues representing a header mask through the (ICMP_ULE,
1207
- // / WideCanonicalIV, backedge-taken-count) pattern.
1208
- // / TODO: Introduce explicit recipe for header-mask instead of searching
1209
- // / for the header-mask pattern manually.
1210
- static SmallVector<VPValue *> collectAllHeaderMasks (VPlan &Plan) {
1211
- SmallVector<VPValue *> WideCanonicalIVs;
1212
- auto *FoundWidenCanonicalIVUser =
1213
- find_if (Plan.getCanonicalIV ()->users (),
1214
- [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
1215
- assert (count_if (Plan.getCanonicalIV ()->users (),
1216
- [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }) <=
1217
- 1 &&
1218
- " Must have at most one VPWideCanonicalIVRecipe" );
1219
- if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV ()->users ().end ()) {
1220
- auto *WideCanonicalIV =
1221
- cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
1222
- WideCanonicalIVs.push_back (WideCanonicalIV);
1223
- }
1224
-
1225
- // Also include VPWidenIntOrFpInductionRecipes that represent a widened
1226
- // version of the canonical induction.
1167
+ // / Return the header mask recipe of the VPlan, if there is one.
1168
+ static VPInstruction *getHeaderMask (VPlan &Plan) {
1227
1169
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
1228
- for (VPRecipeBase &Phi : HeaderVPBB->phis ()) {
1229
- auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
1230
- if (WidenOriginalIV && WidenOriginalIV->isCanonical ())
1231
- WideCanonicalIVs.push_back (WidenOriginalIV);
1232
- }
1170
+ auto R = find_if (*HeaderVPBB, [](VPRecipeBase &R) {
1171
+ using namespace llvm ::VPlanPatternMatch;
1172
+ return match (&R, m_VPInstruction<VPInstruction::HeaderMask>(m_VPValue ()));
1173
+ });
1174
+ return R == HeaderVPBB->end () ? nullptr : cast<VPInstruction>(&*R);
1175
+ }
1233
1176
1234
- // Walk users of wide canonical IVs and collect to all compares of the form
1235
- // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
1236
- SmallVector<VPValue *> HeaderMasks;
1237
- VPValue *BTC = Plan.getOrCreateBackedgeTakenCount ();
1238
- for (auto *Wide : WideCanonicalIVs) {
1239
- for (VPUser *U : SmallVector<VPUser *>(Wide->users ())) {
1240
- auto *HeaderMask = dyn_cast<VPInstruction>(U);
1241
- if (!HeaderMask || HeaderMask->getOpcode () != Instruction::ICmp ||
1242
- HeaderMask->getPredicate () != CmpInst::ICMP_ULE ||
1243
- HeaderMask->getOperand (1 ) != BTC)
1244
- continue ;
1177
+ static VPValue *getOrCreateWideCanonicalIV (VPlan &Plan,
1178
+ VPRecipeBase *InsertPt) {
1245
1179
1246
- assert (HeaderMask->getOperand (0 ) == Wide &&
1247
- " WidenCanonicalIV must be the first operand of the compare" );
1248
- HeaderMasks.push_back (HeaderMask);
1249
- }
1180
+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
1181
+ for (VPRecipeBase &R : HeaderVPBB->phis ()) {
1182
+ auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R);
1183
+ if (!WideIV || !WideIV->isCanonical () ||
1184
+ Plan.getCanonicalIV ()->getScalarType () != WideIV->getScalarType ())
1185
+ continue ;
1186
+ return WideIV;
1187
+ break ;
1250
1188
}
1251
- return HeaderMasks;
1189
+
1190
+ auto *IV = new VPWidenCanonicalIVRecipe (Plan.getCanonicalIV ());
1191
+ IV->insertBefore (InsertPt);
1192
+ return IV;
1252
1193
}
1253
1194
1254
1195
void VPlanTransforms::addActiveLaneMask (
@@ -1258,30 +1199,23 @@ void VPlanTransforms::addActiveLaneMask(
1258
1199
UseActiveLaneMaskForControlFlow) &&
1259
1200
" DataAndControlFlowWithoutRuntimeCheck implies "
1260
1201
" UseActiveLaneMaskForControlFlow" );
1261
-
1262
- auto FoundWidenCanonicalIVUser =
1263
- find_if (Plan.getCanonicalIV ()->users (),
1264
- [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
1265
- assert (FoundWidenCanonicalIVUser &&
1266
- " Must have widened canonical IV when tail folding!" );
1267
- auto *WideCanonicalIV =
1268
- cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
1202
+ VPValue *HeaderMask = getHeaderMask (Plan);
1203
+ assert (HeaderMask && " Active-lane-mask not needed?" );
1269
1204
VPSingleDefRecipe *LaneMask;
1270
1205
if (UseActiveLaneMaskForControlFlow) {
1271
1206
LaneMask = addVPLaneMaskPhiAndUpdateExitBranch (
1272
1207
Plan, DataAndControlFlowWithoutRuntimeCheck);
1273
1208
} else {
1274
- VPBuilder B = VPBuilder::getToInsertAfter (WideCanonicalIV);
1275
- LaneMask = B.createNaryOp (VPInstruction::ActiveLaneMask,
1276
- {WideCanonicalIV, Plan.getTripCount ()}, nullptr ,
1277
- " active.lane.mask" );
1209
+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
1210
+ VPBuilder B;
1211
+ B.setInsertPoint (HeaderVPBB, HeaderVPBB->getFirstNonPhi ());
1212
+ LaneMask = B.createNaryOp (
1213
+ VPInstruction::ActiveLaneMask,
1214
+ {getOrCreateWideCanonicalIV (Plan, &*HeaderVPBB->getFirstNonPhi ()),
1215
+ Plan.getTripCount ()},
1216
+ nullptr , " active.lane.mask" );
1278
1217
}
1279
-
1280
- // Walk users of WideCanonicalIV and replace all compares of the form
1281
- // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
1282
- // active-lane-mask.
1283
- for (VPValue *HeaderMask : collectAllHeaderMasks (Plan))
1284
- HeaderMask->replaceAllUsesWith (LaneMask);
1218
+ HeaderMask->replaceAllUsesWith (LaneMask);
1285
1219
}
1286
1220
1287
1221
// / Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
@@ -1307,6 +1241,10 @@ void VPlanTransforms::addActiveLaneMask(
1307
1241
// / ...
1308
1242
// /
1309
1243
void VPlanTransforms::addExplicitVectorLength (VPlan &Plan) {
1244
+ VPValue *HeaderMask = getHeaderMask (Plan);
1245
+ if (!HeaderMask)
1246
+ return ;
1247
+
1310
1248
VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
1311
1249
auto *CanonicalIVPHI = Plan.getCanonicalIV ();
1312
1250
VPValue *StartV = CanonicalIVPHI->getStartValue ();
@@ -1336,31 +1274,30 @@ void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) {
1336
1274
NextEVLIV->insertBefore (CanonicalIVIncrement);
1337
1275
EVLPhi->addOperand (NextEVLIV);
1338
1276
1339
- for (VPValue *HeaderMask : collectAllHeaderMasks (Plan)) {
1340
- for (VPUser *U : collectUsersRecursively (HeaderMask)) {
1341
- auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
1342
- if (!MemR)
1343
- continue ;
1344
- assert (!MemR->isReverse () &&
1345
- " Reversed memory operations not supported yet." );
1346
- VPValue *OrigMask = MemR->getMask ();
1347
- assert (OrigMask && " Unmasked widen memory recipe when folding tail" );
1348
- VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
1349
- if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
1350
- auto *N = new VPWidenLoadEVLRecipe (L, VPEVL, NewMask);
1351
- N->insertBefore (L);
1352
- L->replaceAllUsesWith (N);
1353
- L->eraseFromParent ();
1354
- } else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
1355
- auto *N = new VPWidenStoreEVLRecipe (S, VPEVL, NewMask);
1356
- N->insertBefore (S);
1357
- S->eraseFromParent ();
1358
- } else {
1359
- llvm_unreachable (" unsupported recipe" );
1360
- }
1277
+ for (VPUser *U : collectUsersRecursively (HeaderMask)) {
1278
+ auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
1279
+ if (!MemR)
1280
+ continue ;
1281
+ assert (!MemR->isReverse () &&
1282
+ " Reversed memory operations not supported yet." );
1283
+ VPValue *OrigMask = MemR->getMask ();
1284
+ assert (OrigMask && " Unmasked widen memory recipe when folding tail" );
1285
+ VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
1286
+ if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
1287
+ auto *N = new VPWidenLoadEVLRecipe (L, VPEVL, NewMask);
1288
+ N->insertBefore (L);
1289
+ L->replaceAllUsesWith (N);
1290
+ L->eraseFromParent ();
1291
+ } else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
1292
+ auto *N = new VPWidenStoreEVLRecipe (S, VPEVL, NewMask);
1293
+ N->insertBefore (S);
1294
+ S->eraseFromParent ();
1295
+ } else {
1296
+ llvm_unreachable (" unsupported recipe" );
1361
1297
}
1362
- recursivelyDeleteDeadRecipes (HeaderMask);
1363
1298
}
1299
+ recursivelyDeleteDeadRecipes (HeaderMask);
1300
+
1364
1301
// Replace all uses of VPCanonicalIVPHIRecipe by
1365
1302
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
1366
1303
CanonicalIVPHI->replaceAllUsesWith (EVLPhi);
@@ -1465,3 +1402,16 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
1465
1402
}
1466
1403
}
1467
1404
}
1405
+
1406
+ void VPlanTransforms::lowerRecipes (VPlan &Plan) {
1407
+ VPInstruction *HeaderMask = getHeaderMask (Plan);
1408
+ if (!HeaderMask)
1409
+ return ;
1410
+
1411
+ VPValue *IV = getOrCreateWideCanonicalIV (Plan, HeaderMask);
1412
+ VPBuilder Builder (HeaderMask);
1413
+ VPValue *BTC = Plan.getOrCreateBackedgeTakenCount ();
1414
+ VPValue *M = Builder.createICmp (CmpInst::ICMP_ULE, IV, BTC);
1415
+ HeaderMask->replaceAllUsesWith (M);
1416
+ HeaderMask->eraseFromParent ();
1417
+ }
0 commit comments