@@ -404,6 +404,8 @@ class MachineBlockPlacement : public MachineFunctionPass {
404
404
405
405
ProfileSummaryInfo *PSI = nullptr ;
406
406
407
+ TargetPassConfig *PassConfig = nullptr ;
408
+
407
409
// / Duplicator used to duplicate tails during placement.
408
410
// /
409
411
// / Placement decisions can open up new tail duplication opportunities, but
@@ -414,6 +416,8 @@ class MachineBlockPlacement : public MachineFunctionPass {
414
416
// / Partial tail duplication threshold.
415
417
BlockFrequency DupThreshold;
416
418
419
+ unsigned TailDupSize;
420
+
417
421
// / True: use block profile count to compute tail duplication cost.
418
422
// / False: use block frequency to compute tail duplication cost.
419
423
bool UseProfileCount = false ;
@@ -458,7 +462,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
458
462
459
463
// / Scale the DupThreshold according to basic block size.
460
464
BlockFrequency scaleThreshold (MachineBasicBlock *BB);
461
- void initDupThreshold ();
465
+ void initTailDupThreshold ();
462
466
463
467
// / Decrease the UnscheduledPredecessors count for all blocks in chain, and
464
468
// / if the count goes to 0, add them to the appropriate work list.
@@ -2936,12 +2940,16 @@ void MachineBlockPlacement::alignBlocks() {
2936
2940
// exclusively on the loop info here so that we can align backedges in
2937
2941
// unnatural CFGs and backedges that were introduced purely because of the
2938
2942
// loop rotations done during this layout pass.
2939
- if (F->getFunction ().hasMinSize () ||
2940
- (F->getFunction ().hasOptSize () && !TLI->alignLoopsWithOptSize ()))
2941
- return ;
2943
+ if (!AlignAllBlock && !AlignAllNonFallThruBlocks) {
2944
+ if (F->getFunction ().hasMinSize () ||
2945
+ (F->getFunction ().hasOptSize () && !TLI->alignLoopsWithOptSize ()))
2946
+ return ;
2947
+ }
2948
+
2942
2949
BlockChain &FunctionChain = *BlockToChain[&F->front ()];
2950
+ // Empty chain.
2943
2951
if (FunctionChain.begin () == FunctionChain.end ())
2944
- return ; // Empty chain.
2952
+ return ;
2945
2953
2946
2954
const BranchProbability ColdProb (1 , 5 ); // 20%
2947
2955
BlockFrequency EntryFreq = MBFI->getBlockFreq (&F->front ());
@@ -3037,6 +3045,33 @@ void MachineBlockPlacement::alignBlocks() {
3037
3045
DetermineMaxAlignmentPadding ();
3038
3046
}
3039
3047
}
3048
+
3049
+ const bool HasMaxBytesOverride =
3050
+ MaxBytesForAlignmentOverride.getNumOccurrences () > 0 ;
3051
+
3052
+ if (AlignAllBlock)
3053
+ // Align all of the blocks in the function to a specific alignment.
3054
+ for (MachineBasicBlock &MBB : *F) {
3055
+ if (HasMaxBytesOverride)
3056
+ MBB.setAlignment (Align (1ULL << AlignAllBlock),
3057
+ MaxBytesForAlignmentOverride);
3058
+ else
3059
+ MBB.setAlignment (Align (1ULL << AlignAllBlock));
3060
+ }
3061
+ else if (AlignAllNonFallThruBlocks) {
3062
+ // Align all of the blocks that have no fall-through predecessors to a
3063
+ // specific alignment.
3064
+ for (auto MBI = std::next (F->begin ()), MBE = F->end (); MBI != MBE; ++MBI) {
3065
+ auto LayoutPred = std::prev (MBI);
3066
+ if (!LayoutPred->isSuccessor (&*MBI)) {
3067
+ if (HasMaxBytesOverride)
3068
+ MBI->setAlignment (Align (1ULL << AlignAllNonFallThruBlocks),
3069
+ MaxBytesForAlignmentOverride);
3070
+ else
3071
+ MBI->setAlignment (Align (1ULL << AlignAllNonFallThruBlocks));
3072
+ }
3073
+ }
3074
+ }
3040
3075
}
3041
3076
3042
3077
// / Tail duplicate \p BB into (some) predecessors if profitable, repeating if
@@ -3390,31 +3425,53 @@ void MachineBlockPlacement::findDuplicateCandidates(
3390
3425
}
3391
3426
}
3392
3427
3393
- void MachineBlockPlacement::initDupThreshold () {
3428
+ void MachineBlockPlacement::initTailDupThreshold () {
3394
3429
DupThreshold = BlockFrequency (0 );
3395
- if (!F->getFunction ().hasProfileData ())
3396
- return ;
3430
+ if (F->getFunction ().hasProfileData ()) {
3431
+ // We prefer to use prifile count.
3432
+ uint64_t HotThreshold = PSI->getOrCompHotCountThreshold ();
3433
+ if (HotThreshold != UINT64_MAX) {
3434
+ UseProfileCount = true ;
3435
+ DupThreshold =
3436
+ BlockFrequency (HotThreshold * TailDupProfilePercentThreshold / 100 );
3437
+ } else {
3438
+ // Profile count is not available, we can use block frequency instead.
3439
+ BlockFrequency MaxFreq = BlockFrequency (0 );
3440
+ for (MachineBasicBlock &MBB : *F) {
3441
+ BlockFrequency Freq = MBFI->getBlockFreq (&MBB);
3442
+ if (Freq > MaxFreq)
3443
+ MaxFreq = Freq;
3444
+ }
3397
3445
3398
- // We prefer to use prifile count.
3399
- uint64_t HotThreshold = PSI->getOrCompHotCountThreshold ();
3400
- if (HotThreshold != UINT64_MAX) {
3401
- UseProfileCount = true ;
3402
- DupThreshold =
3403
- BlockFrequency (HotThreshold * TailDupProfilePercentThreshold / 100 );
3404
- return ;
3446
+ BranchProbability ThresholdProb (TailDupPlacementPenalty, 100 );
3447
+ DupThreshold = BlockFrequency (MaxFreq * ThresholdProb);
3448
+ UseProfileCount = false ;
3449
+ }
3405
3450
}
3406
3451
3407
- // Profile count is not available, we can use block frequency instead.
3408
- BlockFrequency MaxFreq = BlockFrequency (0 );
3409
- for (MachineBasicBlock &MBB : *F) {
3410
- BlockFrequency Freq = MBFI->getBlockFreq (&MBB);
3411
- if (Freq > MaxFreq)
3412
- MaxFreq = Freq;
3452
+ TailDupSize = TailDupPlacementThreshold;
3453
+ // If only the aggressive threshold is explicitly set, use it.
3454
+ if (TailDupPlacementAggressiveThreshold.getNumOccurrences () != 0 &&
3455
+ TailDupPlacementThreshold.getNumOccurrences () == 0 )
3456
+ TailDupSize = TailDupPlacementAggressiveThreshold;
3457
+
3458
+ // For aggressive optimization, we can adjust some thresholds to be less
3459
+ // conservative.
3460
+ if (PassConfig->getOptLevel () >= CodeGenOptLevel::Aggressive) {
3461
+ // At O3 we should be more willing to copy blocks for tail duplication. This
3462
+ // increases size pressure, so we only do it at O3
3463
+ // Do this unless only the regular threshold is explicitly set.
3464
+ if (TailDupPlacementThreshold.getNumOccurrences () == 0 ||
3465
+ TailDupPlacementAggressiveThreshold.getNumOccurrences () != 0 )
3466
+ TailDupSize = TailDupPlacementAggressiveThreshold;
3413
3467
}
3414
3468
3415
- BranchProbability ThresholdProb (TailDupPlacementPenalty, 100 );
3416
- DupThreshold = BlockFrequency (MaxFreq * ThresholdProb);
3417
- UseProfileCount = false ;
3469
+ // If there's no threshold provided through options, query the target
3470
+ // information for a threshold instead.
3471
+ if (TailDupPlacementThreshold.getNumOccurrences () == 0 &&
3472
+ (PassConfig->getOptLevel () < CodeGenOptLevel::Aggressive ||
3473
+ TailDupPlacementAggressiveThreshold.getNumOccurrences () == 0 ))
3474
+ TailDupSize = TII->getTailDuplicateSize (PassConfig->getOptLevel ());
3418
3475
}
3419
3476
3420
3477
bool MachineBlockPlacement::runOnMachineFunction (MachineFunction &MF) {
@@ -3434,8 +3491,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
3434
3491
TLI = MF.getSubtarget ().getTargetLowering ();
3435
3492
MPDT = nullptr ;
3436
3493
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI ();
3437
-
3438
- initDupThreshold ();
3494
+ PassConfig = &getAnalysis<TargetPassConfig>();
3439
3495
3440
3496
// Initialize PreferredLoopExit to nullptr here since it may never be set if
3441
3497
// there are no MachineLoops.
@@ -3446,38 +3502,17 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
3446
3502
assert (ComputedEdges.empty () &&
3447
3503
" Computed Edge map should be empty before starting placement." );
3448
3504
3449
- unsigned TailDupSize = TailDupPlacementThreshold;
3450
- // If only the aggressive threshold is explicitly set, use it.
3451
- if (TailDupPlacementAggressiveThreshold.getNumOccurrences () != 0 &&
3452
- TailDupPlacementThreshold.getNumOccurrences () == 0 )
3453
- TailDupSize = TailDupPlacementAggressiveThreshold;
3454
-
3455
- TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
3456
- // For aggressive optimization, we can adjust some thresholds to be less
3457
- // conservative.
3458
- if (PassConfig->getOptLevel () >= CodeGenOptLevel::Aggressive) {
3459
- // At O3 we should be more willing to copy blocks for tail duplication. This
3460
- // increases size pressure, so we only do it at O3
3461
- // Do this unless only the regular threshold is explicitly set.
3462
- if (TailDupPlacementThreshold.getNumOccurrences () == 0 ||
3463
- TailDupPlacementAggressiveThreshold.getNumOccurrences () != 0 )
3464
- TailDupSize = TailDupPlacementAggressiveThreshold;
3465
- }
3466
-
3467
- // If there's no threshold provided through options, query the target
3468
- // information for a threshold instead.
3469
- if (TailDupPlacementThreshold.getNumOccurrences () == 0 &&
3470
- (PassConfig->getOptLevel () < CodeGenOptLevel::Aggressive ||
3471
- TailDupPlacementAggressiveThreshold.getNumOccurrences () == 0 ))
3472
- TailDupSize = TII->getTailDuplicateSize (PassConfig->getOptLevel ());
3505
+ // Initialize tail duplication thresholds.
3506
+ initTailDupThreshold ();
3473
3507
3508
+ // Apply tail duplication.
3474
3509
if (allowTailDupPlacement ()) {
3475
3510
MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree ();
3476
3511
bool OptForSize = MF.getFunction ().hasOptSize () ||
3477
3512
llvm::shouldOptimizeForSize (&MF, PSI, &MBFI->getMBFI ());
3478
3513
if (OptForSize)
3479
3514
TailDupSize = 1 ;
3480
- bool PreRegAlloc = false ;
3515
+ const bool PreRegAlloc = false ;
3481
3516
TailDup.initMF (MF, PreRegAlloc, MBPI, MBFI.get (), PSI,
3482
3517
/* LayoutMode */ true , TailDupSize);
3483
3518
precomputeTriangleChains ();
@@ -3488,12 +3523,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
3488
3523
// Changing the layout can create new tail merging opportunities.
3489
3524
// TailMerge can create jump into if branches that make CFG irreducible for
3490
3525
// HW that requires structured CFG.
3491
- bool EnableTailMerge = !MF.getTarget ().requiresStructuredCFG () &&
3492
- PassConfig->getEnableTailMerge () &&
3493
- BranchFoldPlacement;
3526
+ const bool EnableTailMerge = !MF.getTarget ().requiresStructuredCFG () &&
3527
+ PassConfig->getEnableTailMerge () &&
3528
+ BranchFoldPlacement && MF. size () > 3 ;
3494
3529
// No tail merging opportunities if the block number is less than four.
3495
- if (MF. size () > 3 && EnableTailMerge) {
3496
- unsigned TailMergeSize = TailDupSize + 1 ;
3530
+ if (EnableTailMerge) {
3531
+ const unsigned TailMergeSize = TailDupSize + 1 ;
3497
3532
BranchFolder BF (/* DefaultEnableTailMerge=*/ true , /* CommonHoist=*/ false ,
3498
3533
*MBFI, *MBPI, PSI, TailMergeSize);
3499
3534
@@ -3528,32 +3563,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
3528
3563
ComputedEdges.clear ();
3529
3564
ChainAllocator.DestroyAll ();
3530
3565
3531
- bool HasMaxBytesOverride =
3532
- MaxBytesForAlignmentOverride.getNumOccurrences () > 0 ;
3533
-
3534
- if (AlignAllBlock)
3535
- // Align all of the blocks in the function to a specific alignment.
3536
- for (MachineBasicBlock &MBB : MF) {
3537
- if (HasMaxBytesOverride)
3538
- MBB.setAlignment (Align (1ULL << AlignAllBlock),
3539
- MaxBytesForAlignmentOverride);
3540
- else
3541
- MBB.setAlignment (Align (1ULL << AlignAllBlock));
3542
- }
3543
- else if (AlignAllNonFallThruBlocks) {
3544
- // Align all of the blocks that have no fall-through predecessors to a
3545
- // specific alignment.
3546
- for (auto MBI = std::next (MF.begin ()), MBE = MF.end (); MBI != MBE; ++MBI) {
3547
- auto LayoutPred = std::prev (MBI);
3548
- if (!LayoutPred->isSuccessor (&*MBI)) {
3549
- if (HasMaxBytesOverride)
3550
- MBI->setAlignment (Align (1ULL << AlignAllNonFallThruBlocks),
3551
- MaxBytesForAlignmentOverride);
3552
- else
3553
- MBI->setAlignment (Align (1ULL << AlignAllNonFallThruBlocks));
3554
- }
3555
- }
3556
- }
3566
+ // View the function.
3557
3567
if (ViewBlockLayoutWithBFI != GVDT_None &&
3558
3568
(ViewBlockFreqFuncName.empty () ||
3559
3569
F->getFunction ().getName () == ViewBlockFreqFuncName)) {
0 commit comments