@@ -462,20 +462,17 @@ class MachineBlockPlacement : public MachineFunctionPass {
462
462
const MachineBasicBlock *ExitBB,
463
463
const BlockFilterSet &LoopBlockSet);
464
464
MachineBasicBlock *findBestLoopTopHelper (MachineBasicBlock *OldTop,
465
- const MachineLoop &L,
466
- const BlockFilterSet &LoopBlockSet,
467
- bool HasStaticProfileOnly = false );
468
- MachineBasicBlock *findBestLoopTop (
469
465
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
470
- MachineBasicBlock *findBestLoopTopNoProfile (
466
+ MachineBasicBlock *findBestLoopTop (
471
467
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
472
468
MachineBasicBlock *findBestLoopExit (
473
- const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
469
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
470
+ BlockFrequency &ExitFreq);
474
471
BlockFilterSet collectLoopBlockSet (const MachineLoop &L);
475
472
void buildLoopChains (const MachineLoop &L);
476
473
void rotateLoop (
477
474
BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
478
- const BlockFilterSet &LoopBlockSet);
475
+ BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
479
476
void rotateLoopWithProfile (
480
477
BlockChain &LoopChain, const MachineLoop &L,
481
478
const BlockFilterSet &LoopBlockSet);
@@ -1950,14 +1947,11 @@ MachineBlockPlacement::FallThroughGains(
1950
1947
// / At the same time, move it before old top increases the taken branch
1951
1948
// / to loop exit block, so the reduced taken branch will be compared with
1952
1949
// / the increased taken branch to the loop exit block.
1953
- // /
1954
- // / This pattern is enabled only when HasStaticProfileOnly is false.
1955
1950
MachineBasicBlock *
1956
1951
MachineBlockPlacement::findBestLoopTopHelper (
1957
1952
MachineBasicBlock *OldTop,
1958
1953
const MachineLoop &L,
1959
- const BlockFilterSet &LoopBlockSet,
1960
- bool HasStaticProfileOnly) {
1954
+ const BlockFilterSet &LoopBlockSet) {
1961
1955
// Check that the header hasn't been fused with a preheader block due to
1962
1956
// crazy branches. If it has, we need to start with the header at the top to
1963
1957
// prevent pulling the preheader into the loop body.
@@ -1981,38 +1975,22 @@ MachineBlockPlacement::findBestLoopTopHelper(
1981
1975
if (Pred->succ_size () > 2 )
1982
1976
continue ;
1983
1977
1978
+ MachineBasicBlock *OtherBB = nullptr ;
1979
+ if (Pred->succ_size () == 2 ) {
1980
+ OtherBB = *Pred->succ_begin ();
1981
+ if (OtherBB == OldTop)
1982
+ OtherBB = *Pred->succ_rbegin ();
1983
+ }
1984
+
1984
1985
if (!canMoveBottomBlockToTop (Pred, OldTop))
1985
1986
continue ;
1986
1987
1987
- if (HasStaticProfileOnly) {
1988
- // In plain mode we consider pattern 1 only.
1989
- if (Pred->succ_size () > 1 )
1990
- continue ;
1991
-
1992
- BlockFrequency PredFreq = MBFI->getBlockFreq (Pred);
1993
- if (!BestPred || PredFreq > BestGains ||
1994
- (!(PredFreq < BestGains) &&
1995
- Pred->isLayoutSuccessor (OldTop))) {
1996
- BestPred = Pred;
1997
- BestGains = PredFreq;
1998
- }
1999
- } else {
2000
- // With profile information we also consider pattern 2.
2001
- MachineBasicBlock *OtherBB = nullptr ;
2002
- if (Pred->succ_size () == 2 ) {
2003
- OtherBB = *Pred->succ_begin ();
2004
- if (OtherBB == OldTop)
2005
- OtherBB = *Pred->succ_rbegin ();
2006
- }
2007
-
2008
- // And more sophisticated cost model.
2009
- BlockFrequency Gains = FallThroughGains (Pred, OldTop, OtherBB,
2010
- LoopBlockSet);
2011
- if ((Gains > 0 ) && (Gains > BestGains ||
2012
- ((Gains == BestGains) && Pred->isLayoutSuccessor (OldTop)))) {
2013
- BestPred = Pred;
2014
- BestGains = Gains;
2015
- }
1988
+ BlockFrequency Gains = FallThroughGains (Pred, OldTop, OtherBB,
1989
+ LoopBlockSet);
1990
+ if ((Gains > 0 ) && (Gains > BestGains ||
1991
+ ((Gains == BestGains) && Pred->isLayoutSuccessor (OldTop)))) {
1992
+ BestPred = Pred;
1993
+ BestGains = Gains;
2016
1994
}
2017
1995
}
2018
1996
@@ -2032,7 +2010,7 @@ MachineBlockPlacement::findBestLoopTopHelper(
2032
2010
return BestPred;
2033
2011
}
2034
2012
2035
- // / Find the best loop top block for layout in FDO mode .
2013
+ // / Find the best loop top block for layout.
2036
2014
// /
2037
2015
// / This function iteratively calls findBestLoopTopHelper, until no new better
2038
2016
// / BB can be found.
@@ -2060,42 +2038,15 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
2060
2038
return NewTop;
2061
2039
}
2062
2040
2063
- // / Find the best loop top block for layout in plain mode. It is less agressive
2064
- // / than findBestLoopTop.
2065
- // /
2066
- // / Look for a block which is strictly better than the loop header for laying
2067
- // / out at the top of the loop. This looks for one and only one pattern:
2068
- // / a latch block with no conditional exit. This block will cause a conditional
2069
- // / jump around it or will be the bottom of the loop if we lay it out in place,
2070
- // / but if it doesn't end up at the bottom of the loop for any reason,
2071
- // / rotation alone won't fix it. Because such a block will always result in an
2072
- // / unconditional jump (for the backedge) rotating it in front of the loop
2073
- // / header is always profitable.
2074
- MachineBasicBlock *
2075
- MachineBlockPlacement::findBestLoopTopNoProfile (
2076
- const MachineLoop &L,
2077
- const BlockFilterSet &LoopBlockSet) {
2078
- // Placing the latch block before the header may introduce an extra branch
2079
- // that skips this block the first time the loop is executed, which we want
2080
- // to avoid when optimising for size.
2081
- // FIXME: in theory there is a case that does not introduce a new branch,
2082
- // i.e. when the layout predecessor does not fallthrough to the loop header.
2083
- // In practice this never happens though: there always seems to be a preheader
2084
- // that can fallthrough and that is also placed before the header.
2085
- if (F->getFunction ().hasOptSize ())
2086
- return L.getHeader ();
2087
-
2088
- return findBestLoopTopHelper (L.getHeader (), L, LoopBlockSet, true );
2089
- }
2090
-
2091
2041
// / Find the best loop exiting block for layout.
2092
2042
// /
2093
2043
// / This routine implements the logic to analyze the loop looking for the best
2094
2044
// / block to layout at the top of the loop. Typically this is done to maximize
2095
2045
// / fallthrough opportunities.
2096
2046
MachineBasicBlock *
2097
2047
MachineBlockPlacement::findBestLoopExit (const MachineLoop &L,
2098
- const BlockFilterSet &LoopBlockSet) {
2048
+ const BlockFilterSet &LoopBlockSet,
2049
+ BlockFrequency &ExitFreq) {
2099
2050
// We don't want to layout the loop linearly in all cases. If the loop header
2100
2051
// is just a normal basic block in the loop, we want to look for what block
2101
2052
// within the loop is the best one to layout at the top. However, if the loop
@@ -2206,6 +2157,7 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
2206
2157
2207
2158
LLVM_DEBUG (dbgs () << " Best exiting block: " << getBlockName (ExitingBB)
2208
2159
<< " \n " );
2160
+ ExitFreq = BestExitEdgeFreq;
2209
2161
return ExitingBB;
2210
2162
}
2211
2163
@@ -2250,6 +2202,7 @@ MachineBlockPlacement::hasViableTopFallthrough(
2250
2202
// / of its bottom already, don't rotate it.
2251
2203
void MachineBlockPlacement::rotateLoop (BlockChain &LoopChain,
2252
2204
const MachineBasicBlock *ExitingBB,
2205
+ BlockFrequency ExitFreq,
2253
2206
const BlockFilterSet &LoopBlockSet) {
2254
2207
if (!ExitingBB)
2255
2208
return ;
@@ -2273,6 +2226,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2273
2226
(!SuccChain || Succ == *SuccChain->begin ()))
2274
2227
return ;
2275
2228
}
2229
+
2230
+ // Rotate will destroy the top fallthrough, we need to ensure the new exit
2231
+ // frequency is larger than top fallthrough.
2232
+ BlockFrequency FallThrough2Top = TopFallThroughFreq (Top, LoopBlockSet);
2233
+ if (FallThrough2Top >= ExitFreq)
2234
+ return ;
2276
2235
}
2277
2236
2278
2237
BlockChain::iterator ExitIt = llvm::find (LoopChain, ExitingBB);
@@ -2524,10 +2483,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
2524
2483
// loop. This will default to the header, but may end up as one of the
2525
2484
// predecessors to the header if there is one which will result in strictly
2526
2485
// fewer branches in the loop body.
2527
- MachineBasicBlock *LoopTop =
2528
- (RotateLoopWithProfile || F->getFunction ().hasProfileData ()) ?
2529
- findBestLoopTop (L, LoopBlockSet) :
2530
- findBestLoopTopNoProfile (L, LoopBlockSet);
2486
+ MachineBasicBlock *LoopTop = findBestLoopTop (L, LoopBlockSet);
2531
2487
2532
2488
// If we selected just the header for the loop top, look for a potentially
2533
2489
// profitable exit block in the event that rotating the loop can eliminate
@@ -2536,8 +2492,9 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
2536
2492
// Loops are processed innermost to uttermost, make sure we clear
2537
2493
// PreferredLoopExit before processing a new loop.
2538
2494
PreferredLoopExit = nullptr ;
2495
+ BlockFrequency ExitFreq;
2539
2496
if (!RotateLoopWithProfile && LoopTop == L.getHeader ())
2540
- PreferredLoopExit = findBestLoopExit (L, LoopBlockSet);
2497
+ PreferredLoopExit = findBestLoopExit (L, LoopBlockSet, ExitFreq );
2541
2498
2542
2499
BlockChain &LoopChain = *BlockToChain[LoopTop];
2543
2500
@@ -2554,11 +2511,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
2554
2511
2555
2512
buildChain (LoopTop, LoopChain, &LoopBlockSet);
2556
2513
2557
- if (RotateLoopWithProfile) {
2558
- if (LoopTop == L.getHeader ())
2559
- rotateLoopWithProfile (LoopChain, L, LoopBlockSet);
2560
- } else
2561
- rotateLoop (LoopChain, PreferredLoopExit, LoopBlockSet);
2514
+ if (RotateLoopWithProfile)
2515
+ rotateLoopWithProfile (LoopChain, L, LoopBlockSet);
2516
+ else
2517
+ rotateLoop (LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2562
2518
2563
2519
LLVM_DEBUG ({
2564
2520
// Crash at the end so we get all of the debugging output first.
0 commit comments