@@ -462,17 +462,20 @@ class MachineBlockPlacement : public MachineFunctionPass {
462
462
const MachineBasicBlock *ExitBB,
463
463
const BlockFilterSet &LoopBlockSet);
464
464
MachineBasicBlock *findBestLoopTopHelper (MachineBasicBlock *OldTop,
465
- const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
465
+ const MachineLoop &L,
466
+ const BlockFilterSet &LoopBlockSet,
467
+ bool HasStaticProfileOnly = false );
466
468
MachineBasicBlock *findBestLoopTop (
467
469
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
470
+ MachineBasicBlock *findBestLoopTopNoProfile (
471
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
468
472
MachineBasicBlock *findBestLoopExit (
469
- const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
470
- BlockFrequency &ExitFreq);
473
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
471
474
BlockFilterSet collectLoopBlockSet (const MachineLoop &L);
472
475
void buildLoopChains (const MachineLoop &L);
473
476
void rotateLoop (
474
477
BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
475
- BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
478
+ const BlockFilterSet &LoopBlockSet);
476
479
void rotateLoopWithProfile (
477
480
BlockChain &LoopChain, const MachineLoop &L,
478
481
const BlockFilterSet &LoopBlockSet);
@@ -1947,11 +1950,14 @@ MachineBlockPlacement::FallThroughGains(
1947
1950
// / At the same time, move it before old top increases the taken branch
1948
1951
// / to loop exit block, so the reduced taken branch will be compared with
1949
1952
// / the increased taken branch to the loop exit block.
1953
+ // /
1954
+ // / This pattern is enabled only when HasStaticProfileOnly is false.
1950
1955
MachineBasicBlock *
1951
1956
MachineBlockPlacement::findBestLoopTopHelper (
1952
1957
MachineBasicBlock *OldTop,
1953
1958
const MachineLoop &L,
1954
- const BlockFilterSet &LoopBlockSet) {
1959
+ const BlockFilterSet &LoopBlockSet,
1960
+ bool HasStaticProfileOnly) {
1955
1961
// Check that the header hasn't been fused with a preheader block due to
1956
1962
// crazy branches. If it has, we need to start with the header at the top to
1957
1963
// prevent pulling the preheader into the loop body.
@@ -1975,22 +1981,38 @@ MachineBlockPlacement::findBestLoopTopHelper(
1975
1981
if (Pred->succ_size () > 2 )
1976
1982
continue ;
1977
1983
1978
- MachineBasicBlock *OtherBB = nullptr ;
1979
- if (Pred->succ_size () == 2 ) {
1980
- OtherBB = *Pred->succ_begin ();
1981
- if (OtherBB == OldTop)
1982
- OtherBB = *Pred->succ_rbegin ();
1983
- }
1984
-
1985
1984
if (!canMoveBottomBlockToTop (Pred, OldTop))
1986
1985
continue ;
1987
1986
1988
- BlockFrequency Gains = FallThroughGains (Pred, OldTop, OtherBB,
1989
- LoopBlockSet);
1990
- if ((Gains > 0 ) && (Gains > BestGains ||
1991
- ((Gains == BestGains) && Pred->isLayoutSuccessor (OldTop)))) {
1992
- BestPred = Pred;
1993
- BestGains = Gains;
1987
+ if (HasStaticProfileOnly) {
1988
+ // In plain mode we consider pattern 1 only.
1989
+ if (Pred->succ_size () > 1 )
1990
+ continue ;
1991
+
1992
+ BlockFrequency PredFreq = MBFI->getBlockFreq (Pred);
1993
+ if (!BestPred || PredFreq > BestGains ||
1994
+ (!(PredFreq < BestGains) &&
1995
+ Pred->isLayoutSuccessor (OldTop))) {
1996
+ BestPred = Pred;
1997
+ BestGains = PredFreq;
1998
+ }
1999
+ } else {
2000
+ // With profile information we also consider pattern 2.
2001
+ MachineBasicBlock *OtherBB = nullptr ;
2002
+ if (Pred->succ_size () == 2 ) {
2003
+ OtherBB = *Pred->succ_begin ();
2004
+ if (OtherBB == OldTop)
2005
+ OtherBB = *Pred->succ_rbegin ();
2006
+ }
2007
+
2008
+ // And more sophisticated cost model.
2009
+ BlockFrequency Gains = FallThroughGains (Pred, OldTop, OtherBB,
2010
+ LoopBlockSet);
2011
+ if ((Gains > 0 ) && (Gains > BestGains ||
2012
+ ((Gains == BestGains) && Pred->isLayoutSuccessor (OldTop)))) {
2013
+ BestPred = Pred;
2014
+ BestGains = Gains;
2015
+ }
1994
2016
}
1995
2017
}
1996
2018
@@ -2010,7 +2032,7 @@ MachineBlockPlacement::findBestLoopTopHelper(
2010
2032
return BestPred;
2011
2033
}
2012
2034
2013
- // / Find the best loop top block for layout.
2035
+ // / Find the best loop top block for layout in FDO mode .
2014
2036
// /
2015
2037
// / This function iteratively calls findBestLoopTopHelper, until no new better
2016
2038
// / BB can be found.
@@ -2038,15 +2060,42 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
2038
2060
return NewTop;
2039
2061
}
2040
2062
2063
+ // / Find the best loop top block for layout in plain mode. It is less agressive
2064
+ // / than findBestLoopTop.
2065
+ // /
2066
+ // / Look for a block which is strictly better than the loop header for laying
2067
+ // / out at the top of the loop. This looks for one and only one pattern:
2068
+ // / a latch block with no conditional exit. This block will cause a conditional
2069
+ // / jump around it or will be the bottom of the loop if we lay it out in place,
2070
+ // / but if it doesn't end up at the bottom of the loop for any reason,
2071
+ // / rotation alone won't fix it. Because such a block will always result in an
2072
+ // / unconditional jump (for the backedge) rotating it in front of the loop
2073
+ // / header is always profitable.
2074
+ MachineBasicBlock *
2075
+ MachineBlockPlacement::findBestLoopTopNoProfile (
2076
+ const MachineLoop &L,
2077
+ const BlockFilterSet &LoopBlockSet) {
2078
+ // Placing the latch block before the header may introduce an extra branch
2079
+ // that skips this block the first time the loop is executed, which we want
2080
+ // to avoid when optimising for size.
2081
+ // FIXME: in theory there is a case that does not introduce a new branch,
2082
+ // i.e. when the layout predecessor does not fallthrough to the loop header.
2083
+ // In practice this never happens though: there always seems to be a preheader
2084
+ // that can fallthrough and that is also placed before the header.
2085
+ if (F->getFunction ().hasOptSize ())
2086
+ return L.getHeader ();
2087
+
2088
+ return findBestLoopTopHelper (L.getHeader (), L, LoopBlockSet, true );
2089
+ }
2090
+
2041
2091
// / Find the best loop exiting block for layout.
2042
2092
// /
2043
2093
// / This routine implements the logic to analyze the loop looking for the best
2044
2094
// / block to layout at the top of the loop. Typically this is done to maximize
2045
2095
// / fallthrough opportunities.
2046
2096
MachineBasicBlock *
2047
2097
MachineBlockPlacement::findBestLoopExit (const MachineLoop &L,
2048
- const BlockFilterSet &LoopBlockSet,
2049
- BlockFrequency &ExitFreq) {
2098
+ const BlockFilterSet &LoopBlockSet) {
2050
2099
// We don't want to layout the loop linearly in all cases. If the loop header
2051
2100
// is just a normal basic block in the loop, we want to look for what block
2052
2101
// within the loop is the best one to layout at the top. However, if the loop
@@ -2157,7 +2206,6 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
2157
2206
2158
2207
LLVM_DEBUG (dbgs () << " Best exiting block: " << getBlockName (ExitingBB)
2159
2208
<< " \n " );
2160
- ExitFreq = BestExitEdgeFreq;
2161
2209
return ExitingBB;
2162
2210
}
2163
2211
@@ -2202,7 +2250,6 @@ MachineBlockPlacement::hasViableTopFallthrough(
2202
2250
// / of its bottom already, don't rotate it.
2203
2251
void MachineBlockPlacement::rotateLoop (BlockChain &LoopChain,
2204
2252
const MachineBasicBlock *ExitingBB,
2205
- BlockFrequency ExitFreq,
2206
2253
const BlockFilterSet &LoopBlockSet) {
2207
2254
if (!ExitingBB)
2208
2255
return ;
@@ -2226,12 +2273,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2226
2273
(!SuccChain || Succ == *SuccChain->begin ()))
2227
2274
return ;
2228
2275
}
2229
-
2230
- // Rotate will destroy the top fallthrough, we need to ensure the new exit
2231
- // frequency is larger than top fallthrough.
2232
- BlockFrequency FallThrough2Top = TopFallThroughFreq (Top, LoopBlockSet);
2233
- if (FallThrough2Top >= ExitFreq)
2234
- return ;
2235
2276
}
2236
2277
2237
2278
BlockChain::iterator ExitIt = llvm::find (LoopChain, ExitingBB);
@@ -2483,7 +2524,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
2483
2524
// loop. This will default to the header, but may end up as one of the
2484
2525
// predecessors to the header if there is one which will result in strictly
2485
2526
// fewer branches in the loop body.
2486
- MachineBasicBlock *LoopTop = findBestLoopTop (L, LoopBlockSet);
2527
+ MachineBasicBlock *LoopTop =
2528
+ (RotateLoopWithProfile || F->getFunction ().hasProfileData ()) ?
2529
+ findBestLoopTop (L, LoopBlockSet) :
2530
+ findBestLoopTopNoProfile (L, LoopBlockSet);
2487
2531
2488
2532
// If we selected just the header for the loop top, look for a potentially
2489
2533
// profitable exit block in the event that rotating the loop can eliminate
@@ -2492,9 +2536,8 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
2492
2536
// Loops are processed innermost to uttermost, make sure we clear
2493
2537
// PreferredLoopExit before processing a new loop.
2494
2538
PreferredLoopExit = nullptr ;
2495
- BlockFrequency ExitFreq;
2496
2539
if (!RotateLoopWithProfile && LoopTop == L.getHeader ())
2497
- PreferredLoopExit = findBestLoopExit (L, LoopBlockSet, ExitFreq );
2540
+ PreferredLoopExit = findBestLoopExit (L, LoopBlockSet);
2498
2541
2499
2542
BlockChain &LoopChain = *BlockToChain[LoopTop];
2500
2543
@@ -2511,10 +2554,11 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
2511
2554
2512
2555
buildChain (LoopTop, LoopChain, &LoopBlockSet);
2513
2556
2514
- if (RotateLoopWithProfile)
2515
- rotateLoopWithProfile (LoopChain, L, LoopBlockSet);
2516
- else
2517
- rotateLoop (LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2557
+ if (RotateLoopWithProfile) {
2558
+ if (LoopTop == L.getHeader ())
2559
+ rotateLoopWithProfile (LoopChain, L, LoopBlockSet);
2560
+ } else
2561
+ rotateLoop (LoopChain, PreferredLoopExit, LoopBlockSet);
2518
2562
2519
2563
LLVM_DEBUG ({
2520
2564
// Crash at the end so we get all of the debugging output first.
0 commit comments