Skip to content

Commit d1b462d

Browse files
committed
merge main into amd-staging
Change-Id: I733dacdbd8cb191d9fe70f579cf440b45f5e2af7
2 parents 4e2fa1a + 38eea57 commit d1b462d

File tree

129 files changed

+9107
-741
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

129 files changed

+9107
-741
lines changed

bolt/lib/Passes/SplitFunctions.cpp

Lines changed: 64 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,12 @@ struct SplitCacheDirected final : public SplitStrategy {
175175
void fragment(const BlockIt Start, const BlockIt End) override {
176176
BasicBlockOrder BlockOrder(Start, End);
177177
BinaryFunction &BF = *BlockOrder.front()->getFunction();
178+
// No need to re-split small functions.
179+
if (BlockOrder.size() <= 2)
180+
return;
178181

179182
size_t BestSplitIndex = findSplitIndex(BF, BlockOrder);
183+
assert(BestSplitIndex < BlockOrder.size());
180184

181185
// Assign fragments based on the computed best split index.
182186
// All basic blocks with index up to the best split index become hot.
@@ -200,10 +204,12 @@ struct SplitCacheDirected final : public SplitStrategy {
200204
};
201205

202206
struct SplitScore {
203-
size_t SplitIndex;
207+
size_t SplitIndex = size_t(-1);
204208
size_t HotSizeReduction = 0;
205209
double LocalScore = 0;
206210
double CoverCallScore = 0;
211+
212+
double sum() const { return LocalScore + CoverCallScore; }
207213
};
208214

209215
// Auxiliary variables used by the algorithm.
@@ -303,7 +309,7 @@ struct SplitCacheDirected final : public SplitStrategy {
303309
const size_t SplitIndex) {
304310
assert(SplitIndex < BlockOrder.size() && "Invalid split index");
305311

306-
// Update function layout assuming hot-warm splitting at SplitIndex
312+
// Update function layout assuming hot-warm splitting at SplitIndex.
307313
for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
308314
BinaryBasicBlock *BB = BlockOrder[Index];
309315
if (BB->getFragmentNum() == FragmentNum::cold())
@@ -319,8 +325,8 @@ struct SplitCacheDirected final : public SplitStrategy {
319325
// Populate BB.OutputAddressRange with estimated new start and end addresses
320326
// and compute the old end address of the hot section and the new end
321327
// address of the hot section.
322-
size_t OldHotEndAddr;
323-
size_t NewHotEndAddr;
328+
size_t OldHotEndAddr{0};
329+
size_t NewHotEndAddr{0};
324330
size_t CurrentAddr = BBOffsets[BlockOrder[0]];
325331
for (BinaryBasicBlock *BB : BlockOrder) {
326332
// We only care about new addresses of blocks in hot/warm.
@@ -492,20 +498,15 @@ struct SplitCacheDirected final : public SplitStrategy {
492498
}
493499

494500
/// Compute the split score of splitting a function at a given index.
495-
/// The split score consists of local score and cover score. Cover call score
496-
/// is expensive to compute. As a result, we pass in a \p ReferenceScore and
497-
/// compute cover score only when the local score exceeds that in the
498-
/// ReferenceScore or that the size reduction of the hot fragment is larger
499-
/// than that achieved by the split index of the ReferenceScore. This function
500-
/// returns \p Score of SplitScore type. It contains the local score and cover
501-
/// score (if computed) of the current splitting index. For easier book
502-
/// keeping and comparison, it also stores the split index and the resulting
503-
/// reduction in hot fragment size.
501+
/// The split score consists of local score and cover score. This function
502+
/// returns \p Score of SplitScore type. It contains the local score and
503+
/// cover score of the current splitting index. For easier book keeping and
504+
/// comparison, it also stores the split index and the resulting reduction
505+
/// in hot fragment size.
504506
SplitScore computeSplitScore(const BinaryFunction &BF,
505507
const BasicBlockOrder &BlockOrder,
506508
const size_t SplitIndex,
507-
const std::vector<CallInfo> &CoverCalls,
508-
const SplitScore &ReferenceScore) {
509+
const std::vector<CallInfo> &CoverCalls) {
509510
// Populate BinaryBasicBlock::OutputAddressRange with estimated
510511
// new start and end addresses after hot-warm splitting at SplitIndex.
511512
size_t OldHotEnd;
@@ -533,47 +534,74 @@ struct SplitCacheDirected final : public SplitStrategy {
533534
// increamented in place.
534535
computeJumpScore(BlockOrder, SplitIndex, Score);
535536

536-
// There is no need to compute CoverCallScore if we have already found
537-
// another split index with a bigger LocalScore and bigger HotSizeReduction.
538-
if (Score.LocalScore <= ReferenceScore.LocalScore &&
539-
Score.HotSizeReduction <= ReferenceScore.HotSizeReduction)
540-
return Score;
541-
542537
// Compute CoverCallScore and store in Score in place.
543538
computeCoverCallScore(BlockOrder, SplitIndex, CoverCalls, Score);
544539
return Score;
545540
}
546541

542+
/// Find the most likely successor of a basic block when it has one or two
543+
/// successors. Return nullptr otherwise.
544+
const BinaryBasicBlock *getMostLikelySuccessor(const BinaryBasicBlock *BB) {
545+
if (BB->succ_size() == 1)
546+
return BB->getSuccessor();
547+
if (BB->succ_size() == 2) {
548+
uint64_t TakenCount = BB->getTakenBranchInfo().Count;
549+
assert(TakenCount != BinaryBasicBlock::COUNT_NO_PROFILE);
550+
uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
551+
assert(NonTakenCount != BinaryBasicBlock::COUNT_NO_PROFILE);
552+
if (TakenCount > NonTakenCount)
553+
return BB->getConditionalSuccessor(true);
554+
else if (TakenCount < NonTakenCount)
555+
return BB->getConditionalSuccessor(false);
556+
}
557+
return nullptr;
558+
}
559+
547560
/// Find the best index for splitting. The returned value is the index of the
548561
/// last hot basic block. Hence, "no splitting" is equivalent to returning the
549562
/// value which is one less than the size of the function.
550563
size_t findSplitIndex(const BinaryFunction &BF,
551564
const BasicBlockOrder &BlockOrder) {
565+
assert(BlockOrder.size() > 2);
552566
// Find all function calls that can be shortened if we move blocks of the
553567
// current function to warm/cold
554568
const std::vector<CallInfo> CoverCalls = extractCoverCalls(BF);
555569

556-
// Try all possible split indices (blocks with Index <= SplitIndex are in
557-
// hot) and find the one maximizing the splitting score.
570+
// Find the existing hot-cold splitting index.
571+
size_t HotColdIndex = 0;
572+
while (HotColdIndex + 1 < BlockOrder.size()) {
573+
if (BlockOrder[HotColdIndex + 1]->getFragmentNum() == FragmentNum::cold())
574+
break;
575+
HotColdIndex++;
576+
}
577+
assert(HotColdIndex + 1 == BlockOrder.size() ||
578+
(BlockOrder[HotColdIndex]->getFragmentNum() == FragmentNum::main() &&
579+
BlockOrder[HotColdIndex + 1]->getFragmentNum() ==
580+
FragmentNum::cold()));
581+
582+
// Try all possible split indices up to HotColdIndex (blocks that have
583+
// Index <= SplitIndex are in hot) and find the one maximizing the
584+
// splitting score.
558585
SplitScore BestScore;
559-
double BestScoreSum = -1.0;
560-
SplitScore ReferenceScore;
561-
for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
586+
for (size_t Index = 0; Index <= HotColdIndex; Index++) {
562587
const BinaryBasicBlock *LastHotBB = BlockOrder[Index];
563-
// No need to keep cold blocks in the hot section.
564-
if (LastHotBB->getFragmentNum() == FragmentNum::cold())
565-
break;
588+
assert(LastHotBB->getFragmentNum() != FragmentNum::cold());
589+
590+
// Do not break jump to the most likely successor.
591+
if (Index + 1 < BlockOrder.size() &&
592+
BlockOrder[Index + 1] == getMostLikelySuccessor(LastHotBB))
593+
continue;
594+
566595
const SplitScore Score =
567-
computeSplitScore(BF, BlockOrder, Index, CoverCalls, ReferenceScore);
568-
double ScoreSum = Score.LocalScore + Score.CoverCallScore;
569-
if (ScoreSum > BestScoreSum) {
570-
BestScoreSum = ScoreSum;
596+
computeSplitScore(BF, BlockOrder, Index, CoverCalls);
597+
if (Score.sum() > BestScore.sum())
571598
BestScore = Score;
572-
}
573-
if (Score.LocalScore > ReferenceScore.LocalScore)
574-
ReferenceScore = Score;
575599
}
576600

601+
// If we don't find a good splitting point, fallback to the original one.
602+
if (BestScore.SplitIndex == size_t(-1))
603+
return HotColdIndex;
604+
577605
return BestScore.SplitIndex;
578606
}
579607
};

bolt/test/X86/cdsplit-call-scale.s

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
# When -call-scale=0.0, the tested function is 2-way splitted.
33
# When -call-scale=1.0, the tested function is 3-way splitted with 5 blocks
44
# in warm because of the increased benefit of shortening the call edges.
5-
# When -call-scale=1000.0, the tested function is 3-way splitted with 7 blocks
6-
# in warm because of the strong benefit of shortening the call edges.
5+
# When -call-scale=1000.0, the tested function is still 3-way splitted with
6+
# 5 blocks in warm because cdsplit does not allow hot-warm splitting to break
7+
# a fall through branch from a basic block to its most likely successor.
78

89
# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
910
# RUN: link_fdata %s %t.o %t.fdata
@@ -39,12 +40,10 @@
3940
# MEDINCENTIVE: {{^\.Ltmp5}}
4041

4142
# HIGHINCENTIVE: Binary Function "chain" after split-functions
42-
# HIGHINCENTIVE: {{^\.LBB00}}
43+
# HIGHINCENTIVE: {{^\.Ltmp1}}
4344
# HIGHINCENTIVE: ------- HOT-COLD SPLIT POINT -------
4445
# HIGHINCENTIVE: {{^\.LFT1}}
4546
# HIGHINCENTIVE: ------- HOT-COLD SPLIT POINT -------
46-
# HIGHINCENTIVE: {{^\.LFT0}}
47-
# HIGHINCENTIVE: {{^\.Ltmp1}}
4847
# HIGHINCENTIVE: {{^\.Ltmp0}}
4948
# HIGHINCENTIVE: {{^\.Ltmp2}}
5049
# HIGHINCENTIVE: {{^\.Ltmp3}}

clang/docs/ControlFlowIntegrityDesign.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ address point. Note that libraries like libcxxabi do assume this property.
349349

350350
(2) virtual function entry layout property
351351

352-
For each virtual function the distance between an virtual table entry for this function and the corresponding
352+
For each virtual function the distance between a virtual table entry for this function and the corresponding
353353
address point is always the same. This property ensures that dynamic dispatch still works with the interleaving layout.
354354

355355
Note that the interleaving scheme in the CFI implementation guarantees both properties above whereas the original scheme proposed

clang/docs/LanguageExtensions.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2019,7 +2019,7 @@ would be +1. ``ns_returns_autoreleased`` specifies that the returned object is
20192019
autorelease pool.
20202020
20212021
**Usage**: The ``ns_consumed`` and ``cf_consumed`` attributes can be placed on
2022-
an parameter declaration; they specify that the argument is expected to have a
2022+
a parameter declaration; they specify that the argument is expected to have a
20232023
+1 retain count, which will be balanced in some way by the function or method.
20242024
The ``ns_consumes_self`` attribute can only be placed on an Objective-C
20252025
method; it specifies that the method expects its ``self`` parameter to have a
@@ -3601,7 +3601,7 @@ scalar calls of ``__builtin_isfpclass`` applied to the input elementwise.
36013601
The result of ``__builtin_isfpclass`` is a boolean value, if the first argument
36023602
is a scalar, or an integer vector with the same element count as the first
36033603
argument. The element type in this vector has the same bit length as the
3604-
element of the the first argument type.
3604+
element of the first argument type.
36053605
36063606
This function never raises floating-point exceptions and does not canonicalize
36073607
its input. The floating-point argument is not promoted, its data class is
@@ -4959,7 +4959,7 @@ Clang supports the following match rules:
49594959
- ``record(unless(is_union))``: Can be used to apply attributes only to
49604960
``struct`` and ``class`` declarations.
49614961
4962-
- ``enum``: Can be be used to apply attributes to enumeration declarations.
4962+
- ``enum``: Can be used to apply attributes to enumeration declarations.
49634963
49644964
- ``enum_constant``: Can be used to apply attributes to enumerators.
49654965

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ New Compiler Flags
311311
the preprocessed text to the output. This can greatly reduce the size of the
312312
preprocessed output, which can be helpful when trying to reduce a test case.
313313
* ``-fassume-nothrow-exception-dtor`` is added to assume that the destructor of
314-
an thrown exception object will not throw. The generated code for catch
314+
a thrown exception object will not throw. The generated code for catch
315315
handlers will be smaller. A throw expression of a type with a
316316
potentially-throwing destructor will lead to an error.
317317

clang/docs/SanitizerCoverage.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ offsets in the corresponding binary/DSO that were executed during the run.
496496
Sancov Tool
497497
-----------
498498

499-
An simple ``sancov`` tool is provided to process coverage files.
499+
A simple ``sancov`` tool is provided to process coverage files.
500500
The tool is part of LLVM project and is currently supported only on Linux.
501501
It can handle symbolization tasks autonomously without any extra support
502502
from the environment. You need to pass .sancov files (named

0 commit comments

Comments
 (0)