Skip to content

Commit e17dd94

Browse files
committed
Merge remote-tracking branch 'origin/main' into fastra
2 parents 26921ec + 033ec09 commit e17dd94

File tree

485 files changed

+29965
-9063
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

485 files changed

+29965
-9063
lines changed

bolt/lib/Core/DIEBuilder.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -266,13 +266,11 @@ void DIEBuilder::buildCompileUnits(const bool Init) {
266266
}
267267
void DIEBuilder::buildCompileUnits(const std::vector<DWARFUnit *> &CUs) {
268268
BuilderState.reset(new State());
269-
// Initializing to full size because there could be cross CU references with
270-
// different abbrev offsets. LLVM happens to output CUs that have cross CU
271-
// references with the same abbrev table. So destinations end up in the first
272-
// set, even if they themselves don't have src cross cu ref. We could have
273-
// cases where this is not the case. In which case this container needs to be
274-
// big enough for all.
275-
getState().CloneUnitCtxMap.resize(DwarfContext->getNumCompileUnits());
269+
// Allocating enough for current batch being processed.
270+
// In real use cases we either processing a batch of CUs with no cross
271+
// references, or if they do have them it is due to LTO. With clang they will
272+
// share the same abbrev table. In either case this vector will not grow.
273+
getState().CloneUnitCtxMap.resize(CUs.size());
276274
getState().Type = ProcessingType::CUs;
277275
for (DWARFUnit *CU : CUs)
278276
registerUnit(*CU, false);
@@ -897,6 +895,10 @@ void DIEBuilder::registerUnit(DWARFUnit &DU, bool NeedSort) {
897895
});
898896
}
899897
getState().UnitIDMap[getHash(DU)] = getState().DUList.size();
898+
// This handles the case where we do have cross cu references, but CUs do not
899+
// share the same abbrev table.
900+
if (getState().DUList.size() == getState().CloneUnitCtxMap.size())
901+
getState().CloneUnitCtxMap.emplace_back();
900902
getState().DUList.push_back(&DU);
901903
}
902904

bolt/lib/Passes/SplitFunctions.cpp

Lines changed: 64 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,12 @@ struct SplitCacheDirected final : public SplitStrategy {
175175
void fragment(const BlockIt Start, const BlockIt End) override {
176176
BasicBlockOrder BlockOrder(Start, End);
177177
BinaryFunction &BF = *BlockOrder.front()->getFunction();
178+
// No need to re-split small functions.
179+
if (BlockOrder.size() <= 2)
180+
return;
178181

179182
size_t BestSplitIndex = findSplitIndex(BF, BlockOrder);
183+
assert(BestSplitIndex < BlockOrder.size());
180184

181185
// Assign fragments based on the computed best split index.
182186
// All basic blocks with index up to the best split index become hot.
@@ -200,10 +204,12 @@ struct SplitCacheDirected final : public SplitStrategy {
200204
};
201205

202206
struct SplitScore {
203-
size_t SplitIndex;
207+
size_t SplitIndex = size_t(-1);
204208
size_t HotSizeReduction = 0;
205209
double LocalScore = 0;
206210
double CoverCallScore = 0;
211+
212+
double sum() const { return LocalScore + CoverCallScore; }
207213
};
208214

209215
// Auxiliary variables used by the algorithm.
@@ -303,7 +309,7 @@ struct SplitCacheDirected final : public SplitStrategy {
303309
const size_t SplitIndex) {
304310
assert(SplitIndex < BlockOrder.size() && "Invalid split index");
305311

306-
// Update function layout assuming hot-warm splitting at SplitIndex
312+
// Update function layout assuming hot-warm splitting at SplitIndex.
307313
for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
308314
BinaryBasicBlock *BB = BlockOrder[Index];
309315
if (BB->getFragmentNum() == FragmentNum::cold())
@@ -319,8 +325,8 @@ struct SplitCacheDirected final : public SplitStrategy {
319325
// Populate BB.OutputAddressRange with estimated new start and end addresses
320326
// and compute the old end address of the hot section and the new end
321327
// address of the hot section.
322-
size_t OldHotEndAddr;
323-
size_t NewHotEndAddr;
328+
size_t OldHotEndAddr{0};
329+
size_t NewHotEndAddr{0};
324330
size_t CurrentAddr = BBOffsets[BlockOrder[0]];
325331
for (BinaryBasicBlock *BB : BlockOrder) {
326332
// We only care about new addresses of blocks in hot/warm.
@@ -492,20 +498,15 @@ struct SplitCacheDirected final : public SplitStrategy {
492498
}
493499

494500
/// Compute the split score of splitting a function at a given index.
495-
/// The split score consists of local score and cover score. Cover call score
496-
/// is expensive to compute. As a result, we pass in a \p ReferenceScore and
497-
/// compute cover score only when the local score exceeds that in the
498-
/// ReferenceScore or that the size reduction of the hot fragment is larger
499-
/// than that achieved by the split index of the ReferenceScore. This function
500-
/// returns \p Score of SplitScore type. It contains the local score and cover
501-
/// score (if computed) of the current splitting index. For easier book
502-
/// keeping and comparison, it also stores the split index and the resulting
503-
/// reduction in hot fragment size.
501+
/// The split score consists of local score and cover score. This function
502+
/// returns \p Score of SplitScore type. It contains the local score and
503+
/// cover score of the current splitting index. For easier book keeping and
504+
/// comparison, it also stores the split index and the resulting reduction
505+
/// in hot fragment size.
504506
SplitScore computeSplitScore(const BinaryFunction &BF,
505507
const BasicBlockOrder &BlockOrder,
506508
const size_t SplitIndex,
507-
const std::vector<CallInfo> &CoverCalls,
508-
const SplitScore &ReferenceScore) {
509+
const std::vector<CallInfo> &CoverCalls) {
509510
// Populate BinaryBasicBlock::OutputAddressRange with estimated
510511
// new start and end addresses after hot-warm splitting at SplitIndex.
511512
size_t OldHotEnd;
@@ -533,47 +534,74 @@ struct SplitCacheDirected final : public SplitStrategy {
533534
// increamented in place.
534535
computeJumpScore(BlockOrder, SplitIndex, Score);
535536

536-
// There is no need to compute CoverCallScore if we have already found
537-
// another split index with a bigger LocalScore and bigger HotSizeReduction.
538-
if (Score.LocalScore <= ReferenceScore.LocalScore &&
539-
Score.HotSizeReduction <= ReferenceScore.HotSizeReduction)
540-
return Score;
541-
542537
// Compute CoverCallScore and store in Score in place.
543538
computeCoverCallScore(BlockOrder, SplitIndex, CoverCalls, Score);
544539
return Score;
545540
}
546541

542+
/// Find the most likely successor of a basic block when it has one or two
543+
/// successors. Return nullptr otherwise.
544+
const BinaryBasicBlock *getMostLikelySuccessor(const BinaryBasicBlock *BB) {
545+
if (BB->succ_size() == 1)
546+
return BB->getSuccessor();
547+
if (BB->succ_size() == 2) {
548+
uint64_t TakenCount = BB->getTakenBranchInfo().Count;
549+
assert(TakenCount != BinaryBasicBlock::COUNT_NO_PROFILE);
550+
uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
551+
assert(NonTakenCount != BinaryBasicBlock::COUNT_NO_PROFILE);
552+
if (TakenCount > NonTakenCount)
553+
return BB->getConditionalSuccessor(true);
554+
else if (TakenCount < NonTakenCount)
555+
return BB->getConditionalSuccessor(false);
556+
}
557+
return nullptr;
558+
}
559+
547560
/// Find the best index for splitting. The returned value is the index of the
548561
/// last hot basic block. Hence, "no splitting" is equivalent to returning the
549562
/// value which is one less than the size of the function.
550563
size_t findSplitIndex(const BinaryFunction &BF,
551564
const BasicBlockOrder &BlockOrder) {
565+
assert(BlockOrder.size() > 2);
552566
// Find all function calls that can be shortened if we move blocks of the
553567
// current function to warm/cold
554568
const std::vector<CallInfo> CoverCalls = extractCoverCalls(BF);
555569

556-
// Try all possible split indices (blocks with Index <= SplitIndex are in
557-
// hot) and find the one maximizing the splitting score.
570+
// Find the existing hot-cold splitting index.
571+
size_t HotColdIndex = 0;
572+
while (HotColdIndex + 1 < BlockOrder.size()) {
573+
if (BlockOrder[HotColdIndex + 1]->getFragmentNum() == FragmentNum::cold())
574+
break;
575+
HotColdIndex++;
576+
}
577+
assert(HotColdIndex + 1 == BlockOrder.size() ||
578+
(BlockOrder[HotColdIndex]->getFragmentNum() == FragmentNum::main() &&
579+
BlockOrder[HotColdIndex + 1]->getFragmentNum() ==
580+
FragmentNum::cold()));
581+
582+
// Try all possible split indices up to HotColdIndex (blocks that have
583+
// Index <= SplitIndex are in hot) and find the one maximizing the
584+
// splitting score.
558585
SplitScore BestScore;
559-
double BestScoreSum = -1.0;
560-
SplitScore ReferenceScore;
561-
for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
586+
for (size_t Index = 0; Index <= HotColdIndex; Index++) {
562587
const BinaryBasicBlock *LastHotBB = BlockOrder[Index];
563-
// No need to keep cold blocks in the hot section.
564-
if (LastHotBB->getFragmentNum() == FragmentNum::cold())
565-
break;
588+
assert(LastHotBB->getFragmentNum() != FragmentNum::cold());
589+
590+
// Do not break jump to the most likely successor.
591+
if (Index + 1 < BlockOrder.size() &&
592+
BlockOrder[Index + 1] == getMostLikelySuccessor(LastHotBB))
593+
continue;
594+
566595
const SplitScore Score =
567-
computeSplitScore(BF, BlockOrder, Index, CoverCalls, ReferenceScore);
568-
double ScoreSum = Score.LocalScore + Score.CoverCallScore;
569-
if (ScoreSum > BestScoreSum) {
570-
BestScoreSum = ScoreSum;
596+
computeSplitScore(BF, BlockOrder, Index, CoverCalls);
597+
if (Score.sum() > BestScore.sum())
571598
BestScore = Score;
572-
}
573-
if (Score.LocalScore > ReferenceScore.LocalScore)
574-
ReferenceScore = Score;
575599
}
576600

601+
// If we don't find a good splitting point, fallback to the original one.
602+
if (BestScore.SplitIndex == size_t(-1))
603+
return HotColdIndex;
604+
577605
return BestScore.SplitIndex;
578606
}
579607
};

bolt/test/X86/cdsplit-call-scale.s

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
# When -call-scale=0.0, the tested function is 2-way splitted.
33
# When -call-scale=1.0, the tested function is 3-way splitted with 5 blocks
44
# in warm because of the increased benefit of shortening the call edges.
5-
# When -call-scale=1000.0, the tested function is 3-way splitted with 7 blocks
6-
# in warm because of the strong benefit of shortening the call edges.
5+
# When -call-scale=1000.0, the tested function is still 3-way splitted with
6+
# 5 blocks in warm because cdsplit does not allow hot-warm splitting to break
7+
# a fall through branch from a basic block to its most likely successor.
78

89
# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
910
# RUN: link_fdata %s %t.o %t.fdata
@@ -39,12 +40,10 @@
3940
# MEDINCENTIVE: {{^\.Ltmp5}}
4041

4142
# HIGHINCENTIVE: Binary Function "chain" after split-functions
42-
# HIGHINCENTIVE: {{^\.LBB00}}
43+
# HIGHINCENTIVE: {{^\.Ltmp1}}
4344
# HIGHINCENTIVE: ------- HOT-COLD SPLIT POINT -------
4445
# HIGHINCENTIVE: {{^\.LFT1}}
4546
# HIGHINCENTIVE: ------- HOT-COLD SPLIT POINT -------
46-
# HIGHINCENTIVE: {{^\.LFT0}}
47-
# HIGHINCENTIVE: {{^\.Ltmp1}}
4847
# HIGHINCENTIVE: {{^\.Ltmp0}}
4948
# HIGHINCENTIVE: {{^\.Ltmp2}}
5049
# HIGHINCENTIVE: {{^\.Ltmp3}}

clang/docs/ControlFlowIntegrityDesign.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ address point. Note that libraries like libcxxabi do assume this property.
349349

350350
(2) virtual function entry layout property
351351

352-
For each virtual function the distance between an virtual table entry for this function and the corresponding
352+
For each virtual function the distance between a virtual table entry for this function and the corresponding
353353
address point is always the same. This property ensures that dynamic dispatch still works with the interleaving layout.
354354

355355
Note that the interleaving scheme in the CFI implementation guarantees both properties above whereas the original scheme proposed

clang/docs/LanguageExtensions.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2019,7 +2019,7 @@ would be +1. ``ns_returns_autoreleased`` specifies that the returned object is
20192019
autorelease pool.
20202020
20212021
**Usage**: The ``ns_consumed`` and ``cf_consumed`` attributes can be placed on
2022-
an parameter declaration; they specify that the argument is expected to have a
2022+
a parameter declaration; they specify that the argument is expected to have a
20232023
+1 retain count, which will be balanced in some way by the function or method.
20242024
The ``ns_consumes_self`` attribute can only be placed on an Objective-C
20252025
method; it specifies that the method expects its ``self`` parameter to have a
@@ -3601,7 +3601,7 @@ scalar calls of ``__builtin_isfpclass`` applied to the input elementwise.
36013601
The result of ``__builtin_isfpclass`` is a boolean value, if the first argument
36023602
is a scalar, or an integer vector with the same element count as the first
36033603
argument. The element type in this vector has the same bit length as the
3604-
element of the the first argument type.
3604+
element of the first argument type.
36053605
36063606
This function never raises floating-point exceptions and does not canonicalize
36073607
its input. The floating-point argument is not promoted, its data class is
@@ -4959,7 +4959,7 @@ Clang supports the following match rules:
49594959
- ``record(unless(is_union))``: Can be used to apply attributes only to
49604960
``struct`` and ``class`` declarations.
49614961
4962-
- ``enum``: Can be be used to apply attributes to enumeration declarations.
4962+
- ``enum``: Can be used to apply attributes to enumeration declarations.
49634963
49644964
- ``enum_constant``: Can be used to apply attributes to enumerators.
49654965

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ New Compiler Flags
253253
the preprocessed text to the output. This can greatly reduce the size of the
254254
preprocessed output, which can be helpful when trying to reduce a test case.
255255
* ``-fassume-nothrow-exception-dtor`` is added to assume that the destructor of
256-
an thrown exception object will not throw. The generated code for catch
256+
a thrown exception object will not throw. The generated code for catch
257257
handlers will be smaller. A throw expression of a type with a
258258
potentially-throwing destructor will lead to an error.
259259

clang/docs/SanitizerCoverage.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ offsets in the corresponding binary/DSO that were executed during the run.
496496
Sancov Tool
497497
-----------
498498

499-
An simple ``sancov`` tool is provided to process coverage files.
499+
A simple ``sancov`` tool is provided to process coverage files.
500500
The tool is part of LLVM project and is currently supported only on Linux.
501501
It can handle symbolization tasks autonomously without any extra support
502502
from the environment. You need to pass .sancov files (named

clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,22 @@ class Environment {
289289
/// `E` must be a glvalue or a `BuiltinType::BuiltinFn`
290290
StorageLocation *getStorageLocation(const Expr &E) const;
291291

292+
/// Returns the result of casting `getStorageLocation(...)` to a subclass of
293+
/// `StorageLocation` (using `cast_or_null<T>`).
294+
/// This assert-fails if the result of `getStorageLocation(...)` is not of
295+
/// type `T *`; if the storage location is not guaranteed to have type `T *`,
296+
/// consider using `dyn_cast_or_null<T>(getStorageLocation(...))` instead.
297+
template <typename T>
298+
std::enable_if_t<std::is_base_of_v<StorageLocation, T>, T *>
299+
get(const ValueDecl &D) const {
300+
return cast_or_null<T>(getStorageLocation(D));
301+
}
302+
template <typename T>
303+
std::enable_if_t<std::is_base_of_v<StorageLocation, T>, T *>
304+
get(const Expr &E) const {
305+
return cast_or_null<T>(getStorageLocation(E));
306+
}
307+
292308
/// Returns the storage location assigned to the `this` pointee in the
293309
/// environment or null if the `this` pointee has no assigned storage location
294310
/// in the environment.
@@ -457,6 +473,26 @@ class Environment {
457473
/// storage location in the environment, otherwise returns null.
458474
Value *getValue(const Expr &E) const;
459475

476+
/// Returns the result of casting `getValue(...)` to a subclass of `Value`
477+
/// (using `cast_or_null<T>`).
478+
/// This assert-fails if the result of `getValue(...)` is not of type `T *`;
479+
/// if the value is not guaranteed to have type `T *`, consider using
480+
/// `dyn_cast_or_null<T>(getValue(...))` instead.
481+
template <typename T>
482+
std::enable_if_t<std::is_base_of_v<Value, T>, T *>
483+
get(const StorageLocation &Loc) const {
484+
return cast_or_null<T>(getValue(Loc));
485+
}
486+
template <typename T>
487+
std::enable_if_t<std::is_base_of_v<Value, T>, T *>
488+
get(const ValueDecl &D) const {
489+
return cast_or_null<T>(getValue(D));
490+
}
491+
template <typename T>
492+
std::enable_if_t<std::is_base_of_v<Value, T>, T *> get(const Expr &E) const {
493+
return cast_or_null<T>(getValue(E));
494+
}
495+
460496
// FIXME: should we deprecate the following & call arena().create() directly?
461497

462498
/// Creates a `T` (some subclass of `Value`), forwarding `args` to the
@@ -691,20 +727,9 @@ RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
691727
std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD);
692728

693729
/// Associates a new `RecordValue` with `Loc` and returns the new value.
694-
/// It is not defined whether the field values remain the same or not.
695-
///
696-
/// This function is primarily intended for use by checks that set custom
697-
/// properties on `RecordValue`s to model the state of these values. Such checks
698-
/// should avoid modifying the properties of an existing `RecordValue` because
699-
/// these changes would be visible to other `Environment`s that share the same
700-
/// `RecordValue`. Instead, call `refreshRecordValue()`, then set the properties
701-
/// on the new `RecordValue` that it returns. Typical usage:
702-
///
703-
/// refreshRecordValue(Loc, Env).setProperty("my_prop", MyPropValue);
704730
RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env);
705731

706732
/// Associates a new `RecordValue` with `Expr` and returns the new value.
707-
/// See also documentation for the overload above.
708733
RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env);
709734

710735
} // namespace dataflow

clang/include/clang/Analysis/FlowSensitive/RecordOps.h

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,13 @@ namespace dataflow {
2222
/// Copies a record (struct, class, or union) from `Src` to `Dst`.
2323
///
2424
/// This performs a deep copy, i.e. it copies every field (including synthetic
25-
/// fields) and recurses on fields of record type. It also copies properties
26-
/// from the `RecordValue` associated with `Src` to the `RecordValue` associated
27-
/// with `Dst` (if these `RecordValue`s exist).
25+
/// fields) and recurses on fields of record type.
2826
///
2927
/// If there is a `RecordValue` associated with `Dst` in the environment, this
3028
/// function creates a new `RecordValue` and associates it with `Dst`; clients
3129
/// need to be aware of this and must not assume that the `RecordValue`
3230
/// associated with `Dst` remains the same after the call.
3331
///
34-
/// We create a new `RecordValue` rather than modifying properties on the old
35-
/// `RecordValue` because the old `RecordValue` may be shared with other
36-
/// `Environment`s, and we don't want changes to properties to be visible there.
37-
///
3832
/// Requirements:
3933
///
4034
/// `Src` and `Dst` must have the same canonical unqualified type.
@@ -49,9 +43,7 @@ void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst,
4943
///
5044
/// This performs a deep comparison, i.e. it compares every field (including
5145
/// synthetic fields) and recurses on fields of record type. Fields of reference
52-
/// type compare equal if they refer to the same storage location. If
53-
/// `RecordValue`s are associated with `Loc1` and Loc2`, it also compares the
54-
/// properties on those `RecordValue`s.
46+
/// type compare equal if they refer to the same storage location.
5547
///
5648
/// Note on how to interpret the result:
5749
/// - If this returns true, the records are guaranteed to be equal at runtime.

0 commit comments

Comments
 (0)