Skip to content

Commit d7b2605

Browse files
committed
Merge remote-tracking branch 'upstream/llvmspirv_pulldown' into HEAD
Closes: #16165
2 parents 2f9db35 + 16e0fe5 commit d7b2605

File tree

9,612 files changed

+655466
-157588
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

9,612 files changed

+655466
-157588
lines changed

.github/workflows/containers/github-action-ci/stage1.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ FROM docker.io/library/ubuntu:22.04 as base
22
ENV LLVM_SYSROOT=/opt/llvm
33

44
FROM base as stage1-toolchain
5-
ENV LLVM_VERSION=18.1.8
5+
ENV LLVM_VERSION=19.1.2
66

77
RUN apt-get update && \
88
apt-get install -y \

.github/workflows/libcxx-build-and-test.yaml

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ env:
4949
jobs:
5050
stage1:
5151
if: github.repository_owner == 'llvm'
52-
runs-on: libcxx-runners-8-set
52+
runs-on: libcxx-runners-set
53+
container: ghcr.io/libcxx/actions-builder:testing-2024-09-21
5354
continue-on-error: false
5455
strategy:
5556
fail-fast: false
@@ -79,12 +80,14 @@ jobs:
7980
path: |
8081
**/test-results.xml
8182
**/*.abilist
83+
**/CMakeConfigureLog.yaml
8284
**/CMakeError.log
8385
**/CMakeOutput.log
8486
**/crash_diagnostics/*
8587
stage2:
8688
if: github.repository_owner == 'llvm'
87-
runs-on: libcxx-runners-8-set
89+
runs-on: libcxx-runners-set
90+
container: ghcr.io/libcxx/actions-builder:testing-2024-09-21
8891
needs: [ stage1 ]
8992
continue-on-error: false
9093
strategy:
@@ -123,6 +126,7 @@ jobs:
123126
path: |
124127
**/test-results.xml
125128
**/*.abilist
129+
**/CMakeConfigureLog.yaml
126130
**/CMakeError.log
127131
**/CMakeOutput.log
128132
**/crash_diagnostics/*
@@ -160,20 +164,21 @@ jobs:
160164
'benchmarks',
161165
'bootstrapping-build'
162166
]
163-
machine: [ 'libcxx-runners-8-set' ]
167+
machine: [ 'libcxx-runners-set' ]
164168
include:
165169
- config: 'generic-cxx26'
166-
machine: libcxx-runners-8-set
170+
machine: libcxx-runners-set
167171
- config: 'generic-asan'
168-
machine: libcxx-runners-8-set
172+
machine: libcxx-runners-set
169173
- config: 'generic-tsan'
170-
machine: libcxx-runners-8-set
174+
machine: libcxx-runners-set
171175
- config: 'generic-ubsan'
172-
machine: libcxx-runners-8-set
176+
machine: libcxx-runners-set
173177
# Use a larger machine for MSAN to avoid timeout and memory allocation issues.
174178
- config: 'generic-msan'
175-
machine: libcxx-runners-8-set
179+
machine: libcxx-runners-set
176180
runs-on: ${{ matrix.machine }}
181+
container: ghcr.io/libcxx/actions-builder:testing-2024-09-21
177182
steps:
178183
- uses: actions/checkout@v4
179184
- name: ${{ matrix.config }}
@@ -188,6 +193,7 @@ jobs:
188193
path: |
189194
**/test-results.xml
190195
**/*.abilist
196+
**/CMakeConfigureLog.yaml
191197
**/CMakeError.log
192198
**/CMakeOutput.log
193199
**/crash_diagnostics/*
@@ -230,6 +236,7 @@ jobs:
230236
path: |
231237
**/test-results.xml
232238
**/*.abilist
239+
**/CMakeConfigureLog.yaml
233240
**/CMakeError.log
234241
**/CMakeOutput.log
235242
**/crash_diagnostics/*

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,9 @@ class BinaryFunction {
386386
/// Raw branch count for this function in the profile.
387387
uint64_t RawBranchCount{0};
388388

389+
/// Dynamically executed function bytes, used for density computation.
390+
uint64_t SampleCountInBytes{0};
391+
389392
/// Indicates the type of profile the function is using.
390393
uint16_t ProfileFlags{PF_NONE};
391394

@@ -1844,6 +1847,9 @@ class BinaryFunction {
18441847
/// to this function.
18451848
void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
18461849

1850+
/// Return the number of dynamically executed bytes, from raw perf data.
1851+
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }
1852+
18471853
/// Return the execution count for functions with known profile.
18481854
/// Return 0 if the function has no profile.
18491855
uint64_t getKnownExecutionCount() const {

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ extern llvm::cl::opt<bool> PrintSections;
5555
enum ProfileFormatKind { PF_Fdata, PF_YAML };
5656

5757
extern llvm::cl::opt<ProfileFormatKind> ProfileFormat;
58+
extern llvm::cl::opt<bool> ShowDensity;
5859
extern llvm::cl::opt<bool> SplitEH;
5960
extern llvm::cl::opt<bool> StrictMode;
6061
extern llvm::cl::opt<bool> TimeOpts;

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2577,6 +2577,7 @@ struct CFISnapshot {
25772577
case MCCFIInstruction::OpAdjustCfaOffset:
25782578
case MCCFIInstruction::OpWindowSave:
25792579
case MCCFIInstruction::OpNegateRAState:
2580+
case MCCFIInstruction::OpNegateRAStateWithPC:
25802581
case MCCFIInstruction::OpLLVMDefAspaceCfa:
25812582
case MCCFIInstruction::OpLabel:
25822583
llvm_unreachable("unsupported CFI opcode");
@@ -2715,6 +2716,7 @@ struct CFISnapshotDiff : public CFISnapshot {
27152716
case MCCFIInstruction::OpAdjustCfaOffset:
27162717
case MCCFIInstruction::OpWindowSave:
27172718
case MCCFIInstruction::OpNegateRAState:
2719+
case MCCFIInstruction::OpNegateRAStateWithPC:
27182720
case MCCFIInstruction::OpLLVMDefAspaceCfa:
27192721
case MCCFIInstruction::OpLabel:
27202722
llvm_unreachable("unsupported CFI opcode");
@@ -2864,6 +2866,7 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
28642866
case MCCFIInstruction::OpAdjustCfaOffset:
28652867
case MCCFIInstruction::OpWindowSave:
28662868
case MCCFIInstruction::OpNegateRAState:
2869+
case MCCFIInstruction::OpNegateRAStateWithPC:
28672870
case MCCFIInstruction::OpLLVMDefAspaceCfa:
28682871
case MCCFIInstruction::OpLabel:
28692872
llvm_unreachable("unsupported CFI opcode");

bolt/lib/Core/DIEBuilder.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,7 @@ void DIEBuilder::buildTypeUnits(DebugStrOffsetsWriter *StrOffsetWriter,
281281
for (auto &Row : TUIndex.getRows()) {
282282
uint64_t Signature = Row.getSignature();
283283
// manually populate TypeUnit to UnitVector
284-
DwarfContext->getTypeUnitForHash(DwarfContext->getMaxVersion(), Signature,
285-
true);
284+
DwarfContext->getTypeUnitForHash(Signature, true);
286285
}
287286
}
288287
const unsigned int CUNum = getCUNum(DwarfContext, isDWO());

bolt/lib/Core/HashUtilities.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
145145
continue;
146146
}
147147

148-
std::string Mnemonic = BC.InstPrinter->getMnemonic(&Inst).first;
148+
std::string Mnemonic = BC.InstPrinter->getMnemonic(Inst).first;
149149
llvm::erase_if(Mnemonic, [](unsigned char ch) { return std::isspace(ch); });
150150
Opcodes.insert(Mnemonic);
151151
}

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "bolt/Core/ParallelUtilities.h"
1616
#include "bolt/Passes/ReorderAlgorithm.h"
1717
#include "bolt/Passes/ReorderFunctions.h"
18+
#include "bolt/Utils/CommandLineOpts.h"
1819
#include "llvm/Support/CommandLine.h"
1920
#include <atomic>
2021
#include <mutex>
@@ -223,6 +224,18 @@ static cl::opt<unsigned> TopCalledLimit(
223224
"functions section"),
224225
cl::init(100), cl::Hidden, cl::cat(BoltCategory));
225226

227+
// Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
228+
static cl::opt<int> ProfileDensityCutOffHot(
229+
"profile-density-cutoff-hot", cl::init(990000),
230+
cl::desc("Total samples cutoff for functions used to calculate "
231+
"profile density."));
232+
233+
static cl::opt<double> ProfileDensityThreshold(
234+
"profile-density-threshold", cl::init(60),
235+
cl::desc("If the profile density is below the given threshold, it "
236+
"will be suggested to increase the sampling rate."),
237+
cl::Optional);
238+
226239
} // namespace opts
227240

228241
namespace llvm {
@@ -1383,6 +1396,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
13831396
uint64_t StaleSampleCount = 0;
13841397
uint64_t InferredSampleCount = 0;
13851398
std::vector<const BinaryFunction *> ProfiledFunctions;
1399+
std::vector<std::pair<double, uint64_t>> FuncDensityList;
13861400
const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n";
13871401
for (auto &BFI : BC.getBinaryFunctions()) {
13881402
const BinaryFunction &Function = BFI.second;
@@ -1441,6 +1455,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
14411455
StaleSampleCount += SampleCount;
14421456
++NumAllStaleFunctions;
14431457
}
1458+
1459+
if (opts::ShowDensity) {
1460+
uint64_t Size = Function.getSize();
1461+
// In case of BOLT split functions registered in BAT, executed traces are
1462+
// automatically attributed to the main fragment. Add up function sizes
1463+
// for all fragments.
1464+
if (IsHotParentOfBOLTSplitFunction)
1465+
for (const BinaryFunction *Fragment : Function.getFragments())
1466+
Size += Fragment->getSize();
1467+
double Density = (double)1.0 * Function.getSampleCountInBytes() / Size;
1468+
FuncDensityList.emplace_back(Density, SampleCount);
1469+
LLVM_DEBUG(BC.outs() << Function << ": executed bytes "
1470+
<< Function.getSampleCountInBytes() << ", size (b) "
1471+
<< Size << ", density " << Density
1472+
<< ", sample count " << SampleCount << '\n');
1473+
}
14441474
}
14451475
BC.NumProfiledFuncs = ProfiledFunctions.size();
14461476
BC.NumStaleProfileFuncs = NumStaleProfileFunctions;
@@ -1684,6 +1714,50 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
16841714
BC.outs() << ". Use -print-unknown to see the list.";
16851715
BC.outs() << '\n';
16861716
}
1717+
1718+
if (opts::ShowDensity) {
1719+
double Density = 0.0;
1720+
// Sorted by the density in descending order.
1721+
llvm::stable_sort(FuncDensityList,
1722+
[&](const std::pair<double, uint64_t> &A,
1723+
const std::pair<double, uint64_t> &B) {
1724+
if (A.first != B.first)
1725+
return A.first > B.first;
1726+
return A.second < B.second;
1727+
});
1728+
1729+
uint64_t AccumulatedSamples = 0;
1730+
uint32_t I = 0;
1731+
assert(opts::ProfileDensityCutOffHot <= 1000000 &&
1732+
"The cutoff value is greater than 1000000(100%)");
1733+
while (AccumulatedSamples <
1734+
TotalSampleCount *
1735+
static_cast<float>(opts::ProfileDensityCutOffHot) /
1736+
1000000 &&
1737+
I < FuncDensityList.size()) {
1738+
AccumulatedSamples += FuncDensityList[I].second;
1739+
Density = FuncDensityList[I].first;
1740+
I++;
1741+
}
1742+
if (Density == 0.0) {
1743+
BC.errs() << "BOLT-WARNING: the output profile is empty or the "
1744+
"--profile-density-cutoff-hot option is "
1745+
"set too low. Please check your command.\n";
1746+
} else if (Density < opts::ProfileDensityThreshold) {
1747+
BC.errs()
1748+
<< "BOLT-WARNING: BOLT is estimated to optimize better with "
1749+
<< format("%.1f", opts::ProfileDensityThreshold / Density)
1750+
<< "x more samples. Please consider increasing sampling rate or "
1751+
"profiling for longer duration to get more samples.\n";
1752+
}
1753+
1754+
BC.outs() << "BOLT-INFO: Functions with density >= "
1755+
<< format("%.1f", Density) << " account for "
1756+
<< format("%.2f",
1757+
static_cast<double>(opts::ProfileDensityCutOffHot) /
1758+
10000)
1759+
<< "% total sample counts.\n";
1760+
}
16871761
return Error::success();
16881762
}
16891763

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -638,8 +638,12 @@ void DataAggregator::processProfile(BinaryContext &BC) {
638638
: BinaryFunction::PF_LBR;
639639
for (auto &BFI : BC.getBinaryFunctions()) {
640640
BinaryFunction &BF = BFI.second;
641-
if (getBranchData(BF) || getFuncSampleData(BF.getNames()))
641+
FuncBranchData *FBD = getBranchData(BF);
642+
if (FBD || getFuncSampleData(BF.getNames())) {
642643
BF.markProfiled(Flags);
644+
if (FBD)
645+
BF.RawBranchCount = FBD->getNumExecutedBranches();
646+
}
643647
}
644648

645649
for (auto &FuncBranches : NamesToBranches)
@@ -845,6 +849,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
845849
return false;
846850
}
847851

852+
// Set ParentFunc to BAT parent function or FromFunc itself.
853+
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
854+
if (!ParentFunc)
855+
ParentFunc = FromFunc;
856+
ParentFunc->SampleCountInBytes += Count * (Second.From - First.To);
857+
848858
std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
849859
BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
850860
Second.From)
@@ -864,13 +874,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
864874
<< FromFunc->getPrintName() << ":"
865875
<< Twine::utohexstr(First.To) << " to "
866876
<< Twine::utohexstr(Second.From) << ".\n");
867-
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
868877
for (auto [From, To] : *FTs) {
869878
if (BAT) {
870879
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
871880
To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
872881
}
873-
doIntraBranch(ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, false);
882+
doIntraBranch(*ParentFunc, From, To, Count, false);
874883
}
875884

876885
return true;

bolt/lib/Utils/CommandLineOpts.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ cl::opt<std::string> SaveProfile("w",
175175
cl::desc("save recorded profile to a file"),
176176
cl::cat(BoltOutputCategory));
177177

178+
cl::opt<bool> ShowDensity("show-density",
179+
cl::desc("show profile density details"),
180+
cl::Optional, cl::cat(AggregatorCategory));
181+
178182
cl::opt<bool> SplitEH("split-eh", cl::desc("split C++ exception handling code"),
179183
cl::Hidden, cl::cat(BoltOptCategory));
180184

bolt/test/X86/pre-aggregated-perf.test

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,21 @@ REQUIRES: system-linux
1111

1212
RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
1313
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
14-
RUN: --profile-use-dfs | FileCheck %s
14+
RUN: --show-density \
15+
RUN: --profile-density-threshold=9 --profile-density-cutoff-hot=970000 \
16+
RUN: --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B
17+
18+
CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
19+
CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.
20+
21+
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
22+
RUN: --show-density \
23+
RUN: --profile-density-cutoff-hot=970000 \
24+
RUN: --profile-use-dfs 2>&1 | FileCheck %s --check-prefix=CHECK-WARNING
25+
26+
CHECK-WARNING: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
27+
CHECK-WARNING: BOLT-WARNING: BOLT is estimated to optimize better with 2.8x more samples.
28+
CHECK-WARNING: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.
1529

1630
RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
1731
RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s

bolt/tools/driver/llvm-bolt.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ void perf2boltMode(int argc, char **argv) {
129129
exit(1);
130130
}
131131
opts::AggregateOnly = true;
132+
opts::ShowDensity = true;
132133
}
133134

134135
void boltDiffMode(int argc, char **argv) {

clang-tools-extra/clang-doc/Generators.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,8 @@ std::string getTagType(TagTypeKind AS);
5555
} // namespace doc
5656
} // namespace clang
5757

58+
namespace llvm {
59+
extern template class Registry<clang::doc::Generator>;
60+
} // namespace llvm
61+
5862
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_GENERATOR_H

clang-tools-extra/clang-include-fixer/InMemorySymbolIndex.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ InMemorySymbolIndex::InMemorySymbolIndex(
2121

2222
std::vector<SymbolAndSignals>
2323
InMemorySymbolIndex::search(llvm::StringRef Identifier) {
24-
auto I = LookupTable.find(std::string(Identifier));
24+
auto I = LookupTable.find(Identifier);
2525
if (I != LookupTable.end())
2626
return I->second;
2727
return {};

clang-tools-extra/clang-include-fixer/InMemorySymbolIndex.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ class InMemorySymbolIndex : public SymbolIndex {
2727
search(llvm::StringRef Identifier) override;
2828

2929
private:
30-
std::map<std::string, std::vector<find_all_symbols::SymbolAndSignals>>
30+
std::map<std::string, std::vector<find_all_symbols::SymbolAndSignals>,
31+
std::less<>>
3132
LookupTable;
3233
};
3334

0 commit comments

Comments
 (0)