Skip to content

Commit 10e747a

Browse files
committed
Merge remote-tracking branch 'origin/main' into vplan-middle-block-branch
Conflicts: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/VPlan.cpp
2 parents 3a4ecfc + b2f65e8 commit 10e747a

File tree

2,195 files changed

+79265
-47676
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,195 files changed

+79265
-47676
lines changed

.ci/generate-buildkite-pipeline-premerge

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ function compute-projects-to-test() {
6868
done
6969
;;
7070
clang)
71-
for p in clang-tools-extra compiler-rt flang lldb cross-project-tests; do
71+
for p in clang-tools-extra compiler-rt lldb cross-project-tests; do
7272
echo $p
7373
done
7474
;;

.github/workflows/llvm-bugs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
runs-on: ubuntu-latest
1515
if: github.repository == 'llvm/llvm-project'
1616
steps:
17-
- uses: actions/setup-node@v3
17+
- uses: actions/setup-node@v4
1818
with:
1919
node-version: 18
2020
check-latest: true

bolt/docs/BAT.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,14 @@ equals output offset.
106106
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
107107
(branch or call instruction). If not set, it signifies a control flow target
108108
(basic block offset).
109+
109110
`InputAddr` is omitted for equal offsets in input and output function. In this
110111
case, `BRANCHENTRY` bits are encoded separately in a `BranchEntries` bitvector.
111112

113+
Deleted basic blocks are emitted as having `OutputOffset` equal to the size of
114+
the function. They don't affect address translation and only participate in
115+
input basic block mapping.
116+
112117
### Secondary Entry Points table
113118
The table is emitted for hot fragments only. It contains `NumSecEntryPoints`
114119
offsets denoting secondary entry points, delta encoded, implicitly starting at zero.

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "bolt/Core/BinaryData.h"
1818
#include "bolt/Core/BinarySection.h"
1919
#include "bolt/Core/DebugData.h"
20+
#include "bolt/Core/DynoStats.h"
2021
#include "bolt/Core/JumpTable.h"
2122
#include "bolt/Core/MCPlusBuilder.h"
2223
#include "bolt/RuntimeLibs/RuntimeLibrary.h"
@@ -359,7 +360,7 @@ class BinaryContext {
359360
void setFileBuildID(StringRef ID) { FileBuildID = std::string(ID); }
360361

361362
bool hasSymbolsWithFileName() const { return HasSymbolsWithFileName; }
362-
void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = true; }
363+
void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = Value; }
363364

364365
/// Return true if relocations against symbol with a given name
365366
/// must be created.
@@ -677,6 +678,9 @@ class BinaryContext {
677678
/// have an origin file name available.
678679
bool HasSymbolsWithFileName{false};
679680

681+
/// Does the binary have BAT section.
682+
bool HasBATSection{false};
683+
680684
/// Sum of execution count of all functions
681685
uint64_t SumExecutionCount{0};
682686

@@ -714,6 +718,9 @@ class BinaryContext {
714718
uint64_t NumStaleBlocksWithEqualIcount{0};
715719
} Stats;
716720

721+
// Original binary execution count stats.
722+
DynoStats InitialDynoStats;
723+
717724
// Address of the first allocated segment.
718725
uint64_t FirstAllocAddress{std::numeric_limits<uint64_t>::max()};
719726

bolt/include/bolt/Passes/BinaryPasses.h

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "bolt/Core/BinaryContext.h"
1717
#include "bolt/Core/BinaryFunction.h"
1818
#include "bolt/Core/DynoStats.h"
19+
#include "bolt/Profile/BoltAddressTranslation.h"
1920
#include "llvm/Support/CommandLine.h"
2021
#include <atomic>
2122
#include <set>
@@ -52,15 +53,31 @@ class BinaryFunctionPass {
5253
virtual Error runOnFunctions(BinaryContext &BC) = 0;
5354
};
5455

56+
/// A pass to set initial program-wide dynostats.
57+
class DynoStatsSetPass : public BinaryFunctionPass {
58+
public:
59+
DynoStatsSetPass() : BinaryFunctionPass(false) {}
60+
61+
const char *getName() const override {
62+
return "set dyno-stats before optimizations";
63+
}
64+
65+
bool shouldPrint(const BinaryFunction &BF) const override { return false; }
66+
67+
Error runOnFunctions(BinaryContext &BC) override {
68+
BC.InitialDynoStats = getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
69+
return Error::success();
70+
}
71+
};
72+
5573
/// A pass to print program-wide dynostats.
5674
class DynoStatsPrintPass : public BinaryFunctionPass {
5775
protected:
58-
DynoStats PrevDynoStats;
5976
std::string Title;
6077

6178
public:
62-
DynoStatsPrintPass(const DynoStats &PrevDynoStats, const char *Title)
63-
: BinaryFunctionPass(false), PrevDynoStats(PrevDynoStats), Title(Title) {}
79+
DynoStatsPrintPass(const char *Title)
80+
: BinaryFunctionPass(false), Title(Title) {}
6481

6582
const char *getName() const override {
6683
return "print dyno-stats after optimizations";
@@ -69,6 +86,7 @@ class DynoStatsPrintPass : public BinaryFunctionPass {
6986
bool shouldPrint(const BinaryFunction &BF) const override { return false; }
7087

7188
Error runOnFunctions(BinaryContext &BC) override {
89+
const DynoStats PrevDynoStats = BC.InitialDynoStats;
7290
const DynoStats NewDynoStats =
7391
getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
7492
const bool Changed = (NewDynoStats != PrevDynoStats);
@@ -399,8 +417,11 @@ class PrintProfileStats : public BinaryFunctionPass {
399417
/// Prints a list of the top 100 functions sorted by a set of
400418
/// dyno stats categories.
401419
class PrintProgramStats : public BinaryFunctionPass {
420+
BoltAddressTranslation *BAT = nullptr;
421+
402422
public:
403-
explicit PrintProgramStats() : BinaryFunctionPass(false) {}
423+
explicit PrintProgramStats(BoltAddressTranslation *BAT = nullptr)
424+
: BinaryFunctionPass(false), BAT(BAT) {}
404425

405426
const char *getName() const override { return "print-stats"; }
406427
bool shouldPrint(const BinaryFunction &) const override { return false; }

bolt/include/bolt/Passes/MCF.h

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,14 @@
99
#ifndef BOLT_PASSES_MCF_H
1010
#define BOLT_PASSES_MCF_H
1111

12+
#include "bolt/Passes/BinaryPasses.h"
13+
#include "llvm/Support/CommandLine.h"
14+
1215
namespace llvm {
1316
namespace bolt {
1417

15-
class BinaryFunction;
1618
class DataflowInfoManager;
1719

18-
enum MCFCostFunction : char {
19-
MCF_DISABLE = 0,
20-
MCF_LINEAR,
21-
MCF_QUADRATIC,
22-
MCF_LOG,
23-
MCF_BLAMEFTS
24-
};
25-
2620
/// Implement the idea in "SamplePGO - The Power of Profile Guided Optimizations
2721
/// without the Usability Burden" by Diego Novillo to make basic block counts
2822
/// equal if we show that A dominates B, B post-dominates A and they are in the
@@ -31,23 +25,18 @@ void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF);
3125

3226
/// Fill edge counts based on the basic block count. Used in nonLBR mode when
3327
/// we only have bb count.
34-
void estimateEdgeCounts(BinaryFunction &BF);
35-
36-
/// Entry point for computing a min-cost flow for the CFG with the goal
37-
/// of fixing the flow of the CFG edges, that is, making sure it obeys the
38-
/// flow-conservation equation SumInEdges = SumOutEdges.
39-
///
40-
/// To do this, we create an instance of the min-cost flow problem in a
41-
/// similar way as the one discussed in the work of Roy Levin "Completing
42-
/// Incomplete Edge Profile by Applying Minimum Cost Circulation Algorithms".
43-
/// We do a few things differently, though. We don't populate edge counts using
44-
/// weights coming from a static branch prediction technique and we don't
45-
/// use the same cost function.
46-
///
47-
/// If cost function BlameFTs is used, assign all remaining flow to
48-
/// fall-throughs. This is used when the sampling is based on taken branches
49-
/// that do not account for them.
50-
void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction);
28+
class EstimateEdgeCounts : public BinaryFunctionPass {
29+
void runOnFunction(BinaryFunction &BF);
30+
31+
public:
32+
explicit EstimateEdgeCounts(const cl::opt<bool> &PrintPass)
33+
: BinaryFunctionPass(PrintPass) {}
34+
35+
const char *getName() const override { return "estimate-edge-counts"; }
36+
37+
/// Pass entry point
38+
Error runOnFunctions(BinaryContext &BC) override;
39+
};
5140

5241
} // end namespace bolt
5342
} // end namespace llvm

bolt/include/bolt/Passes/StokeInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,10 @@ struct StokeFuncInfo {
8787
<< "," << NumBlocks << "," << IsLoopFree << "," << NumLoops << ","
8888
<< MaxLoopDepth << "," << HotSize << "," << TotalSize << ","
8989
<< Score << "," << HasCall << ",\"{ ";
90-
for (std::string S : DefIn)
90+
for (const std::string &S : DefIn)
9191
Outfile << "%" << S << " ";
9292
Outfile << "}\",\"{ ";
93-
for (std::string S : LiveOut)
93+
for (const std::string &S : LiveOut)
9494
Outfile << "%" << S << " ";
9595
Outfile << "}\"," << HeapOut << "," << StackOut << "," << HasRipAddr
9696
<< "," << Omitted << "\n";

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class BinaryFunction;
7070
class BoltAddressTranslation {
7171
public:
7272
// In-memory representation of the address translation table
73-
using MapTy = std::map<uint32_t, uint32_t>;
73+
using MapTy = std::multimap<uint32_t, uint32_t>;
7474

7575
// List of taken fall-throughs
7676
using FallthroughListTy = SmallVector<std::pair<uint64_t, uint64_t>, 16>;
@@ -90,7 +90,7 @@ class BoltAddressTranslation {
9090
std::error_code parse(raw_ostream &OS, StringRef Buf);
9191

9292
/// Dump the parsed address translation tables
93-
void dump(raw_ostream &OS);
93+
void dump(raw_ostream &OS) const;
9494

9595
/// If the maps are loaded in memory, perform the lookup to translate LBR
9696
/// addresses in function located at \p FuncAddress.
@@ -107,7 +107,12 @@ class BoltAddressTranslation {
107107

108108
/// If available, fetch the address of the hot part linked to the cold part
109109
/// at \p Address. Return 0 otherwise.
110-
uint64_t fetchParentAddress(uint64_t Address) const;
110+
uint64_t fetchParentAddress(uint64_t Address) const {
111+
auto Iter = ColdPartSource.find(Address);
112+
if (Iter == ColdPartSource.end())
113+
return 0;
114+
return Iter->second;
115+
}
111116

112117
/// True if the input binary has a translation table we can use to convert
113118
/// addresses when aggregating profile
@@ -132,7 +137,8 @@ class BoltAddressTranslation {
132137
/// emitted for the start of the BB. More entries may be emitted to cover
133138
/// the location of calls or any instruction that may change control flow.
134139
void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB,
135-
uint64_t FuncInputAddress, uint64_t FuncOutputAddress);
140+
uint64_t FuncInputAddress,
141+
uint64_t FuncOutputAddress) const;
136142

137143
/// Write the serialized address translation table for a function.
138144
template <bool Cold>
@@ -147,7 +153,7 @@ class BoltAddressTranslation {
147153

148154
/// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY
149155
/// entries in function address translation map.
150-
APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems);
156+
APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems) const;
151157

152158
/// Calculate the number of equal offsets (output = input - skew) in the
153159
/// beginning of the function.
@@ -178,14 +184,9 @@ class BoltAddressTranslation {
178184
public:
179185
/// Map basic block input offset to a basic block index and hash pair.
180186
class BBHashMapTy {
181-
class EntryTy {
187+
struct EntryTy {
182188
unsigned Index;
183189
size_t Hash;
184-
185-
public:
186-
unsigned getBBIndex() const { return Index; }
187-
size_t getBBHash() const { return Hash; }
188-
EntryTy(unsigned Index, size_t Hash) : Index(Index), Hash(Hash) {}
189190
};
190191

191192
std::map<uint32_t, EntryTy> Map;
@@ -201,34 +202,30 @@ class BoltAddressTranslation {
201202
}
202203

203204
unsigned getBBIndex(uint32_t BBInputOffset) const {
204-
return getEntry(BBInputOffset).getBBIndex();
205+
return getEntry(BBInputOffset).Index;
205206
}
206207

207208
size_t getBBHash(uint32_t BBInputOffset) const {
208-
return getEntry(BBInputOffset).getBBHash();
209+
return getEntry(BBInputOffset).Hash;
209210
}
210211

211212
void addEntry(uint32_t BBInputOffset, unsigned BBIndex, size_t BBHash) {
212-
Map.emplace(BBInputOffset, EntryTy(BBIndex, BBHash));
213+
Map.emplace(BBInputOffset, EntryTy{BBIndex, BBHash});
213214
}
214215

215216
size_t getNumBasicBlocks() const { return Map.size(); }
216217

217218
auto begin() const { return Map.begin(); }
218219
auto end() const { return Map.end(); }
219220
auto upper_bound(uint32_t Offset) const { return Map.upper_bound(Offset); }
221+
auto size() const { return Map.size(); }
220222
};
221223

222224
/// Map function output address to its hash and basic blocks hash map.
223225
class FuncHashesTy {
224-
class EntryTy {
226+
struct EntryTy {
225227
size_t Hash;
226228
BBHashMapTy BBHashMap;
227-
228-
public:
229-
size_t getBFHash() const { return Hash; }
230-
const BBHashMapTy &getBBHashMap() const { return BBHashMap; }
231-
EntryTy(size_t Hash) : Hash(Hash) {}
232229
};
233230

234231
std::unordered_map<uint64_t, EntryTy> Map;
@@ -240,23 +237,23 @@ class BoltAddressTranslation {
240237

241238
public:
242239
size_t getBFHash(uint64_t FuncOutputAddress) const {
243-
return getEntry(FuncOutputAddress).getBFHash();
240+
return getEntry(FuncOutputAddress).Hash;
244241
}
245242

246243
const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
247-
return getEntry(FuncOutputAddress).getBBHashMap();
244+
return getEntry(FuncOutputAddress).BBHashMap;
248245
}
249246

250247
void addEntry(uint64_t FuncOutputAddress, size_t BFHash) {
251-
Map.emplace(FuncOutputAddress, EntryTy(BFHash));
248+
Map.emplace(FuncOutputAddress, EntryTy{BFHash, BBHashMapTy()});
252249
}
253250

254251
size_t getNumFunctions() const { return Map.size(); };
255252

256253
size_t getNumBasicBlocks() const {
257254
size_t NumBasicBlocks{0};
258255
for (auto &I : Map)
259-
NumBasicBlocks += I.second.getBBHashMap().getNumBasicBlocks();
256+
NumBasicBlocks += I.second.BBHashMap.getNumBasicBlocks();
260257
return NumBasicBlocks;
261258
}
262259
};
@@ -278,7 +275,9 @@ class BoltAddressTranslation {
278275

279276
/// Returns the number of basic blocks in a function.
280277
size_t getNumBasicBlocks(uint64_t OutputAddress) const {
281-
return NumBasicBlocksMap.at(OutputAddress);
278+
auto It = NumBasicBlocksMap.find(OutputAddress);
279+
assert(It != NumBasicBlocksMap.end());
280+
return It->second;
282281
}
283282

284283
private:

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define BOLT_PROFILE_DATA_AGGREGATOR_H
1616

1717
#include "bolt/Profile/DataReader.h"
18+
#include "bolt/Profile/YAMLProfileWriter.h"
1819
#include "llvm/ADT/StringRef.h"
1920
#include "llvm/Support/Error.h"
2021
#include "llvm/Support/Program.h"
@@ -248,7 +249,7 @@ class DataAggregator : public DataReader {
248249
BinaryFunction *getBATParentFunction(const BinaryFunction &Func) const;
249250

250251
/// Retrieve the location name to be used for samples recorded in \p Func.
251-
StringRef getLocationName(const BinaryFunction &Func) const;
252+
static StringRef getLocationName(const BinaryFunction &Func, bool BAT);
252253

253254
/// Semantic actions - parser hooks to interpret parsed perf samples
254255
/// Register a sample (non-LBR mode), i.e. a new hit at \p Address
@@ -490,6 +491,8 @@ class DataAggregator : public DataReader {
490491
/// Parse the output generated by "perf buildid-list" to extract build-ids
491492
/// and return a file name matching a given \p FileBuildID.
492493
std::optional<StringRef> getFileNameForBuildID(StringRef FileBuildID);
494+
495+
friend class YAMLProfileWriter;
493496
};
494497
} // namespace bolt
495498
} // namespace llvm

0 commit comments

Comments
 (0)