Skip to content

Commit 3e1462a

Browse files
authored
Merge branch 'main' into clang-doc-ftime-trace
2 parents cbb9d73 + ebab105 commit 3e1462a

File tree

1,588 files changed

+83940
-24070
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,588 files changed

+83940
-24070
lines changed

.github/workflows/issue-write.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@ jobs:
1818
permissions:
1919
pull-requests: write
2020
if: >
21-
github.event.workflow_run.event == 'pull_request'
21+
github.event.workflow_run.event == 'pull_request' &&
22+
(
23+
github.event.workflow_run.conclusion == 'success' ||
24+
github.event.workflow_run.conclusion == 'failure'
25+
)
2226
steps:
2327
- name: 'Download artifact'
2428
uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1

bolt/docs/CommandLineArgumentReference.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,10 @@
688688

689689
Use a modified clustering algorithm geared towards minimizing branches
690690

691+
- `--name-similarity-function-matching-threshold=<uint>`
692+
693+
Match functions using namespace and edit distance.
694+
691695
- `--no-inline`
692696

693697
Disable all inlining (overrides other inlining options)

bolt/docs/OptimizingLinux.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ $ perf2bolt -p perf.data -o perf.fdata vmlinux
4444

4545
Under a high load, `perf.data` should be several gigabytes in size and you should expect the converted `perf.fdata` not to exceed 100 MB.
4646

47+
Profiles collected from multiple workloads could be joined into a single profile using `merge-fdata` utility:
48+
```bash
49+
$ merge-fdata perf.1.fdata perf.2.fdata ... perf.<N>.fdata > perf.merged.fdata
50+
```
51+
4752
Two changes are required for the kernel build. The first one is optional but highly recommended. It introduces a BOLT-reserved space into `vmlinux` code section:
4853

4954

bolt/include/bolt/Core/BinaryBasicBlock.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -842,15 +842,6 @@ class BinaryBasicBlock {
842842
bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
843843
MCInst *&CondBranch, MCInst *&UncondBranch);
844844

845-
/// Return true if iterator \p I is pointing to the first instruction in
846-
/// a pair that could be macro-fused.
847-
bool isMacroOpFusionPair(const_iterator I) const;
848-
849-
/// If the basic block has a pair of instructions suitable for macro-fusion,
850-
/// return iterator to the first instruction of the pair.
851-
/// Otherwise return end().
852-
const_iterator getMacroOpFusionPair() const;
853-
854845
/// Printer required for printing dominator trees.
855846
void printAsOperand(raw_ostream &OS, bool PrintType = true) {
856847
if (PrintType)

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -698,10 +698,6 @@ class BinaryContext {
698698

699699
/// Binary-wide aggregated stats.
700700
struct BinaryStats {
701-
/// Stats for macro-fusion.
702-
uint64_t MissedMacroFusionPairs{0};
703-
uint64_t MissedMacroFusionExecCount{0};
704-
705701
/// Stats for stale profile matching:
706702
/// the total number of basic blocks in the profile
707703
uint32_t NumStaleBlocks{0};

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -835,10 +835,6 @@ class BinaryFunction {
835835
/// them.
836836
void calculateLoopInfo();
837837

838-
/// Calculate missed macro-fusion opportunities and update BinaryContext
839-
/// stats.
840-
void calculateMacroOpFusionStats();
841-
842838
/// Returns if BinaryDominatorTree has been constructed for this function.
843839
bool hasDomTree() const { return BDT != nullptr; }
844840

bolt/include/bolt/Core/DebugData.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,15 @@ class DebugRangesSectionWriter {
210210
static bool classof(const DebugRangesSectionWriter *Writer) {
211211
return Writer->getKind() == RangesWriterKind::DebugRangesWriter;
212212
}
213+
214+
/// Append a range to the main buffer.
215+
void appendToRangeBuffer(const DebugBufferVector &CUBuffer);
216+
217+
/// Sets Unit DIE to be updated for CU.
218+
void setDie(DIE *Die) { this->Die = Die; }
219+
220+
/// Returns Unit DIE to be updated for CU.
221+
DIE *getDie() const { return Die; }
213222

214223
/// Writes out range lists for a current CU being processed.
215224
void virtual finalizeSection(){};
@@ -232,6 +241,9 @@ class DebugRangesSectionWriter {
232241
static constexpr uint64_t EmptyRangesOffset{0};
233242

234243
private:
244+
/// Stores Unit DIE to be updated for CU.
245+
DIE *Die{0};
246+
235247
RangesWriterKind Kind;
236248
};
237249

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -930,13 +930,6 @@ class MCPlusBuilder {
930930
/// Return true if the instruction is encoded using EVEX (AVX-512).
931931
virtual bool hasEVEXEncoding(const MCInst &Inst) const { return false; }
932932

933-
/// Return true if a pair of instructions represented by \p Insts
934-
/// could be fused into a single uop.
935-
virtual bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const {
936-
llvm_unreachable("not implemented");
937-
return false;
938-
}
939-
940933
struct X86MemOperand {
941934
unsigned BaseRegNum;
942935
int64_t ScaleImm;

bolt/include/bolt/Profile/YAMLProfileReader.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ class YAMLProfileReader : public ProfileReaderBase {
9393
ProfiledFunctions.emplace(&BF);
9494
}
9595

96+
/// Matches functions with similarly named profiled functions.
97+
uint64_t matchWithNameSimilarity(BinaryContext &BC);
98+
9699
/// Check if the profile uses an event with a given \p Name.
97100
bool usesEvent(StringRef Name) const;
98101
};

bolt/include/bolt/Rewrite/DWARFRewriter.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include "llvm/ADT/StringRef.h"
1717
#include "llvm/CodeGen/DIE.h"
1818
#include "llvm/DWP/DWP.h"
19-
#include "llvm/MC/MCAsmLayout.h"
2019
#include "llvm/MC/MCContext.h"
2120
#include "llvm/Support/ToolOutputFile.h"
2221
#include <cstdint>
@@ -90,6 +89,10 @@ class DWARFRewriter {
9089
/// Store Rangelists writer for each DWO CU.
9190
RangeListsDWOWriers RangeListsWritersByCU;
9291

92+
/// Stores ranges writer for each DWO CU.
93+
std::unordered_map<uint64_t, std::unique_ptr<DebugRangesSectionWriter>>
94+
LegacyRangesWritersByCU;
95+
9396
std::mutex LocListDebugInfoPatchesMutex;
9497

9598
/// Dwo id specific its RangesBase.
@@ -183,7 +186,7 @@ class DWARFRewriter {
183186
void updateDebugInfo();
184187

185188
/// Update stmt_list for CUs based on the new .debug_line \p Layout.
186-
void updateLineTableOffsets(const MCAsmLayout &Layout);
189+
void updateLineTableOffsets(const MCAssembler &Asm);
187190

188191
uint64_t getDwoRangesBase(uint64_t DWOId) { return DwoRangesBase[DWOId]; }
189192

bolt/lib/Core/BinaryBasicBlock.cpp

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -404,45 +404,6 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
404404
CondBranch, UncondBranch);
405405
}
406406

407-
bool BinaryBasicBlock::isMacroOpFusionPair(const_iterator I) const {
408-
auto &MIB = Function->getBinaryContext().MIB;
409-
ArrayRef<MCInst> Insts = Instructions;
410-
return MIB->isMacroOpFusionPair(Insts.slice(I - begin()));
411-
}
412-
413-
BinaryBasicBlock::const_iterator
414-
BinaryBasicBlock::getMacroOpFusionPair() const {
415-
if (!Function->getBinaryContext().isX86())
416-
return end();
417-
418-
if (getNumNonPseudos() < 2 || succ_size() != 2)
419-
return end();
420-
421-
auto RI = getLastNonPseudo();
422-
assert(RI != rend() && "cannot have an empty block with 2 successors");
423-
424-
BinaryContext &BC = Function->getBinaryContext();
425-
426-
// Skip instruction if it's an unconditional branch following
427-
// a conditional one.
428-
if (BC.MIB->isUnconditionalBranch(*RI))
429-
++RI;
430-
431-
if (!BC.MIB->isConditionalBranch(*RI))
432-
return end();
433-
434-
// Start checking with instruction preceding the conditional branch.
435-
++RI;
436-
if (RI == rend())
437-
return end();
438-
439-
auto II = std::prev(RI.base()); // convert to a forward iterator
440-
if (isMacroOpFusionPair(II))
441-
return II;
442-
443-
return end();
444-
}
445-
446407
MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {
447408
BinaryContext &BC = Function->getBinaryContext();
448409
auto Itr = rbegin();

bolt/lib/Core/BinaryContext.cpp

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
2121
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
2222
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23-
#include "llvm/MC/MCAsmLayout.h"
2423
#include "llvm/MC/MCAssembler.h"
2524
#include "llvm/MC/MCContext.h"
2625
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -2404,32 +2403,23 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
24042403
Streamer->emitLabel(SplitStartLabel);
24052404
emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
24062405
Streamer->emitLabel(SplitEndLabel);
2407-
// To avoid calling MCObjectStreamer::flushPendingLabels() which is
2408-
// private
2409-
Streamer->emitBytes(StringRef(""));
2410-
Streamer->switchSection(Section);
24112406
}
24122407

2413-
// To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2414-
// MCStreamer::Finish(), which does more than we want
2415-
Streamer->emitBytes(StringRef(""));
2416-
24172408
MCAssembler &Assembler =
24182409
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2419-
MCAsmLayout Layout(Assembler);
2420-
Assembler.layout(Layout);
2410+
Assembler.layout();
24212411

24222412
// Obtain fragment sizes.
24232413
std::vector<uint64_t> FragmentSizes;
24242414
// Main fragment size.
2425-
const uint64_t HotSize =
2426-
Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2415+
const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) -
2416+
Assembler.getSymbolOffset(*StartLabel);
24272417
FragmentSizes.push_back(HotSize);
24282418
// Split fragment sizes.
24292419
uint64_t ColdSize = 0;
24302420
for (const auto &Labels : SplitLabels) {
2431-
uint64_t Size = Layout.getSymbolOffset(*Labels.second) -
2432-
Layout.getSymbolOffset(*Labels.first);
2421+
uint64_t Size = Assembler.getSymbolOffset(*Labels.second) -
2422+
Assembler.getSymbolOffset(*Labels.first);
24332423
FragmentSizes.push_back(Size);
24342424
ColdSize += Size;
24352425
}
@@ -2439,7 +2429,8 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
24392429
for (FunctionFragment &FF : BF.getLayout().fragments()) {
24402430
BinaryBasicBlock *PrevBB = nullptr;
24412431
for (BinaryBasicBlock *BB : FF) {
2442-
const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel()));
2432+
const uint64_t BBStartOffset =
2433+
Assembler.getSymbolOffset(*(BB->getLabel()));
24432434
BB->setOutputStartAddress(BBStartOffset);
24442435
if (PrevBB)
24452436
PrevBB->setOutputEndAddress(BBStartOffset);

bolt/lib/Core/BinaryEmitter.cpp

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,6 @@ extern cl::opt<bool> PreserveBlocksAlignment;
3838
cl::opt<bool> AlignBlocks("align-blocks", cl::desc("align basic blocks"),
3939
cl::cat(BoltOptCategory));
4040

41-
cl::opt<MacroFusionType>
42-
AlignMacroOpFusion("align-macro-fusion",
43-
cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"),
44-
cl::init(MFT_HOT),
45-
cl::values(clEnumValN(MFT_NONE, "none",
46-
"do not insert alignment no-ops for macro-fusion"),
47-
clEnumValN(MFT_HOT, "hot",
48-
"only insert alignment no-ops on hot execution paths (default)"),
49-
clEnumValN(MFT_ALL, "all",
50-
"always align instructions to allow macro-fusion")),
51-
cl::ZeroOrMore,
52-
cl::cat(BoltRelocCategory));
53-
5441
static cl::list<std::string>
5542
BreakFunctionNames("break-funcs",
5643
cl::CommaSeparated,
@@ -453,20 +440,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
453440
Streamer.emitLabel(EntrySymbol);
454441
}
455442

456-
// Check if special alignment for macro-fusion is needed.
457-
bool MayNeedMacroFusionAlignment =
458-
(opts::AlignMacroOpFusion == MFT_ALL) ||
459-
(opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount());
460-
BinaryBasicBlock::const_iterator MacroFusionPair;
461-
if (MayNeedMacroFusionAlignment) {
462-
MacroFusionPair = BB->getMacroOpFusionPair();
463-
if (MacroFusionPair == BB->end())
464-
MayNeedMacroFusionAlignment = false;
465-
}
466-
467443
SMLoc LastLocSeen;
468-
// Remember if the last instruction emitted was a prefix.
469-
bool LastIsPrefix = false;
470444
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
471445
MCInst &Instr = *I;
472446

@@ -479,16 +453,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
479453
continue;
480454
}
481455

482-
// Handle macro-fusion alignment. If we emitted a prefix as
483-
// the last instruction, we should've already emitted the associated
484-
// alignment hint, so don't emit it twice.
485-
if (MayNeedMacroFusionAlignment && !LastIsPrefix &&
486-
I == MacroFusionPair) {
487-
// This assumes the second instruction in the macro-op pair will get
488-
// assigned to its own MCRelaxableFragment. Since all JCC instructions
489-
// are relaxable, we should be safe.
490-
}
491-
492456
if (!EmitCodeOnly) {
493457
// A symbol to be emitted before the instruction to mark its location.
494458
MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr);
@@ -525,7 +489,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
525489
}
526490

527491
Streamer.emitInstruction(Instr, *BC.STI);
528-
LastIsPrefix = BC.MIB->isPrefix(Instr);
529492
}
530493
}
531494

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2279,8 +2279,6 @@ void BinaryFunction::postProcessCFG() {
22792279
postProcessBranches();
22802280
}
22812281

2282-
calculateMacroOpFusionStats();
2283-
22842282
// The final cleanup of intermediate structures.
22852283
clearList(IgnoredBranches);
22862284

@@ -2297,29 +2295,6 @@ void BinaryFunction::postProcessCFG() {
22972295
"invalid CFG detected after post-processing");
22982296
}
22992297

2300-
void BinaryFunction::calculateMacroOpFusionStats() {
2301-
if (!getBinaryContext().isX86())
2302-
return;
2303-
for (const BinaryBasicBlock &BB : blocks()) {
2304-
auto II = BB.getMacroOpFusionPair();
2305-
if (II == BB.end())
2306-
continue;
2307-
2308-
// Check offset of the second instruction.
2309-
// FIXME: arch-specific.
2310-
const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
2311-
if (!Offset || (getAddress() + Offset) % 64)
2312-
continue;
2313-
2314-
LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
2315-
<< Twine::utohexstr(getAddress() + Offset)
2316-
<< " in function " << *this << "; executed "
2317-
<< BB.getKnownExecutionCount() << " times.\n");
2318-
++BC.Stats.MissedMacroFusionPairs;
2319-
BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount();
2320-
}
2321-
}
2322-
23232298
void BinaryFunction::removeTagsFromProfile() {
23242299
for (BinaryBasicBlock *BB : BasicBlocks) {
23252300
if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)

bolt/lib/Core/DebugData.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,12 @@ uint64_t DebugRangesSectionWriter::getSectionOffset() {
177177
return SectionOffset;
178178
}
179179

180+
void DebugRangesSectionWriter::appendToRangeBuffer(
181+
const DebugBufferVector &CUBuffer) {
182+
*RangesStream << CUBuffer;
183+
SectionOffset = RangesBuffer->size();
184+
}
185+
180186
DebugAddrWriter *DebugRangeListsSectionWriter::AddrWriter = nullptr;
181187

182188
uint64_t DebugRangeListsSectionWriter::addRanges(

0 commit comments

Comments
 (0)