Skip to content

Commit ef71fb7

Browse files
committed
Merge branch 'main' into prvalue-explicit-object-member-function-call
2 parents 8e72680 + 0c274d5 commit ef71fb7

File tree

373 files changed

+10858
-2924
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

373 files changed

+10858
-2924
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "bolt/RuntimeLibs/RuntimeLibrary.h"
2424
#include "llvm/ADT/AddressRanges.h"
2525
#include "llvm/ADT/ArrayRef.h"
26+
#include "llvm/ADT/EquivalenceClasses.h"
2627
#include "llvm/ADT/StringMap.h"
2728
#include "llvm/ADT/iterator.h"
2829
#include "llvm/BinaryFormat/Dwarf.h"
@@ -241,6 +242,10 @@ class BinaryContext {
241242
/// Function fragments to skip.
242243
std::unordered_set<BinaryFunction *> FragmentsToSkip;
243244

245+
/// Fragment equivalence classes to query belonging to the same "family" in
246+
/// presence of multiple fragments/multiple parents.
247+
EquivalenceClasses<const BinaryFunction *> FragmentClasses;
248+
244249
/// The runtime library.
245250
std::unique_ptr<RuntimeLibrary> RtLibrary;
246251

@@ -1032,7 +1037,15 @@ class BinaryContext {
10321037
/// fragment_name == parent_name.cold(.\d+)?
10331038
/// True if the Function is registered, false if the check failed.
10341039
bool registerFragment(BinaryFunction &TargetFunction,
1035-
BinaryFunction &Function) const;
1040+
BinaryFunction &Function);
1041+
1042+
/// Return true if two functions belong to the same "family": are fragments
1043+
/// of one another, or fragments of the same parent, or transitively fragment-
1044+
/// related.
1045+
bool areRelatedFragments(const BinaryFunction *LHS,
1046+
const BinaryFunction *RHS) const {
1047+
return FragmentClasses.isEquivalent(LHS, RHS);
1048+
}
10361049

10371050
/// Add interprocedural reference for \p Function to \p Address
10381051
void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) {

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1793,11 +1793,6 @@ class BinaryFunction {
17931793
return ParentFragments.contains(&Other);
17941794
}
17951795

1796-
/// Returns if this function is a parent of \p Other function.
1797-
bool isParentOf(const BinaryFunction &Other) const {
1798-
return Fragments.contains(&Other);
1799-
}
1800-
18011796
/// Return the child fragment form parent function
18021797
iterator_range<FragmentsSetTy::const_iterator> getFragments() const {
18031798
return iterator_range<FragmentsSetTy::const_iterator>(Fragments.begin(),
@@ -1807,11 +1802,6 @@ class BinaryFunction {
18071802
/// Return the parent function for split function fragments.
18081803
FragmentsSetTy *getParentFragments() { return &ParentFragments; }
18091804

1810-
/// Returns if this function is a parent or child of \p Other function.
1811-
bool isParentOrChildOf(const BinaryFunction &Other) const {
1812-
return isChildOf(Other) || isParentOf(Other);
1813-
}
1814-
18151805
/// Set the profile data for the number of times the function was called.
18161806
BinaryFunction &setExecutionCount(uint64_t Count) {
18171807
ExecutionCount = Count;

bolt/lib/Core/BinaryContext.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address,
646646
const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
647647
const bool DoesBelongToFunction =
648648
BF.containsAddress(Value) ||
649-
(TargetBF && TargetBF->isParentOrChildOf(BF));
649+
(TargetBF && areRelatedFragments(TargetBF, &BF));
650650
if (!DoesBelongToFunction) {
651651
LLVM_DEBUG({
652652
if (!BF.containsAddress(Value)) {
@@ -839,9 +839,11 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
839839
assert(Address == JT->getAddress() && "unexpected non-empty jump table");
840840

841841
// Prevent associating a jump table to a specific fragment twice.
842-
// This simple check arises from the assumption: no more than 2 fragments.
843-
if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
844-
assert(JT->Parents[0]->isParentOrChildOf(Function) &&
842+
if (!llvm::is_contained(JT->Parents, &Function)) {
843+
assert(llvm::all_of(JT->Parents,
844+
[&](const BinaryFunction *BF) {
845+
return areRelatedFragments(&Function, BF);
846+
}) &&
845847
"cannot re-use jump table of a different function");
846848
// Duplicate the entry for the parent function for easy access
847849
JT->Parents.push_back(&Function);
@@ -852,8 +854,8 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
852854
JT->print(this->outs());
853855
}
854856
Function.JumpTables.emplace(Address, JT);
855-
JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
856-
JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
857+
for (BinaryFunction *Parent : JT->Parents)
858+
Parent->setHasIndirectTargetToSplitFragment(true);
857859
}
858860

859861
bool IsJumpTableParent = false;
@@ -1209,12 +1211,13 @@ void BinaryContext::generateSymbolHashes() {
12091211
}
12101212

12111213
bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1212-
BinaryFunction &Function) const {
1214+
BinaryFunction &Function) {
12131215
assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
12141216
if (TargetFunction.isChildOf(Function))
12151217
return true;
12161218
TargetFunction.addParentFragment(Function);
12171219
Function.addFragment(TargetFunction);
1220+
FragmentClasses.unionSets(&TargetFunction, &Function);
12181221
if (!HasRelocations) {
12191222
TargetFunction.setSimple(false);
12201223
Function.setSimple(false);
@@ -1336,7 +1339,7 @@ void BinaryContext::processInterproceduralReferences() {
13361339

13371340
if (TargetFunction) {
13381341
if (TargetFunction->isFragment() &&
1339-
!TargetFunction->isChildOf(Function)) {
1342+
!areRelatedFragments(TargetFunction, &Function)) {
13401343
this->errs()
13411344
<< "BOLT-WARNING: interprocedural reference between unrelated "
13421345
"fragments: "

bolt/lib/Core/Exceptions.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
207207
"BOLT-ERROR: cannot find landing pad fragment");
208208
BC.addInterproceduralReference(this, Fragment->getAddress());
209209
BC.processInterproceduralReferences();
210-
assert(isParentOrChildOf(*Fragment) &&
210+
assert(BC.areRelatedFragments(this, Fragment) &&
211211
"BOLT-ERROR: cannot have landing pads in different functions");
212212
setHasIndirectTargetToSplitFragment(true);
213213
BC.addFragmentsToSkip(this);

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ MaxSamples("max-samples",
8888
cl::cat(AggregatorCategory));
8989

9090
extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
91+
extern cl::opt<bool> ProfileUsePseudoProbes;
9192
extern cl::opt<std::string> SaveProfile;
9293

9394
cl::opt<bool> ReadPreAggregated(
@@ -2298,7 +2299,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
22982299

22992300
yaml::bolt::BinaryProfile BP;
23002301

2301-
const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
2302+
const MCPseudoProbeDecoder *PseudoProbeDecoder =
2303+
opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
23022304

23032305
// Fill out the header info.
23042306
BP.Header.Version = 1;

bolt/lib/Profile/YAMLProfileReader.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ llvm::cl::opt<bool>
4949
llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
5050
cl::desc("use DFS order for YAML profile"),
5151
cl::Hidden, cl::cat(BoltOptCategory));
52+
53+
llvm::cl::opt<bool> ProfileUsePseudoProbes(
54+
"profile-use-pseudo-probes",
55+
cl::desc("Use pseudo probes for profile generation and matching"),
56+
cl::Hidden, cl::cat(BoltOptCategory));
5257
} // namespace opts
5358

5459
namespace llvm {

bolt/lib/Profile/YAMLProfileWriter.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
namespace opts {
2424
extern llvm::cl::opt<bool> ProfileUseDFS;
25+
extern llvm::cl::opt<bool> ProfileUsePseudoProbes;
2526
} // namespace opts
2627

2728
namespace llvm {
@@ -57,7 +58,8 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
5758
const BoltAddressTranslation *BAT) {
5859
yaml::bolt::BinaryFunctionProfile YamlBF;
5960
const BinaryContext &BC = BF.getBinaryContext();
60-
const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
61+
const MCPseudoProbeDecoder *PseudoProbeDecoder =
62+
opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
6163

6264
const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;
6365

bolt/lib/Rewrite/DWARFRewriter.cpp

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -668,20 +668,12 @@ void DWARFRewriter::updateDebugInfo() {
668668
auto processSplitCU = [&](DWARFUnit &Unit, DWARFUnit &SplitCU,
669669
DIEBuilder &DIEBlder,
670670
DebugRangesSectionWriter &TempRangesSectionWriter,
671-
DebugAddrWriter &AddressWriter) {
671+
DebugAddrWriter &AddressWriter,
672+
const std::string &DWOName,
673+
const std::optional<std::string> &DwarfOutputPath) {
672674
DIEBuilder DWODIEBuilder(BC, &(SplitCU).getContext(), DebugNamesTable,
673675
&Unit);
674676
DWODIEBuilder.buildDWOUnit(SplitCU);
675-
std::string DWOName = "";
676-
std::optional<std::string> DwarfOutputPath =
677-
opts::DwarfOutputPath.empty()
678-
? std::nullopt
679-
: std::optional<std::string>(opts::DwarfOutputPath.c_str());
680-
{
681-
std::lock_guard<std::mutex> Lock(AccessMutex);
682-
DWOName = DIEBlder.updateDWONameCompDir(
683-
*StrOffstsWriter, *StrWriter, Unit, DwarfOutputPath, std::nullopt);
684-
}
685677
DebugStrOffsetsWriter DWOStrOffstsWriter(BC);
686678
DebugStrWriter DWOStrWriter((SplitCU).getContext(), true);
687679
DWODIEBuilder.updateDWONameCompDirForTypes(
@@ -759,8 +751,14 @@ void DWARFRewriter::updateDebugInfo() {
759751
DebugRangesSectionWriter *TempRangesSectionWriter =
760752
CU->getVersion() >= 5 ? RangeListsWritersByCU[*DWOId].get()
761753
: LegacyRangesWritersByCU[*DWOId].get();
754+
std::optional<std::string> DwarfOutputPath =
755+
opts::DwarfOutputPath.empty()
756+
? std::nullopt
757+
: std::optional<std::string>(opts::DwarfOutputPath.c_str());
758+
std::string DWOName = DIEBlder.updateDWONameCompDir(
759+
*StrOffstsWriter, *StrWriter, *CU, DwarfOutputPath, std::nullopt);
762760
processSplitCU(*CU, **SplitCU, DIEBlder, *TempRangesSectionWriter,
763-
AddressWriter);
761+
AddressWriter, DWOName, DwarfOutputPath);
764762
}
765763
for (DWARFUnit *CU : DIEBlder.getProcessedCUs())
766764
processMainBinaryCU(*CU, DIEBlder);

bolt/lib/Rewrite/PseudoProbeRewriter.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
4949
clEnumValN(PPP_All, "all", "enable all debugging printout")),
5050
cl::Hidden, cl::cat(BoltCategory));
5151

52+
extern cl::opt<bool> ProfileUsePseudoProbes;
5253
} // namespace opts
5354

5455
namespace {
@@ -89,12 +90,15 @@ class PseudoProbeRewriter final : public MetadataRewriter {
8990
};
9091

9192
Error PseudoProbeRewriter::preCFGInitializer() {
92-
parsePseudoProbe();
93+
if (opts::ProfileUsePseudoProbes)
94+
parsePseudoProbe();
9395

9496
return Error::success();
9597
}
9698

9799
Error PseudoProbeRewriter::postEmitFinalizer() {
100+
if (!opts::ProfileUsePseudoProbes)
101+
parsePseudoProbe();
98102
updatePseudoProbes();
99103

100104
return Error::success();

bolt/test/X86/pseudoprobe-decoding-inline.test

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
# PREAGG: B X:0 #main# 1 0
77
## Check pseudo-probes in regular YAML profile (non-BOLTed binary)
88
# RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin %t.preagg PREAGG
9-
# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
9+
# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-use-pseudo-probes
1010
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML
1111
## Check pseudo-probes in BAT YAML profile (BOLTed binary)
1212
# RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG
13-
# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2
13+
# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-use-pseudo-probes
1414
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
1515
# CHECK-YAML: name: bar
1616
# CHECK-YAML: - bid: 0
@@ -29,6 +29,14 @@
2929
# CHECK-YAML: pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
3030
# CHECK-YAML: guid: 0xDB956436E78DD5FA
3131
# CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF
32+
#
33+
## Check that without --profile-use-pseudo-probes option, no pseudo probes are
34+
## generated
35+
# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
36+
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
37+
# CHECK-NO-OPT-NOT: pseudo_probes
38+
# CHECK-NO-OPT-NOT: guid
39+
# CHECK-NO-OPT-NOT: pseudo_probe_desc_hash
3240

3341
CHECK: Report of decoding input pseudo probe binaries
3442

bolt/test/X86/three-way-split-jt.s

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
## This reproduces an issue where the function is split into three fragments
2+
## and all fragments access the same jump table.
3+
4+
# REQUIRES: system-linux
5+
6+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
7+
# RUN: llvm-strip --strip-unneeded %t.o
8+
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
9+
# RUN: llvm-bolt %t.exe -o %t.out -v=1 -print-only=main.warm -print-cfg 2>&1 | FileCheck %s
10+
11+
# CHECK-DAG: BOLT-INFO: marking main.warm as a fragment of main
12+
# CHECK-DAG: BOLT-INFO: marking main.cold as a fragment of main
13+
# CHECK-DAG: BOLT-INFO: processing main.warm as a sibling of non-ignored function
14+
# CHECK-DAG: BOLT-INFO: processing main.cold as a sibling of non-ignored function
15+
# CHECK-DAG: BOLT-WARNING: Ignoring main.cold
16+
# CHECK-DAG: BOLT-WARNING: Ignoring main.warm
17+
# CHECK-DAG: BOLT-WARNING: Ignoring main
18+
# CHECK: BOLT-WARNING: skipped 3 functions due to cold fragments
19+
20+
# CHECK: PIC Jump table JUMP_TABLE for function main, main.warm, main.cold
21+
# CHECK-NEXT: 0x0000 : __ENTRY_main@0x[[#]]
22+
# CHECK-NEXT: 0x0004 : __ENTRY_main@0x[[#]]
23+
# CHECK-NEXT: 0x0008 : __ENTRY_main.cold@0x[[#]]
24+
# CHECK-NEXT: 0x000c : __ENTRY_main@0x[[#]]
25+
.globl main
26+
.type main, %function
27+
.p2align 2
28+
main:
29+
LBB0:
30+
andl $0xf, %ecx
31+
cmpb $0x4, %cl
32+
## exit through ret
33+
ja LBB3
34+
35+
## jump table dispatch, jumping to label indexed by val in %ecx
36+
LBB1:
37+
leaq JUMP_TABLE(%rip), %r8
38+
movzbl %cl, %ecx
39+
movslq (%r8,%rcx,4), %rax
40+
addq %rax, %r8
41+
jmpq *%r8
42+
43+
LBB2:
44+
xorq %rax, %rax
45+
LBB3:
46+
addq $0x8, %rsp
47+
ret
48+
.size main, .-main
49+
50+
.globl main.warm
51+
.type main.warm, %function
52+
.p2align 2
53+
main.warm:
54+
LBB20:
55+
andl $0xb, %ebx
56+
cmpb $0x1, %cl
57+
# exit through ret
58+
ja LBB23
59+
60+
## jump table dispatch, jumping to label indexed by val in %ecx
61+
LBB21:
62+
leaq JUMP_TABLE(%rip), %r8
63+
movzbl %cl, %ecx
64+
movslq (%r8,%rcx,4), %rax
65+
addq %rax, %r8
66+
jmpq *%r8
67+
68+
LBB22:
69+
xorq %rax, %rax
70+
LBB23:
71+
addq $0x8, %rsp
72+
ret
73+
.size main.warm, .-main.warm
74+
75+
## cold fragment is only reachable through jump table
76+
.globl main.cold
77+
.type main.cold, %function
78+
main.cold:
79+
leaq JUMP_TABLE(%rip), %r8
80+
movzbl %cl, %ecx
81+
movslq (%r8,%rcx,4), %rax
82+
addq %rax, %r8
83+
jmpq *%r8
84+
LBB4:
85+
callq abort
86+
.size main.cold, .-main.cold
87+
88+
.rodata
89+
## jmp table, entries must be R_X86_64_PC32 relocs
90+
.globl JUMP_TABLE
91+
JUMP_TABLE:
92+
.long LBB2-JUMP_TABLE
93+
.long LBB3-JUMP_TABLE
94+
.long LBB4-JUMP_TABLE
95+
.long LBB3-JUMP_TABLE

0 commit comments

Comments
 (0)