Skip to content

Commit 686b462

Browse files
Merge branch 'main' into loop_access_analysis_partial_result
2 parents 6dc3239 + 21b4059 commit 686b462

File tree

2,763 files changed

+735966
-239421
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,763 files changed

+735966
-239421
lines changed

.github/new-prs-labeler.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,7 @@ mlgo:
702702
- llvm/unittests/CodeGen/ML*
703703
- llvm/test/CodeGen/MLRegAlloc/**
704704
- llvm/utils/mlgo-utils/**
705+
- llvm/docs/MLGO.rst
705706

706707
tools:llvm-exegesis:
707708
- llvm/tools/llvm-exegesis/**

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,6 @@ class DataAggregator : public DataReader {
212212
uint64_t NumTraces{0};
213213
uint64_t NumInvalidTraces{0};
214214
uint64_t NumLongRangeTraces{0};
215-
/// Specifies how many samples were recorded in cold areas if we are dealing
216-
/// with profiling data collected in a bolted binary. For LBRs, incremented
217-
/// for the source of the branch to avoid counting cold activity twice (one
218-
/// for source and another for destination).
219-
uint64_t NumColdSamples{0};
220215
uint64_t NumTotalSamples{0};
221216

222217
/// Looks into system PATH for Linux Perf and set up the aggregator to use it
@@ -473,7 +468,6 @@ class DataAggregator : public DataReader {
473468
void dump(const PerfMemSample &Sample) const;
474469

475470
/// Profile diagnostics print methods
476-
void printColdSamplesDiagnostic() const;
477471
void printLongRangeTracesDiagnostic() const;
478472
void printBranchSamplesDiagnostics() const;
479473
void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;

bolt/include/bolt/Profile/DataReader.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,8 @@ struct FuncBranchData {
114114

115115
FuncBranchData() {}
116116

117-
FuncBranchData(StringRef Name, ContainerTy Data)
118-
: Name(Name), Data(std::move(Data)) {}
119-
120-
FuncBranchData(StringRef Name, ContainerTy Data, ContainerTy EntryData)
117+
FuncBranchData(StringRef Name, ContainerTy Data = ContainerTy(),
118+
ContainerTy EntryData = ContainerTy())
121119
: Name(Name), Data(std::move(Data)), EntryData(std::move(EntryData)) {}
122120

123121
ErrorOr<const BranchInfo &> getBranch(uint64_t From, uint64_t To) const;
@@ -205,7 +203,7 @@ struct FuncMemData {
205203

206204
FuncMemData() {}
207205

208-
FuncMemData(StringRef Name, ContainerTy Data)
206+
FuncMemData(StringRef Name, ContainerTy Data = ContainerTy())
209207
: Name(Name), Data(std::move(Data)) {}
210208
};
211209

@@ -241,7 +239,7 @@ struct FuncBasicSampleData {
241239
StringRef Name;
242240
ContainerTy Data;
243241

244-
FuncBasicSampleData(StringRef Name, ContainerTy Data)
242+
FuncBasicSampleData(StringRef Name, ContainerTy Data = ContainerTy())
245243
: Name(Name), Data(std::move(Data)) {}
246244

247245
/// Get the number of samples recorded in [Start, End)

bolt/include/bolt/Profile/Heatmap.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ class Heatmap {
5252
: BucketSize(BucketSize), MinAddress(MinAddress), MaxAddress(MaxAddress),
5353
TextSections(TextSections) {}
5454

55+
uint64_t HotStart{0};
56+
uint64_t HotEnd{0};
57+
5558
inline bool ignoreAddress(uint64_t Address) const {
5659
return (Address > MaxAddress) || (Address < MinAddress);
5760
}

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,13 @@
1717

1818
namespace opts {
1919

20-
extern bool HeatmapMode;
20+
enum HeatmapModeKind {
21+
HM_None = 0,
22+
HM_Exclusive, // llvm-bolt-heatmap
23+
HM_Optional // perf2bolt --heatmap
24+
};
25+
26+
extern HeatmapModeKind HeatmapMode;
2127
extern bool BinaryAnalysisMode;
2228

2329
extern llvm::cl::OptionCategory BoltCategory;
@@ -45,6 +51,7 @@ extern llvm::cl::opt<unsigned> HeatmapBlock;
4551
extern llvm::cl::opt<unsigned long long> HeatmapMaxAddress;
4652
extern llvm::cl::opt<unsigned long long> HeatmapMinAddress;
4753
extern llvm::cl::opt<bool> HeatmapPrintMappings;
54+
extern llvm::cl::opt<std::string> HeatmapOutput;
4855
extern llvm::cl::opt<bool> HotData;
4956
extern llvm::cl::opt<bool> HotFunctionsAtEnd;
5057
extern llvm::cl::opt<bool> HotText;

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ extern cl::opt<bool> UpdateDebugSections;
6666
extern cl::opt<unsigned> Verbosity;
6767

6868
extern bool BinaryAnalysisMode;
69-
extern bool HeatmapMode;
69+
extern HeatmapModeKind HeatmapMode;
7070
extern bool processAllFunctions();
7171

7272
static cl::opt<bool> CheckEncoding(
@@ -3326,7 +3326,7 @@ void BinaryFunction::duplicateConstantIslands() {
33263326
static std::string constructFilename(std::string Filename,
33273327
std::string Annotation,
33283328
std::string Suffix) {
3329-
std::replace(Filename.begin(), Filename.end(), '/', '-');
3329+
llvm::replace(Filename, '/', '-');
33303330
if (!Annotation.empty())
33313331
Annotation.insert(0, "-");
33323332
if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) {

bolt/lib/Core/DIEBuilder.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -437,10 +437,10 @@ getUnitForOffset(DIEBuilder &Builder, DWARFContext &DWCtx,
437437
// This is a work around for XCode clang. There is a build error when we
438438
// pass DWCtx.compile_units() to llvm::upper_bound
439439
std::call_once(InitVectorFlag, initCUVector);
440-
auto CUIter = std::upper_bound(CUOffsets.begin(), CUOffsets.end(), Offset,
441-
[](uint64_t LHS, const DWARFUnit *RHS) {
442-
return LHS < RHS->getNextUnitOffset();
443-
});
440+
auto CUIter = llvm::upper_bound(CUOffsets, Offset,
441+
[](uint64_t LHS, const DWARFUnit *RHS) {
442+
return LHS < RHS->getNextUnitOffset();
443+
});
444444
CU = CUIter != CUOffsets.end() ? (*CUIter) : nullptr;
445445
}
446446
return CU;

bolt/lib/Core/DebugData.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,6 @@ static void writeDWARF5LocList(uint32_t &NumberOfEntries, DIEValue &AttrInfo,
676676
return;
677677
}
678678

679-
std::vector<uint64_t> OffsetsArray;
680679
auto writeExpression = [&](uint32_t Index) -> void {
681680
const DebugLocationEntry &Entry = LocList[Index];
682681
encodeULEB128(Entry.Expr.size(), LocBodyStream);

bolt/lib/Passes/AsmDump.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ void dumpFunction(const BinaryFunction &BF) {
109109
}
110110

111111
std::string PrintName = BF.getPrintName();
112-
std::replace(PrintName.begin(), PrintName.end(), '/', '-');
112+
llvm::replace(PrintName, '/', '-');
113113
std::string Filename =
114114
opts::AsmDump.empty()
115115
? (PrintName + ".s")

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ static const char *dynoStatsOptName(const bolt::DynoStats::Category C) {
3535

3636
OptNames[C] = bolt::DynoStats::Description(C);
3737

38-
std::replace(OptNames[C].begin(), OptNames[C].end(), ' ', '-');
38+
llvm::replace(OptNames[C], ' ', '-');
3939

4040
return OptNames[C].c_str();
4141
}

bolt/lib/Passes/FrameAnalysis.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,6 @@ bool FrameAnalysis::updateArgsTouchedFor(const BinaryFunction &BF, MCInst &Inst,
320320
if (!BC.MIB->isCall(Inst))
321321
return false;
322322

323-
std::set<int64_t> Res;
324323
const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
325324
// If indirect call, we conservatively assume it accesses all stack positions
326325
if (TargetSymbol == nullptr) {

bolt/lib/Passes/HFSort.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,6 @@ std::vector<Cluster> clusterize(const CallGraph &Cg) {
239239
}
240240

241241
std::vector<Cluster> randomClusters(const CallGraph &Cg) {
242-
std::vector<NodeId> FuncIds(Cg.numNodes(), 0);
243242
std::vector<Cluster> Clusters;
244243
Clusters.reserve(Cg.numNodes());
245244

bolt/lib/Passes/PettisAndHansen.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,10 @@ std::vector<Cluster> pettisAndHansen(const CallGraph &Cg) {
143143
// Find an arc with max weight and merge its nodes
144144

145145
while (!Carcs.empty()) {
146-
auto Maxpos =
147-
std::max_element(Carcs.begin(), Carcs.end(),
148-
[&](const ClusterArc &Carc1, const ClusterArc &Carc2) {
149-
return Carc1.Weight < Carc2.Weight;
150-
});
146+
auto Maxpos = llvm::max_element(
147+
Carcs, [&](const ClusterArc &Carc1, const ClusterArc &Carc2) {
148+
return Carc1.Weight < Carc2.Weight;
149+
});
151150

152151
ClusterArc Max = *Maxpos;
153152
Carcs.erase(Maxpos);

bolt/lib/Passes/ShrinkWrapping.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -825,14 +825,13 @@ void ShrinkWrapping::computeSaveLocations() {
825825
if (!CSA.CalleeSaved[I])
826826
continue;
827827

828-
std::stable_sort(BestSavePos[I].begin(), BestSavePos[I].end(),
829-
[&](const MCInst *A, const MCInst *B) {
830-
const BinaryBasicBlock *BBA = InsnToBB[A];
831-
const BinaryBasicBlock *BBB = InsnToBB[B];
832-
const uint64_t CountA = BBA->getKnownExecutionCount();
833-
const uint64_t CountB = BBB->getKnownExecutionCount();
834-
return CountB < CountA;
835-
});
828+
llvm::stable_sort(BestSavePos[I], [&](const MCInst *A, const MCInst *B) {
829+
const BinaryBasicBlock *BBA = InsnToBB[A];
830+
const BinaryBasicBlock *BBB = InsnToBB[B];
831+
const uint64_t CountA = BBA->getKnownExecutionCount();
832+
const uint64_t CountB = BBB->getKnownExecutionCount();
833+
return CountB < CountA;
834+
});
836835

837836
for (MCInst *Pos : BestSavePos[I]) {
838837
const BinaryBasicBlock *BB = InsnToBB[Pos];

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 42 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,10 @@ void DataAggregator::findPerfExecutable() {
164164
void DataAggregator::start() {
165165
outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
166166

167+
// Turn on heatmap building if requested by --heatmap flag.
168+
if (!opts::HeatmapMode && opts::HeatmapOutput.getNumOccurrences())
169+
opts::HeatmapMode = opts::HeatmapModeKind::HM_Optional;
170+
167171
// Don't launch perf for pre-aggregated files or when perf input is specified
168172
// by the user.
169173
if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty())
@@ -502,24 +506,25 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
502506
errs() << "PERF2BOLT: failed to parse samples\n";
503507

504508
// Special handling for memory events
505-
if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
506-
return Error::success();
507-
508-
if (const std::error_code EC = parseMemEvents())
509-
errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
510-
<< '\n';
509+
if (!prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
510+
if (const std::error_code EC = parseMemEvents())
511+
errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
512+
<< '\n';
511513

512514
deleteTempFiles();
513515

514516
heatmap:
515-
if (opts::HeatmapMode) {
516-
if (std::error_code EC = printLBRHeatMap()) {
517-
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
518-
exit(1);
519-
}
520-
exit(0);
521-
}
522-
return Error::success();
517+
if (!opts::HeatmapMode)
518+
return Error::success();
519+
520+
if (std::error_code EC = printLBRHeatMap())
521+
return errorCodeToError(EC);
522+
523+
if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Optional)
524+
return Error::success();
525+
526+
assert(opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive);
527+
exit(0);
523528
}
524529

525530
Error DataAggregator::readProfile(BinaryContext &BC) {
@@ -635,8 +640,6 @@ bool DataAggregator::doBasicSample(BinaryFunction &OrigFunc, uint64_t Address,
635640

636641
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
637642
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
638-
if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress())))
639-
NumColdSamples += Count;
640643
// Attach executed bytes to parent function in case of cold fragment.
641644
Func.SampleCountInBytes += Count * BlockSize;
642645

@@ -740,15 +743,10 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
740743
if (BAT)
741744
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
742745

743-
BinaryFunction *ParentFunc = getBATParentFunction(*Func);
744-
if (IsFrom &&
745-
(ParentFunc || (BAT && !BAT->isBATFunction(Func->getAddress()))))
746-
NumColdSamples += Count;
747-
748-
if (!ParentFunc)
749-
return std::pair{Func, IsRet};
746+
if (BinaryFunction *ParentFunc = getBATParentFunction(*Func))
747+
Func = ParentFunc;
750748

751-
return std::pair{ParentFunc, IsRet};
749+
return std::pair{Func, IsRet};
752750
};
753751

754752
auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true);
@@ -1318,6 +1316,14 @@ std::error_code DataAggregator::printLBRHeatMap() {
13181316
}
13191317
Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
13201318
opts::HeatmapMaxAddress, getTextSections(BC));
1319+
auto getSymbolValue = [&](const MCSymbol *Symbol) -> uint64_t {
1320+
if (Symbol)
1321+
if (ErrorOr<uint64_t> SymValue = BC->getSymbolValue(*Symbol))
1322+
return SymValue.get();
1323+
return 0;
1324+
};
1325+
HM.HotStart = getSymbolValue(BC->getHotTextStartSymbol());
1326+
HM.HotEnd = getSymbolValue(BC->getHotTextEndSymbol());
13211327

13221328
if (!NumTotalSamples) {
13231329
if (opts::BasicAggregation) {
@@ -1351,15 +1357,14 @@ std::error_code DataAggregator::printLBRHeatMap() {
13511357
exit(1);
13521358
}
13531359

1354-
HM.print(opts::OutputFilename);
1355-
if (opts::OutputFilename == "-")
1356-
HM.printCDF(opts::OutputFilename);
1357-
else
1358-
HM.printCDF(opts::OutputFilename + ".csv");
1359-
if (opts::OutputFilename == "-")
1360-
HM.printSectionHotness(opts::OutputFilename);
1361-
else
1362-
HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv");
1360+
HM.print(opts::HeatmapOutput);
1361+
if (opts::HeatmapOutput == "-") {
1362+
HM.printCDF(opts::HeatmapOutput);
1363+
HM.printSectionHotness(opts::HeatmapOutput);
1364+
} else {
1365+
HM.printCDF(opts::HeatmapOutput + ".csv");
1366+
HM.printSectionHotness(opts::HeatmapOutput + "-section-hotness.csv");
1367+
}
13631368

13641369
return std::error_code();
13651370
}
@@ -1386,7 +1391,7 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
13861391
const uint64_t TraceTo = NextLBR->From;
13871392
const BinaryFunction *TraceBF =
13881393
getBinaryFunctionContainingAddress(TraceFrom);
1389-
if (opts::HeatmapMode) {
1394+
if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive) {
13901395
FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
13911396
++Info.InternCount;
13921397
} else if (TraceBF && TraceBF->containsAddress(TraceTo)) {
@@ -1424,7 +1429,7 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
14241429
NextLBR = &LBR;
14251430

14261431
// Record branches outside binary functions for heatmap.
1427-
if (opts::HeatmapMode) {
1432+
if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive) {
14281433
TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)];
14291434
++Info.TakenCount;
14301435
continue;
@@ -1439,26 +1444,13 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
14391444
}
14401445
// Record LBR addresses not covered by fallthroughs (bottom-of-stack source
14411446
// and top-of-stack target) as basic samples for heatmap.
1442-
if (opts::HeatmapMode && !Sample.LBR.empty()) {
1447+
if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive &&
1448+
!Sample.LBR.empty()) {
14431449
++BasicSamples[Sample.LBR.front().To];
14441450
++BasicSamples[Sample.LBR.back().From];
14451451
}
14461452
}
14471453

1448-
void DataAggregator::printColdSamplesDiagnostic() const {
1449-
if (NumColdSamples > 0) {
1450-
const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1451-
outs() << "PERF2BOLT: " << NumColdSamples
1452-
<< format(" (%.1f%%)", ColdSamples)
1453-
<< " samples recorded in cold regions of split functions.\n";
1454-
if (ColdSamples > 5.0f)
1455-
outs()
1456-
<< "WARNING: The BOLT-processed binary where samples were collected "
1457-
"likely used bad data or your service observed a large shift in "
1458-
"profile. You may want to audit this\n";
1459-
}
1460-
}
1461-
14621454
void DataAggregator::printLongRangeTracesDiagnostic() const {
14631455
outs() << "PERF2BOLT: out of range traces involving unknown regions: "
14641456
<< NumLongRangeTraces;
@@ -1499,7 +1491,6 @@ void DataAggregator::printBranchSamplesDiagnostics() const {
14991491
"collection. The generated data may be ineffective for improving "
15001492
"performance\n\n";
15011493
printLongRangeTracesDiagnostic();
1502-
printColdSamplesDiagnostic();
15031494
}
15041495

15051496
void DataAggregator::printBasicSamplesDiagnostics(
@@ -1511,7 +1502,6 @@ void DataAggregator::printBasicSamplesDiagnostics(
15111502
"binary is probably not the same binary used during profiling "
15121503
"collection. The generated data may be ineffective for improving "
15131504
"performance\n\n";
1514-
printColdSamplesDiagnostic();
15151505
}
15161506

15171507
void DataAggregator::printBranchStacksDiagnostics(

0 commit comments

Comments
 (0)