Skip to content

Commit a8a38ef

Browse files
committed
[llvm-profgen] Fix bug of loop scope mismatch
One performance issue happened in profile generation and it turned out the line 525 loop is the bottleneck. Moving the code outside of loop scope can fix this issue. The run time is improved from 30+mins to ~30s. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D107529
1 parent 2b89f40 commit a8a38ef

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

llvm/tools/llvm-profgen/ProfileGenerator.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "ProfileGenerator.h"
1010
#include "llvm/ProfileData/ProfileCommon.h"
11+
#include <unordered_set>
1112

1213
static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
1314
cl::Required,
@@ -520,7 +521,8 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
520521
// Extract the top frame probes by looking up each address among the range in
521522
// the Address2ProbeMap
522523
extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
523-
std::unordered_map<MCDecodedPseudoProbeInlineTree *, FunctionSamples *>
524+
std::unordered_map<MCDecodedPseudoProbeInlineTree *,
525+
std::unordered_set<FunctionSamples *>>
524526
FrameSamples;
525527
for (auto PI : ProbeCounter) {
526528
const MCDecodedPseudoProbe *Probe = PI.first;
@@ -530,7 +532,7 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
530532
// Record the current frame and FunctionProfile whenever samples are
531533
// collected for non-danglie probes. This is for reporting all of the
532534
// zero count probes of the frame later.
533-
FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile;
535+
FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
534536
FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
535537
FunctionProfile.addTotalSamples(Count);
536538
if (Probe->isEntry()) {
@@ -559,12 +561,13 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
559561
FunctionProfile.getContext().getNameWithoutContext(), Count);
560562
}
561563
}
564+
}
562565

563-
// Assign zero count for remaining probes without sample hits to
564-
// differentiate from probes optimized away, of which the counts are unknown
565-
// and will be inferred by the compiler.
566-
for (auto &I : FrameSamples) {
567-
auto *FunctionProfile = I.second;
566+
// Assign zero count for remaining probes without sample hits to
567+
// differentiate from probes optimized away, of which the counts are unknown
568+
// and will be inferred by the compiler.
569+
for (auto &I : FrameSamples) {
570+
for (auto *FunctionProfile : I.second) {
568571
for (auto *Probe : I.first->getProbes()) {
569572
FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
570573
}

0 commit comments

Comments
 (0)