Skip to content

Commit ee09f7d

Browse files
authored
[MC][NFC] Reduce Address2ProbesMap size
Replace the map from addresses to list of probes with a flat vector containing probe references sorted by their addresses. Reduces pseudo probe parsing time from 9.56s to 8.59s and peak RSS from 9.66 GiB to 9.08 GiB as part of perf2bolt processing a large binary. Test Plan: ``` bin/llvm-lit -sv test/tools/llvm-profgen ``` Reviewers: maksfb, rafaelauler, dcci, ayermolo, wlei-llvm Reviewed By: wlei-llvm Pull Request: #102904
1 parent 04ebd19 commit ee09f7d

File tree

6 files changed

+94
-95
lines changed

6 files changed

+94
-95
lines changed

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2415,17 +2415,15 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
24152415
Fragments.insert(BF);
24162416
for (const BinaryFunction *F : Fragments) {
24172417
const uint64_t FuncAddr = F->getAddress();
2418-
const auto &FragmentProbes =
2419-
llvm::make_range(ProbeMap.lower_bound(FuncAddr),
2420-
ProbeMap.lower_bound(FuncAddr + F->getSize()));
2421-
for (const auto &[OutputAddress, Probes] : FragmentProbes) {
2418+
for (const MCDecodedPseudoProbe &Probe :
2419+
ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) {
2420+
const uint32_t OutputAddress = Probe.getAddress();
24222421
const uint32_t InputOffset = BAT->translate(
24232422
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
24242423
const unsigned BlockIndex = getBlock(InputOffset).second;
2425-
for (const MCDecodedPseudoProbe &Probe : Probes)
2426-
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
2427-
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
2428-
Probe.getType()});
2424+
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
2425+
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
2426+
Probe.getType()});
24292427
}
24302428
}
24312429
}

bolt/lib/Profile/YAMLProfileWriter.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -193,13 +193,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
193193
const uint64_t FuncAddr = BF.getAddress();
194194
const std::pair<uint64_t, uint64_t> &BlockRange =
195195
BB->getInputAddressRange();
196-
const auto &BlockProbes =
197-
llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
198-
ProbeMap.lower_bound(FuncAddr + BlockRange.second));
199-
for (const auto &[_, Probes] : BlockProbes)
200-
for (const MCDecodedPseudoProbe &Probe : Probes)
201-
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
202-
Probe.getGuid(), Probe.getIndex(), Probe.getType()});
196+
for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(
197+
FuncAddr + BlockRange.first, FuncAddr + BlockRange.second))
198+
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
199+
Probe.getGuid(), Probe.getIndex(), Probe.getType()});
203200
}
204201

205202
YamlBF.Blocks.emplace_back(YamlBB);

bolt/lib/Rewrite/PseudoProbeRewriter.cpp

Lines changed: 34 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -173,59 +173,50 @@ void PseudoProbeRewriter::updatePseudoProbes() {
173173
AddressProbesMap &Address2ProbesMap = ProbeDecoder.getAddress2ProbesMap();
174174
const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap();
175175

176-
for (auto &AP : Address2ProbesMap) {
177-
BinaryFunction *F = BC.getBinaryFunctionContainingAddress(AP.first);
176+
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
177+
uint64_t Address = Probe.getAddress();
178+
BinaryFunction *F = BC.getBinaryFunctionContainingAddress(Address);
178179
// If F is removed, eliminate all probes inside it from inline tree
179180
// Setting probes' addresses as INT64_MAX means elimination
180181
if (!F) {
181-
for (MCDecodedPseudoProbe &Probe : AP.second)
182-
Probe.setAddress(INT64_MAX);
182+
Probe.setAddress(INT64_MAX);
183183
continue;
184184
}
185185
// If F is not emitted, the function will remain in the same address as its
186186
// input
187187
if (!F->isEmitted())
188188
continue;
189189

190-
uint64_t Offset = AP.first - F->getAddress();
190+
uint64_t Offset = Address - F->getAddress();
191191
const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset);
192192
uint64_t BlkOutputAddress = BB->getOutputAddressRange().first;
193193
// Check if block output address is defined.
194194
// If not, such block is removed from binary. Then remove the probes from
195195
// inline tree
196196
if (BlkOutputAddress == 0) {
197-
for (MCDecodedPseudoProbe &Probe : AP.second)
198-
Probe.setAddress(INT64_MAX);
197+
Probe.setAddress(INT64_MAX);
199198
continue;
200199
}
201200

202-
unsigned ProbeTrack = AP.second.size();
203-
auto Probe = llvm::map_iterator(
204-
AP.second.begin(),
205-
[](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); });
206-
while (ProbeTrack != 0) {
207-
if (Probe->isBlock()) {
208-
Probe->setAddress(BlkOutputAddress);
209-
} else if (Probe->isCall()) {
210-
// A call probe may be duplicated due to ICP
211-
// Go through output of InputOffsetToAddressMap to collect all related
212-
// probes
213-
auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first);
214-
auto CallOutputAddress = CallOutputAddresses.first;
215-
if (CallOutputAddress == CallOutputAddresses.second) {
216-
Probe->setAddress(INT64_MAX);
217-
} else {
218-
Probe->setAddress(CallOutputAddress->second);
219-
CallOutputAddress = std::next(CallOutputAddress);
220-
}
221-
222-
while (CallOutputAddress != CallOutputAddresses.second) {
223-
ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second);
224-
CallOutputAddress = std::next(CallOutputAddress);
225-
}
201+
if (Probe.isBlock()) {
202+
Probe.setAddress(BlkOutputAddress);
203+
} else if (Probe.isCall()) {
204+
// A call probe may be duplicated due to ICP
205+
// Go through output of InputOffsetToAddressMap to collect all related
206+
// probes
207+
auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(Address);
208+
auto CallOutputAddress = CallOutputAddresses.first;
209+
if (CallOutputAddress == CallOutputAddresses.second) {
210+
Probe.setAddress(INT64_MAX);
211+
} else {
212+
Probe.setAddress(CallOutputAddress->second);
213+
CallOutputAddress = std::next(CallOutputAddress);
214+
}
215+
216+
while (CallOutputAddress != CallOutputAddresses.second) {
217+
ProbeDecoder.addInjectedProbe(Probe, CallOutputAddress->second);
218+
CallOutputAddress = std::next(CallOutputAddress);
226219
}
227-
Probe = std::next(Probe);
228-
ProbeTrack--;
229220
}
230221
}
231222

@@ -241,22 +232,16 @@ void PseudoProbeRewriter::updatePseudoProbes() {
241232
BinaryBlock.getName();
242233

243234
// scan all addresses -> correlate probe to block when print out
244-
std::vector<uint64_t> Addresses;
245-
for (auto &Entry : Address2ProbesMap)
246-
Addresses.push_back(Entry.first);
247-
llvm::sort(Addresses);
248-
for (uint64_t Key : Addresses) {
249-
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) {
250-
if (Probe.getAddress() == INT64_MAX)
251-
outs() << "Deleted Probe: ";
252-
else
253-
outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
254-
Probe.print(outs(), GUID2Func, true);
255-
// print block name only if the probe is block type and undeleted.
256-
if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
257-
outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
258-
<< Addr2BlockNames[Probe.getAddress()] << "\n";
259-
}
235+
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
236+
if (Probe.getAddress() == INT64_MAX)
237+
outs() << "Deleted Probe: ";
238+
else
239+
outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
240+
Probe.print(outs(), GUID2Func, true);
241+
// print block name only if the probe is block type and undeleted.
242+
if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
243+
outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
244+
<< Addr2BlockNames[Probe.getAddress()] << "\n";
260245
}
261246
outs() << "=======================================\n";
262247
}

llvm/include/llvm/MC/MCPseudoProbe.h

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@
6363
#include "llvm/IR/PseudoProbe.h"
6464
#include "llvm/Support/ErrorOr.h"
6565
#include <functional>
66-
#include <map>
6766
#include <memory>
6867
#include <string>
6968
#include <tuple>
@@ -103,10 +102,6 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
103102
// GUID to PseudoProbeFuncDesc map
104103
using GUIDProbeFunctionMap =
105104
std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
106-
// Address to pseudo probes map.
107-
using AddressProbesMap =
108-
std::map<uint64_t,
109-
std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>;
110105

111106
class MCDecodedPseudoProbeInlineTree;
112107

@@ -213,6 +208,31 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
213208
bool ShowName) const;
214209
};
215210

211+
// Address to pseudo probes map.
212+
class AddressProbesMap
213+
: public std::vector<std::reference_wrapper<MCDecodedPseudoProbe>> {
214+
auto getIt(uint64_t Addr) const {
215+
auto CompareProbe = [](const MCDecodedPseudoProbe &Probe, uint64_t Addr) {
216+
return Probe.getAddress() < Addr;
217+
};
218+
return llvm::lower_bound(*this, Addr, CompareProbe);
219+
}
220+
221+
public:
222+
// Returns range of probes within [\p From, \p To) address range.
223+
auto find(uint64_t From, uint64_t To) const {
224+
return llvm::make_range(getIt(From), getIt(To));
225+
}
226+
// Returns range of probes with given \p Address.
227+
auto find(uint64_t Address) const {
228+
auto FromIt = getIt(Address);
229+
if (FromIt == end() || FromIt->get().getAddress() != Address)
230+
return llvm::make_range(end(), end());
231+
auto ToIt = getIt(Address + 1);
232+
return llvm::make_range(FromIt, ToIt);
233+
}
234+
};
235+
216236
template <typename ProbesType, typename DerivedProbeInlineTreeType,
217237
typename InlinedProbeTreeMap>
218238
class MCPseudoProbeInlineTreeBase {

llvm/lib/MC/MCPseudoProbe.cpp

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,6 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
501501
if (Cur && !isSentinelProbe(Attr)) {
502502
PseudoProbeVec.emplace_back(Addr, Index, PseudoProbeType(Kind), Attr,
503503
Discriminator, Cur);
504-
Address2ProbesMap[Addr].emplace_back(PseudoProbeVec.back());
505504
++CurrentProbeCount;
506505
}
507506
LastAddr = Addr;
@@ -635,6 +634,15 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
635634
"Mismatching probe count pre- and post-parsing");
636635
assert(InlineTreeVec.size() == InlinedCount &&
637636
"Mismatching function records count pre- and post-parsing");
637+
638+
std::vector<std::pair<uint64_t, uint32_t>> SortedA2P(ProbeCount);
639+
for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec))
640+
SortedA2P[I] = {Probe.getAddress(), I};
641+
llvm::sort(SortedA2P);
642+
Address2ProbesMap.reserve(ProbeCount);
643+
for (const uint32_t I : llvm::make_second_range(SortedA2P))
644+
Address2ProbesMap.emplace_back(PseudoProbeVec[I]);
645+
SortedA2P.clear();
638646
return true;
639647
}
640648

@@ -650,36 +658,29 @@ void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {
650658

651659
void MCPseudoProbeDecoder::printProbeForAddress(raw_ostream &OS,
652660
uint64_t Address) {
653-
auto It = Address2ProbesMap.find(Address);
654-
if (It != Address2ProbesMap.end()) {
655-
for (const MCDecodedPseudoProbe &Probe : It->second) {
656-
OS << " [Probe]:\t";
657-
Probe.print(OS, GUID2FuncDescMap, true);
658-
}
661+
for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) {
662+
OS << " [Probe]:\t";
663+
Probe.print(OS, GUID2FuncDescMap, true);
659664
}
660665
}
661666

662667
void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) {
663-
auto Entries = make_first_range(Address2ProbesMap);
664-
SmallVector<uint64_t, 0> Addresses(Entries.begin(), Entries.end());
665-
llvm::sort(Addresses);
666-
for (auto K : Addresses) {
667-
OS << "Address:\t";
668-
OS << K;
669-
OS << "\n";
670-
printProbeForAddress(OS, K);
668+
uint64_t PrevAddress = INT64_MAX;
669+
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
670+
uint64_t Address = Probe.getAddress();
671+
if (Address != PrevAddress) {
672+
PrevAddress = Address;
673+
OS << "Address:\t" << Address << '\n';
674+
}
675+
OS << " [Probe]:\t";
676+
Probe.print(OS, GUID2FuncDescMap, true);
671677
}
672678
}
673679

674680
const MCDecodedPseudoProbe *
675681
MCPseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const {
676-
auto It = Address2ProbesMap.find(Address);
677-
if (It == Address2ProbesMap.end())
678-
return nullptr;
679-
const auto &Probes = It->second;
680-
681682
const MCDecodedPseudoProbe *CallProbe = nullptr;
682-
for (const MCDecodedPseudoProbe &Probe : Probes) {
683+
for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) {
683684
if (Probe.isCall()) {
684685
// Disabling the assert and returning first call probe seen so far.
685686
// Subsequent call probes, if any, are ignored. Due to the the way

llvm/tools/llvm-profgen/ProfileGenerator.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,11 +1183,9 @@ void ProfileGeneratorBase::extractProbesFromRange(
11831183
do {
11841184
const AddressProbesMap &Address2ProbesMap =
11851185
Binary->getAddress2ProbesMap();
1186-
auto It = Address2ProbesMap.find(IP.Address);
1187-
if (It != Address2ProbesMap.end()) {
1188-
for (const MCDecodedPseudoProbe &Probe : It->second) {
1189-
ProbeCounter[&Probe] += Count;
1190-
}
1186+
for (const MCDecodedPseudoProbe &Probe :
1187+
Address2ProbesMap.find(IP.Address)) {
1188+
ProbeCounter[&Probe] += Count;
11911189
}
11921190
} while (IP.advance() && IP.Address <= RangeEnd);
11931191
}

0 commit comments

Comments
 (0)