Skip to content

[MC][NFC] Reduce Address2ProbesMap size #102904

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2415,17 +2415,15 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
Fragments.insert(BF);
for (const BinaryFunction *F : Fragments) {
const uint64_t FuncAddr = F->getAddress();
const auto &FragmentProbes =
llvm::make_range(ProbeMap.lower_bound(FuncAddr),
ProbeMap.lower_bound(FuncAddr + F->getSize()));
for (const auto &[OutputAddress, Probes] : FragmentProbes) {
for (const MCDecodedPseudoProbe &Probe :
ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) {
const uint32_t OutputAddress = Probe.getAddress();
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
const unsigned BlockIndex = getBlock(InputOffset).second;
for (const MCDecodedPseudoProbe &Probe : Probes)
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
Probe.getType()});
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
Probe.getType()});
}
}
}
Expand Down
11 changes: 4 additions & 7 deletions bolt/lib/Profile/YAMLProfileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,13 +193,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const uint64_t FuncAddr = BF.getAddress();
const std::pair<uint64_t, uint64_t> &BlockRange =
BB->getInputAddressRange();
const auto &BlockProbes =
llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
ProbeMap.lower_bound(FuncAddr + BlockRange.second));
for (const auto &[_, Probes] : BlockProbes)
for (const MCDecodedPseudoProbe &Probe : Probes)
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
Probe.getGuid(), Probe.getIndex(), Probe.getType()});
for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(
FuncAddr + BlockRange.first, FuncAddr + BlockRange.second))
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
Probe.getGuid(), Probe.getIndex(), Probe.getType()});
}

YamlBF.Blocks.emplace_back(YamlBB);
Expand Down
83 changes: 34 additions & 49 deletions bolt/lib/Rewrite/PseudoProbeRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,59 +173,50 @@ void PseudoProbeRewriter::updatePseudoProbes() {
AddressProbesMap &Address2ProbesMap = ProbeDecoder.getAddress2ProbesMap();
const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap();

for (auto &AP : Address2ProbesMap) {
BinaryFunction *F = BC.getBinaryFunctionContainingAddress(AP.first);
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
uint64_t Address = Probe.getAddress();
BinaryFunction *F = BC.getBinaryFunctionContainingAddress(Address);
// If F is removed, eliminate all probes inside it from inline tree
// Setting probes' addresses as INT64_MAX means elimination
if (!F) {
for (MCDecodedPseudoProbe &Probe : AP.second)
Probe.setAddress(INT64_MAX);
Probe.setAddress(INT64_MAX);
continue;
}
// If F is not emitted, the function will remain in the same address as its
// input
if (!F->isEmitted())
continue;

uint64_t Offset = AP.first - F->getAddress();
uint64_t Offset = Address - F->getAddress();
const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset);
uint64_t BlkOutputAddress = BB->getOutputAddressRange().first;
// Check if block output address is defined.
// If not, such block is removed from binary. Then remove the probes from
// inline tree
if (BlkOutputAddress == 0) {
for (MCDecodedPseudoProbe &Probe : AP.second)
Probe.setAddress(INT64_MAX);
Probe.setAddress(INT64_MAX);
continue;
}

unsigned ProbeTrack = AP.second.size();
auto Probe = llvm::map_iterator(
AP.second.begin(),
[](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); });
while (ProbeTrack != 0) {
if (Probe->isBlock()) {
Probe->setAddress(BlkOutputAddress);
} else if (Probe->isCall()) {
// A call probe may be duplicated due to ICP
// Go through output of InputOffsetToAddressMap to collect all related
// probes
auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first);
auto CallOutputAddress = CallOutputAddresses.first;
if (CallOutputAddress == CallOutputAddresses.second) {
Probe->setAddress(INT64_MAX);
} else {
Probe->setAddress(CallOutputAddress->second);
CallOutputAddress = std::next(CallOutputAddress);
}

while (CallOutputAddress != CallOutputAddresses.second) {
ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second);
CallOutputAddress = std::next(CallOutputAddress);
}
if (Probe.isBlock()) {
Probe.setAddress(BlkOutputAddress);
} else if (Probe.isCall()) {
// A call probe may be duplicated due to ICP
// Go through output of InputOffsetToAddressMap to collect all related
// probes
auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(Address);
auto CallOutputAddress = CallOutputAddresses.first;
if (CallOutputAddress == CallOutputAddresses.second) {
Probe.setAddress(INT64_MAX);
} else {
Probe.setAddress(CallOutputAddress->second);
CallOutputAddress = std::next(CallOutputAddress);
}

while (CallOutputAddress != CallOutputAddresses.second) {
ProbeDecoder.addInjectedProbe(Probe, CallOutputAddress->second);
CallOutputAddress = std::next(CallOutputAddress);
}
Probe = std::next(Probe);
ProbeTrack--;
}
}

Expand All @@ -241,22 +232,16 @@ void PseudoProbeRewriter::updatePseudoProbes() {
BinaryBlock.getName();

// scan all addresses -> correlate probe to block when print out
std::vector<uint64_t> Addresses;
for (auto &Entry : Address2ProbesMap)
Addresses.push_back(Entry.first);
llvm::sort(Addresses);
for (uint64_t Key : Addresses) {
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) {
if (Probe.getAddress() == INT64_MAX)
outs() << "Deleted Probe: ";
else
outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
Probe.print(outs(), GUID2Func, true);
// print block name only if the probe is block type and undeleted.
if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
<< Addr2BlockNames[Probe.getAddress()] << "\n";
}
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
if (Probe.getAddress() == INT64_MAX)
outs() << "Deleted Probe: ";
else
outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
Probe.print(outs(), GUID2Func, true);
// print block name only if the probe is block type and undeleted.
if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
<< Addr2BlockNames[Probe.getAddress()] << "\n";
}
outs() << "=======================================\n";
}
Expand Down
30 changes: 25 additions & 5 deletions llvm/include/llvm/MC/MCPseudoProbe.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
#include "llvm/IR/PseudoProbe.h"
#include "llvm/Support/ErrorOr.h"
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <tuple>
Expand Down Expand Up @@ -103,10 +102,6 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
// GUID to PseudoProbeFuncDesc map
using GUIDProbeFunctionMap =
std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
// Address to pseudo probes map.
using AddressProbesMap =
std::map<uint64_t,
std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>;

class MCDecodedPseudoProbeInlineTree;

Expand Down Expand Up @@ -213,6 +208,31 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
bool ShowName) const;
};

// Address to pseudo probes map.
class AddressProbesMap
: public std::vector<std::reference_wrapper<MCDecodedPseudoProbe>> {
auto getIt(uint64_t Addr) const {
auto CompareProbe = [](const MCDecodedPseudoProbe &Probe, uint64_t Addr) {
return Probe.getAddress() < Addr;
};
return llvm::lower_bound(*this, Addr, CompareProbe);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this guarantee it's equivalent to std::map<uint64_t, ....> ? specifically, if the address doesn't exist in the map, std::map returns end() but using lower_bound, it will points to the next element.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. Initially I used equal_range in find(Addr) case, and lower_bound in find(From, To) case. Need to revert to equal_range.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I remember now why I went with lower_bound in both cases:
find(Addr) does the same as find(Addr, Addr+1), which in the case of missing Addr would return an empty range.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, thanks for the clarification. but getIt(From), getIt(To)) introduces two vector search, can we just check if the return value's address is equal to the given address, if not, then return end()? this would save one search.

}

public:
// Returns range of probes within [\p From, \p To) address range.
auto find(uint64_t From, uint64_t To) const {
return llvm::make_range(getIt(From), getIt(To));
}
// Returns range of probes with given \p Address.
auto find(uint64_t Address) const {
auto FromIt = getIt(Address);
if (FromIt == end() || FromIt->get().getAddress() != Address)
return llvm::make_range(end(), end());
auto ToIt = getIt(Address + 1);
return llvm::make_range(FromIt, ToIt);
}
};

template <typename ProbesType, typename DerivedProbeInlineTreeType,
typename InlinedProbeTreeMap>
class MCPseudoProbeInlineTreeBase {
Expand Down
43 changes: 22 additions & 21 deletions llvm/lib/MC/MCPseudoProbe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,6 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
if (Cur && !isSentinelProbe(Attr)) {
PseudoProbeVec.emplace_back(Addr, Index, PseudoProbeType(Kind), Attr,
Discriminator, Cur);
Address2ProbesMap[Addr].emplace_back(PseudoProbeVec.back());
++CurrentProbeCount;
}
LastAddr = Addr;
Expand Down Expand Up @@ -635,6 +634,15 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
"Mismatching probe count pre- and post-parsing");
assert(InlineTreeVec.size() == InlinedCount &&
"Mismatching function records count pre- and post-parsing");

std::vector<std::pair<uint64_t, uint32_t>> SortedA2P(ProbeCount);
for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec))
SortedA2P[I] = {Probe.getAddress(), I};
llvm::sort(SortedA2P);
Address2ProbesMap.reserve(ProbeCount);
for (const uint32_t I : llvm::make_second_range(SortedA2P))
Address2ProbesMap.emplace_back(PseudoProbeVec[I]);
SortedA2P.clear();
return true;
}

Expand All @@ -650,36 +658,29 @@ void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {

void MCPseudoProbeDecoder::printProbeForAddress(raw_ostream &OS,
uint64_t Address) {
auto It = Address2ProbesMap.find(Address);
if (It != Address2ProbesMap.end()) {
for (const MCDecodedPseudoProbe &Probe : It->second) {
OS << " [Probe]:\t";
Probe.print(OS, GUID2FuncDescMap, true);
}
for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) {
OS << " [Probe]:\t";
Probe.print(OS, GUID2FuncDescMap, true);
}
}

void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) {
auto Entries = make_first_range(Address2ProbesMap);
SmallVector<uint64_t, 0> Addresses(Entries.begin(), Entries.end());
llvm::sort(Addresses);
for (auto K : Addresses) {
OS << "Address:\t";
OS << K;
OS << "\n";
printProbeForAddress(OS, K);
uint64_t PrevAddress = INT64_MAX;
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
uint64_t Address = Probe.getAddress();
if (Address != PrevAddress) {
PrevAddress = Address;
OS << "Address:\t" << Address << '\n';
}
OS << " [Probe]:\t";
Probe.print(OS, GUID2FuncDescMap, true);
}
}

const MCDecodedPseudoProbe *
MCPseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const {
auto It = Address2ProbesMap.find(Address);
if (It == Address2ProbesMap.end())
return nullptr;
const auto &Probes = It->second;

const MCDecodedPseudoProbe *CallProbe = nullptr;
for (const MCDecodedPseudoProbe &Probe : Probes) {
for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) {
if (Probe.isCall()) {
// Disabling the assert and returning first call probe seen so far.
// Subsequent call probes, if any, are ignored. Due to the the way
Expand Down
8 changes: 3 additions & 5 deletions llvm/tools/llvm-profgen/ProfileGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1183,11 +1183,9 @@ void ProfileGeneratorBase::extractProbesFromRange(
do {
const AddressProbesMap &Address2ProbesMap =
Binary->getAddress2ProbesMap();
auto It = Address2ProbesMap.find(IP.Address);
if (It != Address2ProbesMap.end()) {
for (const MCDecodedPseudoProbe &Probe : It->second) {
ProbeCounter[&Probe] += Count;
}
for (const MCDecodedPseudoProbe &Probe :
Address2ProbesMap.find(IP.Address)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a hot path, but now we change the O(1) unordered_map search to a binary search. Not sure if it can cause noticeable regression. I will run an end-to-end test on llvm-profgen.

ProbeCounter[&Probe] += Count;
}
} while (IP.advance() && IP.Address <= RangeEnd);
}
Expand Down
Loading