Skip to content

Commit 98c1ba4

Browse files
[InstrProf] Add vtables with type metadata into symtab (#81051)
The indirect-call-promotion pass will look up the vtable to find out the virtual function [1], and add vtable-derived information in icall candidate [2] for cost-benefit analysis. [1] https://github.com/llvm/llvm-project/pull/81442/files#diff-a95d1ac8a0da69713fcb3346135d4b219f0a73920318d2549495620ea215191bR395-R416 [2] https://github.com/llvm/llvm-project/pull/81442/files#diff-a95d1ac8a0da69713fcb3346135d4b219f0a73920318d2549495620ea215191bR195-R199
1 parent b3a835e commit 98c1ba4

File tree

3 files changed

+105
-2
lines changed

3 files changed

+105
-2
lines changed

llvm/include/llvm/ProfileData/InstrProf.h

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "llvm/ADT/ArrayRef.h"
1919
#include "llvm/ADT/BitmaskEnum.h"
20+
#include "llvm/ADT/DenseMap.h"
2021
#include "llvm/ADT/IntervalMap.h"
2122
#include "llvm/ADT/STLExtras.h"
2223
#include "llvm/ADT/StringRef.h"
@@ -470,6 +471,12 @@ class InstrProfSymtab {
470471
// A map from MD5 keys to function define. We only populate this map
471472
// when build the Symtab from a Module.
472473
std::vector<std::pair<uint64_t, Function *>> MD5FuncMap;
474+
// A map from MD5 to the global variable. This map is only populated when
475+
// building the symtab from a module. Use separate container instances for
476+
// `MD5FuncMap` and `MD5VTableMap`.
477+
// TODO: Unify the container type and the lambda function 'mapName' inside
478+
// add{Func,VTable}WithName.
479+
DenseMap<uint64_t, GlobalVariable *> MD5VTableMap;
473480
// A map from function runtime address to function name MD5 hash.
474481
// This map is only populated and used by raw instr profile reader.
475482
AddrHashMap AddrToMD5Map;
@@ -488,12 +495,18 @@ class InstrProfSymtab {
488495

489496
// Add the function into the symbol table, by creating the following
490497
// map entries:
491-
// name-set = {PGOFuncName} + {getCanonicalName(PGOFuncName)} if the canonical
492-
// name is different from pgo name
498+
// name-set = {PGOFuncName} union {getCanonicalName(PGOFuncName)}
493499
// - In MD5NameMap: <MD5Hash(name), name> for name in name-set
494500
// - In MD5FuncMap: <MD5Hash(name), &F> for name in name-set
495501
Error addFuncWithName(Function &F, StringRef PGOFuncName);
496502

503+
// Add the vtable into the symbol table, by creating the following
504+
// map entries:
505+
// name-set = {PGOName} union {getCanonicalName(PGOName)}
506+
// - In MD5NameMap: <MD5Hash(name), name> for name in name-set
507+
// - In MD5VTableMap: <MD5Hash(name), name> for name in name-set
508+
Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName);
509+
497510
// If the symtab is created by a series of calls to \c addFuncName, \c
498511
// finalizeSymtab needs to be called before looking up function names.
499512
// This is required because the underlying map is a vector (for space
@@ -555,6 +568,7 @@ class InstrProfSymtab {
555568
Error create(const FuncNameIterRange &FuncIterRange,
556569
const VTableNameIterRange &VTableIterRange);
557570

571+
// Map the MD5 of the symbol name to the name.
558572
Error addSymbolName(StringRef SymbolName) {
559573
if (SymbolName.empty())
560574
return make_error<InstrProfError>(instrprof_error::malformed,
@@ -630,6 +644,10 @@ class InstrProfSymtab {
630644
/// Return function from the name's md5 hash. Return nullptr if not found.
631645
inline Function *getFunction(uint64_t FuncMD5Hash);
632646

647+
/// Return the global variable corresponding to md5 hash. Return nullptr if
648+
/// not found.
649+
inline GlobalVariable *getGlobalVariable(uint64_t MD5Hash);
650+
633651
/// Return the name section data.
634652
inline StringRef getNameData() const { return Data; }
635653

@@ -709,6 +727,12 @@ Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
709727
return nullptr;
710728
}
711729

730+
GlobalVariable *InstrProfSymtab::getGlobalVariable(uint64_t MD5Hash) {
731+
if (auto Iter = MD5VTableMap.find(MD5Hash); Iter != MD5VTableMap.end())
732+
return Iter->second;
733+
return nullptr;
734+
}
735+
712736
// To store the sums of profile count values, or the percentage of
713737
// the sums of the total count values.
714738
struct CountSumOrPercent {

llvm/lib/ProfileData/InstrProf.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,11 +476,43 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
476476
return E;
477477
}
478478

479+
SmallVector<MDNode *, 2> Types;
480+
for (GlobalVariable &G : M.globals()) {
481+
if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
482+
continue;
483+
if (Error E = addVTableWithName(
484+
G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr)))
485+
return E;
486+
}
487+
479488
Sorted = false;
480489
finalizeSymtab();
481490
return Error::success();
482491
}
483492

493+
Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable,
494+
StringRef VTablePGOName) {
495+
auto mapName = [&](StringRef Name) -> Error {
496+
if (Error E = addSymbolName(Name))
497+
return E;
498+
499+
bool Inserted = true;
500+
std::tie(std::ignore, Inserted) =
501+
MD5VTableMap.try_emplace(GlobalValue::getGUID(Name), &VTable);
502+
if (!Inserted)
503+
LLVM_DEBUG(dbgs() << "GUID conflict within one module");
504+
return Error::success();
505+
};
506+
if (Error E = mapName(VTablePGOName))
507+
return E;
508+
509+
StringRef CanonicalName = getCanonicalName(VTablePGOName);
510+
if (CanonicalName != VTablePGOName)
511+
return mapName(CanonicalName);
512+
513+
return Error::success();
514+
}
515+
484516
/// \c NameStrings is a string composed of one of more possibly encoded
485517
/// sub-strings. The substrings are separated by 0 or more zero bytes. This
486518
/// method decodes the string and calls `NameCallback` for each substring.

llvm/unittests/ProfileData/InstrProfTest.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "llvm/ADT/STLExtras.h"
10+
#include "llvm/IR/DerivedTypes.h"
911
#include "llvm/IR/Function.h"
1012
#include "llvm/IR/IRBuilder.h"
1113
#include "llvm/IR/LLVMContext.h"
@@ -1730,6 +1732,34 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
17301732
Function::Create(FTy, Function::WeakODRLinkage, "Wblah", M.get());
17311733
Function::Create(FTy, Function::WeakODRLinkage, "Wbar", M.get());
17321734

1735+
// [ptr, ptr, ptr]
1736+
ArrayType *VTableArrayType = ArrayType::get(
1737+
PointerType::get(Ctx, M->getDataLayout().getDefaultGlobalsAddressSpace()),
1738+
3);
1739+
Constant *Int32TyNull =
1740+
llvm::ConstantExpr::getNullValue(PointerType::getUnqual(Ctx));
1741+
SmallVector<llvm::Type *, 1> tys = {VTableArrayType};
1742+
StructType *VTableType = llvm::StructType::get(Ctx, tys);
1743+
1744+
// Create two vtables in the module, one with external linkage and the other
1745+
// with local linkage.
1746+
for (auto [Name, Linkage] :
1747+
{std::pair{"ExternalGV", GlobalValue::ExternalLinkage},
1748+
{"LocalGV", GlobalValue::InternalLinkage}}) {
1749+
llvm::Twine FuncName(Name, StringRef("VFunc"));
1750+
Function *VFunc = Function::Create(FTy, Linkage, FuncName, M.get());
1751+
GlobalVariable *GV = new llvm::GlobalVariable(
1752+
*M, VTableType, /* isConstant= */ true, Linkage,
1753+
llvm::ConstantStruct::get(
1754+
VTableType,
1755+
{llvm::ConstantArray::get(VTableArrayType,
1756+
{Int32TyNull, Int32TyNull, VFunc})}),
1757+
Name);
1758+
// Add type metadata for the test data, since vtables with type metadata
1759+
// are added to symtab.
1760+
GV->addTypeMetadata(16, MDString::get(Ctx, Name));
1761+
}
1762+
17331763
InstrProfSymtab ProfSymtab;
17341764
EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded());
17351765

@@ -1751,6 +1781,23 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
17511781
EXPECT_EQ(PGOName, PGOFuncName);
17521782
EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str()));
17531783
}
1784+
1785+
StringRef VTables[] = {"ExternalGV", "LocalGV"};
1786+
for (auto [VTableName, PGOName] : {std::pair{"ExternalGV", "ExternalGV"},
1787+
{"LocalGV", "MyModule.cpp;LocalGV"}}) {
1788+
GlobalVariable *GV =
1789+
M->getGlobalVariable(VTableName, /* AllowInternal=*/true);
1790+
1791+
// Test that ProfSymtab returns the expected name given a hash.
1792+
std::string IRPGOName = getPGOName(*GV);
1793+
EXPECT_STREQ(IRPGOName.c_str(), PGOName);
1794+
uint64_t GUID = IndexedInstrProf::ComputeHash(IRPGOName);
1795+
EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(GUID));
1796+
EXPECT_EQ(VTableName, getParsedIRPGOName(IRPGOName).second);
1797+
1798+
// Test that ProfSymtab returns the expected global variable
1799+
EXPECT_EQ(GV, ProfSymtab.getGlobalVariable(GUID));
1800+
}
17541801
}
17551802

17561803
// Testing symtab serialization and creator/deserialization interface

0 commit comments

Comments
 (0)