Skip to content

Commit fe05193

Browse files
committed
[InstrProf] Encode linkage names in IRPGO counter names
Prior to this diff, names in the `__llvm_prf_names` section had the format `[<filepath>:]<function-name>`, e.g., `main.cpp:foo`, `bar`. `<filepath>` is used to discriminate between possibly identical function names when linkage is local and `<function-name>` simply comes from `F.getName()`. This has two problems: * `:` is commonly found in Objective-C functions so that names like `main.mm:-[C foo::]` and `-[C bar::]` are difficult to parse * `<function-name>` might be different from the linkage name, so it cannot be used to pass a function order to the linker via `-symbol-ordering-file` or `-order_file` (see https://discourse.llvm.org/t/rfc-temporal-profiling-extension-for-irpgo/68068) Instead, this diff changes the format to `[<filepath>;]<linkage-name>`, e.g., `main.cpp;_foo`, `_bar`. The hope is that `;` won't realistically be found in either `<filepath>` or `<linkage-name>`. To prevent invalidating all prior IRPGO profiles, we also lookup the prior name format when a record is not found (see `InstrProfSymtab::create()`, `readMemprof()`, and `getInstrProfRecord()`). It seems that Swift and Clang FE-PGO rely on the original `getPGOFuncName()`, so we cannot simply replace it. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D156569
1 parent 165f7f0 commit fe05193

File tree

11 files changed

+305
-90
lines changed

11 files changed

+305
-90
lines changed

llvm/include/llvm/ProfileData/InstrProf.h

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,15 @@ std::string getPGOFuncName(StringRef RawFuncName,
184184
StringRef FileName,
185185
uint64_t Version = INSTR_PROF_INDEX_VERSION);
186186

187+
/// \return the modified name for function \c F suitable to be
188+
/// used as the key for IRPGO profile lookup. \c InLTO indicates if this is
189+
/// called from LTO optimization passes.
190+
std::string getIRPGOFuncName(const Function &F, bool InLTO = false);
191+
192+
/// \return the filename and the function name parsed from the output of
193+
/// \c getIRPGOFuncName()
194+
std::pair<StringRef, StringRef> getParsedIRPGOFuncName(StringRef IRPGOFuncName);
195+
187196
/// Return the name of the global variable used to store a function
188197
/// name in PGO instrumentation. \c FuncName is the name of the function
189198
/// returned by the \c getPGOFuncName call.
@@ -434,6 +443,8 @@ class InstrProfSymtab {
434443
return "** External Symbol **";
435444
}
436445

446+
Error addFuncWithName(Function &F, StringRef PGOFuncName);
447+
437448
// If the symtab is created by a series of calls to \c addFuncName, \c
438449
// finalizeSymtab needs to be called before looking up function names.
439450
// This is required because the underlying map is a vector (for space
@@ -516,11 +527,6 @@ class InstrProfSymtab {
516527
/// Return function from the name's md5 hash. Return nullptr if not found.
517528
inline Function *getFunction(uint64_t FuncMD5Hash);
518529

519-
/// Return the function's original assembly name by stripping off
520-
/// the prefix attached (to symbols with priviate linkage). For
521-
/// global functions, it returns the same string as getFuncName.
522-
inline StringRef getOrigFuncName(uint64_t FuncMD5Hash);
523-
524530
/// Return the name section data.
525531
inline StringRef getNameData() const { return Data; }
526532

@@ -586,16 +592,6 @@ Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
586592
return nullptr;
587593
}
588594

589-
// See also getPGOFuncName implementation. These two need to be
590-
// matched.
591-
StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) {
592-
StringRef PGOName = getFuncName(FuncMD5Hash);
593-
size_t S = PGOName.find_first_of(':');
594-
if (S == StringRef::npos)
595-
return PGOName;
596-
return PGOName.drop_front(S + 1);
597-
}
598-
599595
// To store the sums of profile count values, or the percentage of
600596
// the sums of the total count values.
601597
struct CountSumOrPercent {

llvm/include/llvm/ProfileData/InstrProfReader.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -716,9 +716,12 @@ class IndexedInstrProfReader : public InstrProfReader {
716716
/// When return a hash_mismatch error and MismatchedFuncSum is not nullptr,
717717
/// the sum of all counters in the mismatched function will be set to
718718
/// MismatchedFuncSum. If there are multiple instances of mismatched
719-
/// functions, MismatchedFuncSum returns the maximum.
719+
/// functions, MismatchedFuncSum returns the maximum. If \c FuncName is not
720+
/// found, try to lookup \c DeprecatedFuncName to handle profiles built by
721+
/// older compilers.
720722
Expected<InstrProfRecord>
721723
getInstrProfRecord(StringRef FuncName, uint64_t FuncHash,
724+
StringRef DeprecatedFuncName = "",
722725
uint64_t *MismatchedFuncSum = nullptr);
723726

724727
/// Return the memprof record for the function identified by

llvm/lib/ProfileData/InstrProf.cpp

Lines changed: 111 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "llvm/IR/Instruction.h"
2828
#include "llvm/IR/LLVMContext.h"
2929
#include "llvm/IR/MDBuilder.h"
30+
#include "llvm/IR/Mangler.h"
3031
#include "llvm/IR/Metadata.h"
3132
#include "llvm/IR/Module.h"
3233
#include "llvm/IR/Type.h"
@@ -264,6 +265,67 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
264265
return PathNameStr.substr(LastPos);
265266
}
266267

268+
static StringRef getStrippedSourceFileName(const Function &F) {
269+
StringRef FileName(F.getParent()->getSourceFileName());
270+
uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1;
271+
if (StripLevel < StaticFuncStripDirNamePrefix)
272+
StripLevel = StaticFuncStripDirNamePrefix;
273+
if (StripLevel)
274+
FileName = stripDirPrefix(FileName, StripLevel);
275+
return FileName;
276+
}
277+
278+
// The PGO name has the format [<filepath>;]<linkage-name> where <filepath>; is
279+
// provided if linkage is local and <linkage-name> is the mangled function
280+
// name. The filepath is used to discriminate possibly identical function names.
281+
// ; is used because it is unlikely to be found in either <filepath> or
282+
// <linkage-name>.
283+
//
284+
// Older compilers used getPGOFuncName() which has the format
285+
// [<filepath>:]<function-name>. <filepath> is used to discriminate between
286+
// possibly identical function names when linkage is local and <function-name>
287+
// simply comes from F.getName(). This caused trouble for Objective-C functions
288+
// which commonly have :'s in their names. Also, since <function-name> is not
289+
// mangled, they cannot be passed to Mach-O linkers via -order_file. We still
290+
// need to compute this name to lookup functions from profiles built by older
291+
// compilers.
292+
static std::string getIRPGOFuncName(const Function &F,
293+
GlobalValue::LinkageTypes Linkage,
294+
StringRef FileName) {
295+
SmallString<64> Name;
296+
if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
297+
Name.append(FileName.empty() ? "<unknown>" : FileName);
298+
Name.append(";");
299+
}
300+
Mangler().getNameWithPrefix(Name, &F, /*CannotUsePrivateLabel=*/true);
301+
return Name.str().str();
302+
}
303+
304+
static std::optional<std::string> lookupPGOFuncName(const Function &F) {
305+
if (MDNode *MD = getPGOFuncNameMetadata(F)) {
306+
StringRef S = cast<MDString>(MD->getOperand(0))->getString();
307+
return S.str();
308+
}
309+
return {};
310+
}
311+
312+
// See getPGOFuncName()
313+
std::string getIRPGOFuncName(const Function &F, bool InLTO) {
314+
if (!InLTO) {
315+
auto FileName = getStrippedSourceFileName(F);
316+
return getIRPGOFuncName(F, F.getLinkage(), FileName);
317+
}
318+
319+
// In LTO mode (when InLTO is true), first check if there is a meta data.
320+
if (auto IRPGOFuncName = lookupPGOFuncName(F))
321+
return *IRPGOFuncName;
322+
323+
// If there is no meta data, the function must be a global before the value
324+
// profile annotation pass. Its current linkage may be internal if it is
325+
// internalized in LTO mode.
326+
return getIRPGOFuncName(F, GlobalValue::ExternalLinkage, "");
327+
}
328+
267329
// Return the PGOFuncName. This function has some special handling when called
268330
// in LTO optimization. The following only applies when calling in LTO passes
269331
// (when \c InLTO is true): LTO's internalization privatizes many global linkage
@@ -279,27 +341,29 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
279341
// data, its original linkage must be non-internal.
280342
std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
281343
if (!InLTO) {
282-
StringRef FileName(F.getParent()->getSourceFileName());
283-
uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1;
284-
if (StripLevel < StaticFuncStripDirNamePrefix)
285-
StripLevel = StaticFuncStripDirNamePrefix;
286-
if (StripLevel)
287-
FileName = stripDirPrefix(FileName, StripLevel);
344+
auto FileName = getStrippedSourceFileName(F);
288345
return getPGOFuncName(F.getName(), F.getLinkage(), FileName, Version);
289346
}
290347

291348
// In LTO mode (when InLTO is true), first check if there is a meta data.
292-
if (MDNode *MD = getPGOFuncNameMetadata(F)) {
293-
StringRef S = cast<MDString>(MD->getOperand(0))->getString();
294-
return S.str();
295-
}
349+
if (auto PGOFuncName = lookupPGOFuncName(F))
350+
return *PGOFuncName;
296351

297352
// If there is no meta data, the function must be a global before the value
298353
// profile annotation pass. Its current linkage may be internal if it is
299354
// internalized in LTO mode.
300355
return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, "");
301356
}
302357

358+
// See getIRPGOFuncName() for a discription of the format.
359+
std::pair<StringRef, StringRef>
360+
getParsedIRPGOFuncName(StringRef IRPGOFuncName) {
361+
auto [FileName, FuncName] = IRPGOFuncName.split(';');
362+
if (FuncName.empty())
363+
return std::make_pair(StringRef(), IRPGOFuncName);
364+
return std::make_pair(FileName, FuncName);
365+
}
366+
303367
StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) {
304368
if (FileName.empty())
305369
return PGOFuncName;
@@ -320,7 +384,7 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
320384
return VarName;
321385

322386
// Now fix up illegal chars in local VarName that may upset the assembler.
323-
const char *InvalidChars = "-:<>/\"'";
387+
const char InvalidChars[] = "-:;<>/\"'";
324388
size_t found = VarName.find_first_of(InvalidChars);
325389
while (found != std::string::npos) {
326390
VarName[found] = '_';
@@ -366,41 +430,49 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
366430
// Ignore in this case.
367431
if (!F.hasName())
368432
continue;
369-
const std::string &PGOFuncName = getPGOFuncName(F, InLTO);
370-
if (Error E = addFuncName(PGOFuncName))
433+
if (Error E = addFuncWithName(F, getIRPGOFuncName(F, InLTO)))
434+
return E;
435+
// Also use getPGOFuncName() so that we can find records from older profiles
436+
if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO)))
371437
return E;
372-
MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
373-
// In ThinLTO, local function may have been promoted to global and have
374-
// suffix ".llvm." added to the function name. We need to add the
375-
// stripped function name to the symbol table so that we can find a match
376-
// from profile.
377-
//
378-
// We may have other suffixes similar as ".llvm." which are needed to
379-
// be stripped before the matching, but ".__uniq." suffix which is used
380-
// to differentiate internal linkage functions in different modules
381-
// should be kept. Now this is the only suffix with the pattern ".xxx"
382-
// which is kept before matching.
383-
const std::string UniqSuffix = ".__uniq.";
384-
auto pos = PGOFuncName.find(UniqSuffix);
385-
// Search '.' after ".__uniq." if ".__uniq." exists, otherwise
386-
// search '.' from the beginning.
387-
if (pos != std::string::npos)
388-
pos += UniqSuffix.length();
389-
else
390-
pos = 0;
391-
pos = PGOFuncName.find('.', pos);
392-
if (pos != std::string::npos && pos != 0) {
393-
const std::string &OtherFuncName = PGOFuncName.substr(0, pos);
394-
if (Error E = addFuncName(OtherFuncName))
395-
return E;
396-
MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
397-
}
398438
}
399439
Sorted = false;
400440
finalizeSymtab();
401441
return Error::success();
402442
}
403443

444+
Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
445+
if (Error E = addFuncName(PGOFuncName))
446+
return E;
447+
MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
448+
// In ThinLTO, local function may have been promoted to global and have
449+
// suffix ".llvm." added to the function name. We need to add the
450+
// stripped function name to the symbol table so that we can find a match
451+
// from profile.
452+
//
453+
// We may have other suffixes similar as ".llvm." which are needed to
454+
// be stripped before the matching, but ".__uniq." suffix which is used
455+
// to differentiate internal linkage functions in different modules
456+
// should be kept. Now this is the only suffix with the pattern ".xxx"
457+
// which is kept before matching.
458+
const std::string UniqSuffix = ".__uniq.";
459+
auto pos = PGOFuncName.find(UniqSuffix);
460+
// Search '.' after ".__uniq." if ".__uniq." exists, otherwise
461+
// search '.' from the beginning.
462+
if (pos != std::string::npos)
463+
pos += UniqSuffix.length();
464+
else
465+
pos = 0;
466+
pos = PGOFuncName.find('.', pos);
467+
if (pos != std::string::npos && pos != 0) {
468+
StringRef OtherFuncName = PGOFuncName.substr(0, pos);
469+
if (Error E = addFuncName(OtherFuncName))
470+
return E;
471+
MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
472+
}
473+
return Error::success();
474+
}
475+
404476
uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
405477
finalizeSymtab();
406478
auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {

llvm/lib/ProfileData/InstrProfReader.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,12 +1214,25 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
12141214
}
12151215

12161216
Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord(
1217-
StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum) {
1217+
StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName,
1218+
uint64_t *MismatchedFuncSum) {
12181219
ArrayRef<NamedInstrProfRecord> Data;
12191220
uint64_t FuncSum = 0;
1220-
Error Err = Remapper->getRecords(FuncName, Data);
1221-
if (Err)
1222-
return std::move(Err);
1221+
auto Err = Remapper->getRecords(FuncName, Data);
1222+
if (Err) {
1223+
// If we don't find FuncName, try DeprecatedFuncName to handle profiles
1224+
// built by older compilers.
1225+
auto Err2 =
1226+
handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error {
1227+
if (IE.get() != instrprof_error::unknown_function)
1228+
return make_error<InstrProfError>(IE);
1229+
if (auto Err = Remapper->getRecords(DeprecatedFuncName, Data))
1230+
return Err;
1231+
return Error::success();
1232+
});
1233+
if (Err2)
1234+
return std::move(Err2);
1235+
}
12231236
// Found it. Look for counters with the right hash.
12241237

12251238
// A flag to indicate if the records are from the same type

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -679,12 +679,26 @@ static void readMemprof(Module &M, Function &F,
679679
const TargetLibraryInfo &TLI) {
680680
auto &Ctx = M.getContext();
681681

682-
auto FuncName = getPGOFuncName(F);
682+
auto FuncName = getIRPGOFuncName(F);
683683
auto FuncGUID = Function::getGUID(FuncName);
684-
Expected<memprof::MemProfRecord> MemProfResult =
685-
MemProfReader->getMemProfRecord(FuncGUID);
686-
if (Error E = MemProfResult.takeError()) {
687-
handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
684+
std::optional<memprof::MemProfRecord> MemProfRec;
685+
auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
686+
if (Err) {
687+
// If we don't find getIRPGOFuncName(), try getPGOFuncName() to handle
688+
// profiles built by older compilers
689+
Err = handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error {
690+
if (IE.get() != instrprof_error::unknown_function)
691+
return make_error<InstrProfError>(IE);
692+
auto FuncName = getPGOFuncName(F);
693+
auto FuncGUID = Function::getGUID(FuncName);
694+
if (auto Err =
695+
MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec))
696+
return Err;
697+
return Error::success();
698+
});
699+
}
700+
if (Err) {
701+
handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
688702
auto Err = IPE.get();
689703
bool SkipWarning = false;
690704
LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
@@ -722,15 +736,14 @@ static void readMemprof(Module &M, Function &F,
722736
// the frame array (see comments below where the map entries are added).
723737
std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
724738
LocHashToCallSites;
725-
const auto MemProfRec = std::move(MemProfResult.get());
726-
for (auto &AI : MemProfRec.AllocSites) {
739+
for (auto &AI : MemProfRec->AllocSites) {
727740
// Associate the allocation info with the leaf frame. The later matching
728741
// code will match any inlined call sequences in the IR with a longer prefix
729742
// of call stack frames.
730743
uint64_t StackId = computeStackId(AI.CallStack[0]);
731744
LocHashToAllocInfo[StackId].insert(&AI);
732745
}
733-
for (auto &CS : MemProfRec.CallSites) {
746+
for (auto &CS : MemProfRec->CallSites) {
734747
// Need to record all frames from leaf up to and including this function,
735748
// as any of these may or may not have been inlined at this point.
736749
unsigned Idx = 0;

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,7 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
525525
std::vector<std::vector<VPCandidateInfo>> ValueSites;
526526
SelectInstVisitor SIVisitor;
527527
std::string FuncName;
528+
std::string DeprecatedFuncName;
528529
GlobalVariable *FuncNameVar;
529530

530531
// CFG hash value for this function.
@@ -590,7 +591,8 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
590591
NumOfCSPGOBB += MST.BBInfos.size();
591592
}
592593

593-
FuncName = getPGOFuncName(F);
594+
FuncName = getIRPGOFuncName(F);
595+
DeprecatedFuncName = getPGOFuncName(F);
594596
computeCFGHash();
595597
if (!ComdatMembers.empty())
596598
renameComdatFunction();
@@ -1336,7 +1338,8 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
13361338
auto &Ctx = M->getContext();
13371339
uint64_t MismatchedFuncSum = 0;
13381340
Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
1339-
FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
1341+
FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1342+
&MismatchedFuncSum);
13401343
if (Error E = Result.takeError()) {
13411344
handleInstrProfError(std::move(E), MismatchedFuncSum);
13421345
return false;
@@ -1381,7 +1384,8 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
13811384
void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
13821385
uint64_t MismatchedFuncSum = 0;
13831386
Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
1384-
FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
1387+
FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1388+
&MismatchedFuncSum);
13851389
if (auto Err = Result.takeError()) {
13861390
handleInstrProfError(std::move(Err), MismatchedFuncSum);
13871391
return;

llvm/test/Transforms/PGOProfile/comdat_internal.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ $foo = comdat any
1313
; CHECK: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat
1414
; CHECK-NOT: __profn__stdin__foo
1515
; CHECK: @__profc__stdin__foo.[[#FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
16-
; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, ptr, ptr, i32, [2 x i16] } { i64 -5640069336071256030, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), ptr null
16+
; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, ptr, ptr, i32, [2 x i16] } { i64 {{.*}}, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), ptr null
1717
; CHECK-NOT: @foo
1818
; CHECK-SAME: , ptr null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8
1919
; CHECK: @__llvm_prf_nm

0 commit comments

Comments
 (0)