Skip to content

[ThinLTO] Add tail call flag to call edges in summary #74043

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions llvm/include/llvm/Bitcode/LLVMBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ enum GlobalValueSummarySymtabCodes {
FS_PERMODULE = 1,
// PERMODULE_PROFILE: [valueid, flags, instcount, numrefs,
// numrefs x valueid,
// n x (valueid, hotness)]
// n x (valueid, hotness+tailcall)]
FS_PERMODULE_PROFILE = 2,
// PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid]
FS_PERMODULE_GLOBALVAR_INIT_REFS = 3,
Expand All @@ -219,7 +219,7 @@ enum GlobalValueSummarySymtabCodes {
FS_COMBINED = 4,
// COMBINED_PROFILE: [valueid, modid, flags, instcount, numrefs,
// numrefs x valueid,
// n x (valueid, hotness)]
// n x (valueid, hotness+tailcall)]
FS_COMBINED_PROFILE = 5,
// COMBINED_GLOBALVAR_INIT_REFS: [valueid, modid, flags, n x valueid]
FS_COMBINED_GLOBALVAR_INIT_REFS = 6,
Expand Down Expand Up @@ -268,7 +268,7 @@ enum GlobalValueSummarySymtabCodes {
// Per-module summary that also adds relative block frequency to callee info.
// PERMODULE_RELBF: [valueid, flags, instcount, numrefs,
// numrefs x valueid,
// n x (valueid, relblockfreq)]
// n x (valueid, relblockfreq+tailcall)]
FS_PERMODULE_RELBF = 19,
// Index-wide flags
FS_FLAGS = 20,
Expand Down
20 changes: 15 additions & 5 deletions llvm/include/llvm/IR/ModuleSummaryIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,31 @@ struct CalleeInfo {
// added to HotnessType enum.
uint32_t Hotness : 3;

// True if at least one of the calls to the callee is a tail call.
bool HasTailCall : 1;

/// The value stored in RelBlockFreq has to be interpreted as the digits of
/// a scaled number with a scale of \p -ScaleShift.
uint32_t RelBlockFreq : 29;
static constexpr unsigned RelBlockFreqBits = 28;
uint32_t RelBlockFreq : RelBlockFreqBits;
static constexpr int32_t ScaleShift = 8;
static constexpr uint64_t MaxRelBlockFreq = (1 << 29) - 1;
static constexpr uint64_t MaxRelBlockFreq = (1 << RelBlockFreqBits) - 1;

CalleeInfo()
: Hotness(static_cast<uint32_t>(HotnessType::Unknown)), RelBlockFreq(0) {}
explicit CalleeInfo(HotnessType Hotness, uint64_t RelBF)
: Hotness(static_cast<uint32_t>(Hotness)), RelBlockFreq(RelBF) {}
: Hotness(static_cast<uint32_t>(HotnessType::Unknown)),
HasTailCall(false), RelBlockFreq(0) {}
explicit CalleeInfo(HotnessType Hotness, bool HasTC, uint64_t RelBF)
: Hotness(static_cast<uint32_t>(Hotness)), HasTailCall(HasTC),
RelBlockFreq(RelBF) {}

void updateHotness(const HotnessType OtherHotness) {
Hotness = std::max(Hotness, static_cast<uint32_t>(OtherHotness));
}

bool hasTailCall() const { return HasTailCall; }

void setHasTailCall(const bool HasTC) { HasTailCall = HasTC; }

HotnessType getHotness() const { return HotnessType(Hotness); }

/// Update \p RelBlockFreq from \p BlockFreq and \p EntryFreq
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,8 @@ static void computeFunctionSummary(
auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo(
cast<GlobalValue>(CalledValue))];
ValueInfo.updateHotness(Hotness);
if (CB->isTailCall())
ValueInfo.setHasTailCall(true);
// Add the relative block frequency to CalleeInfo if there is no profile
// information.
if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) {
Expand Down
33 changes: 25 additions & 8 deletions llvm/lib/AsmParser/LLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9058,7 +9058,8 @@ bool LLParser::parseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
/// OptionalCalls
/// := 'calls' ':' '(' Call [',' Call]* ')'
/// Call ::= '(' 'callee' ':' GVReference
/// [( ',' 'hotness' ':' Hotness | ',' 'relbf' ':' UInt32 )]? ')'
/// [( ',' 'hotness' ':' Hotness | ',' 'relbf' ':' UInt32 )]?
/// [ ',' 'tail' ]? ')'
bool LLParser::parseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
assert(Lex.getKind() == lltok::kw_calls);
Lex.Lex();
Expand All @@ -9083,23 +9084,39 @@ bool LLParser::parseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {

CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown;
unsigned RelBF = 0;
if (EatIfPresent(lltok::comma)) {
// Expect either hotness or relbf
if (EatIfPresent(lltok::kw_hotness)) {
unsigned HasTailCall = false;

// parse optional fields
while (EatIfPresent(lltok::comma)) {
switch (Lex.getKind()) {
case lltok::kw_hotness:
Lex.Lex();
if (parseToken(lltok::colon, "expected ':'") || parseHotness(Hotness))
return true;
} else {
if (parseToken(lltok::kw_relbf, "expected relbf") ||
parseToken(lltok::colon, "expected ':'") || parseUInt32(RelBF))
break;
case lltok::kw_relbf:
Lex.Lex();
if (parseToken(lltok::colon, "expected ':'") || parseUInt32(RelBF))
return true;
break;
case lltok::kw_tail:
Lex.Lex();
if (parseToken(lltok::colon, "expected ':'") || parseFlag(HasTailCall))
return true;
break;
default:
return error(Lex.getLoc(), "expected hotness, relbf, or tail");
}
}
if (Hotness != CalleeInfo::HotnessType::Unknown && RelBF > 0)
return tokError("Expected only one of hotness or relbf");
// Keep track of the Call array index needing a forward reference.
// We will save the location of the ValueInfo needing an update, but
// can only do so once the std::vector is finalized.
if (VI.getRef() == FwdVIRef)
IdToIndexMap[GVId].push_back(std::make_pair(Calls.size(), Loc));
Calls.push_back(FunctionSummary::EdgeTy{VI, CalleeInfo(Hotness, RelBF)});
Calls.push_back(
FunctionSummary::EdgeTy{VI, CalleeInfo(Hotness, HasTailCall, RelBF)});

if (parseToken(lltok::rparen, "expected ')' in call"))
return true;
Expand Down
34 changes: 28 additions & 6 deletions llvm/lib/Bitcode/Reader/BitcodeReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1117,6 +1117,22 @@ static GlobalVarSummary::GVarFlags getDecodedGVarFlags(uint64_t RawFlags) {
(GlobalObject::VCallVisibility)(RawFlags >> 3));
}

static std::pair<CalleeInfo::HotnessType, bool>
getDecodedHotnessCallEdgeInfo(uint64_t RawFlags) {
CalleeInfo::HotnessType Hotness =
static_cast<CalleeInfo::HotnessType>(RawFlags & 0x7); // 3 bits
bool HasTailCall = (RawFlags & 0x8); // 1 bit
return {Hotness, HasTailCall};
}

static void getDecodedRelBFCallEdgeInfo(uint64_t RawFlags, uint64_t &RelBF,
bool &HasTailCall) {
static constexpr uint64_t RelBlockFreqMask =
(1 << CalleeInfo::RelBlockFreqBits) - 1;
RelBF = RawFlags & RelBlockFreqMask; // RelBlockFreqBits bits
HasTailCall = (RawFlags & (1 << CalleeInfo::RelBlockFreqBits)); // 1 bit
}

static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
switch (Val) {
default: // Map unknown visibilities to default.
Expand Down Expand Up @@ -7007,17 +7023,20 @@ ModuleSummaryIndexBitcodeReader::makeCallList(ArrayRef<uint64_t> Record,
Ret.reserve(Record.size());
for (unsigned I = 0, E = Record.size(); I != E; ++I) {
CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown;
bool HasTailCall = false;
uint64_t RelBF = 0;
ValueInfo Callee = std::get<0>(getValueInfoFromValueId(Record[I]));
if (IsOldProfileFormat) {
I += 1; // Skip old callsitecount field
if (HasProfile)
I += 1; // Skip old profilecount field
} else if (HasProfile)
Hotness = static_cast<CalleeInfo::HotnessType>(Record[++I]);
std::tie(Hotness, HasTailCall) =
getDecodedHotnessCallEdgeInfo(Record[++I]);
else if (HasRelBF)
RelBF = Record[++I];
Ret.push_back(FunctionSummary::EdgeTy{Callee, CalleeInfo(Hotness, RelBF)});
getDecodedRelBFCallEdgeInfo(Record[++I], RelBF, HasTailCall);
Ret.push_back(FunctionSummary::EdgeTy{
Callee, CalleeInfo(Hotness, HasTailCall, RelBF)});
}
return Ret;
}
Expand Down Expand Up @@ -7231,14 +7250,15 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
TheIndex.getOrInsertValueInfo(RefGUID), RefGUID, RefGUID);
break;
}
// FS_PERMODULE is legacy and does not have support for the tail call flag.
// FS_PERMODULE: [valueid, flags, instcount, fflags, numrefs,
// numrefs x valueid, n x (valueid)]
// FS_PERMODULE_PROFILE: [valueid, flags, instcount, fflags, numrefs,
// numrefs x valueid,
// n x (valueid, hotness)]
// n x (valueid, hotness+tailcall flags)]
// FS_PERMODULE_RELBF: [valueid, flags, instcount, fflags, numrefs,
// numrefs x valueid,
// n x (valueid, relblockfreq)]
// n x (valueid, relblockfreq+tailcall)]
case bitc::FS_PERMODULE:
case bitc::FS_PERMODULE_RELBF:
case bitc::FS_PERMODULE_PROFILE: {
Expand Down Expand Up @@ -7385,10 +7405,12 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
TheIndex.addGlobalValueSummary(std::get<0>(GUID), std::move(VS));
break;
}
// FS_COMBINED is legacy and does not have support for the tail call flag.
// FS_COMBINED: [valueid, modid, flags, instcount, fflags, numrefs,
// numrefs x valueid, n x (valueid)]
// FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, fflags, numrefs,
// numrefs x valueid, n x (valueid, hotness)]
// numrefs x valueid,
// n x (valueid, hotness+tailcall flags)]
case bitc::FS_COMBINED:
case bitc::FS_COMBINED_PROFILE: {
unsigned ValueID = Record[0];
Expand Down
101 changes: 44 additions & 57 deletions llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,24 @@ static uint64_t getEncodedGVarFlags(GlobalVarSummary::GVarFlags Flags) {
return RawFlags;
}

static uint64_t getEncodedHotnessCallEdgeInfo(const CalleeInfo &CI) {
uint64_t RawFlags = 0;

RawFlags |= CI.Hotness; // 3 bits
RawFlags |= (CI.HasTailCall << 3); // 1 bit

return RawFlags;
}

static uint64_t getEncodedRelBFCallEdgeInfo(const CalleeInfo &CI) {
uint64_t RawFlags = 0;

RawFlags |= CI.RelBlockFreq; // CalleeInfo::RelBlockFreqBits bits
RawFlags |= (CI.HasTailCall << CalleeInfo::RelBlockFreqBits); // 1 bit

return RawFlags;
}

static unsigned getEncodedVisibility(const GlobalValue &GV) {
switch (GV.getVisibility()) {
case GlobalValue::DefaultVisibility: return 0;
Expand Down Expand Up @@ -4003,8 +4021,9 @@ static void writeFunctionHeapProfileRecords(
// Helper to emit a single function summary record.
void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F) {
unsigned ValueID, unsigned FSCallsRelBFAbbrev,
unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev,
unsigned AllocAbbrev, const Function &F) {
NameVals.push_back(ValueID);

FunctionSummary *FS = cast<FunctionSummary>(Summary);
Expand All @@ -4031,21 +4050,21 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
for (auto &RI : FS->refs())
NameVals.push_back(VE.getValueID(RI.getValue()));

bool HasProfileData =
F.hasProfileData() || ForceSummaryEdgesCold != FunctionSummary::FSHT_None;
const bool UseRelBFRecord =
WriteRelBFToSummary && !F.hasProfileData() &&
ForceSummaryEdgesCold == FunctionSummary::FSHT_None;
for (auto &ECI : FS->calls()) {
NameVals.push_back(getValueId(ECI.first));
if (HasProfileData)
NameVals.push_back(static_cast<uint8_t>(ECI.second.Hotness));
else if (WriteRelBFToSummary)
NameVals.push_back(ECI.second.RelBlockFreq);
if (UseRelBFRecord)
NameVals.push_back(getEncodedRelBFCallEdgeInfo(ECI.second));
else
NameVals.push_back(getEncodedHotnessCallEdgeInfo(ECI.second));
}

unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev);
unsigned FSAbbrev =
(UseRelBFRecord ? FSCallsRelBFAbbrev : FSCallsProfileAbbrev);
unsigned Code =
(HasProfileData ? bitc::FS_PERMODULE_PROFILE
: (WriteRelBFToSummary ? bitc::FS_PERMODULE_RELBF
: bitc::FS_PERMODULE));
(UseRelBFRecord ? bitc::FS_PERMODULE_RELBF : bitc::FS_PERMODULE_PROFILE);

// Emit the finished record.
Stream.EmitRecord(Code, NameVals, FSAbbrev);
Expand Down Expand Up @@ -4154,28 +4173,25 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // rorefcnt
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // worefcnt
// numrefs x valueid, n x (valueid, hotness)
// numrefs x valueid, n x (valueid, hotness+tailcall flags)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));

// Abbrev for FS_PERMODULE or FS_PERMODULE_RELBF.
// Abbrev for FS_PERMODULE_RELBF.
Abbv = std::make_shared<BitCodeAbbrev>();
if (WriteRelBFToSummary)
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_RELBF));
else
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE));
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_RELBF));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // rorefcnt
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // worefcnt
// numrefs x valueid, n x (valueid [, rel_block_freq])
// numrefs x valueid, n x (valueid, rel_block_freq+tailcall])
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
unsigned FSCallsRelBFAbbrev = Stream.EmitAbbrev(std::move(Abbv));

// Abbrev for FS_PERMODULE_GLOBALVAR_INIT_REFS.
Abbv = std::make_shared<BitCodeAbbrev>();
Expand Down Expand Up @@ -4247,9 +4263,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
continue;
}
auto *Summary = VI.getSummaryList()[0].get();
writePerModuleFunctionSummaryRecord(NameVals, Summary, VE.getValueID(&F),
FSCallsAbbrev, FSCallsProfileAbbrev,
CallsiteAbbrev, AllocAbbrev, F);
writePerModuleFunctionSummaryRecord(
NameVals, Summary, VE.getValueID(&F), FSCallsRelBFAbbrev,
FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, F);
}

// Capture references from GlobalVariable initializers, which are outside
Expand Down Expand Up @@ -4320,25 +4336,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Stream.EmitRecord(bitc::FS_STACK_IDS, StackIds, StackIdAbbvId);
}

// Abbrev for FS_COMBINED.
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // entrycount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // rorefcnt
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // worefcnt
// numrefs x valueid, n x (valueid)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));

// Abbrev for FS_COMBINED_PROFILE.
Abbv = std::make_shared<BitCodeAbbrev>();
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_PROFILE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
Expand All @@ -4349,7 +4348,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // rorefcnt
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // worefcnt
// numrefs x valueid, n x (valueid, hotness)
// numrefs x valueid, n x (valueid, hotness+tailcall flags)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));
Expand Down Expand Up @@ -4529,31 +4528,19 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals[7] = RORefCnt;
NameVals[8] = WORefCnt;

bool HasProfileData = false;
for (auto &EI : FS->calls()) {
HasProfileData |=
EI.second.getHotness() != CalleeInfo::HotnessType::Unknown;
if (HasProfileData)
break;
}

for (auto &EI : FS->calls()) {
// If this GUID doesn't have a value id, it doesn't have a function
// summary and we don't need to record any calls to it.
std::optional<unsigned> CallValueId = GetValueId(EI.first);
if (!CallValueId)
continue;
NameVals.push_back(*CallValueId);
if (HasProfileData)
NameVals.push_back(static_cast<uint8_t>(EI.second.Hotness));
NameVals.push_back(getEncodedHotnessCallEdgeInfo(EI.second));
}

unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev);
unsigned Code =
(HasProfileData ? bitc::FS_COMBINED_PROFILE : bitc::FS_COMBINED);

// Emit the finished record.
Stream.EmitRecord(Code, NameVals, FSAbbrev);
Stream.EmitRecord(bitc::FS_COMBINED_PROFILE, NameVals,
FSCallsProfileAbbrev);
NameVals.clear();
MaybeEmitOriginalName(*S);
});
Expand Down
Loading