Skip to content

Commit 2357d29

Browse files
committed
[SampleFDO] Another fix to prevent repeated indirect call promotion in
sample loader pass. In https://reviews.llvm.org/rG5fb65c02ca5e91e7e1a00e0efdb8edc899f3e4b9, to prevent repeated indirect call promotion for the same indirect call and the same target, we used zero-count value profile to indicate an indirect call has been promoted for a certain target. We removed PromotedInsns cache in the same patch. However, there was a problem in that patch described below, and that problem led me to add PromotedInsns back as a mitigation in https://reviews.llvm.org/rG4ffad1fb489f691825d6c7d78e1626de142f26cf. When we get value profile from metadata by calling getValueProfDataFromInst, we need to specify the maximum possible number of values we expect to read. We uses MaxNumPromotions in the last patch so the maximum number of value information extracted from metadata is MaxNumPromotions. If we have many values including zero-count values when we write the metadata, some of them will be dropped when we read them because we only read MaxNumPromotions values. It will allow repeated indirect call promotion again. We need to make sure if there are values indicating promoted targets, those values need to be saved in metadata with higher priority than other values. The patch fixed that problem. We change to use -1 to represent the count of a promoted target instead of 0 so it is easier to sort the values. When we prepare to update the metadata in updateIDTMetaData, we will sort the values in the descending count order and extract only MaxNumPromotions values to write into metadata. Since -1 is the max uint64_t number, if we have equal to or less than MaxNumPromotions of -1 count values, they will all be kept in metadata. If we have more than MaxNumPromotions of -1 count values, we will only save MaxNumPromotions such values maximally. In such case, we have logic in place in doesHistoryAllowICP to guarantee no more promotion in sample loader pass will happen for the indirect call, because it has been promoted enough. With this change, now we can remove PromotedInsns without problem. Differential Revision: https://reviews.llvm.org/D97350
1 parent e63ddcc commit 2357d29

File tree

7 files changed

+238
-55
lines changed

7 files changed

+238
-55
lines changed

llvm/include/llvm/ProfileData/InstrProf.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,10 @@ void annotateValueSite(Module &M, Instruction &Inst,
253253
ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
254254
InstrProfValueKind ValueKind, uint32_t MaxMDCount);
255255

256+
/// Magic number in the value profile data showing a target has been
257+
/// promoted for the instruction and shouldn't be promoted again.
258+
const uint64_t NOMORE_ICP_MAGICNUM = -1;
259+
256260
/// Extract the value profile data from \p Inst which is annotated with
257261
/// value profile meta data. Return false if there is no value data annotated,
258262
/// otherwise return true.
@@ -261,7 +265,7 @@ bool getValueProfDataFromInst(const Instruction &Inst,
261265
uint32_t MaxNumValueData,
262266
InstrProfValueData ValueData[],
263267
uint32_t &ActualNumValueData, uint64_t &TotalC,
264-
bool GetZeroCntValue = false);
268+
bool GetNoICPValue = false);
265269

266270
inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
267271

llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ static cl::opt<unsigned>
4545

4646
// Set the maximum number of targets to promote for a single indirect-call
4747
// callsite.
48-
cl::opt<unsigned>
48+
static cl::opt<unsigned>
4949
MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore,
5050
cl::desc("Max number of promotions for a single indirect "
5151
"call callsite"));

llvm/lib/ProfileData/InstrProf.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,7 @@ bool getValueProfDataFromInst(const Instruction &Inst,
988988
uint32_t MaxNumValueData,
989989
InstrProfValueData ValueData[],
990990
uint32_t &ActualNumValueData, uint64_t &TotalC,
991-
bool GetZeroCntValue) {
991+
bool GetNoICPValue) {
992992
MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof);
993993
if (!MD)
994994
return false;
@@ -1015,7 +1015,7 @@ bool getValueProfDataFromInst(const Instruction &Inst,
10151015

10161016
// Get total count
10171017
ConstantInt *TotalCInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(2));
1018-
if (!TotalCInt && !GetZeroCntValue)
1018+
if (!TotalCInt)
10191019
return false;
10201020
TotalC = TotalCInt->getZExtValue();
10211021

@@ -1027,10 +1027,13 @@ bool getValueProfDataFromInst(const Instruction &Inst,
10271027
ConstantInt *Value = mdconst::dyn_extract<ConstantInt>(MD->getOperand(I));
10281028
ConstantInt *Count =
10291029
mdconst::dyn_extract<ConstantInt>(MD->getOperand(I + 1));
1030-
if (!Value || (!Count && !GetZeroCntValue))
1030+
if (!Value || !Count)
10311031
return false;
1032+
uint64_t CntValue = Count->getZExtValue();
1033+
if (!GetNoICPValue && (CntValue == NOMORE_ICP_MAGICNUM))
1034+
continue;
10321035
ValueData[ActualNumValueData].Value = Value->getZExtValue();
1033-
ValueData[ActualNumValueData].Count = Count->getZExtValue();
1036+
ValueData[ActualNumValueData].Count = CntValue;
10341037
ActualNumValueData++;
10351038
}
10361039
return true;

llvm/lib/Transforms/IPO/SampleProfile.cpp

Lines changed: 84 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,11 @@ static cl::opt<std::string> ProfileInlineReplayFile(
218218
"by inlining from sample profile loader."),
219219
cl::Hidden);
220220

221-
extern cl::opt<unsigned> MaxNumPromotions;
221+
static cl::opt<unsigned>
222+
MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
223+
cl::ZeroOrMore,
224+
cl::desc("Max number of promotions for a single indirect "
225+
"call callsite in sample profile loader"));
222226

223227
namespace {
224228

@@ -364,8 +368,7 @@ class SampleProfileLoader final
364368
// Attempt to promote indirect call and also inline the promoted call
365369
bool tryPromoteAndInlineCandidate(
366370
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
367-
uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
368-
SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
371+
uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
369372
bool inlineHotFunctions(Function &F,
370373
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
371374
InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
@@ -696,43 +699,70 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
696699
return it.first->second;
697700
}
698701

699-
/// If the profile count for the promotion candidate \p Candidate is 0,
700-
/// it means \p Candidate has already been promoted for \p Inst.
701-
static bool isPromotedBefore(const Instruction &Inst, StringRef Candidate) {
702+
/// Check whether the indirect call promotion history of \p Inst allows
703+
/// the promotion for \p Candidate.
704+
/// If the profile count for the promotion candidate \p Candidate is
705+
/// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
706+
/// for \p Inst. If we already have at least MaxNumPromotions
707+
/// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
708+
/// cannot promote for \p Inst anymore.
709+
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
702710
uint32_t NumVals = 0;
703711
uint64_t TotalCount = 0;
704712
std::unique_ptr<InstrProfValueData[]> ValueData =
705713
std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
706714
bool Valid =
707715
getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
708716
ValueData.get(), NumVals, TotalCount, true);
709-
if (Valid) {
710-
for (uint32_t I = 0; I < NumVals; I++) {
711-
// If the promotion candidate has 0 count in the metadata, it
712-
// means the candidate has been promoted for this indirect call.
713-
if (ValueData[I].Value == Function::getGUID(Candidate))
714-
return ValueData[I].Count == 0;
715-
}
717+
// No valid value profile so no promoted targets have been recorded
718+
// before. Ok to do ICP.
719+
if (!Valid)
720+
return true;
721+
722+
unsigned NumPromoted = 0;
723+
for (uint32_t I = 0; I < NumVals; I++) {
724+
if (ValueData[I].Count != NOMORE_ICP_MAGICNUM)
725+
continue;
726+
727+
// If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
728+
// metadata, it means the candidate has been promoted for this
729+
// indirect call.
730+
if (ValueData[I].Value == Function::getGUID(Candidate))
731+
return false;
732+
NumPromoted++;
733+
// If already have MaxNumPromotions promotion, don't do it anymore.
734+
if (NumPromoted == MaxNumPromotions)
735+
return false;
716736
}
717-
return false;
737+
return true;
718738
}
719739

720-
/// Update indirect call target profile metadata for \p Inst. If \p Total
721-
/// is given, set TotalCount of call targets counts to \p Total, otherwise
722-
/// keep the original value in metadata.
740+
/// Update indirect call target profile metadata for \p Inst.
741+
/// Usually \p Sum is the sum of counts of all the targets for \p Inst.
742+
/// If it is 0, it means updateIDTMetaData is used to mark a
743+
/// certain target to be promoted already. If it is not zero,
744+
/// we expect to use it to update the total count in the value profile.
723745
static void
724746
updateIDTMetaData(Instruction &Inst,
725747
const SmallVectorImpl<InstrProfValueData> &CallTargets,
726-
uint64_t Total = 0) {
727-
DenseMap<uint64_t, uint64_t> ValueCountMap;
748+
uint64_t Sum) {
749+
assert((Sum != 0 || (CallTargets.size() == 1 &&
750+
CallTargets[0].Count == NOMORE_ICP_MAGICNUM)) &&
751+
"If sum is 0, assume only one element in CallTargets with count "
752+
"being NOMORE_ICP_MAGICNUM");
728753

729754
uint32_t NumVals = 0;
730-
uint64_t TotalCount = 0;
755+
// OldSum is the existing total count in the value profile data.
756+
// It will be replaced by Sum if Sum is not 0.
757+
uint64_t OldSum = 0;
731758
std::unique_ptr<InstrProfValueData[]> ValueData =
732759
std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
733760
bool Valid =
734761
getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions,
735-
ValueData.get(), NumVals, TotalCount, true);
762+
ValueData.get(), NumVals, OldSum, true);
763+
764+
DenseMap<uint64_t, uint64_t> ValueCountMap;
765+
// Initialize ValueCountMap with existing value profile data.
736766
if (Valid) {
737767
for (uint32_t I = 0; I < NumVals; I++)
738768
ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
@@ -742,13 +772,24 @@ updateIDTMetaData(Instruction &Inst,
742772
auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
743773
if (Pair.second)
744774
continue;
745-
// Update existing profile count of the call target if it is not 0.
746-
// If it is 0, the call target has been promoted so keep it as 0.
747-
if (Pair.first->second != 0)
775+
// Whenever the count is NOMORE_ICP_MAGICNUM for a value, keep it
776+
// in the ValueCountMap. If both the count in CallTargets and the
777+
// count in ValueCountMap is not NOMORE_ICP_MAGICNUM, keep the
778+
// count in CallTargets.
779+
if (Pair.first->second != NOMORE_ICP_MAGICNUM &&
780+
Data.Count == NOMORE_ICP_MAGICNUM) {
781+
OldSum -= Pair.first->second;
782+
Pair.first->second = NOMORE_ICP_MAGICNUM;
783+
} else if (Pair.first->second == NOMORE_ICP_MAGICNUM &&
784+
Data.Count != NOMORE_ICP_MAGICNUM) {
785+
assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
786+
Sum -= Data.Count;
787+
} else if (Pair.first->second != NOMORE_ICP_MAGICNUM &&
788+
Data.Count != NOMORE_ICP_MAGICNUM) {
789+
// Sum will be used in this case. Although the existing count
790+
// for the current value in value profile will be overriden,
791+
// no need to update OldSum.
748792
Pair.first->second = Data.Count;
749-
else {
750-
assert(Total >= Data.Count && "Total should be >= Data.Count");
751-
Total -= Data.Count;
752793
}
753794
}
754795

@@ -757,36 +798,38 @@ updateIDTMetaData(Instruction &Inst,
757798
NewCallTargets.emplace_back(
758799
InstrProfValueData{ValueCount.first, ValueCount.second});
759800
}
801+
760802
llvm::sort(NewCallTargets,
761803
[](const InstrProfValueData &L, const InstrProfValueData &R) {
762804
if (L.Count != R.Count)
763805
return L.Count > R.Count;
764806
return L.Value > R.Value;
765807
});
808+
809+
uint32_t MaxMDCount =
810+
std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
766811
annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
767-
NewCallTargets, Total ? Total : TotalCount,
768-
IPVK_IndirectCallTarget, NewCallTargets.size());
812+
NewCallTargets, Sum ? Sum : OldSum, IPVK_IndirectCallTarget,
813+
MaxMDCount);
769814
}
770815

771816
/// Attempt to promote indirect call and also inline the promoted call.
772817
///
773818
/// \param F Caller function.
774819
/// \param Candidate ICP and inline candidate.
775820
/// \param Sum Sum of target counts for indirect call.
776-
/// \param PromotedInsns Map to keep track of indirect call already processed.
777821
/// \param InlinedCallSite Output vector for new call sites exposed after
778822
/// inlining.
779823
bool SampleProfileLoader::tryPromoteAndInlineCandidate(
780824
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
781-
DenseSet<Instruction *> &PromotedInsns,
782825
SmallVector<CallBase *, 8> *InlinedCallSite) {
783826
auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
784827
auto R = SymbolMap.find(CalleeFunctionName);
785828
if (R == SymbolMap.end() || !R->getValue())
786829
return false;
787830

788831
auto &CI = *Candidate.CallInstr;
789-
if (isPromotedBefore(CI, R->getValue()->getName()))
832+
if (!doesHistoryAllowICP(CI, R->getValue()->getName()))
790833
return false;
791834

792835
const char *Reason = "Callee function not available";
@@ -799,11 +842,11 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
799842
if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
800843
R->getValue()->hasFnAttribute("use-sample-profile") &&
801844
R->getValue() != &F && isLegalToPromote(CI, R->getValue(), &Reason)) {
802-
// For promoted target, save 0 count in the value profile metadata so
803-
// the target won't be promoted again.
804-
SmallVector<InstrProfValueData, 1> SortedCallTargets = {
805-
InstrProfValueData{Function::getGUID(R->getValue()->getName()), 0}};
806-
updateIDTMetaData(CI, SortedCallTargets);
845+
// For promoted target, set its value with NOMORE_ICP_MAGICNUM count
846+
// in the value profile metadata so the target won't be promoted again.
847+
SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
848+
Function::getGUID(R->getValue()->getName()), NOMORE_ICP_MAGICNUM}};
849+
updateIDTMetaData(CI, SortedCallTargets, 0);
807850

808851
auto *DI = &pgo::promoteIndirectCall(
809852
CI, R->getValue(), Candidate.CallsiteCount, Sum, false, ORE);
@@ -817,7 +860,6 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
817860
// be prorated so that the it will reflect the real callsite counts.
818861
setProbeDistributionFactor(CI, Candidate.CallsiteDistribution * Sum /
819862
SumOrigin);
820-
PromotedInsns.insert(Candidate.CallInstr);
821863
Candidate.CallInstr = DI;
822864
if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
823865
bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
@@ -890,8 +932,6 @@ void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
890932
/// \returns True if there is any inline happened.
891933
bool SampleProfileLoader::inlineHotFunctions(
892934
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
893-
DenseSet<Instruction *> PromotedInsns;
894-
895935
// ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
896936
// Profile symbol list is ignored when profile-sample-accurate is on.
897937
assert((!ProfAccForSymsInList ||
@@ -945,8 +985,6 @@ bool SampleProfileLoader::inlineHotFunctions(
945985
if (CalledFunction == &F)
946986
continue;
947987
if (I->isIndirectCall()) {
948-
if (PromotedInsns.count(I))
949-
continue;
950988
uint64_t Sum;
951989
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
952990
uint64_t SumOrigin = Sum;
@@ -959,8 +997,7 @@ bool SampleProfileLoader::inlineHotFunctions(
959997
continue;
960998

961999
Candidate = {I, FS, FS->getEntrySamples(), 1.0};
962-
if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
963-
PromotedInsns)) {
1000+
if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
9641001
LocalNotInlinedCallSites.erase(I);
9651002
LocalChanged = true;
9661003
}
@@ -1169,7 +1206,6 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
11691206

11701207
bool SampleProfileLoader::inlineHotFunctionsWithPriority(
11711208
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1172-
DenseSet<Instruction *> PromotedInsns;
11731209
assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
11741210

11751211
// ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
@@ -1218,8 +1254,6 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
12181254
if (CalledFunction == &F)
12191255
continue;
12201256
if (I->isIndirectCall()) {
1221-
if (PromotedInsns.count(I))
1222-
continue;
12231257
uint64_t Sum;
12241258
auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
12251259
uint64_t SumOrigin = Sum;
@@ -1254,7 +1288,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
12541288
Candidate = {I, FS, EntryCountDistributed,
12551289
Candidate.CallsiteDistribution};
12561290
if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1257-
PromotedInsns, &InlinedCallSites)) {
1291+
&InlinedCallSites)) {
12581292
for (auto *CB : InlinedCallSites) {
12591293
if (getInlineCandidate(&NewCandidate, CB))
12601294
CQueue.emplace(NewCandidate);
@@ -1351,6 +1385,8 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
13511385
Sum += NameFS.second.getEntrySamples();
13521386
}
13531387
}
1388+
if (!Sum)
1389+
continue;
13541390
updateIDTMetaData(I, SortedCallTargets, Sum);
13551391
} else if (!isa<IntrinsicInst>(&I)) {
13561392
I.setMetadata(LLVMContext::MD_prof,
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
_Z3goov:5860:1
2+
1: 5279 _Z3foov:2000 _Z3barv:1000
3+
2: 5279 _Z3foov:2000 _Z3barv:1000
4+
3: 5279 _Z3foov:2000 _Z3barv:1000
5+
1: _Z3hoov:5860
6+
1: 5000
7+
1: _Z3moov:5860
8+
1: 5000
9+
2: _Z3hoov:5860
10+
1: 5000
11+
2: _Z3moov:5860
12+
1: 5000
13+
3: _Z3hoov:5860
14+
1: 5000
15+
3: _Z3moov:5860
16+
1: 5000

llvm/test/Transforms/SampleProfile/indirect-call.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ attributes #0 = {"use-sample-profile"}
202202
; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398}
203203
; CHECK: ![[BR1]] = !{!"branch_weights", i32 4000, i32 4000}
204204
; CHECK: ![[BR2]] = !{!"branch_weights", i32 3000, i32 1000}
205-
; CHECK: ![[VP]] = !{!"VP", i32 0, i64 8000, i64 -6391416044382067764, i64 1000, i64 7476224446746900038, i64 0, i64 925324185419832389, i64 0}
205+
; CHECK: ![[VP]] = !{!"VP", i32 0, i64 8000, i64 7476224446746900038, i64 -1, i64 925324185419832389, i64 -1, i64 -6391416044382067764, i64 1000}
206206
; CHECK: ![[BR3]] = !{!"branch_weights", i32 1, i32 0}
207207
!6 = distinct !DISubprogram(name: "test_inline", scope: !1, file: !1, line: 6, unit: !0)
208208
!7 = !DILocation(line: 7, scope: !6)

0 commit comments

Comments
 (0)