-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[PGO] Sampled instrumentation in PGO to speed up instrumentation binary #69535
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
451d818
64cbd8f
c207bfb
9c795d9
b8204b0
27d0a6c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -170,6 +170,30 @@ cl::opt<bool> SkipRetExitBlock( | |
"skip-ret-exit-block", cl::init(true), | ||
cl::desc("Suppress counter promotion if exit blocks contain ret.")); | ||
|
||
static cl::opt<bool> SampledInstr("sampled-instrumentation", cl::ZeroOrMore, | ||
cl::init(false), | ||
cl::desc("Do PGO instrumentation sampling")); | ||
|
||
static cl::opt<unsigned> SampledInstrPeriod( | ||
"sampled-instr-period", | ||
cl::desc("Set the profile instrumentation sample period. For each sample " | ||
"period, a fixed number of consecutive samples will be recorded. " | ||
"The number is controlled by 'sampled-instr-burst-duration' flag. " | ||
"The default sample period of 65535 is optimized for generating " | ||
"efficient code that leverages unsigned integer wrapping in " | ||
"overflow."), | ||
cl::init(65535)); | ||
|
||
static cl::opt<unsigned> SampledInstrBurstDuration( | ||
"sampled-instr-burst-duration", | ||
cl::desc("Set the profile instrumentation burst duration, which can range " | ||
"from 0 to one less than the value of 'sampled-instr-period'. " | ||
"This number of samples will be recorded for each " | ||
"'sampled-instr-period' count update. Setting to 1 enables " | ||
"simple sampling, in which case it is recommended to set " | ||
"'sampled-instr-period' to a prime number."), | ||
cl::init(200)); | ||
|
||
using LoadStorePair = std::pair<Instruction *, Instruction *>; | ||
|
||
static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) { | ||
|
@@ -260,6 +284,9 @@ class InstrLowerer final { | |
/// Returns true if profile counter update register promotion is enabled. | ||
bool isCounterPromotionEnabled() const; | ||
|
||
/// Return true if profile sampling is enabled. | ||
bool isSamplingEnabled() const; | ||
|
||
/// Count the number of instrumented value sites for the function. | ||
void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins); | ||
|
||
|
@@ -291,6 +318,9 @@ class InstrLowerer final { | |
/// acts on. | ||
Value *getCounterAddress(InstrProfCntrInstBase *I); | ||
|
||
/// Lower the incremental instructions under profile sampling predicates. | ||
void doSampling(Instruction *I); | ||
|
||
/// Get the region counters for an increment, creating them if necessary. | ||
/// | ||
/// If the counter array doesn't yet exist, the profile data variables | ||
|
@@ -635,33 +665,169 @@ PreservedAnalyses InstrProfilingLoweringPass::run(Module &M, | |
return PreservedAnalyses::none(); | ||
} | ||
|
||
// | ||
// Perform instrumentation sampling. | ||
// | ||
// There are 3 favors of sampling: | ||
// (1) Full burst sampling: We transform: | ||
// Increment_Instruction; | ||
// to: | ||
// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) { | ||
// Increment_Instruction; | ||
// } | ||
// __llvm_profile_sampling__ += 1; | ||
// if (__llvm_profile_sampling__ >= SampledInstrPeriod) { | ||
// __llvm_profile_sampling__ = 0; | ||
// } | ||
// | ||
// "__llvm_profile_sampling__" is a thread-local global shared by all PGO | ||
// counters (value-instrumentation and edge instrumentation). | ||
// | ||
// (2) Fast burst sampling: | ||
// "__llvm_profile_sampling__" variable is an unsigned type, meaning it will | ||
// wrap around to zero when overflows. In this case, the second check is | ||
// unnecessary, so we won't generate check2 when the SampledInstrPeriod is | ||
// set to 65535 (64K - 1). The code after: | ||
// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) { | ||
// Increment_Instruction; | ||
// } | ||
// __llvm_profile_sampling__ += 1; | ||
// | ||
// (3) Simple sampling: | ||
// When SampledInstrBurstDuration sets to 1, we do a simple sampling: | ||
// __llvm_profile_sampling__ += 1; | ||
// if (__llvm_profile_sampling__ >= SampledInstrPeriod) { | ||
// __llvm_profile_sampling__ = 0; | ||
// Increment_Instruction; | ||
// } | ||
// | ||
// Note that, the code snippet after the transformation can still be counter | ||
// promoted. However, with sampling enabled, counter updates are expected to | ||
// be infrequent, making the benefits of counter promotion negligible. | ||
// Moreover, counter promotion can potentially cause issues in server | ||
// applications, particularly when the counters are dumped without a clean | ||
// exit. To mitigate this risk, counter promotion is disabled by default when | ||
// sampling is enabled. This behavior can be overridden using the internal | ||
// option. | ||
void InstrLowerer::doSampling(Instruction *I) { | ||
if (!isSamplingEnabled()) | ||
return; | ||
|
||
unsigned SampledBurstDuration = SampledInstrBurstDuration.getValue(); | ||
unsigned SampledPeriod = SampledInstrPeriod.getValue(); | ||
if (SampledBurstDuration >= SampledPeriod) { | ||
report_fatal_error( | ||
"SampledPeriod needs to be greater than SampledBurstDuration"); | ||
} | ||
bool UseShort = (SampledPeriod <= USHRT_MAX); | ||
bool IsSimpleSampling = (SampledBurstDuration == 1); | ||
// If (SampledBurstDuration == 1 && SampledPeriod == 65535), generate | ||
// the simple sampling style code. | ||
bool IsFastSampling = (!IsSimpleSampling && SampledPeriod == 65535); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My understanding is fast means we don't need a check for period, and instead rely on overflow. In that case, value of SampledBurstDuration is unrelated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The value of SampleBurstDuration is number of samples being recored for each duration. The value will be used in the condition generated. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The condition for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The condition is to handle the case where both SampledBurstDuration==1 and SampledPeriod=65535. |
||
|
||
auto GetConstant = [UseShort](IRBuilder<> &Builder, uint32_t C) { | ||
if (UseShort) | ||
return Builder.getInt16(C); | ||
else | ||
return Builder.getInt32(C); | ||
}; | ||
|
||
IntegerType *SamplingVarTy; | ||
if (UseShort) | ||
SamplingVarTy = Type::getInt16Ty(M.getContext()); | ||
else | ||
SamplingVarTy = Type::getInt32Ty(M.getContext()); | ||
auto *SamplingVar = | ||
M.getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR)); | ||
assert(SamplingVar && "SamplingVar not set properly"); | ||
|
||
// Create the condition for checking the burst duration. | ||
Instruction *SamplingVarIncr; | ||
Value *NewSamplingVarVal; | ||
MDBuilder MDB(I->getContext()); | ||
MDNode *BranchWeight; | ||
IRBuilder<> CondBuilder(I); | ||
auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar); | ||
if (IsSimpleSampling) { | ||
// For the simple sampling, just create the load and increments. | ||
IRBuilder<> IncBuilder(I); | ||
NewSamplingVarVal = | ||
IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1)); | ||
SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar); | ||
} else { | ||
// For the bust-sampling, create the conditonal update. | ||
auto *DurationCond = CondBuilder.CreateICmpULE( | ||
LoadSamplingVar, GetConstant(CondBuilder, SampledBurstDuration)); | ||
BranchWeight = MDB.createBranchWeights( | ||
SampledBurstDuration, SampledPeriod + 1 - SampledBurstDuration); | ||
Instruction *ThenTerm = SplitBlockAndInsertIfThen( | ||
DurationCond, I, /* Unreachable */ false, BranchWeight); | ||
IRBuilder<> IncBuilder(I); | ||
NewSamplingVarVal = | ||
IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1)); | ||
SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar); | ||
I->moveBefore(ThenTerm); | ||
} | ||
|
||
if (IsFastSampling) | ||
return; | ||
|
||
// Create the condtion for checking the period. | ||
Instruction *ThenTerm, *ElseTerm; | ||
IRBuilder<> PeriodCondBuilder(SamplingVarIncr); | ||
auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE( | ||
NewSamplingVarVal, GetConstant(PeriodCondBuilder, SampledPeriod)); | ||
BranchWeight = MDB.createBranchWeights(1, SampledPeriod); | ||
SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm, | ||
&ElseTerm, BranchWeight); | ||
|
||
// For the simple sampling, the counter update happens in sampling var reset. | ||
if (IsSimpleSampling) | ||
I->moveBefore(ThenTerm); | ||
|
||
IRBuilder<> ResetBuilder(ThenTerm); | ||
ResetBuilder.CreateStore(GetConstant(ResetBuilder, 0), SamplingVar); | ||
SamplingVarIncr->moveBefore(ElseTerm); | ||
} | ||
|
||
bool InstrLowerer::lowerIntrinsics(Function *F) { | ||
bool MadeChange = false; | ||
PromotionCandidates.clear(); | ||
SmallVector<InstrProfInstBase *, 8> InstrProfInsts; | ||
|
||
// To ensure compatibility with sampling, we save the intrinsics into | ||
// a buffer to prevent potential breakage of the iterator (as the | ||
// intrinsics will be moved to a different BB). | ||
for (BasicBlock &BB : *F) { | ||
for (Instruction &Instr : llvm::make_early_inc_range(BB)) { | ||
if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(&Instr)) { | ||
lowerIncrement(IPIS); | ||
MadeChange = true; | ||
} else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) { | ||
lowerIncrement(IPI); | ||
MadeChange = true; | ||
} else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(&Instr)) { | ||
lowerTimestamp(IPC); | ||
MadeChange = true; | ||
} else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) { | ||
lowerCover(IPC); | ||
MadeChange = true; | ||
} else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) { | ||
lowerValueProfileInst(IPVP); | ||
MadeChange = true; | ||
} else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(&Instr)) { | ||
IPMP->eraseFromParent(); | ||
MadeChange = true; | ||
} else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(&Instr)) { | ||
lowerMCDCTestVectorBitmapUpdate(IPBU); | ||
MadeChange = true; | ||
} | ||
if (auto *IP = dyn_cast<InstrProfInstBase>(&Instr)) | ||
InstrProfInsts.push_back(IP); | ||
xur-llvm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
for (auto *Instr : InstrProfInsts) { | ||
doSampling(Instr); | ||
if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Instr)) { | ||
lowerIncrement(IPIS); | ||
MadeChange = true; | ||
} else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Instr)) { | ||
lowerIncrement(IPI); | ||
MadeChange = true; | ||
} else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Instr)) { | ||
lowerTimestamp(IPC); | ||
MadeChange = true; | ||
} else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Instr)) { | ||
lowerCover(IPC); | ||
MadeChange = true; | ||
} else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Instr)) { | ||
lowerValueProfileInst(IPVP); | ||
MadeChange = true; | ||
} else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Instr)) { | ||
IPMP->eraseFromParent(); | ||
MadeChange = true; | ||
} else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Instr)) { | ||
lowerMCDCTestVectorBitmapUpdate(IPBU); | ||
MadeChange = true; | ||
} | ||
} | ||
|
||
|
@@ -684,6 +850,12 @@ bool InstrLowerer::isRuntimeCounterRelocationEnabled() const { | |
return TT.isOSFuchsia(); | ||
} | ||
|
||
bool InstrLowerer::isSamplingEnabled() const { | ||
if (SampledInstr.getNumOccurrences() > 0) | ||
return SampledInstr; | ||
return Options.Sampling; | ||
} | ||
|
||
bool InstrLowerer::isCounterPromotionEnabled() const { | ||
if (DoCounterPromotion.getNumOccurrences() > 0) | ||
return DoCounterPromotion; | ||
|
@@ -754,6 +926,9 @@ bool InstrLowerer::lower() { | |
if (NeedsRuntimeHook) | ||
MadeChange = emitRuntimeHook(); | ||
|
||
if (!IsCS && isSamplingEnabled()) | ||
createProfileSamplingVar(M); | ||
|
||
bool ContainsProfiling = containsProfilingIntrinsics(M); | ||
GlobalVariable *CoverageNamesVar = | ||
M.getNamedGlobal(getCoverageUnusedNamesVarName()); | ||
|
@@ -1952,3 +2127,29 @@ void InstrLowerer::emitInitialization() { | |
|
||
appendToGlobalCtors(M, F, 0); | ||
} | ||
|
||
namespace llvm { | ||
// Create the variable for profile sampling. | ||
void createProfileSamplingVar(Module &M) { | ||
const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR)); | ||
IntegerType *SamplingVarTy; | ||
Constant *ValueZero; | ||
if (SampledInstrPeriod.getValue() <= USHRT_MAX) { | ||
SamplingVarTy = Type::getInt16Ty(M.getContext()); | ||
ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(16, 0)); | ||
} else { | ||
SamplingVarTy = Type::getInt32Ty(M.getContext()); | ||
ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(32, 0)); | ||
} | ||
auto SamplingVar = new GlobalVariable( | ||
M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName); | ||
SamplingVar->setVisibility(GlobalValue::DefaultVisibility); | ||
SamplingVar->setThreadLocal(true); | ||
Triple TT(M.getTargetTriple()); | ||
if (TT.supportsCOMDAT()) { | ||
SamplingVar->setLinkage(GlobalValue::ExternalLinkage); | ||
SamplingVar->setComdat(M.getOrInsertComdat(VarName)); | ||
} | ||
appendToCompilerUsed(M, SamplingVar); | ||
} | ||
} // namespace llvm |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a comment on why counter promotion is turned off?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The reason is mentioned in InstrProfling.C:400.