@@ -170,6 +170,30 @@ cl::opt<bool> SkipRetExitBlock(
170
170
" skip-ret-exit-block" , cl::init(true ),
171
171
cl::desc(" Suppress counter promotion if exit blocks contain ret." ));
172
172
173
+ static cl::opt<bool > SampledInstr (" sampled-instrumentation" , cl::ZeroOrMore,
174
+ cl::init (false ),
175
+ cl::desc(" Do PGO instrumentation sampling" ));
176
+
177
+ static cl::opt<unsigned > SampledInstrPeriod (
178
+ " sampled-instr-period" ,
179
+ cl::desc (" Set the profile instrumentation sample period. For each sample "
180
+ " period, a fixed number of consecutive samples will be recorded. "
181
+ " The number is controlled by 'sampled-instr-burst-duration' flag. "
182
+ " The default sample period of 65535 is optimized for generating "
183
+ " efficient code that leverages unsigned integer wrapping in "
184
+ " overflow." ),
185
+ cl::init(65535 ));
186
+
187
+ static cl::opt<unsigned > SampledInstrBurstDuration (
188
+ " sampled-instr-burst-duration" ,
189
+ cl::desc (" Set the profile instrumentation burst duration, which can range "
190
+ " from 0 to one less than the value of 'sampled-instr-period'. "
191
+ " This number of samples will be recorded for each "
192
+ " 'sampled-instr-period' count update. Setting to 1 enables "
193
+ " simple sampling, in which case it is recommended to set "
194
+ " 'sampled-instr-period' to a prime number." ),
195
+ cl::init(200 ));
196
+
173
197
using LoadStorePair = std::pair<Instruction *, Instruction *>;
174
198
175
199
static uint64_t getIntModuleFlagOrZero (const Module &M, StringRef Flag) {
@@ -260,6 +284,9 @@ class InstrLowerer final {
260
284
// / Returns true if profile counter update register promotion is enabled.
261
285
bool isCounterPromotionEnabled () const ;
262
286
287
+ // / Return true if profile sampling is enabled.
288
+ bool isSamplingEnabled () const ;
289
+
263
290
// / Count the number of instrumented value sites for the function.
264
291
void computeNumValueSiteCounts (InstrProfValueProfileInst *Ins);
265
292
@@ -291,6 +318,9 @@ class InstrLowerer final {
291
318
// / acts on.
292
319
Value *getCounterAddress (InstrProfCntrInstBase *I);
293
320
321
+ // / Lower the incremental instructions under profile sampling predicates.
322
+ void doSampling (Instruction *I);
323
+
294
324
// / Get the region counters for an increment, creating them if necessary.
295
325
// /
296
326
// / If the counter array doesn't yet exist, the profile data variables
@@ -635,33 +665,169 @@ PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
635
665
return PreservedAnalyses::none ();
636
666
}
637
667
668
+ //
669
+ // Perform instrumentation sampling.
670
+ //
671
+ // There are 3 favors of sampling:
672
+ // (1) Full burst sampling: We transform:
673
+ // Increment_Instruction;
674
+ // to:
675
+ // if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {
676
+ // Increment_Instruction;
677
+ // }
678
+ // __llvm_profile_sampling__ += 1;
679
+ // if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
680
+ // __llvm_profile_sampling__ = 0;
681
+ // }
682
+ //
683
+ // "__llvm_profile_sampling__" is a thread-local global shared by all PGO
684
+ // counters (value-instrumentation and edge instrumentation).
685
+ //
686
+ // (2) Fast burst sampling:
687
+ // "__llvm_profile_sampling__" variable is an unsigned type, meaning it will
688
+ // wrap around to zero when overflows. In this case, the second check is
689
+ // unnecessary, so we won't generate check2 when the SampledInstrPeriod is
690
+ // set to 65535 (64K - 1). The code after:
691
+ // if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {
692
+ // Increment_Instruction;
693
+ // }
694
+ // __llvm_profile_sampling__ += 1;
695
+ //
696
+ // (3) Simple sampling:
697
+ // When SampledInstrBurstDuration sets to 1, we do a simple sampling:
698
+ // __llvm_profile_sampling__ += 1;
699
+ // if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
700
+ // __llvm_profile_sampling__ = 0;
701
+ // Increment_Instruction;
702
+ // }
703
+ //
704
+ // Note that, the code snippet after the transformation can still be counter
705
+ // promoted. However, with sampling enabled, counter updates are expected to
706
+ // be infrequent, making the benefits of counter promotion negligible.
707
+ // Moreover, counter promotion can potentially cause issues in server
708
+ // applications, particularly when the counters are dumped without a clean
709
+ // exit. To mitigate this risk, counter promotion is disabled by default when
710
+ // sampling is enabled. This behavior can be overridden using the internal
711
+ // option.
712
+ void InstrLowerer::doSampling (Instruction *I) {
713
+ if (!isSamplingEnabled ())
714
+ return ;
715
+
716
+ unsigned SampledBurstDuration = SampledInstrBurstDuration.getValue ();
717
+ unsigned SampledPeriod = SampledInstrPeriod.getValue ();
718
+ if (SampledBurstDuration >= SampledPeriod) {
719
+ report_fatal_error (
720
+ " SampledPeriod needs to be greater than SampledBurstDuration" );
721
+ }
722
+ bool UseShort = (SampledPeriod <= USHRT_MAX);
723
+ bool IsSimpleSampling = (SampledBurstDuration == 1 );
724
+ // If (SampledBurstDuration == 1 && SampledPeriod == 65535), generate
725
+ // the simple sampling style code.
726
+ bool IsFastSampling = (!IsSimpleSampling && SampledPeriod == 65535 );
727
+
728
+ auto GetConstant = [UseShort](IRBuilder<> &Builder, uint32_t C) {
729
+ if (UseShort)
730
+ return Builder.getInt16 (C);
731
+ else
732
+ return Builder.getInt32 (C);
733
+ };
734
+
735
+ IntegerType *SamplingVarTy;
736
+ if (UseShort)
737
+ SamplingVarTy = Type::getInt16Ty (M.getContext ());
738
+ else
739
+ SamplingVarTy = Type::getInt32Ty (M.getContext ());
740
+ auto *SamplingVar =
741
+ M.getGlobalVariable (INSTR_PROF_QUOTE (INSTR_PROF_PROFILE_SAMPLING_VAR));
742
+ assert (SamplingVar && " SamplingVar not set properly" );
743
+
744
+ // Create the condition for checking the burst duration.
745
+ Instruction *SamplingVarIncr;
746
+ Value *NewSamplingVarVal;
747
+ MDBuilder MDB (I->getContext ());
748
+ MDNode *BranchWeight;
749
+ IRBuilder<> CondBuilder (I);
750
+ auto *LoadSamplingVar = CondBuilder.CreateLoad (SamplingVarTy, SamplingVar);
751
+ if (IsSimpleSampling) {
752
+ // For the simple sampling, just create the load and increments.
753
+ IRBuilder<> IncBuilder (I);
754
+ NewSamplingVarVal =
755
+ IncBuilder.CreateAdd (LoadSamplingVar, GetConstant (IncBuilder, 1 ));
756
+ SamplingVarIncr = IncBuilder.CreateStore (NewSamplingVarVal, SamplingVar);
757
+ } else {
758
+ // For the bust-sampling, create the conditonal update.
759
+ auto *DurationCond = CondBuilder.CreateICmpULE (
760
+ LoadSamplingVar, GetConstant (CondBuilder, SampledBurstDuration));
761
+ BranchWeight = MDB.createBranchWeights (
762
+ SampledBurstDuration, SampledPeriod + 1 - SampledBurstDuration);
763
+ Instruction *ThenTerm = SplitBlockAndInsertIfThen (
764
+ DurationCond, I, /* Unreachable */ false , BranchWeight);
765
+ IRBuilder<> IncBuilder (I);
766
+ NewSamplingVarVal =
767
+ IncBuilder.CreateAdd (LoadSamplingVar, GetConstant (IncBuilder, 1 ));
768
+ SamplingVarIncr = IncBuilder.CreateStore (NewSamplingVarVal, SamplingVar);
769
+ I->moveBefore (ThenTerm);
770
+ }
771
+
772
+ if (IsFastSampling)
773
+ return ;
774
+
775
+ // Create the condtion for checking the period.
776
+ Instruction *ThenTerm, *ElseTerm;
777
+ IRBuilder<> PeriodCondBuilder (SamplingVarIncr);
778
+ auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE (
779
+ NewSamplingVarVal, GetConstant (PeriodCondBuilder, SampledPeriod));
780
+ BranchWeight = MDB.createBranchWeights (1 , SampledPeriod);
781
+ SplitBlockAndInsertIfThenElse (PeriodCond, SamplingVarIncr, &ThenTerm,
782
+ &ElseTerm, BranchWeight);
783
+
784
+ // For the simple sampling, the counter update happens in sampling var reset.
785
+ if (IsSimpleSampling)
786
+ I->moveBefore (ThenTerm);
787
+
788
+ IRBuilder<> ResetBuilder (ThenTerm);
789
+ ResetBuilder.CreateStore (GetConstant (ResetBuilder, 0 ), SamplingVar);
790
+ SamplingVarIncr->moveBefore (ElseTerm);
791
+ }
792
+
638
793
bool InstrLowerer::lowerIntrinsics (Function *F) {
639
794
bool MadeChange = false ;
640
795
PromotionCandidates.clear ();
796
+ SmallVector<InstrProfInstBase *, 8 > InstrProfInsts;
797
+
798
+ // To ensure compatibility with sampling, we save the intrinsics into
799
+ // a buffer to prevent potential breakage of the iterator (as the
800
+ // intrinsics will be moved to a different BB).
641
801
for (BasicBlock &BB : *F) {
642
802
for (Instruction &Instr : llvm::make_early_inc_range (BB)) {
643
- if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(&Instr)) {
644
- lowerIncrement (IPIS);
645
- MadeChange = true ;
646
- } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) {
647
- lowerIncrement (IPI);
648
- MadeChange = true ;
649
- } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(&Instr)) {
650
- lowerTimestamp (IPC);
651
- MadeChange = true ;
652
- } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) {
653
- lowerCover (IPC);
654
- MadeChange = true ;
655
- } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
656
- lowerValueProfileInst (IPVP);
657
- MadeChange = true ;
658
- } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(&Instr)) {
659
- IPMP->eraseFromParent ();
660
- MadeChange = true ;
661
- } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(&Instr)) {
662
- lowerMCDCTestVectorBitmapUpdate (IPBU);
663
- MadeChange = true ;
664
- }
803
+ if (auto *IP = dyn_cast<InstrProfInstBase>(&Instr))
804
+ InstrProfInsts.push_back (IP);
805
+ }
806
+ }
807
+
808
+ for (auto *Instr : InstrProfInsts) {
809
+ doSampling (Instr);
810
+ if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Instr)) {
811
+ lowerIncrement (IPIS);
812
+ MadeChange = true ;
813
+ } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Instr)) {
814
+ lowerIncrement (IPI);
815
+ MadeChange = true ;
816
+ } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Instr)) {
817
+ lowerTimestamp (IPC);
818
+ MadeChange = true ;
819
+ } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Instr)) {
820
+ lowerCover (IPC);
821
+ MadeChange = true ;
822
+ } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Instr)) {
823
+ lowerValueProfileInst (IPVP);
824
+ MadeChange = true ;
825
+ } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Instr)) {
826
+ IPMP->eraseFromParent ();
827
+ MadeChange = true ;
828
+ } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Instr)) {
829
+ lowerMCDCTestVectorBitmapUpdate (IPBU);
830
+ MadeChange = true ;
665
831
}
666
832
}
667
833
@@ -684,6 +850,12 @@ bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
684
850
return TT.isOSFuchsia ();
685
851
}
686
852
853
+ bool InstrLowerer::isSamplingEnabled () const {
854
+ if (SampledInstr.getNumOccurrences () > 0 )
855
+ return SampledInstr;
856
+ return Options.Sampling ;
857
+ }
858
+
687
859
bool InstrLowerer::isCounterPromotionEnabled () const {
688
860
if (DoCounterPromotion.getNumOccurrences () > 0 )
689
861
return DoCounterPromotion;
@@ -754,6 +926,9 @@ bool InstrLowerer::lower() {
754
926
if (NeedsRuntimeHook)
755
927
MadeChange = emitRuntimeHook ();
756
928
929
+ if (!IsCS && isSamplingEnabled ())
930
+ createProfileSamplingVar (M);
931
+
757
932
bool ContainsProfiling = containsProfilingIntrinsics (M);
758
933
GlobalVariable *CoverageNamesVar =
759
934
M.getNamedGlobal (getCoverageUnusedNamesVarName ());
@@ -1955,3 +2130,29 @@ void InstrLowerer::emitInitialization() {
1955
2130
1956
2131
appendToGlobalCtors (M, F, 0 );
1957
2132
}
2133
+
2134
+ namespace llvm {
2135
+ // Create the variable for profile sampling.
2136
+ void createProfileSamplingVar (Module &M) {
2137
+ const StringRef VarName (INSTR_PROF_QUOTE (INSTR_PROF_PROFILE_SAMPLING_VAR));
2138
+ IntegerType *SamplingVarTy;
2139
+ Constant *ValueZero;
2140
+ if (SampledInstrPeriod.getValue () <= USHRT_MAX) {
2141
+ SamplingVarTy = Type::getInt16Ty (M.getContext ());
2142
+ ValueZero = Constant::getIntegerValue (SamplingVarTy, APInt (16 , 0 ));
2143
+ } else {
2144
+ SamplingVarTy = Type::getInt32Ty (M.getContext ());
2145
+ ValueZero = Constant::getIntegerValue (SamplingVarTy, APInt (32 , 0 ));
2146
+ }
2147
+ auto SamplingVar = new GlobalVariable (
2148
+ M, SamplingVarTy, false , GlobalValue::WeakAnyLinkage, ValueZero, VarName);
2149
+ SamplingVar->setVisibility (GlobalValue::DefaultVisibility);
2150
+ SamplingVar->setThreadLocal (true );
2151
+ Triple TT (M.getTargetTriple ());
2152
+ if (TT.supportsCOMDAT ()) {
2153
+ SamplingVar->setLinkage (GlobalValue::ExternalLinkage);
2154
+ SamplingVar->setComdat (M.getOrInsertComdat (VarName));
2155
+ }
2156
+ appendToCompilerUsed (M, SamplingVar);
2157
+ }
2158
+ } // namespace llvm
0 commit comments