Skip to content

Commit b4fcaa1

Browse files
authored
[PGO][SampledInstr] Correct off by 1s and allow 100% sampling (#113350)
This corrects a couple off by ones related to the sampling of **instrumented** counters, and enables setting 100% rates for burst sampling (burst duration = period). Off by ones: Prior to this change it was impossible to set a period of 65535 because this was converted to fast sampling which rollsover at USHRT_MAX + 1 (65536). Similarly the burst durations would collect burst duration + 1 counts as they used an ULE comparison. 100% sampling: Although this is not useful for a productionized use case, it does allow for more deterministic testing with the sampling checks in place. After all the off by ones are fixed, allowing for 100% sampling is a matter of letting burst duration = period.
1 parent 2e0506f commit b4fcaa1

File tree

7 files changed

+81
-57
lines changed

7 files changed

+81
-57
lines changed

llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp

Lines changed: 56 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -181,24 +181,52 @@ static cl::opt<bool> SampledInstr("sampled-instrumentation", cl::ZeroOrMore,
181181

182182
static cl::opt<unsigned> SampledInstrPeriod(
183183
"sampled-instr-period",
184-
cl::desc("Set the profile instrumentation sample period. For each sample "
185-
"period, a fixed number of consecutive samples will be recorded. "
186-
"The number is controlled by 'sampled-instr-burst-duration' flag. "
187-
"The default sample period of 65535 is optimized for generating "
188-
"efficient code that leverages unsigned integer wrapping in "
189-
"overflow."),
190-
cl::init(65535));
184+
cl::desc("Set the profile instrumentation sample period. A sample period "
185+
"of 0 is invalid. For each sample period, a fixed number of "
186+
"consecutive samples will be recorded. The number is controlled "
187+
"by 'sampled-instr-burst-duration' flag. The default sample "
188+
"period of 65536 is optimized for generating efficient code that "
189+
"leverages unsigned short integer wrapping in overflow, but this "
190+
"is disabled under simple sampling (burst duration = 1)."),
191+
cl::init(USHRT_MAX + 1));
191192

192193
static cl::opt<unsigned> SampledInstrBurstDuration(
193194
"sampled-instr-burst-duration",
194195
cl::desc("Set the profile instrumentation burst duration, which can range "
195-
"from 0 to one less than the value of 'sampled-instr-period'. "
196+
"from 1 to the value of 'sampled-instr-period' (0 is invalid). "
196197
"This number of samples will be recorded for each "
197-
"'sampled-instr-period' count update. Setting to 1 enables "
198-
"simple sampling, in which case it is recommended to set "
198+
"'sampled-instr-period' count update. Setting to 1 enables simple "
199+
"sampling, in which case it is recommended to set "
199200
"'sampled-instr-period' to a prime number."),
200201
cl::init(200));
201202

203+
struct SampledInstrumentationConfig {
204+
unsigned BurstDuration;
205+
unsigned Period;
206+
bool UseShort;
207+
bool IsSimpleSampling;
208+
bool IsFastSampling;
209+
};
210+
211+
static SampledInstrumentationConfig getSampledInstrumentationConfig() {
212+
SampledInstrumentationConfig config;
213+
config.BurstDuration = SampledInstrBurstDuration.getValue();
214+
config.Period = SampledInstrPeriod.getValue();
215+
if (config.BurstDuration > config.Period)
216+
report_fatal_error(
217+
"SampledBurstDuration must be less than or equal to SampledPeriod");
218+
if (config.Period == 0 || config.BurstDuration == 0)
219+
report_fatal_error(
220+
"SampledPeriod and SampledBurstDuration must be greater than 0");
221+
config.IsSimpleSampling = (config.BurstDuration == 1);
222+
// If (BurstDuration == 1 && Period == 65536), generate the simple sampling
223+
// style code.
224+
config.IsFastSampling =
225+
(!config.IsSimpleSampling && config.Period == USHRT_MAX + 1);
226+
config.UseShort = (config.Period <= USHRT_MAX) || config.IsFastSampling;
227+
return config;
228+
}
229+
202230
using LoadStorePair = std::pair<Instruction *, Instruction *>;
203231

204232
static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
@@ -665,7 +693,7 @@ PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
665693
// (1) Full burst sampling: We transform:
666694
// Increment_Instruction;
667695
// to:
668-
// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {
696+
// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
669697
// Increment_Instruction;
670698
// }
671699
// __llvm_profile_sampling__ += 1;
@@ -680,14 +708,14 @@ PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
680708
// "__llvm_profile_sampling__" variable is an unsigned type, meaning it will
681709
// wrap around to zero when overflows. In this case, the second check is
682710
// unnecessary, so we won't generate check2 when the SampledInstrPeriod is
683-
// set to 65535 (64K - 1). The code after:
684-
// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {
711+
// set to 65536 (64K). The code after:
712+
// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
685713
// Increment_Instruction;
686714
// }
687715
// __llvm_profile_sampling__ += 1;
688716
//
689717
// (3) Simple sampling:
690-
// When SampledInstrBurstDuration sets to 1, we do a simple sampling:
718+
// When SampledInstrBurstDuration is set to 1, we do a simple sampling:
691719
// __llvm_profile_sampling__ += 1;
692720
// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
693721
// __llvm_profile_sampling__ = 0;
@@ -706,27 +734,16 @@ void InstrLowerer::doSampling(Instruction *I) {
706734
if (!isSamplingEnabled())
707735
return;
708736

709-
unsigned SampledBurstDuration = SampledInstrBurstDuration.getValue();
710-
unsigned SampledPeriod = SampledInstrPeriod.getValue();
711-
if (SampledBurstDuration >= SampledPeriod) {
712-
report_fatal_error(
713-
"SampledPeriod needs to be greater than SampledBurstDuration");
714-
}
715-
bool UseShort = (SampledPeriod <= USHRT_MAX);
716-
bool IsSimpleSampling = (SampledBurstDuration == 1);
717-
// If (SampledBurstDuration == 1 && SampledPeriod == 65535), generate
718-
// the simple sampling style code.
719-
bool IsFastSampling = (!IsSimpleSampling && SampledPeriod == 65535);
720-
721-
auto GetConstant = [UseShort](IRBuilder<> &Builder, uint32_t C) {
722-
if (UseShort)
737+
SampledInstrumentationConfig config = getSampledInstrumentationConfig();
738+
auto GetConstant = [&config](IRBuilder<> &Builder, uint32_t C) {
739+
if (config.UseShort)
723740
return Builder.getInt16(C);
724741
else
725742
return Builder.getInt32(C);
726743
};
727744

728745
IntegerType *SamplingVarTy;
729-
if (UseShort)
746+
if (config.UseShort)
730747
SamplingVarTy = Type::getInt16Ty(M.getContext());
731748
else
732749
SamplingVarTy = Type::getInt32Ty(M.getContext());
@@ -741,18 +758,18 @@ void InstrLowerer::doSampling(Instruction *I) {
741758
MDNode *BranchWeight;
742759
IRBuilder<> CondBuilder(I);
743760
auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar);
744-
if (IsSimpleSampling) {
761+
if (config.IsSimpleSampling) {
745762
// For the simple sampling, just create the load and increments.
746763
IRBuilder<> IncBuilder(I);
747764
NewSamplingVarVal =
748765
IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
749766
SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
750767
} else {
751-
// For the bust-sampling, create the conditonal update.
768+
// For the burst-sampling, create the conditional update.
752769
auto *DurationCond = CondBuilder.CreateICmpULE(
753-
LoadSamplingVar, GetConstant(CondBuilder, SampledBurstDuration));
770+
LoadSamplingVar, GetConstant(CondBuilder, config.BurstDuration - 1));
754771
BranchWeight = MDB.createBranchWeights(
755-
SampledBurstDuration, SampledPeriod + 1 - SampledBurstDuration);
772+
config.BurstDuration, config.Period - config.BurstDuration);
756773
Instruction *ThenTerm = SplitBlockAndInsertIfThen(
757774
DurationCond, I, /* Unreachable */ false, BranchWeight);
758775
IRBuilder<> IncBuilder(I);
@@ -762,20 +779,20 @@ void InstrLowerer::doSampling(Instruction *I) {
762779
I->moveBefore(ThenTerm);
763780
}
764781

765-
if (IsFastSampling)
782+
if (config.IsFastSampling)
766783
return;
767784

768-
// Create the condtion for checking the period.
785+
// Create the condition for checking the period.
769786
Instruction *ThenTerm, *ElseTerm;
770787
IRBuilder<> PeriodCondBuilder(SamplingVarIncr);
771788
auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE(
772-
NewSamplingVarVal, GetConstant(PeriodCondBuilder, SampledPeriod));
773-
BranchWeight = MDB.createBranchWeights(1, SampledPeriod);
789+
NewSamplingVarVal, GetConstant(PeriodCondBuilder, config.Period));
790+
BranchWeight = MDB.createBranchWeights(1, config.Period - 1);
774791
SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm,
775792
&ElseTerm, BranchWeight);
776793

777794
// For the simple sampling, the counter update happens in sampling var reset.
778-
if (IsSimpleSampling)
795+
if (config.IsSimpleSampling)
779796
I->moveBefore(ThenTerm);
780797

781798
IRBuilder<> ResetBuilder(ThenTerm);
@@ -2138,7 +2155,7 @@ void createProfileSamplingVar(Module &M) {
21382155
const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
21392156
IntegerType *SamplingVarTy;
21402157
Constant *ValueZero;
2141-
if (SampledInstrPeriod.getValue() <= USHRT_MAX) {
2158+
if (getSampledInstrumentationConfig().UseShort) {
21422159
SamplingVarTy = Type::getInt16Ty(M.getContext());
21432160
ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(16, 0));
21442161
} else {

llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
define void @foo(i32 %n, i32 %N) {
77
; SAMPLING-LABEL: @foo
88
; SAMPLING: %[[VV0:[0-9]+]] = load i16, ptr @__llvm_profile_sampling, align 2
9-
; SAMPLING: %[[VV1:[0-9]+]] = icmp ule i16 %[[VV0]], 200
9+
; SAMPLING: %[[VV1:[0-9]+]] = icmp ule i16 %[[VV0]], 199
1010
; SAMPLING: br i1 %[[VV1]], label {{.*}}, label {{.*}}, !prof !0
1111
; SAMPLING: {{.*}} = load {{.*}} @__profc_foo{{.*}} 3)
1212
; SAMPLING-NEXT: add

llvm/test/Transforms/PGOProfile/cspgo_sample.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ for.end:
5353

5454
; CSGEN-LABEL: @foo
5555
; CSGEN: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2
56-
; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 201
56+
; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 200
5757
; CSGEN-NEXT: br i1 [[TMP1]], label %{{.*}}, label %{{.*}}, !prof [[PROF:![0-9]+]]
5858
; CSGEN: [[TMP2:%.*]] = add i16 {{.*}}, 1
5959
; CSGEN-NEXT: store i16 [[TMP2]], ptr @__llvm_profile_sampling, align 2
@@ -67,7 +67,7 @@ entry:
6767
}
6868
; CSGEN-LABEL: @main
6969
; CSGEN: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2
70-
; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 201
70+
; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 200
7171
; CSGEN-NEXT: br i1 [[TMP1]], label %{{.*}}, label %{{.*}}, !prof [[PROF:![0-9]+]]
7272
; CSGEN: [[TMP2:%.*]] = add i16 {{.*}}, 1
7373
; CSGEN-NEXT: store i16 [[TMP2]], ptr @__llvm_profile_sampling, align 2

llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; RUN: opt < %s --passes=instrprof --sampled-instrumentation -S | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-CODE,SAMPLE-DURATION,SAMPLE-WEIGHT
22
; RUN: opt < %s --passes=instrprof --sampled-instrumentation --sampled-instr-burst-duration=100 -S | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-CODE,SAMPLE-DURATION100,SAMPLE-WEIGHT100
3+
; RUN: opt < %s --passes=instrprof --sampled-instrumentation --sampled-instr-burst-duration=65536 -S | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-CODE,UNSAMPLED-DURATION,UNSAMPLED-WEIGHT
34

45
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
56
target triple = "x86_64-unknown-linux-gnu"
@@ -23,8 +24,9 @@ define void @f() {
2324
; SAMPLE-CODE-LABEL: @f(
2425
; SAMPLE-CODE: entry:
2526
; SAMPLE-CODE-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2
26-
; SAMPLE-DURATION: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 200
27-
; SAMPLE-DURATION100: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 100
27+
; SAMPLE-DURATION: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 199
28+
; SAMPLE-DURATION100: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 99
29+
; UNSAMPLED-DURATION: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], -1
2830
; SAMPLE-CODE: br i1 [[TMP1]], label %[[TMP2:.*]], label %[[TMP4:.*]], !prof !0
2931
; SAMPLE-CODE: [[TMP2]]:
3032
; SAMPLE-CODE-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f
@@ -43,5 +45,6 @@ entry:
4345

4446
; SAMPLE-WEIGHT: !0 = !{!"branch_weights", i32 200, i32 65336}
4547
; SAMPLE-WEIGHT100: !0 = !{!"branch_weights", i32 100, i32 65436}
48+
; UNSAMPLED-WEIGHT: !0 = !{!"branch_weights", i32 65536, i32 0}
4649

4750
declare void @llvm.instrprof.increment(i8*, i64, i32, i32)

llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt < %s --passes=instrprof -sampled-instrumentation --sampled-instr-period=1009 --sampled-instr-burst-duration=32 -S | FileCheck %s
2+
; RUN: opt < %s --passes=instrprof -sampled-instrumentation --sampled-instr-period=1009 --sampled-instr-burst-duration=32 -S | FileCheck %s --check-prefixes=CHECK,CHECK-32
3+
; RUN: opt < %s --passes=instrprof -sampled-instrumentation --sampled-instr-period=1009 --sampled-instr-burst-duration=1009 -S | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAMPLED
34

45
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
56
target triple = "x86_64-unknown-linux-gnu"
@@ -13,7 +14,8 @@ define void @f() {
1314
; CHECK-LABEL: define void @f() {
1415
; CHECK-NEXT: [[ENTRY:.*:]]
1516
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2
16-
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 32
17+
; CHECK-32-NEXT: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 31
18+
; CHECK-UNSAMPLED-NEXT: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 1008
1719
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB4:.*]], !prof [[PROF0:![0-9]+]]
1820
; CHECK: [[BB2]]:
1921
; CHECK-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8
@@ -40,6 +42,8 @@ entry:
4042

4143
declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
4244
;.
43-
; CHECK: [[PROF0]] = !{!"branch_weights", i32 32, i32 978}
44-
; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1009}
45+
; CHECK-32: [[PROF0]] = !{!"branch_weights", i32 32, i32 977}
46+
; CHECK-32: [[PROF1]] = !{!"branch_weights", i32 1, i32 1008}
47+
; CHECK-UNSAMPLED: [[PROF0]] = !{!"branch_weights", i32 1009, i32 0}
48+
; CHECK-UNSAMPLED: [[PROF1]] = !{!"branch_weights", i32 1, i32 1008}
4549
;.

llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define void @f() {
1313
; CHECK-LABEL: define void @f() {
1414
; CHECK-NEXT: [[ENTRY:.*:]]
1515
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @__llvm_profile_sampling, align 4
16-
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[TMP0]], 3000
16+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[TMP0]], 2999
1717
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB4:.*]], !prof [[PROF0:![0-9]+]]
1818
; CHECK: [[BB2]]:
1919
; CHECK-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8
@@ -40,6 +40,6 @@ entry:
4040

4141
declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
4242
;.
43-
; CHECK: [[PROF0]] = !{!"branch_weights", i32 3000, i32 997020}
44-
; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1000019}
43+
; CHECK: [[PROF0]] = !{!"branch_weights", i32 3000, i32 997019}
44+
; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1000018}
4545
;.

llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,18 @@ define void @f() {
3131
;
3232
; DEFAULTPERIOD-LABEL: define void @f() {
3333
; DEFAULTPERIOD-NEXT: [[ENTRY:.*:]]
34-
; DEFAULTPERIOD-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2
35-
; DEFAULTPERIOD-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 1
36-
; DEFAULTPERIOD-NEXT: [[TMP2:%.*]] = icmp uge i16 [[TMP1]], -1
34+
; DEFAULTPERIOD-NEXT: [[TMP0:%.*]] = load i32, ptr @__llvm_profile_sampling, align 4
35+
; DEFAULTPERIOD-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 1
36+
; DEFAULTPERIOD-NEXT: [[TMP2:%.*]] = icmp uge i32 [[TMP1]], 65536
3737
; DEFAULTPERIOD-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB5:.*]], !prof [[PROF0:![0-9]+]]
3838
; DEFAULTPERIOD: [[BB3]]:
3939
; DEFAULTPERIOD-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8
4040
; DEFAULTPERIOD-NEXT: [[TMP4:%.*]] = add i64 [[PGOCOUNT]], 1
4141
; DEFAULTPERIOD-NEXT: store i64 [[TMP4]], ptr @__profc_f, align 8
42-
; DEFAULTPERIOD-NEXT: store i16 0, ptr @__llvm_profile_sampling, align 2
42+
; DEFAULTPERIOD-NEXT: store i32 0, ptr @__llvm_profile_sampling, align 4
4343
; DEFAULTPERIOD-NEXT: br label %[[BB6:.*]]
4444
; DEFAULTPERIOD: [[BB5]]:
45-
; DEFAULTPERIOD-NEXT: store i16 [[TMP1]], ptr @__llvm_profile_sampling, align 2
45+
; DEFAULTPERIOD-NEXT: store i32 [[TMP1]], ptr @__llvm_profile_sampling, align 4
4646
; DEFAULTPERIOD-NEXT: br label %[[BB6]]
4747
; DEFAULTPERIOD: [[BB6]]:
4848
; DEFAULTPERIOD-NEXT: ret void
@@ -54,7 +54,7 @@ entry:
5454

5555
declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
5656
;.
57-
; PERIOD1009: [[PROF0]] = !{!"branch_weights", i32 1, i32 1009}
57+
; PERIOD1009: [[PROF0]] = !{!"branch_weights", i32 1, i32 1008}
5858
;.
5959
; DEFAULTPERIOD: [[PROF0]] = !{!"branch_weights", i32 1, i32 65535}
6060
;.

0 commit comments

Comments
 (0)