Skip to content

Commit 2d64185

Browse files
[nfc][PGO]Factor out profile scaling into a standalone helper function (llvm#83780)
- Put the helper function in `ProfDataUtil.h/cpp`, which is already a dependency of `Instructions.cpp` - The helper function could be re-used to update profiles of `InvokeInst` (in a follow-up pull request)
1 parent 2329fb2 commit 2d64185

File tree

5 files changed

+197
-45
lines changed

5 files changed

+197
-45
lines changed

llvm/include/llvm/IR/ProfDataUtils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,5 +108,8 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalWeights);
108108
/// a `prof` metadata reference to instruction `I`.
109109
void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights);
110110

111+
/// Scaling the profile data attached to 'I' using the ratio of S/T.
112+
void scaleProfData(Instruction &I, uint64_t S, uint64_t T);
113+
111114
} // namespace llvm
112115
#endif

llvm/lib/IR/Instructions.cpp

Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -833,15 +833,6 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
833833
// of S/T. The meaning of "branch_weights" meta data for call instruction is
834834
// transfered to represent call count.
835835
void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
836-
auto *ProfileData = getMetadata(LLVMContext::MD_prof);
837-
if (ProfileData == nullptr)
838-
return;
839-
840-
auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
841-
if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
842-
!ProfDataName->getString().equals("VP")))
843-
return;
844-
845836
if (T == 0) {
846837
LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in "
847838
"div by 0. Ignoring. Likely the function "
@@ -850,42 +841,7 @@ void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
850841
"with non-zero prof info.");
851842
return;
852843
}
853-
854-
MDBuilder MDB(getContext());
855-
SmallVector<Metadata *, 3> Vals;
856-
Vals.push_back(ProfileData->getOperand(0));
857-
APInt APS(128, S), APT(128, T);
858-
if (ProfDataName->getString().equals("branch_weights") &&
859-
ProfileData->getNumOperands() > 0) {
860-
// Using APInt::div may be expensive, but most cases should fit 64 bits.
861-
APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
862-
->getValue()
863-
.getZExtValue());
864-
Val *= APS;
865-
Vals.push_back(MDB.createConstant(
866-
ConstantInt::get(Type::getInt32Ty(getContext()),
867-
Val.udiv(APT).getLimitedValue(UINT32_MAX))));
868-
} else if (ProfDataName->getString().equals("VP"))
869-
for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
870-
// The first value is the key of the value profile, which will not change.
871-
Vals.push_back(ProfileData->getOperand(i));
872-
uint64_t Count =
873-
mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
874-
->getValue()
875-
.getZExtValue();
876-
// Don't scale the magic number.
877-
if (Count == NOMORE_ICP_MAGICNUM) {
878-
Vals.push_back(ProfileData->getOperand(i + 1));
879-
continue;
880-
}
881-
// Using APInt::div may be expensive, but most cases should fit 64 bits.
882-
APInt Val(128, Count);
883-
Val *= APS;
884-
Vals.push_back(MDB.createConstant(
885-
ConstantInt::get(Type::getInt64Ty(getContext()),
886-
Val.udiv(APT).getLimitedValue())));
887-
}
888-
setMetadata(LLVMContext::MD_prof, MDNode::get(getContext(), Vals));
844+
scaleProfData(*this, S, T);
889845
}
890846

891847
//===----------------------------------------------------------------------===//

llvm/lib/IR/ProfDataUtils.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,4 +190,52 @@ void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights) {
190190
I.setMetadata(LLVMContext::MD_prof, BranchWeights);
191191
}
192192

193+
void scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
194+
assert(T != 0 && "Caller should guarantee");
195+
auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
196+
if (ProfileData == nullptr)
197+
return;
198+
199+
auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
200+
if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
201+
!ProfDataName->getString().equals("VP")))
202+
return;
203+
204+
LLVMContext &C = I.getContext();
205+
206+
MDBuilder MDB(C);
207+
SmallVector<Metadata *, 3> Vals;
208+
Vals.push_back(ProfileData->getOperand(0));
209+
APInt APS(128, S), APT(128, T);
210+
if (ProfDataName->getString().equals("branch_weights") &&
211+
ProfileData->getNumOperands() > 0) {
212+
// Using APInt::div may be expensive, but most cases should fit 64 bits.
213+
APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
214+
->getValue()
215+
.getZExtValue());
216+
Val *= APS;
217+
Vals.push_back(MDB.createConstant(ConstantInt::get(
218+
Type::getInt32Ty(C), Val.udiv(APT).getLimitedValue(UINT32_MAX))));
219+
} else if (ProfDataName->getString().equals("VP"))
220+
for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
221+
// The first value is the key of the value profile, which will not change.
222+
Vals.push_back(ProfileData->getOperand(i));
223+
uint64_t Count =
224+
mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
225+
->getValue()
226+
.getZExtValue();
227+
// Don't scale the magic number.
228+
if (Count == NOMORE_ICP_MAGICNUM) {
229+
Vals.push_back(ProfileData->getOperand(i + 1));
230+
continue;
231+
}
232+
// Using APInt::div may be expensive, but most cases should fit 64 bits.
233+
APInt Val(128, Count);
234+
Val *= APS;
235+
Vals.push_back(MDB.createConstant(ConstantInt::get(
236+
Type::getInt64Ty(C), Val.udiv(APT).getLimitedValue())));
237+
}
238+
I.setMetadata(LLVMContext::MD_prof, MDNode::get(C, Vals));
239+
}
240+
193241
} // namespace llvm
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
; A pre-commit test to show that branch weights and value profiles associated with invoke are not updated.
2+
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
3+
4+
declare i32 @__gxx_personality_v0(...)
5+
6+
define void @caller(ptr %func) personality ptr @__gxx_personality_v0 !prof !15 {
7+
call void @callee(ptr %func), !prof !16
8+
ret void
9+
}
10+
11+
declare void @inner_callee(ptr %func)
12+
13+
define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 {
14+
invoke void %func()
15+
to label %next unwind label %lpad, !prof !18
16+
17+
next:
18+
invoke void @inner_callee(ptr %func)
19+
to label %ret unwind label %lpad, !prof !19
20+
21+
lpad:
22+
%exn = landingpad {ptr, i32}
23+
cleanup
24+
unreachable
25+
26+
ret:
27+
ret void
28+
}
29+
30+
!llvm.module.flags = !{!1}
31+
!1 = !{i32 1, !"ProfileSummary", !2}
32+
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
33+
!3 = !{!"ProfileFormat", !"SampleProfile"}
34+
!4 = !{!"TotalCount", i64 10000}
35+
!5 = !{!"MaxCount", i64 10}
36+
!6 = !{!"MaxInternalCount", i64 1}
37+
!7 = !{!"MaxFunctionCount", i64 2000}
38+
!8 = !{!"NumCounts", i64 2}
39+
!9 = !{!"NumFunctions", i64 2}
40+
!10 = !{!"DetailedSummary", !11}
41+
!11 = !{!12, !13, !14}
42+
!12 = !{i32 10000, i64 100, i32 1}
43+
!13 = !{i32 999000, i64 100, i32 1}
44+
!14 = !{i32 999999, i64 1, i32 2}
45+
!15 = !{!"function_entry_count", i64 1000}
46+
!16 = !{!"branch_weights", i64 1000}
47+
!17 = !{!"function_entry_count", i32 1500}
48+
!18 = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600}
49+
!19 = !{!"branch_weights", i32 1500}
50+
51+
; CHECK-LABEL: @caller(
52+
; CHECK: invoke void %func(
53+
; CHECK-NEXT: {{.*}} !prof ![[PROF1:[0-9]+]]
54+
; CHECK: invoke void @inner_callee(
55+
; CHECK-NEXT: {{.*}} !prof ![[PROF2:[0-9]+]]
56+
57+
; CHECK-LABL: @callee(
58+
; CHECK: invoke void %func(
59+
; CHECK-NEXT: {{.*}} !prof ![[PROF1]]
60+
; CHECK: invoke void @inner_callee(
61+
; CHECK-NEXT: {{.*}} !prof ![[PROF2]]
62+
63+
; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600}
64+
; CHECK: ![[PROF2]] = !{!"branch_weights", i32 1500}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -S | FileCheck %s
2+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
3+
target triple = "x86_64-unknown-linux-gnu"
4+
5+
; When 'callee' is inlined into caller1 and caller2, the indirect call value
6+
; profiles of the inlined copy should be scaled based on callers' profiles,
7+
; and the indirect call value profiles in 'callee' should be updated.
8+
define i32 @callee(ptr %0, i32 %1) !prof !20 {
9+
; CHECK-LABEL: define i32 @callee(
10+
; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) !prof [[PROF0:![0-9]+]] {
11+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8
12+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
13+
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
14+
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF1:![0-9]+]]
15+
; CHECK-NEXT: ret i32 [[TMP6]]
16+
;
17+
%3 = load ptr, ptr %0
18+
%5 = getelementptr inbounds i8, ptr %3, i64 8
19+
%6 = load ptr, ptr %5
20+
%7 = tail call i32 %6(ptr %0, i32 %1), !prof !17
21+
ret i32 %7
22+
}
23+
24+
define i32 @caller1(i32 %0) !prof !18 {
25+
; CHECK-LABEL: define i32 @caller1(
26+
; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF2:![0-9]+]] {
27+
; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
28+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
29+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
30+
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
31+
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF3:![0-9]+]]
32+
; CHECK-NEXT: ret i32 [[TMP6]]
33+
;
34+
%2 = tail call ptr @_Z10createTypei(i32 %0)
35+
%3 = tail call i32 @callee(ptr %2, i32 %0)
36+
ret i32 %3
37+
}
38+
39+
define i32 @caller2(i32 %0) !prof !19 {
40+
; CHECK-LABEL: define i32 @caller2(
41+
; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF4:![0-9]+]] {
42+
; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
43+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
44+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
45+
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
46+
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]]
47+
; CHECK-NEXT: ret i32 [[TMP6]]
48+
;
49+
%2 = tail call ptr @_Z10createTypei(i32 %0)
50+
%3 = tail call i32 @callee(ptr %2, i32 %0)
51+
ret i32 %3
52+
}
53+
54+
declare ptr @_Z10createTypei(i32)
55+
56+
!1 = !{i32 1, !"ProfileSummary", !2}
57+
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
58+
!3 = !{!"ProfileFormat", !"InstrProf"}
59+
!4 = !{!"TotalCount", i64 10000}
60+
!5 = !{!"MaxCount", i64 10}
61+
!6 = !{!"MaxInternalCount", i64 1}
62+
!7 = !{!"MaxFunctionCount", i64 1000}
63+
!8 = !{!"NumCounts", i64 3}
64+
!9 = !{!"NumFunctions", i64 3}
65+
!10 = !{!"DetailedSummary", !11}
66+
!11 = !{!12, !13, !14}
67+
!12 = !{i32 10000, i64 100, i32 1}
68+
!13 = !{i32 999000, i64 100, i32 1}
69+
!14 = !{i32 999999, i64 1, i32 2}
70+
!17 = !{!"VP", i32 0, i64 1600, i64 123, i64 1000, i64 456, i64 600}
71+
!18 = !{!"function_entry_count", i64 1000}
72+
!19 = !{!"function_entry_count", i64 600}
73+
!20 = !{!"function_entry_count", i64 1700}
74+
;.
75+
; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100}
76+
; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 94, i64 123, i64 58, i64 456, i64 35}
77+
; CHECK: [[PROF2]] = !{!"function_entry_count", i64 1000}
78+
; CHECK: [[PROF3]] = !{!"VP", i32 0, i64 941, i64 123, i64 588, i64 456, i64 352}
79+
; CHECK: [[PROF4]] = !{!"function_entry_count", i64 600}
80+
; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 564, i64 123, i64 352, i64 456, i64 211}
81+
;.

0 commit comments

Comments
 (0)