-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[nfc][PGO]Factor out profile scaling into a standalone helper function #83780
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-ir Author: Mingming Liu (minglotus-6) Changes
Full diff: https://github.com/llvm/llvm-project/pull/83780.diff 4 Files Affected:
diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h
index 255fa2ff1c7906..c0897408986fb3 100644
--- a/llvm/include/llvm/IR/ProfDataUtils.h
+++ b/llvm/include/llvm/IR/ProfDataUtils.h
@@ -108,5 +108,8 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalWeights);
/// a `prof` metadata reference to instruction `I`.
void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights);
+/// Scaling the profile data attached to 'I' using the ratio of S/T.
+void scaleProfData(Instruction &I, uint64_t S, uint64_t T);
+
} // namespace llvm
#endif
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 42cdcad78228f6..9ae71acd523c36 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -825,15 +825,6 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
// of S/T. The meaning of "branch_weights" meta data for call instruction is
// transfered to represent call count.
void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
- auto *ProfileData = getMetadata(LLVMContext::MD_prof);
- if (ProfileData == nullptr)
- return;
-
- auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
- if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
- !ProfDataName->getString().equals("VP")))
- return;
-
if (T == 0) {
LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in "
"div by 0. Ignoring. Likely the function "
@@ -842,42 +833,7 @@ void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
"with non-zero prof info.");
return;
}
-
- MDBuilder MDB(getContext());
- SmallVector<Metadata *, 3> Vals;
- Vals.push_back(ProfileData->getOperand(0));
- APInt APS(128, S), APT(128, T);
- if (ProfDataName->getString().equals("branch_weights") &&
- ProfileData->getNumOperands() > 0) {
- // Using APInt::div may be expensive, but most cases should fit 64 bits.
- APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
- ->getValue()
- .getZExtValue());
- Val *= APS;
- Vals.push_back(MDB.createConstant(
- ConstantInt::get(Type::getInt32Ty(getContext()),
- Val.udiv(APT).getLimitedValue(UINT32_MAX))));
- } else if (ProfDataName->getString().equals("VP"))
- for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
- // The first value is the key of the value profile, which will not change.
- Vals.push_back(ProfileData->getOperand(i));
- uint64_t Count =
- mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
- ->getValue()
- .getZExtValue();
- // Don't scale the magic number.
- if (Count == NOMORE_ICP_MAGICNUM) {
- Vals.push_back(ProfileData->getOperand(i + 1));
- continue;
- }
- // Using APInt::div may be expensive, but most cases should fit 64 bits.
- APInt Val(128, Count);
- Val *= APS;
- Vals.push_back(MDB.createConstant(
- ConstantInt::get(Type::getInt64Ty(getContext()),
- Val.udiv(APT).getLimitedValue())));
- }
- setMetadata(LLVMContext::MD_prof, MDNode::get(getContext(), Vals));
+ scaleProfData(*this, S, T);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index b1a10d0ce5a522..dc86f4204b1a1d 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -190,4 +190,52 @@ void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights) {
I.setMetadata(LLVMContext::MD_prof, BranchWeights);
}
+void scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
+ assert(T != 0 && "Caller should guarantee");
+ auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
+ if (ProfileData == nullptr)
+ return;
+
+ auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
+ if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
+ !ProfDataName->getString().equals("VP")))
+ return;
+
+ LLVMContext &C = I.getContext();
+
+ MDBuilder MDB(C);
+ SmallVector<Metadata *, 3> Vals;
+ Vals.push_back(ProfileData->getOperand(0));
+ APInt APS(128, S), APT(128, T);
+ if (ProfDataName->getString().equals("branch_weights") &&
+ ProfileData->getNumOperands() > 0) {
+ // Using APInt::div may be expensive, but most cases should fit 64 bits.
+ APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
+ ->getValue()
+ .getZExtValue());
+ Val *= APS;
+ Vals.push_back(MDB.createConstant(ConstantInt::get(
+ Type::getInt32Ty(C), Val.udiv(APT).getLimitedValue(UINT32_MAX))));
+ } else if (ProfDataName->getString().equals("VP"))
+ for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
+ // The first value is the key of the value profile, which will not change.
+ Vals.push_back(ProfileData->getOperand(i));
+ uint64_t Count =
+ mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
+ ->getValue()
+ .getZExtValue();
+ // Don't scale the magic number.
+ if (Count == NOMORE_ICP_MAGICNUM) {
+ Vals.push_back(ProfileData->getOperand(i + 1));
+ continue;
+ }
+ // Using APInt::div may be expensive, but most cases should fit 64 bits.
+ APInt Val(128, Count);
+ Val *= APS;
+ Vals.push_back(MDB.createConstant(ConstantInt::get(
+ Type::getInt64Ty(C), Val.udiv(APT).getLimitedValue())));
+ }
+ I.setMetadata(LLVMContext::MD_prof, MDNode::get(C, Vals));
+}
+
} // namespace llvm
diff --git a/llvm/test/Transforms/Inline/update_value_profile.ll b/llvm/test/Transforms/Inline/update_value_profile.ll
new file mode 100644
index 00000000000000..7fa8c28f89f7ce
--- /dev/null
+++ b/llvm/test/Transforms/Inline/update_value_profile.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; When 'callee' is inlined into caller1 and caller2, the indirect call value
+; profiles of the inlined copy should be scaled based on callers' profiles,
+; and the indirect call value profiles in 'callee' should be updated.
+define i32 @callee(ptr %0, i32 %1) !prof !20 {
+; CHECK-LABEL: define i32 @callee(
+; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT: ret i32 [[TMP6]]
+;
+ %3 = load ptr, ptr %0
+ %5 = getelementptr inbounds i8, ptr %3, i64 8
+ %6 = load ptr, ptr %5
+ %7 = tail call i32 %6(ptr %0, i32 %1), !prof !17
+ ret i32 %7
+}
+
+define i32 @caller1(i32 %0) !prof !18 {
+; CHECK-LABEL: define i32 @caller1(
+; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF2:![0-9]+]] {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF3:![0-9]+]]
+; CHECK-NEXT: ret i32 [[TMP6]]
+;
+ %2 = tail call ptr @_Z10createTypei(i32 %0)
+ %3 = tail call i32 @callee(ptr %2, i32 %0)
+ ret i32 %3
+}
+
+define i32 @caller2(i32 %0) !prof !19 {
+; CHECK-LABEL: define i32 @caller2(
+; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF4:![0-9]+]] {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]]
+; CHECK-NEXT: ret i32 [[TMP6]]
+;
+ %2 = tail call ptr @_Z10createTypei(i32 %0)
+ %3 = tail call i32 @callee(ptr %2, i32 %0)
+ ret i32 %3
+}
+
+declare ptr @_Z10createTypei(i32)
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!17 = !{!"VP", i32 0, i64 1600, i64 15186643663281959480, i64 1000, i64 15101948577241817854, i64 600}
+!18 = !{!"function_entry_count", i64 1000}
+!19 = !{!"function_entry_count", i64 600}
+!20 = !{!"function_entry_count", i64 1700}
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100}
+; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 94, i64 -3260100410427592136, i64 58, i64 -3344795496467733762, i64 35}
+; CHECK: [[PROF2]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF3]] = !{!"VP", i32 0, i64 941, i64 -3260100410427592136, i64 588, i64 -3344795496467733762, i64 352}
+; CHECK: [[PROF4]] = !{!"function_entry_count", i64 600}
+; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 564, i64 -3260100410427592136, i64 352, i64 -3344795496467733762, i64 211}
+;.
|
…er for invoke with value profiles
@@ -0,0 +1,54 @@ | |||
; A pre-commit test to show that value profiles associated with invoke are not updated. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can the two test cases be merged into one? i.e., callee makes calls to inner_callee and *func .
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
@@ -0,0 +1,82 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove this line?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
!20 = !{!"function_entry_count", i64 1700} | ||
;. | ||
; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100} | ||
; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 94, i64 -3260100410427592136, i64 58, i64 -3344795496467733762, i64 35} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why does the md5sum change (compared with !17)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I didn't realize this; test passes (and the two numbers have the same hex representation).
I updated both to simpler values.
ProfDataUtil.h/cpp
, which is already a dependency ofInstructions.cpp
InvokeInst
(in a follow-up pull request)