Skip to content

[nfc][PGO]Factor out profile scaling into a standalone helper function #83780

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/IR/ProfDataUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,8 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalWeights);
/// a `prof` metadata reference to instruction `I`.
void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights);

/// Scaling the profile data attached to 'I' using the ratio of S/T.
void scaleProfData(Instruction &I, uint64_t S, uint64_t T);

} // namespace llvm
#endif
46 changes: 1 addition & 45 deletions llvm/lib/IR/Instructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -825,15 +825,6 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
// of S/T. The meaning of "branch_weights" meta data for call instruction is
// transfered to represent call count.
void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
auto *ProfileData = getMetadata(LLVMContext::MD_prof);
if (ProfileData == nullptr)
return;

auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
!ProfDataName->getString().equals("VP")))
return;

if (T == 0) {
LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in "
"div by 0. Ignoring. Likely the function "
Expand All @@ -842,42 +833,7 @@ void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
"with non-zero prof info.");
return;
}

MDBuilder MDB(getContext());
SmallVector<Metadata *, 3> Vals;
Vals.push_back(ProfileData->getOperand(0));
APInt APS(128, S), APT(128, T);
if (ProfDataName->getString().equals("branch_weights") &&
ProfileData->getNumOperands() > 0) {
// Using APInt::div may be expensive, but most cases should fit 64 bits.
APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
->getValue()
.getZExtValue());
Val *= APS;
Vals.push_back(MDB.createConstant(
ConstantInt::get(Type::getInt32Ty(getContext()),
Val.udiv(APT).getLimitedValue(UINT32_MAX))));
} else if (ProfDataName->getString().equals("VP"))
for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
// The first value is the key of the value profile, which will not change.
Vals.push_back(ProfileData->getOperand(i));
uint64_t Count =
mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
->getValue()
.getZExtValue();
// Don't scale the magic number.
if (Count == NOMORE_ICP_MAGICNUM) {
Vals.push_back(ProfileData->getOperand(i + 1));
continue;
}
// Using APInt::div may be expensive, but most cases should fit 64 bits.
APInt Val(128, Count);
Val *= APS;
Vals.push_back(MDB.createConstant(
ConstantInt::get(Type::getInt64Ty(getContext()),
Val.udiv(APT).getLimitedValue())));
}
setMetadata(LLVMContext::MD_prof, MDNode::get(getContext(), Vals));
scaleProfData(*this, S, T);
}

//===----------------------------------------------------------------------===//
Expand Down
48 changes: 48 additions & 0 deletions llvm/lib/IR/ProfDataUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,52 @@ void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights) {
I.setMetadata(LLVMContext::MD_prof, BranchWeights);
}

void scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
assert(T != 0 && "Caller should guarantee");
auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
if (ProfileData == nullptr)
return;

auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
!ProfDataName->getString().equals("VP")))
return;

LLVMContext &C = I.getContext();

MDBuilder MDB(C);
SmallVector<Metadata *, 3> Vals;
Vals.push_back(ProfileData->getOperand(0));
APInt APS(128, S), APT(128, T);
if (ProfDataName->getString().equals("branch_weights") &&
ProfileData->getNumOperands() > 0) {
// Using APInt::div may be expensive, but most cases should fit 64 bits.
APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
->getValue()
.getZExtValue());
Val *= APS;
Vals.push_back(MDB.createConstant(ConstantInt::get(
Type::getInt32Ty(C), Val.udiv(APT).getLimitedValue(UINT32_MAX))));
} else if (ProfDataName->getString().equals("VP"))
for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
// The first value is the key of the value profile, which will not change.
Vals.push_back(ProfileData->getOperand(i));
uint64_t Count =
mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
->getValue()
.getZExtValue();
// Don't scale the magic number.
if (Count == NOMORE_ICP_MAGICNUM) {
Vals.push_back(ProfileData->getOperand(i + 1));
continue;
}
// Using APInt::div may be expensive, but most cases should fit 64 bits.
APInt Val(128, Count);
Val *= APS;
Vals.push_back(MDB.createConstant(ConstantInt::get(
Type::getInt64Ty(C), Val.udiv(APT).getLimitedValue())));
}
I.setMetadata(LLVMContext::MD_prof, MDNode::get(C, Vals));
}

} // namespace llvm
64 changes: 64 additions & 0 deletions llvm/test/Transforms/Inline/update_invoke_prof.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
; A pre-commit test to show that branch weights and value profiles associated with invoke are not updated.
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s

declare i32 @__gxx_personality_v0(...)

define void @caller(ptr %func) personality ptr @__gxx_personality_v0 !prof !15 {
call void @callee(ptr %func), !prof !16
ret void
}

declare void @inner_callee(ptr %func)

define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 {
invoke void %func()
to label %next unwind label %lpad, !prof !18

next:
invoke void @inner_callee(ptr %func)
to label %ret unwind label %lpad, !prof !19

lpad:
%exn = landingpad {ptr, i32}
cleanup
unreachable

ret:
ret void
}

!llvm.module.flags = !{!1}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"SampleProfile"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 10}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 2000}
!8 = !{!"NumCounts", i64 2}
!9 = !{!"NumFunctions", i64 2}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
!15 = !{!"function_entry_count", i64 1000}
!16 = !{!"branch_weights", i64 1000}
!17 = !{!"function_entry_count", i32 1500}
!18 = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600}
!19 = !{!"branch_weights", i32 1500}

; CHECK-LABEL: @caller(
; CHECK: invoke void %func(
; CHECK-NEXT: {{.*}} !prof ![[PROF1:[0-9]+]]
; CHECK: invoke void @inner_callee(
; CHECK-NEXT: {{.*}} !prof ![[PROF2:[0-9]+]]

; CHECK-LABL: @callee(
; CHECK: invoke void %func(
; CHECK-NEXT: {{.*}} !prof ![[PROF1]]
; CHECK: invoke void @inner_callee(
; CHECK-NEXT: {{.*}} !prof ![[PROF2]]

; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600}
; CHECK: ![[PROF2]] = !{!"branch_weights", i32 1500}
81 changes: 81 additions & 0 deletions llvm/test/Transforms/Inline/update_value_profile.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -S | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; When 'callee' is inlined into caller1 and caller2, the indirect call value
; profiles of the inlined copy should be scaled based on callers' profiles,
; and the indirect call value profiles in 'callee' should be updated.
define i32 @callee(ptr %0, i32 %1) !prof !20 {
; CHECK-LABEL: define i32 @callee(
; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) !prof [[PROF0:![0-9]+]] {
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: ret i32 [[TMP6]]
;
%3 = load ptr, ptr %0
%5 = getelementptr inbounds i8, ptr %3, i64 8
%6 = load ptr, ptr %5
%7 = tail call i32 %6(ptr %0, i32 %1), !prof !17
ret i32 %7
}

define i32 @caller1(i32 %0) !prof !18 {
; CHECK-LABEL: define i32 @caller1(
; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF2:![0-9]+]] {
; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF3:![0-9]+]]
; CHECK-NEXT: ret i32 [[TMP6]]
;
%2 = tail call ptr @_Z10createTypei(i32 %0)
%3 = tail call i32 @callee(ptr %2, i32 %0)
ret i32 %3
}

define i32 @caller2(i32 %0) !prof !19 {
; CHECK-LABEL: define i32 @caller2(
; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF4:![0-9]+]] {
; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]]
; CHECK-NEXT: ret i32 [[TMP6]]
;
%2 = tail call ptr @_Z10createTypei(i32 %0)
%3 = tail call i32 @callee(ptr %2, i32 %0)
ret i32 %3
}

declare ptr @_Z10createTypei(i32)

!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 10}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
!17 = !{!"VP", i32 0, i64 1600, i64 123, i64 1000, i64 456, i64 600}
!18 = !{!"function_entry_count", i64 1000}
!19 = !{!"function_entry_count", i64 600}
!20 = !{!"function_entry_count", i64 1700}
;.
; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100}
; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 94, i64 123, i64 58, i64 456, i64 35}
; CHECK: [[PROF2]] = !{!"function_entry_count", i64 1000}
; CHECK: [[PROF3]] = !{!"VP", i32 0, i64 941, i64 123, i64 588, i64 456, i64 352}
; CHECK: [[PROF4]] = !{!"function_entry_count", i64 600}
; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 564, i64 123, i64 352, i64 456, i64 211}
;.