Skip to content

Commit 14756b7

Browse files
committed
[SampleFDO] Don't mix up the existing indirect call value profile with the new
value profile annotated after inlining. In https://reviews.llvm.org/D96806 and https://reviews.llvm.org/D97350, we use the magic number -1 in the value profile to avoid repeated indirect call promotion to the same target for an indirect call. Function updateIDTMetaData is used to mark an target as being promoted in the value profile with the magic number. updateIDTMetaData is also used to update the value profile when an indirect call is inlined and new inline instance profile should be applied. For the second case, currently updateIDTMetaData mixes up the existing value profile of the indirect call with the new profile, leading to the problematic senario that a target count is larger than the total count in the value profile. The patch fixes the problem. When updateIDTMetaData is used to update the value profile after inlining, all the values in the existing value profile will be dropped except the values with the magic number counts. Differential Revision: https://reviews.llvm.org/D98835
1 parent 92ccc6c commit 14756b7

File tree

3 files changed

+112
-32
lines changed

3 files changed

+112
-32
lines changed

llvm/lib/Transforms/IPO/SampleProfile.cpp

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -755,14 +755,8 @@ static void
755755
updateIDTMetaData(Instruction &Inst,
756756
const SmallVectorImpl<InstrProfValueData> &CallTargets,
757757
uint64_t Sum) {
758-
assert((Sum != 0 || (CallTargets.size() == 1 &&
759-
CallTargets[0].Count == NOMORE_ICP_MAGICNUM)) &&
760-
"If sum is 0, assume only one element in CallTargets with count "
761-
"being NOMORE_ICP_MAGICNUM");
762-
763758
uint32_t NumVals = 0;
764759
// OldSum is the existing total count in the value profile data.
765-
// It will be replaced by Sum if Sum is not 0.
766760
uint64_t OldSum = 0;
767761
std::unique_ptr<InstrProfValueData[]> ValueData =
768762
std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
@@ -771,34 +765,44 @@ updateIDTMetaData(Instruction &Inst,
771765
ValueData.get(), NumVals, OldSum, true);
772766

773767
DenseMap<uint64_t, uint64_t> ValueCountMap;
774-
// Initialize ValueCountMap with existing value profile data.
775-
if (Valid) {
776-
for (uint32_t I = 0; I < NumVals; I++)
777-
ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
778-
}
779-
780-
for (const auto &Data : CallTargets) {
781-
auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
782-
if (Pair.second)
783-
continue;
784-
// Whenever the count is NOMORE_ICP_MAGICNUM for a value, keep it
785-
// in the ValueCountMap. If both the count in CallTargets and the
786-
// count in ValueCountMap is not NOMORE_ICP_MAGICNUM, keep the
787-
// count in CallTargets.
788-
if (Pair.first->second != NOMORE_ICP_MAGICNUM &&
789-
Data.Count == NOMORE_ICP_MAGICNUM) {
768+
if (Sum == 0) {
769+
assert((CallTargets.size() == 1 &&
770+
CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
771+
"If sum is 0, assume only one element in CallTargets "
772+
"with count being NOMORE_ICP_MAGICNUM");
773+
// Initialize ValueCountMap with existing value profile data.
774+
if (Valid) {
775+
for (uint32_t I = 0; I < NumVals; I++)
776+
ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
777+
}
778+
auto Pair =
779+
ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
780+
// If the target already exists in value profile, decrease the total
781+
// count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
782+
if (!Pair.second) {
790783
OldSum -= Pair.first->second;
791784
Pair.first->second = NOMORE_ICP_MAGICNUM;
792-
} else if (Pair.first->second == NOMORE_ICP_MAGICNUM &&
793-
Data.Count != NOMORE_ICP_MAGICNUM) {
785+
}
786+
Sum = OldSum;
787+
} else {
788+
// Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
789+
// counts in the value profile.
790+
if (Valid) {
791+
for (uint32_t I = 0; I < NumVals; I++) {
792+
if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
793+
ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
794+
}
795+
}
796+
797+
for (const auto &Data : CallTargets) {
798+
auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
799+
if (Pair.second)
800+
continue;
801+
// The target represented by Data.Value has already been promoted.
802+
// Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
803+
// Sum by Data.Count.
794804
assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
795805
Sum -= Data.Count;
796-
} else if (Pair.first->second != NOMORE_ICP_MAGICNUM &&
797-
Data.Count != NOMORE_ICP_MAGICNUM) {
798-
// Sum will be used in this case. Although the existing count
799-
// for the current value in value profile will be overriden,
800-
// no need to update OldSum.
801-
Pair.first->second = Data.Count;
802806
}
803807
}
804808

@@ -818,8 +822,7 @@ updateIDTMetaData(Instruction &Inst,
818822
uint32_t MaxMDCount =
819823
std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
820824
annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
821-
NewCallTargets, Sum ? Sum : OldSum, IPVK_IndirectCallTarget,
822-
MaxMDCount);
825+
NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
823826
}
824827

825828
/// Attempt to promote indirect call and also inline the promoted call.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
_Z3foov:225715:1
2+
2: 5553
3+
3: 5391
4+
1: _Z3goov:5860
5+
1: 5279 _Z3hoov:5860 _Z3moov:210
6+
2: 5279
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
; RUN: opt < %s -passes=sample-profile -sample-profile-icp-max-prom=4 -sample-profile-file=%S/Inputs/norepeated-icp-3.prof -S | FileCheck %s
2+
3+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
4+
target triple = "x86_64-unknown-linux-gnu"
5+
6+
@.str = private unnamed_addr constant [5 x i8] c"hoo\0A\00", align 1
7+
@p = dso_local global void ()* null, align 8
8+
@str = private unnamed_addr constant [4 x i8] c"hoo\00", align 1
9+
10+
; Function Attrs: nofree nounwind
11+
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) #1
12+
13+
; Function Attrs: uwtable mustprogress
14+
define dso_local void @_Z3goov() #0 !dbg !11 {
15+
entry:
16+
%0 = load void ()*, void ()** @p, align 8, !dbg !12, !tbaa !13
17+
call void %0(), !dbg !17, !prof !22
18+
ret void, !dbg !18
19+
}
20+
21+
; After the indirect call in _Z3goov is inlined into _Z3foov, it will be
22+
; annotated with new inline instance profile. The existing value profile
23+
; associated with the indirect call should be dropped except those values
24+
; wth NOMORE_ICP_MAGICNUM magic number indicating promoted targets.
25+
; CHECK-LABEL: @_Z3foov(
26+
; CHECK: call void %0(), {{.*}} !prof ![[PROF_ID:[0-9]+]]
27+
; CHECK-NEXT: ret void
28+
29+
; Function Attrs: uwtable mustprogress
30+
define dso_local void @_Z3foov() #0 !dbg !19 {
31+
entry:
32+
call void @_Z3goov(), !dbg !20
33+
ret void, !dbg !21
34+
}
35+
36+
; Function Attrs: nofree nounwind
37+
declare noundef i32 @puts(i8* nocapture noundef readonly) #2
38+
39+
attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
40+
attributes #1 = { nofree nounwind "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
41+
attributes #2 = { nofree nounwind }
42+
43+
!llvm.dbg.cu = !{!0}
44+
!llvm.module.flags = !{!3, !4, !5}
45+
!llvm.ident = !{!6}
46+
47+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
48+
!1 = !DIFile(filename: "1.cc", directory: "")
49+
!2 = !{}
50+
!3 = !{i32 7, !"Dwarf Version", i32 4}
51+
!4 = !{i32 2, !"Debug Info Version", i32 3}
52+
!5 = !{i32 1, !"wchar_size", i32 4}
53+
!6 = !{!""}
54+
!8 = !DISubroutineType(types: !2)
55+
!11 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
56+
!12 = !DILocation(line: 7, column: 5, scope: !11)
57+
!13 = !{!14, !14, i64 0}
58+
!14 = !{!"any pointer", !15, i64 0}
59+
!15 = !{!"omnipotent char", !16, i64 0}
60+
!16 = !{!"Simple C++ TBAA"}
61+
!17 = !DILocation(line: 7, column: 3, scope: !11)
62+
!18 = !DILocation(line: 8, column: 1, scope: !11)
63+
!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
64+
!20 = !DILocation(line: 11, column: 3, scope: !19)
65+
!21 = !DILocation(line: 12, column: 3, scope: !19)
66+
; The original value 125292384912345234234 and its count 8000 should
67+
; be dropped when the indirect call is annotated with new profile.
68+
; The original value -7383239051784516332 and its count -1 should be kept
69+
; because -1 is NOMORE_ICP_MAGICNUM.
70+
; CHECK: ![[PROF_ID]] = !{!"VP", i32 0, i64 5860, i64 -7383239051784516332, i64 -1, i64 -7701940972712279918, i64 5860}
71+
!22 = !{!"VP", i32 0, i64 8000, i64 -7383239051784516332, i64 -1, i64 125292384912345234234, i64 8000}

0 commit comments

Comments
 (0)