Skip to content

Commit 54adf41

Browse files
[TypeProf][InstrFDO]Implement more efficient comparison sequence for
indirect-call-promotion with vtable profiles. Clang's `-fwhole-program-vtables` is required for this optimization to take place. If `-fwhole-program-vtables` is not enabled, this change is no-op. Function-comparison (before): VTable-comparison (after): Key changes: 1. Find out virtual calls and the vtables they come from. - The ICP relies on type intrinsic `llvm.type.test` and `llvm.public.type.test` to find out virtual calls and the compatible vtables, and relies on type metadata to find the address point (offset) for comparison. 2. ICP pass does cost-benefit analysis and compares vtable only when both conditions are met 1) The function addressing and vtable load can sink to indirect fallback, and the indirect fallback is cold block 2) The number of vtables for a function candidate is within (option specified) threshold. 3. Sink the function addressing and vtable load instruction to indirect fallback. - The sink helper functions are simplified versions of `InstCombinerImpl::tryToSinkInstruction`. - The helper functions to handle debug intrinsics are copied from `InstCombinerImpl::tryToSinkInstructionDbgValues` and `InstCombinerImpl::tryToSinkInstructionDbgVariableRecords` into Transforms/Utils/Local.cpp. Ideally only one copy should exist for inst-combine, icp and other passes. 4. Keep value profiles updated 1) Update vtable value profiles after inline 2) For either function-based comparison or vtable-based comparison, update both vtable and indirect call value profiles.
1 parent 03538e3 commit 54adf41

File tree

12 files changed

+950
-673
lines changed

12 files changed

+950
-673
lines changed

compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp

Lines changed: 60 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -5,59 +5,61 @@
55
// ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported
66
// UNSUPPORTED: ppc && host-byteorder-big-endian
77

8-
// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o %t-test
9-
// RUN: env LLVM_PROFILE_FILE=%t-test.profraw %t-test
8+
// RUN: rm -rf %t && mkdir %t && cd %t
9+
10+
// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o test
11+
// RUN: env LLVM_PROFILE_FILE=test.profraw ./test
1012

1113
// Show vtable profiles from raw profile.
12-
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profraw | FileCheck %s --check-prefixes=COMMON,RAW
14+
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profraw | FileCheck %s --check-prefixes=COMMON,RAW
1315

1416
// Generate indexed profile from raw profile and show the data.
15-
// RUN: llvm-profdata merge %t-test.profraw -o %t-test.profdata
16-
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
17+
// RUN: llvm-profdata merge test.profraw -o test.profdata
18+
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
1719

1820
// Generate text profile from raw and indexed profiles respectively and show the data.
19-
// RUN: llvm-profdata merge --text %t-test.profraw -o %t-raw.proftext
20-
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-raw.proftext | FileCheck %s --check-prefix=ICTEXT
21-
// RUN: llvm-profdata merge --text %t-test.profdata -o %t-indexed.proftext
22-
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-indexed.proftext | FileCheck %s --check-prefix=ICTEXT
21+
// RUN: llvm-profdata merge --text test.profraw -o raw.proftext
22+
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text raw.proftext | FileCheck %s --check-prefix=ICTEXT
23+
// RUN: llvm-profdata merge --text test.profdata -o indexed.proftext
24+
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text indexed.proftext | FileCheck %s --check-prefix=ICTEXT
2325

2426
// Generate indexed profile from text profiles and show the data
25-
// RUN: llvm-profdata merge --binary %t-raw.proftext -o %t-text.profraw
26-
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED
27-
// RUN: llvm-profdata merge --binary %t-indexed.proftext -o %t-text.profdata
28-
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
27+
// RUN: llvm-profdata merge --binary raw.proftext -o text.profraw
28+
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED
29+
// RUN: llvm-profdata merge --binary indexed.proftext -o text.profdata
30+
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
2931

3032
// COMMON: Counters:
3133
// COMMON-NEXT: main:
32-
// COMMON-NEXT: Hash: 0x0f9a16fe6d398548
33-
// COMMON-NEXT: Counters: 2
34+
// COMMON-NEXT: Hash: 0x068617320ec408a0
35+
// COMMON-NEXT: Counters: 4
3436
// COMMON-NEXT: Indirect Call Site Count: 2
3537
// COMMON-NEXT: Number of instrumented vtables: 2
3638
// RAW: Indirect Target Results:
37-
// RAW-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%)
38-
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%)
39-
// RAW-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%)
40-
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%)
39+
// RAW-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%)
40+
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
41+
// RAW-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%)
42+
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
4143
// RAW-NEXT: VTable Results:
42-
// RAW-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%)
43-
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
44+
// RAW-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%)
45+
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
4446
// RAW-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
4547
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
4648
// INDEXED: Indirect Target Results:
47-
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%)
48-
// INDEXED-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%)
49-
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%)
50-
// INDEXED-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%)
49+
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
50+
// INDEXED-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%)
51+
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
52+
// INDEXED-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%)
5153
// INDEXED-NEXT: VTable Results:
52-
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
53-
// INDEXED-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%)
54+
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
55+
// INDEXED-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%)
5456
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
5557
// INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
5658
// COMMON: Instrumentation level: IR entry_first = 0
5759
// COMMON-NEXT: Functions shown: 1
58-
// COMMON-NEXT: Total functions: 6
60+
// COMMON-NEXT: Total functions: 7
5961
// COMMON-NEXT: Maximum function count: 1000
60-
// COMMON-NEXT: Maximum internal block count: 250
62+
// COMMON-NEXT: Maximum internal block count: 1000
6163
// COMMON-NEXT: Statistics for indirect call sites profile:
6264
// COMMON-NEXT: Total number of sites: 2
6365
// COMMON-NEXT: Total number of sites with values: 2
@@ -76,11 +78,13 @@
7678
// ICTEXT: :ir
7779
// ICTEXT: main
7880
// ICTEXT: # Func Hash:
79-
// ICTEXT: 1124236338992350536
81+
// ICTEXT: 470088714870327456
8082
// ICTEXT: # Num Counters:
81-
// ICTEXT: 2
83+
// ICTEXT: 4
8284
// ICTEXT: # Counter Values:
8385
// ICTEXT: 1000
86+
// ICTEXT: 1000
87+
// ICTEXT: 200
8488
// ICTEXT: 1
8589
// ICTEXT: # Num Value Kinds:
8690
// ICTEXT: 2
@@ -89,41 +93,50 @@
8993
// ICTEXT: # NumValueSites:
9094
// ICTEXT: 2
9195
// ICTEXT: 2
92-
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii:750
93-
// ICTEXT: _ZN8Derived15func1Eii:250
96+
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150
97+
// ICTEXT: _ZN8Derived14funcEii:50
9498
// ICTEXT: 2
95-
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii:750
96-
// ICTEXT: _ZN8Derived15func2Eii:250
99+
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750
100+
// ICTEXT: _ZN8Derived1D0Ev:250
97101
// ICTEXT: # ValueKind = IPVK_VTableTarget:
98102
// ICTEXT: 2
99103
// ICTEXT: # NumValueSites:
100104
// ICTEXT: 2
101105
// ICTEXT: 2
102-
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
103-
// ICTEXT: _ZTV8Derived1:250
106+
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150
107+
// ICTEXT: _ZTV8Derived1:50
104108
// ICTEXT: 2
105109
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
106110
// ICTEXT: _ZTV8Derived1:250
107111

112+
// Test indirect call promotion transformation using vtable profiles.
113+
// RUN: %clangxx -fprofile-use=test.profdata -fuse-ld=lld -flto=thin -fwhole-program-vtables -O2 -mllvm -enable-vtable-value-profiling -mllvm -icp-enable-vtable-cmp -Rpass=pgo-icall-prom %s 2>&1 | FileCheck %s --check-prefix=REMARK --implicit-check-not="!VP"
114+
115+
// REMARK: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, compare 1 vtables and sink 1 instructions
116+
// REMARK: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, compare 1 vtables and sink 1 instructions
117+
// REMARK: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, compare 1 vtables and sink 2 instructions
118+
// REMARK: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, compare 1 vtables and sink 2 instructions
119+
108120
#include <cstdio>
109121
#include <cstdlib>
110122
class Base {
111123
public:
112-
virtual int func1(int a, int b) = 0;
113-
virtual int func2(int a, int b) = 0;
124+
virtual int func(int a, int b) = 0;
125+
126+
virtual ~Base() {};
114127
};
115128
class Derived1 : public Base {
116129
public:
117-
int func1(int a, int b) override { return a + b; }
130+
int func(int a, int b) override { return a * b; }
118131

119-
int func2(int a, int b) override { return a * b; }
132+
~Derived1() {}
120133
};
121134
namespace {
122135
class Derived2 : public Base {
123136
public:
124-
int func1(int a, int b) override { return a - b; }
137+
int func(int a, int b) override { return a * (a - b); }
125138

126-
int func2(int a, int b) override { return a * (a - b); }
139+
~Derived2() {}
127140
};
128141
} // namespace
129142
__attribute__((noinline)) Base *createType(int a) {
@@ -140,7 +153,10 @@ int main(int argc, char **argv) {
140153
int a = rand();
141154
int b = rand();
142155
Base *ptr = createType(i);
143-
sum += ptr->func1(a, b) + ptr->func2(b, a);
156+
if (i % 5 == 0)
157+
sum += ptr->func(b, a);
158+
159+
delete ptr;
144160
}
145161
printf("sum is %d\n", sum);
146162
return 0;

llvm/include/llvm/Analysis/IndirectCallVisitor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
3737
// A heuristic is used to find the address feeding instructions.
3838
static Instruction *tryGetVTableInstruction(CallBase *CB) {
3939
assert(CB != nullptr && "Caller guaranteed");
40+
if (!CB->isIndirectCall())
41+
return nullptr;
42+
4043
LoadInst *LI = dyn_cast<LoadInst>(CB->getCalledOperand());
4144

4245
if (LI != nullptr) {

llvm/include/llvm/Transforms/Utils/Local.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,15 @@ void salvageDebugInfoForDbgValues(Instruction &I,
316316
ArrayRef<DbgVariableIntrinsic *> Insns,
317317
ArrayRef<DbgVariableRecord *> DPInsns);
318318

319+
void tryToSinkInstructionDbgValues(
320+
Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
321+
BasicBlock *DestBlock, SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers);
322+
323+
void tryToSinkInstructionDPValues(
324+
Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
325+
BasicBlock *DestBlock,
326+
SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords);
327+
319328
/// Given an instruction \p I and DIExpression \p DIExpr operating on
320329
/// it, append the effects of \p I to the DIExpression operand list
321330
/// \p Ops, or return \p nullptr if it cannot be salvaged.

0 commit comments

Comments
 (0)