Skip to content

Commit 04c3040

Browse files
committed
[InstrProfiling] Place __llvm_prf_vnodes and __llvm_prf_names in llvm.used on ELF
`__llvm_prf_vnodes` and `__llvm_prf_names` are used by runtime but not referenced via relocation in the translation unit. With `-z start-stop-gc` (D96914 https://sourceware.org/bugzilla/show_bug.cgi?id=27451), the linker no longer lets `__start_/__stop_` references retain them. Place `__llvm_prf_vnodes` and `__llvm_prf_names` in `llvm.used` to make them retained by the linker. This patch changes most existing `UsedVars` cases to `CompilerUsedVars` to reflect the ideal state - if the binary format properly supports section based GC (dead stripping), `llvm.compiler.used` should be sufficient. `__llvm_prf_vnodes` and `__llvm_prf_names` are switched to `UsedVars` since we want them to be unconditionally retained by both compiler and linker. Behaviors on other COFF/Mach-O are not affected. Differential Revision: https://reviews.llvm.org/D97649
1 parent 5a9c349 commit 04c3040

File tree

6 files changed

+123
-75
lines changed

6 files changed

+123
-75
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#include <string.h>
2+
3+
void (*f0)();
4+
void (*f1)();
5+
void (*f2)();
6+
7+
char dst[200];
8+
char src[200];
9+
volatile int n;
10+
11+
__attribute__((noinline)) void foo() {}
12+
13+
__attribute__((noinline)) void bar() {
14+
f0 = foo;
15+
f1 = foo;
16+
f2 = foo;
17+
n = 4;
18+
}
19+
int main(int argc, char *argv[]) {
20+
int i;
21+
bar();
22+
if (argc == 1) {
23+
f0();
24+
for (i = 0; i < 9; i++)
25+
f1();
26+
for (i = 0; i < 99; i++)
27+
f2();
28+
} else {
29+
memcpy((void *)dst, (void *)src, n);
30+
for (i = 0; i < 6; i++)
31+
memcpy((void *)(dst + 2), (void *)src, n + 1);
32+
for (i = 0; i < 66; i++)
33+
memcpy((void *)(dst + 9), (void *)src, n + 2);
34+
}
35+
}
36+
37+
// CHECK: Counters:
38+
// CHECK: main:
39+
// CHECK: Hash: 0x0a9bd81e87ab6e87
40+
// CHECK: Counters: 6
41+
// CHECK: Indirect Call Site Count: 3
42+
// CHECK: Number of Memory Intrinsics Calls: 3
43+
// CHECK: Block counts: [27, 297, 12, 132, 3, 2]
44+
// CHECK: Indirect Target Results:
45+
// CHECK: [ 0, foo, 3 ]
46+
// CHECK: [ 1, foo, 27 ]
47+
// CHECK: [ 2, foo, 297 ]
48+
// CHECK: Memory Intrinsic Size Results:
49+
// CHECK: [ 0, 4, 2 ]
50+
// CHECK: [ 1, 5, 12 ]
51+
// CHECK: [ 2, 6, 132 ]
52+
// CHECK: Instrumentation level: IR
53+
// CHECK: Functions shown: 1
54+
// CHECK: Total functions: 3
55+
// CHECK: Maximum function count: 327
56+
// CHECK: Maximum internal block count: 297
57+
// CHECK: Statistics for indirect call sites profile:
58+
// CHECK: Total number of sites: 3
59+
// CHECK: Total number of sites with values: 3
60+
// CHECK: Total number of profiled values: 3
61+
// CHECK: Value sites histogram:
62+
// CHECK: NumTargets, SiteCount
63+
// CHECK: 1, 3
64+
// CHECK: Statistics for memory intrinsic calls sizes profile:
65+
// CHECK: Total number of sites: 3
66+
// CHECK: Total number of sites with values: 3
67+
// CHECK: Total number of profiled values: 3
68+
// CHECK: Value sites histogram:
69+
// CHECK: NumTargets, SiteCount
70+
// CHECK: 1, 3
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// REQUIRES: lld-available
2+
3+
// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=lld -ffunction-sections -fdata-sections -Wl,--gc-sections -z start-stop-gc
4+
// RUN: rm -rf %t.profdir
5+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
6+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
7+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
8+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
9+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
10+
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
Lines changed: 19 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,27 @@
1-
// RUN: %clang_pgogen -o %t -O3 %s
1+
// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c
22
// RUN: rm -rf %t.profdir
33
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
44
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
55
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
66
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
77
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
8-
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %s
8+
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
99

10-
#include <string.h>
11-
12-
void (*f0)();
13-
void (*f1)();
14-
void (*f2)();
15-
16-
char dst[200];
17-
char src[200];
18-
volatile int n;
19-
20-
__attribute__((noinline)) void foo() {}
21-
22-
__attribute__((noinline)) void bar() {
23-
f0 = foo;
24-
f1 = foo;
25-
f2 = foo;
26-
n = 4;
27-
}
28-
int main(int argc, char *argv[]) {
29-
int i;
30-
bar();
31-
if (argc == 1) {
32-
f0();
33-
for (i = 0; i < 9; i++)
34-
f1();
35-
for (i = 0; i < 99; i++)
36-
f2();
37-
} else {
38-
memcpy((void *)dst, (void *)src, n);
39-
for (i = 0; i < 6; i++)
40-
memcpy((void *)(dst + 2), (void *)src, n + 1);
41-
for (i = 0; i < 66; i++)
42-
memcpy((void *)(dst + 9), (void *)src, n + 2);
43-
}
44-
}
10+
/// -z start-stop-gc requires binutils 2.37.
11+
// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=bfd -ffunction-sections -fdata-sections -Wl,--gc-sections
12+
// RUN: rm -rf %t.profdir
13+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
14+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
15+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
16+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
17+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
18+
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c
4519

46-
// CHECK: Counters:
47-
// CHECK: main:
48-
// CHECK: Hash: 0x0a9bd81e87ab6e87
49-
// CHECK: Counters: 6
50-
// CHECK: Indirect Call Site Count: 3
51-
// CHECK: Number of Memory Intrinsics Calls: 3
52-
// CHECK: Block counts: [27, 297, 12, 132, 3, 2]
53-
// CHECK: Indirect Target Results:
54-
// CHECK: [ 0, foo, 3 ]
55-
// CHECK: [ 1, foo, 27 ]
56-
// CHECK: [ 2, foo, 297 ]
57-
// CHECK: Memory Intrinsic Size Results:
58-
// CHECK: [ 0, 4, 2 ]
59-
// CHECK: [ 1, 5, 12 ]
60-
// CHECK: [ 2, 6, 132 ]
61-
// CHECK: Instrumentation level: IR
62-
// CHECK: Functions shown: 1
63-
// CHECK: Total functions: 3
64-
// CHECK: Maximum function count: 327
65-
// CHECK: Maximum internal block count: 297
66-
// CHECK: Statistics for indirect call sites profile:
67-
// CHECK: Total number of sites: 3
68-
// CHECK: Total number of sites with values: 3
69-
// CHECK: Total number of profiled values: 3
70-
// CHECK: Value sites histogram:
71-
// CHECK: NumTargets, SiteCount
72-
// CHECK: 1, 3
73-
// CHECK: Statistics for memory intrinsic calls sizes profile:
74-
// CHECK: Total number of sites: 3
75-
// CHECK: Total number of sites with values: 3
76-
// CHECK: Total number of profiled values: 3
77-
// CHECK: Value sites histogram:
78-
// CHECK: NumTargets, SiteCount
79-
// CHECK: 1, 3
20+
// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=gold -ffunction-sections -fdata-sections -Wl,--gc-sections
21+
// RUN: rm -rf %t.profdir
22+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
23+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
24+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
25+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
26+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
27+
// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c

llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class InstrProfiling : public PassInfoMixin<InstrProfiling> {
5757
}
5858
};
5959
DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
60+
std::vector<GlobalValue *> CompilerUsedVars;
6061
std::vector<GlobalValue *> UsedVars;
6162
std::vector<GlobalVariable *> ReferencedNames;
6263
GlobalVariable *NamesVar;

llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,7 @@ bool InstrProfiling::run(
539539
NamesVar = nullptr;
540540
NamesSize = 0;
541541
ProfileDataMap.clear();
542+
CompilerUsedVars.clear();
542543
UsedVars.clear();
543544
TT = Triple(M.getTargetTriple());
544545

@@ -921,7 +922,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
921922
ProfileDataMap[NamePtr] = PD;
922923

923924
// Mark the data variable as used so that it isn't stripped out.
924-
UsedVars.push_back(Data);
925+
CompilerUsedVars.push_back(Data);
925926
// Now that the linkage set by the FE has been passed to the data and counter
926927
// variables, reset Name variable's linkage and visibility to private so that
927928
// it can be removed later by the compiler.
@@ -976,6 +977,8 @@ void InstrProfiling::emitVNodes() {
976977
Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
977978
VNodesVar->setSection(
978979
getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
980+
// VNodesVar is used by runtime but not referenced via relocation by other
981+
// sections. Conservatively make it linker retained.
979982
UsedVars.push_back(VNodesVar);
980983
}
981984

@@ -1004,6 +1007,8 @@ void InstrProfiling::emitNameData() {
10041007
// linker from inserting padding before the start of the names section or
10051008
// between names entries.
10061009
NamesVar->setAlignment(Align(1));
1010+
// NamesVar is used by runtime but not referenced via relocation by other
1011+
// sections. Conservatively make it linker retained.
10071012
UsedVars.push_back(NamesVar);
10081013

10091014
for (auto *NamePtr : ReferencedNames)
@@ -1031,6 +1036,9 @@ void InstrProfiling::emitRegistration() {
10311036
getInstrProfRegFuncName(), M);
10321037

10331038
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
1039+
for (Value *Data : CompilerUsedVars)
1040+
if (Data != NamesVar && !isa<Function>(Data))
1041+
IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
10341042
for (Value *Data : UsedVars)
10351043
if (Data != NamesVar && !isa<Function>(Data))
10361044
IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
@@ -1081,7 +1089,7 @@ bool InstrProfiling::emitRuntimeHook() {
10811089
IRB.CreateRet(Load);
10821090

10831091
// Mark the user variable as used so that it isn't stripped out.
1084-
UsedVars.push_back(User);
1092+
CompilerUsedVars.push_back(User);
10851093
return true;
10861094
}
10871095

@@ -1094,9 +1102,14 @@ void InstrProfiling::emitUses() {
10941102
// or discarded as a unit, so llvm.compiler.used is sufficient. Otherwise,
10951103
// conservatively make all of them retained by the linker.
10961104
if (TT.isOSBinFormatELF())
1097-
appendToCompilerUsed(*M, UsedVars);
1105+
appendToCompilerUsed(*M, CompilerUsedVars);
10981106
else
1099-
appendToUsed(*M, UsedVars);
1107+
appendToUsed(*M, CompilerUsedVars);
1108+
1109+
// We do not add proper references from used metadata sections to NamesVar and
1110+
// VNodesVar, so we have to be conservative and place them in llvm.used
1111+
// regardless of the target,
1112+
appendToUsed(*M, UsedVars);
11001113
}
11011114

11021115
void InstrProfiling::emitInitialization() {

llvm/test/Instrumentation/InstrProfiling/icall.ll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ attributes #0 = { nounwind }
5050
; DYN-NOT: @__profvp_foo
5151
; DYN-NOT: @__llvm_prf_vnodes
5252

53+
;; __llvm_prf_vnodes and __llvm_prf_nm are not referenced by other metadata sections.
54+
;; We have to conservatively place them in llvm.used.
55+
; STATIC: @llvm.used = appending global
56+
; STATIC-SAME: @__llvm_prf_vnodes
57+
; STATIC-SAME: @__llvm_prf_nm
58+
5359
; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0)
5460
; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 zeroext 0)
5561
; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 signext 0)

0 commit comments

Comments
 (0)