Skip to content

Commit 1d1b66d

Browse files
committed
[profile] Add %b LLVM_PROFILE_FILE option for binary ID
Add support for expanding `%b` in `LLVM_PROFILE_FILE` to the binary ID (build ID). It can be used with `%m` to avoid its signature collisions. This is supported on all platforms where writing binary IDs into profiles is implemented, as the `__llvm_write_binary_ids` function is used. Fixes #51560.
1 parent d80eb92 commit 1d1b66d

File tree

4 files changed

+108
-8
lines changed

4 files changed

+108
-8
lines changed

clang/docs/SourceBasedCodeCoverage.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,11 @@ directory structure will be created. Additionally, the following special
9494
not specified (i.e the pattern is "%m"), it's assumed that ``N = 1``. The
9595
merge pool specifier can only occur once per filename pattern.
9696

97+
* "%b" expands out to the binary ID (build ID). It can be used with "%Nm" to
98+
avoid binary signature collisions. To use it, the program should be compiled
99+
with the build ID linker option (``--build-id`` for GNU ld or LLD). Linux,
100+
Windows and AIX are supported.
101+
97102
* "%c" expands out to nothing, but enables a mode in which profile counter
98103
updates are continuously synced to a file. This means that if the
99104
instrumented program crashes, or is killed by a signal, perfect coverage

clang/docs/UsersManual.rst

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2880,7 +2880,8 @@ instrumentation:
28802880
environment variable to specify an alternate file. If non-default file name
28812881
is specified by both the environment variable and the command line option,
28822882
the environment variable takes precedence. The file name pattern specified
2883-
can include different modifiers: ``%p``, ``%h``, ``%m``, ``%t``, and ``%c``.
2883+
can include different modifiers: ``%p``, ``%h``, ``%m``, ``%b``, ``%t``, and
2884+
``%c``.
28842885

28852886
Any instance of ``%p`` in that file name will be replaced by the process
28862887
ID, so that you can easily distinguish the profile output from multiple
@@ -2902,11 +2903,11 @@ instrumentation:
29022903
``%p`` is that the storage requirement for raw profile data files is greatly
29032904
increased. To avoid issues like this, the ``%m`` specifier can used in the profile
29042905
name. When this specifier is used, the profiler runtime will substitute ``%m``
2905-
with a unique integer identifier associated with the instrumented binary. Additionally,
2906+
with an integer identifier associated with the instrumented binary. Additionally,
29062907
multiple raw profiles dumped from different processes that share a file system (can be
29072908
on different hosts) will be automatically merged by the profiler runtime during the
29082909
dumping. If the program links in multiple instrumented shared libraries, each library
2909-
will dump the profile data into its own profile data file (with its unique integer
2910+
will dump the profile data into its own profile data file (with its integer
29102911
id embedded in the profile name). Note that the merging enabled by ``%m`` is for raw
29112912
profile data generated by profiler runtime. The resulting merged "raw" profile data
29122913
file still needs to be converted to a different format expected by the compiler (
@@ -2916,6 +2917,12 @@ instrumentation:
29162917
29172918
$ LLVM_PROFILE_FILE="code-%m.profraw" ./code
29182919
2920+
Although rare, binary signatures used by the ``%m`` specifier can have
2921+
collisions. In this case, the ``%b`` specifier, which expands to the binary
2922+
ID (build ID), can be added. To use it, the program should be compiled with
2923+
the build ID linker option (``--build-id`` for GNU ld or LLD). Linux, Windows
2924+
and AIX are supported.
2925+
29192926
See `this <SourceBasedCodeCoverage.html#running-the-instrumented-program>`_ section
29202927
about the ``%t``, and ``%c`` modifiers.
29212928

compiler-rt/lib/profile/InstrProfilingFile.c

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ typedef struct lprofFilename {
7777
char Hostname[COMPILER_RT_MAX_HOSTLEN];
7878
unsigned NumPids;
7979
unsigned NumHosts;
80+
unsigned NumBinaryIds;
8081
/* When in-process merging is enabled, this parameter specifies
8182
* the total number of profile data files shared by all the processes
8283
* spawned from the same binary. By default the value is 1. If merging
@@ -88,8 +89,8 @@ typedef struct lprofFilename {
8889
ProfileNameSpecifier PNS;
8990
} lprofFilename;
9091

91-
static lprofFilename lprofCurFilename = {0, 0, 0, {0}, NULL,
92-
{0}, 0, 0, 0, PNS_unknown};
92+
static lprofFilename lprofCurFilename = {0, 0, 0, {0}, NULL, {0},
93+
0, 0, 0, 0, PNS_unknown};
9394

9495
static int ProfileMergeRequested = 0;
9596
static int getProfileFileSizeForMerging(FILE *ProfileFile,
@@ -790,7 +791,7 @@ static int checkBounds(int Idx, int Strlen) {
790791
* lprofcurFilename structure. */
791792
static int parseFilenamePattern(const char *FilenamePat,
792793
unsigned CopyFilenamePat) {
793-
int NumPids = 0, NumHosts = 0, I;
794+
int NumPids = 0, NumHosts = 0, NumBinaryIds = 0, I;
794795
char *PidChars = &lprofCurFilename.PidChars[0];
795796
char *Hostname = &lprofCurFilename.Hostname[0];
796797
int MergingEnabled = 0;
@@ -855,6 +856,16 @@ static int parseFilenamePattern(const char *FilenamePat,
855856
FilenamePat);
856857
return -1;
857858
}
859+
} else if (FilenamePat[I] == 'b') {
860+
if (!NumBinaryIds++) {
861+
/* Check if binary ID does not exist or if its size is 0. */
862+
if (__llvm_write_binary_ids(NULL) <= 0) {
863+
PROF_WARN("Unable to get binary ID for filename pattern %s. Using "
864+
"the default name.",
865+
FilenamePat);
866+
return -1;
867+
}
868+
}
858869
} else if (FilenamePat[I] == 'c') {
859870
if (__llvm_profile_is_continuous_mode_enabled()) {
860871
PROF_WARN("%%c specifier can only be specified once in %s.\n",
@@ -887,6 +898,7 @@ static int parseFilenamePattern(const char *FilenamePat,
887898

888899
lprofCurFilename.NumPids = NumPids;
889900
lprofCurFilename.NumHosts = NumHosts;
901+
lprofCurFilename.NumBinaryIds = NumBinaryIds;
890902
return 0;
891903
}
892904

@@ -934,24 +946,53 @@ static void parseAndSetFilename(const char *FilenamePat,
934946
* filename with PID and hostname substitutions. */
935947
/* The length to hold uint64_t followed by 3 digits pool id including '_' */
936948
#define SIGLEN 24
949+
/* The length to hold 160-bit hash in hexadecimal form */
950+
#define BINARY_ID_LEN 40
937951
static int getCurFilenameLength(void) {
938952
int Len;
939953
if (!lprofCurFilename.FilenamePat || !lprofCurFilename.FilenamePat[0])
940954
return 0;
941955

942956
if (!(lprofCurFilename.NumPids || lprofCurFilename.NumHosts ||
943-
lprofCurFilename.TmpDir || lprofCurFilename.MergePoolSize))
957+
lprofCurFilename.NumBinaryIds || lprofCurFilename.TmpDir ||
958+
lprofCurFilename.MergePoolSize))
944959
return strlen(lprofCurFilename.FilenamePat);
945960

946961
Len = strlen(lprofCurFilename.FilenamePat) +
947962
lprofCurFilename.NumPids * (strlen(lprofCurFilename.PidChars) - 2) +
948963
lprofCurFilename.NumHosts * (strlen(lprofCurFilename.Hostname) - 2) +
964+
lprofCurFilename.NumBinaryIds * BINARY_ID_LEN +
949965
(lprofCurFilename.TmpDir ? (strlen(lprofCurFilename.TmpDir) - 1) : 0);
950966
if (lprofCurFilename.MergePoolSize)
951967
Len += SIGLEN;
952968
return Len;
953969
}
954970

971+
typedef struct lprofBinaryIdsBuffer {
972+
char String[BINARY_ID_LEN + 1];
973+
int Length;
974+
} lprofBinaryIdsBuffer;
975+
976+
/* Reads binary ID length and then its data, writes it into lprofBinaryIdsBuffer
977+
* in hexadecimal form. */
978+
static uint32_t binaryIdsStringWriter(ProfDataWriter *This,
979+
ProfDataIOVec *IOVecs,
980+
uint32_t NumIOVecs) {
981+
if (NumIOVecs < 2 || IOVecs[0].ElmSize != sizeof(uint64_t))
982+
return -1;
983+
uint64_t BinaryIdLen = *(const uint64_t *)IOVecs[0].Data;
984+
if (IOVecs[1].ElmSize != sizeof(uint8_t) || IOVecs[1].NumElm != BinaryIdLen)
985+
return -1;
986+
const uint8_t *BinaryIdData = (const uint8_t *)IOVecs[1].Data;
987+
lprofBinaryIdsBuffer *Data = (lprofBinaryIdsBuffer *)This->WriterCtx;
988+
for (uint64_t I = 0; I < BinaryIdLen; I++) {
989+
Data->Length +=
990+
snprintf(Data->String + Data->Length, BINARY_ID_LEN + 1 - Data->Length,
991+
"%02hhx", BinaryIdData[I]);
992+
}
993+
return 0;
994+
}
995+
955996
/* Return the pointer to the current profile file name (after substituting
956997
* PIDs and Hostnames in filename pattern. \p FilenameBuf is the buffer
957998
* to store the resulting filename. If no substitution is needed, the
@@ -965,7 +1006,8 @@ static const char *getCurFilename(char *FilenameBuf, int ForceUseBuf) {
9651006
return 0;
9661007

9671008
if (!(lprofCurFilename.NumPids || lprofCurFilename.NumHosts ||
968-
lprofCurFilename.TmpDir || lprofCurFilename.MergePoolSize ||
1009+
lprofCurFilename.NumBinaryIds || lprofCurFilename.TmpDir ||
1010+
lprofCurFilename.MergePoolSize ||
9691011
__llvm_profile_is_continuous_mode_enabled())) {
9701012
if (!ForceUseBuf)
9711013
return lprofCurFilename.FilenamePat;
@@ -992,6 +1034,12 @@ static const char *getCurFilename(char *FilenameBuf, int ForceUseBuf) {
9921034
memcpy(FilenameBuf + J, lprofCurFilename.TmpDir, TmpDirLength);
9931035
FilenameBuf[J + TmpDirLength] = DIR_SEPARATOR;
9941036
J += TmpDirLength + 1;
1037+
} else if (FilenamePat[I] == 'b') {
1038+
lprofBinaryIdsBuffer Data = {{0}, 0};
1039+
ProfDataWriter Writer = {binaryIdsStringWriter, &Data};
1040+
__llvm_write_binary_ids(&Writer);
1041+
memcpy(FilenameBuf + J, Data.String, Data.Length);
1042+
J += Data.Length;
9951043
} else {
9961044
if (!getMergePoolSize(FilenamePat, &I))
9971045
continue;
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// REQUIRES: linux
2+
// RUN: split-file %s %t.dir
3+
// RUN: %clang_profgen -Wl,--build-id=sha1 -o %t.dir/foo %t.dir/foo.c
4+
// RUN: %clang_profgen -Wl,--build-id=sha1 -o %t.dir/bar %t.dir/bar.c
5+
6+
// Check that foo and bar have the same signatures.
7+
// RUN: rm -rf %t.profdir
8+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/%m.profraw %run %t.dir/foo
9+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/%m.profraw %run %t.dir/bar 2>&1 | FileCheck %s --check-prefix=MERGE-ERROR
10+
11+
// Check that foo and bar have different binary IDs.
12+
// RUN: rm -rf %t.profdir %t.profdata
13+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/%b.profraw %run %t.dir/foo
14+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/%b.profraw %run %t.dir/bar
15+
// RUN: llvm-profdata merge -o %t.profdata %t.profdir
16+
// RUN: llvm-profdata show --binary-ids %t.profdata | FileCheck %s --check-prefix=BINARY-ID
17+
18+
// Check fallback to the default name if binary ID is missing.
19+
// RUN: %clang_profgen -Wl,--build-id=none -o %t.dir/foo %t.dir/foo.c
20+
// RUN: env LLVM_PROFILE_FILE=%t.profdir/%b.profraw %run %t.dir/foo 2>&1 | FileCheck %s --check-prefix=MISSING
21+
22+
// MERGE-ERROR: LLVM Profile Error: Profile Merging of file {{.*}}.profraw failed: File exists
23+
24+
// BINARY-ID: Instrumentation level: Front-end
25+
// BINARY-ID-NEXT: Total functions: 3
26+
// BINARY-ID-NEXT: Maximum function count: 2
27+
// BINARY-ID-NEXT: Maximum internal block count: 0
28+
// BINARY-ID-NEXT: Binary IDs:
29+
// BINARY-ID-NEXT: {{[0-9a-f]+}}
30+
// BINARY-ID-NEXT: {{[0-9a-f]+}}
31+
32+
// MISSING: Unable to get binary ID for filename pattern {{.*}}.profraw. Using the default name.
33+
34+
//--- foo.c
35+
int main(void) { return 0; }
36+
void foo(void) {}
37+
38+
//--- bar.c
39+
int main(void) { return 0; }
40+
void bar(int *a) { *a += 10; }

0 commit comments

Comments
 (0)