Skip to content

Commit 340cb19

Browse files
[MemProf] Expand optimization scope to internal linkage function (#73236)
Now MemProf can't do IR annotation right in the local linkage function and global initial function __cxx_global_var_init. In llvm-profdata which convert raw memory profile to memory profile, it uses function name in dwarf to create GUID. But when llvm consumes memory profile, it use `getIRPGOFuncName` or `getPGOFuncName` which returns local linkage function as `FileName;FunctionName` or `FileName:FunctionName` to get function name and create GUID. So profile creator's GUID is not same as profile consumer. So I think MemProf should be used with `unique-internal-linkage-names` and don't use PGOFuncName. __cxx_global_var_init is created later than where UniqueInternalLinkageNames works. So I add uniq suffix to __cxx_global_var_init additionally. Co-authored-by: lifengxiang <[email protected]>
1 parent 58199df commit 340cb19

File tree

5 files changed

+110
-16
lines changed

5 files changed

+110
-16
lines changed

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -676,25 +676,17 @@ static void readMemprof(Module &M, Function &F,
676676
IndexedInstrProfReader *MemProfReader,
677677
const TargetLibraryInfo &TLI) {
678678
auto &Ctx = M.getContext();
679-
680-
auto FuncName = getIRPGOFuncName(F);
679+
// Previously we used getIRPGOFuncName() here. If F is local linkage,
680+
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
681+
// llvm-profdata uses FuncName in dwarf to create GUID which doesn't
682+
// contain FileName's prefix. It caused local linkage function can't
683+
// find MemProfRecord. So we use getName() now.
684+
// 'unique-internal-linkage-names' can make MemProf work better for local
685+
// linkage function.
686+
auto FuncName = F.getName();
681687
auto FuncGUID = Function::getGUID(FuncName);
682688
std::optional<memprof::MemProfRecord> MemProfRec;
683689
auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
684-
if (Err) {
685-
// If we don't find getIRPGOFuncName(), try getPGOFuncName() to handle
686-
// profiles built by older compilers
687-
Err = handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error {
688-
if (IE.get() != instrprof_error::unknown_function)
689-
return make_error<InstrProfError>(IE);
690-
auto FuncName = getPGOFuncName(F);
691-
auto FuncGUID = Function::getGUID(FuncName);
692-
if (auto Err =
693-
MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec))
694-
return Err;
695-
return Error::success();
696-
});
697-
}
698690
if (Err) {
699691
handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
700692
auto Err = IPE.get();
Binary file not shown.
Binary file not shown.

llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,21 @@ ${CLANG} ${COMMON_FLAGS} -fmemory-profile -DUSE_MUSTTAIL=1 ${OUTDIR}/memprof_mis
127127
env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof_missing_leaf.exe > ${OUTDIR}/memprof_missing_leaf.memprofraw
128128

129129
rm ${OUTDIR}/memprof_missing_leaf.cc
130+
131+
cat > ${OUTDIR}/memprof_internal_linkage.cc << EOF
132+
#include <cstring>
133+
#include <unistd.h>
134+
static void foo() {
135+
int *a = new int[5];
136+
memset(a, 0, 5);
137+
}
138+
int main(int argc, char **argv) {
139+
foo();
140+
return 0;
141+
}
142+
EOF
143+
144+
${CLANG} ${COMMON_FLAGS} -fmemory-profile -funique-internal-linkage-names ${OUTDIR}/memprof_internal_linkage.cc -o ${OUTDIR}/memprof_internal_linkage.exe
145+
env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof_internal_linkage.exe > ${OUTDIR}/memprof_internal_linkage.memprofraw
146+
147+
rm ${OUTDIR}/memprof_internal_linkage.cc
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
;; Tests memprof when contains internal linkage function.
2+
3+
;; Avoid failures on big-endian systems that can't read the profile properly
4+
; REQUIRES: x86_64-linux
5+
6+
;; TODO: Use text profile inputs once that is available for memprof.
7+
;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh.
8+
;; # To generate below LLVM IR for use in matching.
9+
;; $ clang++ -gmlt -fdebug-info-for-profiling -S %S/Inputs/memprof_internal_linkage.cc -emit-llvm -funique-internal-linkage-names
10+
11+
; RUN: llvm-profdata merge %S/Inputs/memprof_internal_linkage.memprofraw --profiled-binary %S/Inputs/memprof_internal_linkage.exe -o %t.memprofdata
12+
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S | FileCheck %s
13+
14+
; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]]
15+
; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" }
16+
17+
; ModuleID = 'memprof_internal_linkage.cc'
18+
source_filename = "memprof_internal_linkage.cc"
19+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
20+
target triple = "x86_64-unknown-linux-gnu"
21+
22+
; Function Attrs: mustprogress noinline norecurse optnone uwtable
23+
define dso_local noundef i32 @main(i32 noundef %argc, ptr noundef %argv) #0 !dbg !10 {
24+
entry:
25+
%retval = alloca i32, align 4
26+
%argc.addr = alloca i32, align 4
27+
%argv.addr = alloca ptr, align 8
28+
store i32 0, ptr %retval, align 4
29+
store i32 %argc, ptr %argc.addr, align 4
30+
store ptr %argv, ptr %argv.addr, align 8
31+
call void @_ZL3foov.__uniq.231888424933890731874095357293037629092() #4, !dbg !14
32+
ret i32 0, !dbg !15
33+
}
34+
35+
; Function Attrs: mustprogress noinline optnone uwtable
36+
define internal void @_ZL3foov.__uniq.231888424933890731874095357293037629092() #1 !dbg !16 {
37+
entry:
38+
%a = alloca ptr, align 8
39+
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 20) #5, !dbg !17
40+
store ptr %call, ptr %a, align 8, !dbg !18
41+
%0 = load ptr, ptr %a, align 8, !dbg !19
42+
call void @llvm.memset.p0.i64(ptr align 4 %0, i8 0, i64 5, i1 false), !dbg !20
43+
ret void, !dbg !21
44+
}
45+
46+
; Function Attrs: nobuiltin allocsize(0)
47+
declare noundef nonnull ptr @_Znam(i64 noundef) #2
48+
49+
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
50+
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
51+
52+
attributes #0 = { mustprogress noinline norecurse optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
53+
attributes #1 = { mustprogress noinline optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "sample-profile-suffix-elision-policy"="selected" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
54+
attributes #2 = { nobuiltin allocsize(0) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
55+
attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) }
56+
attributes #4 = { "sample-profile-suffix-elision-policy"="selected" }
57+
attributes #5 = { builtin allocsize(0) }
58+
59+
!llvm.dbg.cu = !{!0}
60+
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
61+
!llvm.ident = !{!9}
62+
63+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 18.0.0 (https://github.com/llvm/llvm-project.git a604a1112a611ea867dc4e8d164021c7b055e18a)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
64+
!1 = !DIFile(filename: "memprof_internal_linkage.cc", directory: ".", checksumkind: CSK_MD5, checksum: "de848419432086fd9ed6dda04f3bf0ac")
65+
!2 = !{i32 7, !"Dwarf Version", i32 5}
66+
!3 = !{i32 2, !"Debug Info Version", i32 3}
67+
!4 = !{i32 1, !"wchar_size", i32 4}
68+
!5 = !{i32 8, !"PIC Level", i32 2}
69+
!6 = !{i32 7, !"PIE Level", i32 2}
70+
!7 = !{i32 7, !"uwtable", i32 2}
71+
!8 = !{i32 7, !"frame-pointer", i32 2}
72+
!9 = !{!"clang version 18.0.0 (https://github.com/llvm/llvm-project.git a604a1112a611ea867dc4e8d164021c7b055e18a)"}
73+
!10 = distinct !DISubprogram(name: "main", scope: !11, file: !11, line: 7, type: !12, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
74+
!11 = !DIFile(filename: "memprof_internal_linkage.cc", directory: ".", checksumkind: CSK_MD5, checksum: "de848419432086fd9ed6dda04f3bf0ac")
75+
!12 = !DISubroutineType(types: !13)
76+
!13 = !{}
77+
!14 = !DILocation(line: 8, column: 3, scope: !10)
78+
!15 = !DILocation(line: 9, column: 3, scope: !10)
79+
!16 = distinct !DISubprogram(name: "foo", linkageName: "_ZL3foov.__uniq.231888424933890731874095357293037629092", scope: !11, file: !11, line: 3, type: !12, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0)
80+
!17 = !DILocation(line: 4, column: 12, scope: !16)
81+
!18 = !DILocation(line: 4, column: 8, scope: !16)
82+
!19 = !DILocation(line: 5, column: 10, scope: !16)
83+
!20 = !DILocation(line: 5, column: 3, scope: !16)
84+
!21 = !DILocation(line: 6, column: 1, scope: !16)

0 commit comments

Comments
 (0)