Skip to content

Commit 1e7d587

Browse files
[MemProf] Fix when CallStackTrie has a single chain to leaf with multi alloc type (#79433)
Fix one corner case when `CallStackTrie` has a single chain to leaf with multi alloc type. This will cause stackIds in function summary is empty.
1 parent 274d1b0 commit 1e7d587

File tree

5 files changed

+141
-7
lines changed

5 files changed

+141
-7
lines changed

llvm/lib/Analysis/MemoryProfileInfo.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -244,12 +244,21 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
244244
MIBCallStack.push_back(AllocStackId);
245245
std::vector<Metadata *> MIBNodes;
246246
assert(!Alloc->Callers.empty() && "addCallStack has not been called yet");
247-
buildMIBNodes(Alloc, Ctx, MIBCallStack, MIBNodes,
248-
/*CalleeHasAmbiguousCallerContext=*/true);
249-
assert(MIBCallStack.size() == 1 &&
250-
"Should only be left with Alloc's location in stack");
251-
CI->setMetadata(LLVMContext::MD_memprof, MDNode::get(Ctx, MIBNodes));
252-
return true;
247+
// The last parameter is meant to say whether the callee of the given node
248+
// has more than one caller. Here the node being passed in is the alloc
249+
// and it has no callees. So it's false.
250+
if (buildMIBNodes(Alloc, Ctx, MIBCallStack, MIBNodes, false)) {
251+
assert(MIBCallStack.size() == 1 &&
252+
"Should only be left with Alloc's location in stack");
253+
CI->setMetadata(LLVMContext::MD_memprof, MDNode::get(Ctx, MIBNodes));
254+
return true;
255+
}
256+
// If there exists corner case that CallStackTrie has one chain to leaf
257+
// and all node in the chain have multi alloc type, conservatively give
258+
// it non-cold allocation type.
259+
// FIXME: Avoid this case before memory profile created.
260+
addAllocTypeAttribute(Ctx, CI, AllocationType::NotCold);
261+
return false;
253262
}
254263

255264
template <>
Binary file not shown.
Binary file not shown.

llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,4 +144,33 @@ EOF
144144
${CLANG} ${COMMON_FLAGS} -fmemory-profile -funique-internal-linkage-names ${OUTDIR}/memprof_internal_linkage.cc -o ${OUTDIR}/memprof_internal_linkage.exe
145145
env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof_internal_linkage.exe > ${OUTDIR}/memprof_internal_linkage.memprofraw
146146

147-
rm ${OUTDIR}/memprof_internal_linkage.cc
147+
rm ${OUTDIR}/memprof_internal_linkage.cc
148+
149+
cat > ${OUTDIR}/memprof_loop_unroll_a.cc << EOF
150+
int* a[2];
151+
extern void foo();
152+
int main() {
153+
foo();
154+
for (int i = 0; i < 1000000; ++i) {
155+
*a[0] = 1;
156+
}
157+
return 0;
158+
}
159+
EOF
160+
cat > ${OUTDIR}/memprof_loop_unroll_b.cc << EOF
161+
#include <string>
162+
extern int* a[2];
163+
void foo() {
164+
for (int i = 0; i < 2; ++i) {
165+
a[i] = new int[1];
166+
}
167+
}
168+
EOF
169+
${CLANG} ${COMMON_FLAGS} -fmemory-profile ${OUTDIR}/memprof_loop_unroll_a.cc -O0 -o ${OUTDIR}/memprof_loop_unroll_a.o -c
170+
${CLANG} ${COMMON_FLAGS} -fmemory-profile ${OUTDIR}/memprof_loop_unroll_b.cc -O3 -o ${OUTDIR}/memprof_loop_unroll_b.o -c
171+
${CLANG} ${COMMON_FLAGS} -fmemory-profile ${OUTDIR}/memprof_loop_unroll_a.o ${OUTDIR}/memprof_loop_unroll_b.o -o ${OUTDIR}/memprof_loop_unroll.exe
172+
env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof_loop_unroll.exe > ${OUTDIR}/memprof_loop_unroll.memprofraw
173+
rm ${OUTDIR}/memprof_loop_unroll_a.cc
174+
rm ${OUTDIR}/memprof_loop_unroll_a.o
175+
rm ${OUTDIR}/memprof_loop_unroll_b.cc
176+
rm ${OUTDIR}/memprof_loop_unroll_b.o
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
;; Tests memprof when contains loop unroll.
2+
3+
;; Avoid failures on big-endian systems that can't read the profile properly
4+
; REQUIRES: x86_64-linux
5+
6+
;; TODO: Use text profile inputs once that is available for memprof.
7+
;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh.
8+
;; # To generate below LLVM IR for use in matching.
9+
;; $ clang++ -gmlt -fdebug-info-for-profiling -S %S/Inputs/memprof_loop_unroll_b.cc -emit-llvm
10+
11+
; RUN: llvm-profdata merge %S/Inputs/memprof_loop_unroll.memprofraw --profiled-binary %S/Inputs/memprof_loop_unroll.exe -o %t.memprofdata
12+
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S | FileCheck %s
13+
14+
; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]]
15+
; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" }
16+
; CHECK-NOT: stackIds: ()
17+
18+
; ModuleID = 'memprof_loop_unroll_b.cc'
19+
source_filename = "memprof_loop_unroll_b.cc"
20+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
21+
target triple = "x86_64-unknown-linux-gnu"
22+
23+
@a = external global [2 x ptr], align 16
24+
25+
; Function Attrs: mustprogress noinline optnone uwtable
26+
define dso_local void @_Z3foov() #0 !dbg !10 {
27+
entry:
28+
%i = alloca i32, align 4
29+
store i32 0, ptr %i, align 4, !dbg !13
30+
br label %for.cond, !dbg !14
31+
32+
for.cond: ; preds = %for.inc, %entry
33+
%0 = load i32, ptr %i, align 4, !dbg !15
34+
%cmp = icmp slt i32 %0, 2, !dbg !17
35+
br i1 %cmp, label %for.body, label %for.end, !dbg !18
36+
37+
for.body: ; preds = %for.cond
38+
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 4) #2, !dbg !19
39+
%1 = load i32, ptr %i, align 4, !dbg !20
40+
%idxprom = sext i32 %1 to i64, !dbg !21
41+
%arrayidx = getelementptr inbounds [2 x ptr], ptr @a, i64 0, i64 %idxprom, !dbg !21
42+
store ptr %call, ptr %arrayidx, align 8, !dbg !22
43+
br label %for.inc, !dbg !23
44+
45+
for.inc: ; preds = %for.body
46+
%2 = load i32, ptr %i, align 4, !dbg !24
47+
%inc = add nsw i32 %2, 1, !dbg !24
48+
store i32 %inc, ptr %i, align 4, !dbg !24
49+
br label %for.cond, !dbg !26, !llvm.loop !27
50+
51+
for.end: ; preds = %for.cond
52+
ret void, !dbg !30
53+
}
54+
55+
; Function Attrs: nobuiltin allocsize(0)
56+
declare noundef nonnull ptr @_Znam(i64 noundef) #1
57+
58+
attributes #0 = { mustprogress noinline optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
59+
attributes #1 = { nobuiltin allocsize(0) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
60+
attributes #2 = { builtin allocsize(0) }
61+
62+
!llvm.dbg.cu = !{!0}
63+
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
64+
!llvm.ident = !{!9}
65+
66+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 18.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
67+
!1 = !DIFile(filename: "memprof_loop_unroll_b.cc", directory: "/", checksumkind: CSK_MD5, checksum: "00276e590d606451dc54f3ff5f4bba25")
68+
!2 = !{i32 7, !"Dwarf Version", i32 5}
69+
!3 = !{i32 2, !"Debug Info Version", i32 3}
70+
!4 = !{i32 1, !"wchar_size", i32 4}
71+
!5 = !{i32 8, !"PIC Level", i32 2}
72+
!6 = !{i32 7, !"PIE Level", i32 2}
73+
!7 = !{i32 7, !"uwtable", i32 2}
74+
!8 = !{i32 7, !"frame-pointer", i32 2}
75+
!9 = !{!"clang version 18.0.0"}
76+
!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
77+
!11 = !DISubroutineType(types: !12)
78+
!12 = !{}
79+
!13 = !DILocation(line: 5, column: 14, scope: !10)
80+
!14 = !DILocation(line: 5, column: 10, scope: !10)
81+
!15 = !DILocation(line: 5, column: 21, scope: !16)
82+
!16 = !DILexicalBlockFile(scope: !10, file: !1, discriminator: 2)
83+
!17 = !DILocation(line: 5, column: 23, scope: !16)
84+
!18 = !DILocation(line: 5, column: 5, scope: !16)
85+
!19 = !DILocation(line: 6, column: 16, scope: !10)
86+
!20 = !DILocation(line: 6, column: 11, scope: !10)
87+
!21 = !DILocation(line: 6, column: 9, scope: !10)
88+
!22 = !DILocation(line: 6, column: 14, scope: !10)
89+
!23 = !DILocation(line: 7, column: 5, scope: !10)
90+
!24 = !DILocation(line: 5, column: 28, scope: !25)
91+
!25 = !DILexicalBlockFile(scope: !10, file: !1, discriminator: 4)
92+
!26 = !DILocation(line: 5, column: 5, scope: !25)
93+
!27 = distinct !{!27, !28, !23, !29}
94+
!28 = !DILocation(line: 5, column: 5, scope: !10)
95+
!29 = !{!"llvm.loop.mustprogress"}
96+
!30 = !DILocation(line: 8, column: 1, scope: !10)

0 commit comments

Comments
 (0)