Skip to content

Commit 2916352

Browse files
[MemProf] Skip unmatched callers when cloning (#120455)
Don't unnecessarily clone for a caller that wasn't matched to a call instruction. This necessitated updated a couple of tests that were either unnecessarily cloning or unnecessarily processing an allocation and hinting it not cold.
1 parent b1b60d4 commit 2916352

File tree

4 files changed

+85
-34
lines changed

4 files changed

+85
-34
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3369,6 +3369,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
33693369
if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
33703370
break;
33713371

3372+
// If the caller was not successfully matched to a call in the IR/summary,
3373+
// there is no point in trying to clone for it as we can't update that call.
3374+
if (!CallerEdge->Caller->hasCall()) {
3375+
++EI;
3376+
continue;
3377+
}
3378+
33723379
// Only need to process the ids along this edge pertaining to the given
33733380
// allocation.
33743381
auto CallerEdgeContextsForAlloc =
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
;; Test callsite context graph generation for simple call graph with
2+
;; two memprof contexts and no inlining, where one callsite required for
3+
;; cloning is missing (e.g. unmatched).
4+
;;
5+
;; Original code looks like:
6+
;;
7+
;; char *foo() {
8+
;; return new char[10];
9+
;; }
10+
;;
11+
;; int main(int argc, char **argv) {
12+
;; char *x = foo();
13+
;; char *y = foo();
14+
;; memset(x, 0, 10);
15+
;; memset(y, 0, 10);
16+
;; delete[] x;
17+
;; sleep(200);
18+
;; delete[] y;
19+
;; return 0;
20+
;; }
21+
22+
; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o
23+
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
24+
; RUN: -supports-hot-cold-new \
25+
; RUN: -r=%t.o,main,plx \
26+
; RUN: -r=%t.o,_Znam, \
27+
; RUN: -memprof-report-hinted-sizes \
28+
; RUN: -pass-remarks=memprof-context-disambiguation -save-temps \
29+
; RUN: -o %t.out 2>&1 | FileCheck %s --implicit-check-not "call in clone _Z3foov" \
30+
; RUN: --check-prefix=SIZESUNHINTED
31+
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not \"memprof\"=\"cold\"
32+
33+
source_filename = "memprof-missing-callsite.ll"
34+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
35+
target triple = "x86_64-unknown-linux-gnu"
36+
37+
define i32 @main() #0 {
38+
entry:
39+
;; Missing callsite metadata blocks cloning
40+
%call = call ptr @_Z3foov()
41+
%call1 = call ptr @_Z3foov()
42+
ret i32 0
43+
}
44+
45+
define internal ptr @_Z3foov() #0 {
46+
entry:
47+
%call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
48+
ret ptr null
49+
}
50+
51+
declare ptr @_Znam(i64)
52+
53+
; uselistorder directives
54+
uselistorder ptr @_Z3foov, { 1, 0 }
55+
56+
attributes #0 = { noinline optnone }
57+
58+
!2 = !{!3, !5}
59+
!3 = !{!4, !"notcold", !10}
60+
!4 = !{i64 9086428284934609951, i64 8632435727821051414}
61+
!5 = !{!6, !"cold", !11, !12}
62+
!6 = !{i64 9086428284934609951, i64 -3421689549917153178}
63+
!7 = !{i64 9086428284934609951}
64+
!10 = !{i64 123, i64 100}
65+
!11 = !{i64 456, i64 200}
66+
!12 = !{i64 789, i64 300}
67+
68+
; SIZESUNHINTED: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning
69+
; SIZESUNHINTED: Cold full allocation context 456 with total size 200 is NotColdCold after cloning
70+
; SIZESUNHINTED: Cold full allocation context 789 with total size 300 is NotColdCold after cloning

llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
; RUN: -stats -debug -save-temps \
2121
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG
2222

23-
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
23+
;; We should not see any type of cold attribute or cloning applied
24+
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not cold \
25+
; RUN: --implicit-check-not ".memprof."
2426

2527
;; Try again but with distributed ThinLTO
2628
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
@@ -39,26 +41,23 @@
3941
; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG
4042

4143
;; Run ThinLTO backend
44+
;; We should not see any type of cold attribute or cloning applied
4245
; RUN: opt -passes=memprof-context-disambiguation \
4346
; RUN: -memprof-import-summary=%t.o.thinlto.bc \
44-
; RUN: -stats %t.o -S 2>&1 | FileCheck %s --check-prefix=IR
47+
; RUN: -stats %t.o -S 2>&1 | FileCheck %s --implicit-check-not cold \
48+
; RUN: --implicit-check-not ".memprof."
4549

4650
; DEBUG: Not found through unique tail call chain: 17377440600225628772 (_Z3barv) from 15822663052811949562 (main) that actually called 8716735811002003409 (xyz) (found multiple possible chains)
4751

4852
; STATS: 1 memprof-context-disambiguation - Number of profiled callees found via multiple tail call chains
4953

50-
;; Check that all calls in the IR are to the original functions, leading to a
51-
;; non-cold operator new call.
52-
5354
source_filename = "tailcall-nonunique.cc"
5455
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5556
target triple = "x86_64-unknown-linux-gnu"
5657

5758
; Function Attrs: noinline
58-
; IR-LABEL: @_Z3barv()
5959
define dso_local ptr @_Z3barv() local_unnamed_addr #0 {
6060
entry:
61-
; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]]
6261
%call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !9
6362
ret ptr %call
6463
}
@@ -67,54 +66,43 @@ entry:
6766
declare ptr @_Znam(i64) #1
6867

6968
; Function Attrs: noinline
70-
; IR-LABEL: @_Z5blah1v()
7169
define dso_local ptr @_Z5blah1v() local_unnamed_addr #0 {
7270
entry:
73-
; IR: call ptr @_Z3barv()
7471
%call = tail call ptr @_Z3barv()
7572
ret ptr %call
7673
}
7774

7875
; Function Attrs: noinline
79-
; IR-LABEL: @_Z5blah2v()
8076
define dso_local ptr @_Z5blah2v() local_unnamed_addr #0 {
8177
entry:
82-
; IR: call ptr @_Z3barv()
8378
%call = tail call ptr @_Z3barv()
8479
ret ptr %call
8580
}
8681

8782
; Function Attrs: noinline
88-
; IR-LABEL: @_Z4baz1v()
8983
define dso_local ptr @_Z4baz1v() local_unnamed_addr #0 {
9084
entry:
91-
; IR: call ptr @_Z5blah1v()
9285
%call = tail call ptr @_Z5blah1v()
9386
ret ptr %call
9487
}
9588

9689
; Function Attrs: noinline
97-
; IR-LABEL: @_Z4baz2v()
9890
define dso_local ptr @_Z4baz2v() local_unnamed_addr #0 {
9991
entry:
100-
; IR: call ptr @_Z5blah2v()
10192
%call = tail call ptr @_Z5blah2v()
10293
ret ptr %call
10394
}
10495

10596
; Function Attrs: noinline
106-
; IR-LABEL: @_Z3foob(i1 %b)
10797
define dso_local ptr @_Z3foob(i1 %b) local_unnamed_addr #0 {
10898
entry:
10999
br i1 %b, label %if.then, label %if.else
110100

111101
if.then: ; preds = %entry
112-
; IR: call ptr @_Z4baz1v()
113102
%call = tail call ptr @_Z4baz1v()
114103
br label %return
115104

116105
if.else: ; preds = %entry
117-
; IR: call ptr @_Z4baz2v()
118106
%call1 = tail call ptr @_Z4baz2v()
119107
br label %return
120108

@@ -124,29 +112,21 @@ return: ; preds = %if.else, %if.then
124112
}
125113

126114
; Function Attrs: noinline
127-
; IR-LABEL: @xyz()
128115
define dso_local i32 @xyz() local_unnamed_addr #0 {
129116
delete.end13:
130-
; IR: call ptr @_Z3foob(i1 true)
131117
%call = tail call ptr @_Z3foob(i1 true)
132-
; IR: call ptr @_Z3foob(i1 true)
133118
%call1 = tail call ptr @_Z3foob(i1 true)
134-
; IR: call ptr @_Z3foob(i1 false)
135119
%call2 = tail call ptr @_Z3foob(i1 false)
136-
; IR: call ptr @_Z3foob(i1 false)
137120
%call3 = tail call ptr @_Z3foob(i1 false)
138121
ret i32 0
139122
}
140123

141124
define dso_local i32 @main() local_unnamed_addr #0 {
142125
delete.end13:
143-
; IR: call i32 @xyz()
144126
%call1 = tail call i32 @xyz(), !callsite !11
145127
ret i32 0
146128
}
147129

148-
; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" }
149-
150130
attributes #0 = { noinline }
151131
attributes #1 = { nobuiltin allocsize(0) }
152132
attributes #2 = { builtin allocsize(0) }

llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,8 @@
77
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
88
; RUN: -pass-remarks=memprof-context-disambiguation %s -S 2>&1 | FileCheck %s
99

10-
;; Make sure we created some clones
11-
; CHECK: created clone A.memprof.1
12-
; CHECK: created clone C.memprof.1
13-
; CHECK: created clone D.memprof.1
14-
; CHECK: created clone E.memprof.1
15-
; CHECK: created clone B.memprof.1
16-
; CHECK: created clone F.memprof.1
17-
; CHECK: created clone G.memprof.1
10+
;; Make sure we successfully created at least one clone
11+
; CHECK: created clone {{.*}}.memprof.1
1812

1913
; ModuleID = '<stdin>'
2014
source_filename = "reduced.ll"

0 commit comments

Comments
 (0)