Skip to content

Commit 0ab3d6e

Browse files
Reapply "[MemProf] Reduce cloning overhead by sharing nodes when possible" (#102932) with fixes (#106623)
This reverts commit 11aa31f, restoring commit 055e431, with added fixes for linker unsats. In some cases multiple calls to different targets may end up with the same debug information, and therefore callsite id. We will end up sharing the node between these calls. We don't know which one matches the callees until all nodes are matched with calls, at which point any non-matching calls should be removed from the node. The fix extends the handling in handleCallsitesWithMultipleTargets to do this, and adds tests for various permutations of this situation.
1 parent 18e35d8 commit 0ab3d6e

9 files changed

+1386
-28
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 208 additions & 28 deletions
Large diffs are not rendered by default.
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
;; Test to ensure a call to a different callee but with the same debug info
2+
;; (and therefore callsite metadata) as a preceding call in the alloc context
3+
;; does not cause missing or incorrect cloning. This test is otherwise the same
4+
;; as memprof-basic.ll.
5+
6+
;; -stats requires asserts
7+
; REQUIRES: asserts
8+
9+
; RUN: opt -thinlto-bc %s >%t.o
10+
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
11+
; RUN: -supports-hot-cold-new \
12+
; RUN: -r=%t.o,main,plx \
13+
; RUN: -r=%t.o,blah, \
14+
; RUN: -r=%t.o,_Znam, \
15+
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
16+
; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
17+
; RUN: -o %t.out 2>&1 | FileCheck %s \
18+
; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
19+
20+
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
21+
22+
source_filename = "memprof-aliased-location1.ll"
23+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
24+
target triple = "x86_64-unknown-linux-gnu"
25+
26+
define i32 @main() #0 {
27+
entry:
28+
%call = call ptr @_Z3foov(), !callsite !0
29+
%call1 = call ptr @_Z3foov(), !callsite !1
30+
ret i32 0
31+
}
32+
33+
declare void @blah()
34+
35+
define internal ptr @_Z3barv() #0 {
36+
entry:
37+
%call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
38+
ret ptr null
39+
}
40+
41+
declare ptr @_Znam(i64)
42+
43+
define internal ptr @_Z3bazv() #0 {
44+
entry:
45+
;; Preceding call to another callee but with the same debug location / callsite id
46+
call void @blah(), !callsite !8
47+
%call = call ptr @_Z3barv(), !callsite !8
48+
ret ptr null
49+
}
50+
51+
define internal ptr @_Z3foov() #0 {
52+
entry:
53+
%call = call ptr @_Z3bazv(), !callsite !9
54+
ret ptr null
55+
}
56+
57+
; uselistorder directives
58+
uselistorder ptr @_Z3foov, { 1, 0 }
59+
60+
attributes #0 = { noinline optnone }
61+
62+
!0 = !{i64 8632435727821051414}
63+
!1 = !{i64 -3421689549917153178}
64+
!2 = !{!3, !5}
65+
!3 = !{!4, !"notcold", i64 100}
66+
!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
67+
!5 = !{!6, !"cold", i64 400}
68+
!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
69+
!7 = !{i64 9086428284934609951}
70+
!8 = !{i64 -5964873800580613432}
71+
!9 = !{i64 2732490490862098848}
72+
73+
; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
74+
; REMARKS: created clone _Z3barv.memprof.1
75+
; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
76+
; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold
77+
; REMARKS: created clone _Z3bazv.memprof.1
78+
; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1
79+
; REMARKS: created clone _Z3foov.memprof.1
80+
; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1
81+
82+
83+
; IR: define {{.*}} @main
84+
;; The first call to foo does not allocate cold memory. It should call the
85+
;; original functions, which ultimately call the original allocation decorated
86+
;; with a "notcold" attribute.
87+
; IR: call {{.*}} @_Z3foov()
88+
;; The second call to foo allocates cold memory. It should call cloned functions
89+
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
90+
; IR: call {{.*}} @_Z3foov.memprof.1()
91+
; IR: define internal {{.*}} @_Z3barv()
92+
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
93+
; IR: define internal {{.*}} @_Z3bazv()
94+
; IR: call {{.*}} @_Z3barv()
95+
; IR: define internal {{.*}} @_Z3foov()
96+
; IR: call {{.*}} @_Z3bazv()
97+
; IR: define internal {{.*}} @_Z3barv.memprof.1()
98+
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
99+
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
100+
; IR: call {{.*}} @_Z3barv.memprof.1()
101+
; IR: define internal {{.*}} @_Z3foov.memprof.1()
102+
; IR: call {{.*}} @_Z3bazv.memprof.1()
103+
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
104+
; IR: attributes #[[COLD]] = { "memprof"="cold" }
105+
106+
107+
; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
108+
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
109+
; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
110+
; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
111+
; STATS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
112+
; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
113+
; STATS-BE: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
114+
; STATS-BE: 3 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend
115+
; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend
116+
; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
;; Test to ensure a call to a different callee but with the same debug info
2+
;; (and therefore callsite metadata) as a subsequent call in the alloc context
3+
;; does not cause missing or incorrect cloning. This test is otherwise the same
4+
;; as memprof-basic.ll.
5+
6+
;; -stats requires asserts
7+
; REQUIRES: asserts
8+
9+
; RUN: opt -thinlto-bc %s >%t.o
10+
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
11+
; RUN: -supports-hot-cold-new \
12+
; RUN: -r=%t.o,main,plx \
13+
; RUN: -r=%t.o,blah, \
14+
; RUN: -r=%t.o,_Znam, \
15+
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
16+
; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
17+
; RUN: -o %t.out 2>&1 | FileCheck %s \
18+
; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
19+
20+
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
21+
22+
source_filename = "memprof-aliased-location2.ll"
23+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
24+
target triple = "x86_64-unknown-linux-gnu"
25+
26+
define i32 @main() #0 {
27+
entry:
28+
%call = call ptr @_Z3foov(), !callsite !0
29+
%call1 = call ptr @_Z3foov(), !callsite !1
30+
ret i32 0
31+
}
32+
33+
declare void @blah()
34+
35+
define internal ptr @_Z3barv() #0 {
36+
entry:
37+
%call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
38+
ret ptr null
39+
}
40+
41+
declare ptr @_Znam(i64)
42+
43+
define internal ptr @_Z3bazv() #0 {
44+
entry:
45+
%call = call ptr @_Z3barv(), !callsite !8
46+
;; Subsequent call to another callee but with the same debug location / callsite id
47+
call void @blah(), !callsite !8
48+
ret ptr null
49+
}
50+
51+
define internal ptr @_Z3foov() #0 {
52+
entry:
53+
%call = call ptr @_Z3bazv(), !callsite !9
54+
ret ptr null
55+
}
56+
57+
; uselistorder directives
58+
uselistorder ptr @_Z3foov, { 1, 0 }
59+
60+
attributes #0 = { noinline optnone }
61+
62+
!0 = !{i64 8632435727821051414}
63+
!1 = !{i64 -3421689549917153178}
64+
!2 = !{!3, !5}
65+
!3 = !{!4, !"notcold", i64 100}
66+
!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
67+
!5 = !{!6, !"cold", i64 400}
68+
!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
69+
!7 = !{i64 9086428284934609951}
70+
!8 = !{i64 -5964873800580613432}
71+
!9 = !{i64 2732490490862098848}
72+
73+
; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
74+
; REMARKS: created clone _Z3barv.memprof.1
75+
; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
76+
; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold
77+
; REMARKS: created clone _Z3bazv.memprof.1
78+
; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1
79+
; REMARKS: created clone _Z3foov.memprof.1
80+
; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1
81+
82+
83+
; IR: define {{.*}} @main
84+
;; The first call to foo does not allocate cold memory. It should call the
85+
;; original functions, which ultimately call the original allocation decorated
86+
;; with a "notcold" attribute.
87+
; IR: call {{.*}} @_Z3foov()
88+
;; The second call to foo allocates cold memory. It should call cloned functions
89+
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
90+
; IR: call {{.*}} @_Z3foov.memprof.1()
91+
; IR: define internal {{.*}} @_Z3barv()
92+
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
93+
; IR: define internal {{.*}} @_Z3bazv()
94+
; IR: call {{.*}} @_Z3barv()
95+
; IR: define internal {{.*}} @_Z3foov()
96+
; IR: call {{.*}} @_Z3bazv()
97+
; IR: define internal {{.*}} @_Z3barv.memprof.1()
98+
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
99+
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
100+
; IR: call {{.*}} @_Z3barv.memprof.1()
101+
; IR: define internal {{.*}} @_Z3foov.memprof.1()
102+
; IR: call {{.*}} @_Z3bazv.memprof.1()
103+
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
104+
; IR: attributes #[[COLD]] = { "memprof"="cold" }
105+
106+
107+
; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
108+
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
109+
; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
110+
; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
111+
; STATS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
112+
; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
113+
; STATS-BE: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
114+
; STATS-BE: 3 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend
115+
; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend
116+
; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
;; Test to ensure a call to a different callee but with the same debug info
2+
;; (and therefore callsite metadata) as a preceding tail call in the alloc
3+
;; context does not cause missing or incorrect cloning. This test is otherwise
4+
;; the same as memprof-tailcall.ll.
5+
6+
;; -stats requires asserts
7+
; REQUIRES: asserts
8+
9+
; RUN: opt -thinlto-bc %s >%t.o
10+
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
11+
; RUN: -supports-hot-cold-new \
12+
; RUN: -r=%t.o,_Z3barv,plx \
13+
; RUN: -r=%t.o,_Z3bazv,plx \
14+
; RUN: -r=%t.o,_Z3foov,plx \
15+
; RUN: -r=%t.o,main,plx \
16+
; RUN: -r=%t.o,_Znam, \
17+
; RUN: -r=%t.o,blah, \
18+
; RUN: -stats -save-temps \
19+
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS
20+
21+
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
22+
23+
; STATS: 2 memprof-context-disambiguation - Number of profiled callees found via tail calls
24+
; STATS: 4 memprof-context-disambiguation - Aggregate depth of profiled callees found via tail calls
25+
; STATS: 2 memprof-context-disambiguation - Maximum depth of profiled callees found via tail calls
26+
27+
source_filename = "memprof-tailcall-aliased-location1.cc"
28+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
29+
target triple = "x86_64-unknown-linux-gnu"
30+
31+
; Function Attrs: noinline
32+
; IR-LABEL: @_Z3barv()
33+
define ptr @_Z3barv() local_unnamed_addr #0 {
34+
entry:
35+
; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]]
36+
%call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !5
37+
ret ptr %call
38+
}
39+
40+
; Function Attrs: nobuiltin allocsize(0)
41+
declare ptr @_Znam(i64) #1
42+
declare void @blah()
43+
44+
; Function Attrs: noinline
45+
; IR-LABEL: @_Z3bazv()
46+
define ptr @_Z3bazv() #0 {
47+
entry:
48+
; IR: call ptr @_Z3barv()
49+
%call = tail call ptr @_Z3barv()
50+
ret ptr %call
51+
}
52+
53+
; Function Attrs: noinline
54+
; IR-LABEL: @_Z3foov()
55+
define ptr @_Z3foov() #0 {
56+
entry:
57+
; IR: call ptr @_Z3bazv()
58+
%call = tail call ptr @_Z3bazv()
59+
ret ptr %call
60+
}
61+
62+
; Function Attrs: noinline
63+
; IR-LABEL: @main()
64+
define i32 @main() #0 {
65+
;; Preceding call to another callee but with the same debug location / callsite id
66+
call void @blah(), !callsite !6
67+
;; The first call to foo is part of a cold context, and should use the
68+
;; original functions.
69+
; IR: call ptr @_Z3foov()
70+
%call = tail call ptr @_Z3foov(), !callsite !6
71+
;; The second call to foo is part of a cold context, and should call the
72+
;; cloned functions.
73+
; IR: call ptr @_Z3foov.memprof.1()
74+
%call1 = tail call ptr @_Z3foov(), !callsite !7
75+
ret i32 0
76+
}
77+
78+
; IR-LABEL: @_Z3barv.memprof.1()
79+
; IR: call {{.*}} @_Znam(i64 10) #[[COLD:[0-9]+]]
80+
; IR-LABEL: @_Z3bazv.memprof.1()
81+
; IR: call ptr @_Z3barv.memprof.1()
82+
; IR-LABEL: @_Z3foov.memprof.1()
83+
; IR: call ptr @_Z3bazv.memprof.1()
84+
85+
; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" }
86+
; IR: attributes #[[COLD]] = { builtin allocsize(0) "memprof"="cold" }
87+
88+
attributes #0 = { noinline }
89+
attributes #1 = { nobuiltin allocsize(0) }
90+
attributes #2 = { builtin allocsize(0) }
91+
92+
!0 = !{!1, !3}
93+
!1 = !{!2, !"notcold"}
94+
!2 = !{i64 3186456655321080972, i64 8632435727821051414}
95+
!3 = !{!4, !"cold"}
96+
!4 = !{i64 3186456655321080972, i64 -3421689549917153178}
97+
!5 = !{i64 3186456655321080972}
98+
!6 = !{i64 8632435727821051414}
99+
!7 = !{i64 -3421689549917153178}

0 commit comments

Comments
 (0)