Skip to content

Commit 48bc902

Browse files
[MemProf] Fix the stack updating handling of pruned contexts (#81322)
Fix a bug in the handling of cases where a callsite's stack ids partially overlap with the pruned context during matching of calls to the graph contructed from the profiled contexts. This fix makes the code match the comments.
1 parent 70e61f5 commit 48bc902

File tree

2 files changed

+194
-1
lines changed

2 files changed

+194
-1
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1381,7 +1381,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
13811381
// not fully matching stack contexts. To do this, subtract any context ids
13821382
// found in caller nodes of the last node found above.
13831383
if (Ids.back() != getLastStackId(Call)) {
1384-
for (const auto &PE : CurNode->CallerEdges) {
1384+
for (const auto &PE : LastNode->CallerEdges) {
13851385
set_subtract(StackSequenceContextIds, PE->getContextIds());
13861386
if (StackSequenceContextIds.empty())
13871387
break;
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
;; This test ensures that the logic which assigns calls to stack nodes
2+
;; correctly handles an inlined callsite with stack ids that partially
3+
;; overlap with a trimmed context. In particular when it also partially
4+
;; overlaps with a longer non-trimmed context that doesn't match all of
5+
;; the inlined callsite stack ids.
6+
7+
;; The profile data and call stacks were all manually added, but the code
8+
;; would be structured something like the following (fairly contrived to
9+
;; result in the type of control flow needed to test):
10+
11+
;; void A(bool b) {
12+
;; if (b)
13+
;; // cold: stack ids 6, 2, 8 (trimmed ids 10)
14+
;; // not cold: stack ids 6, 7 (trimmed ids 9, 11)
15+
;; new char[10]; // stack id 6
16+
;; else
17+
;; // not cold: stack ids 1, 2, 8, 3, 4
18+
;; // cold: stack ids 1, 2, 8, 3, 5
19+
;; new char[10]; // stack id 1
20+
;; }
21+
;;
22+
;; void XZ() {
23+
;; A(false); // stack ids 2, 8 (e.g. X inlined into Z)
24+
;; }
25+
;;
26+
;; void XZN() {
27+
;; // This is the tricky one to get right. We want to ensure it gets
28+
;; // correctly correlated with a stack node for the trimmed 6, 2, 8
29+
;; // context shown in A. It should *not* be correlated with the longer
30+
;; // untrimmed 1, 2, 8, 3, 4|5 contexts.
31+
;; A(true); // stack ids 2, 8, 9 (e.g. X inlined into Z inlined into N)
32+
;; }
33+
;;
34+
;; void Y() {
35+
;; A(true); // stack id 7
36+
;; }
37+
;;
38+
;; void M() {
39+
;; XZ(); // stack id 3
40+
;; }
41+
;;
42+
;; int main() {
43+
;; M(); // stack id 4 (leads to not cold allocation)
44+
;; M(); // stack id 5 (leads to cold allocation)
45+
;; XZN(); // stack id 11 (leads to cold allocation)
46+
;; Y(); // stack id 10 (leads to not cold allocation)
47+
;; }
48+
49+
;; -stats requires asserts
50+
; REQUIRES: asserts
51+
52+
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
53+
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
54+
; RUN: -stats -pass-remarks=memprof-context-disambiguation \
55+
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=IR \
56+
; RUN: --check-prefix=STATS --check-prefix=REMARKS
57+
58+
; REMARKS: created clone _Z1Ab.memprof.1
59+
; REMARKS: created clone _Z2XZv.memprof.1
60+
; REMARKS: created clone _Z1Mv.memprof.1
61+
;; Make sure the inlined context in _Z3XZNv, which partially overlaps
62+
;; trimmed cold context, and also partially overlaps completely
63+
;; unrelated contexts, correctly calls a cloned version of Z1Ab,
64+
;; which will call the cold annotated allocation.
65+
; REMARKS: call in clone _Z3XZNv assigned to call function clone _Z1Ab.memprof.1
66+
; REMARKS: call in clone main assigned to call function clone _Z1Mv.memprof.1
67+
; REMARKS: call in clone _Z1Mv.memprof.1 assigned to call function clone _Z2XZv.memprof.1
68+
; REMARKS: call in clone _Z2XZv.memprof.1 assigned to call function clone _Z1Ab
69+
; REMARKS: call in clone main assigned to call function clone _Z1Mv
70+
; REMARKS: call in clone _Z1Mv assigned to call function clone _Z2XZv
71+
; REMARKS: call in clone _Z2XZv assigned to call function clone _Z1Ab.memprof.1
72+
; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute cold
73+
; REMARKS: call in clone _Z1Yv assigned to call function clone _Z1Ab
74+
; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute notcold
75+
; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute cold
76+
; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute notcold
77+
78+
79+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
80+
target triple = "x86_64-unknown-linux-gnu"
81+
82+
define dso_local void @_Z1Ab(i1 noundef zeroext %b) {
83+
entry:
84+
br i1 %b, label %if.then, label %if.else
85+
86+
if.then:
87+
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !5, !callsite !11
88+
br label %if.end
89+
90+
if.else:
91+
%call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !10
92+
br label %if.end
93+
94+
if.end:
95+
ret void
96+
}
97+
98+
; Function Attrs: nobuiltin
99+
declare ptr @_Znam(i64) #0
100+
101+
define dso_local void @_Z2XZv() local_unnamed_addr #0 {
102+
entry:
103+
tail call void @_Z1Ab(i1 noundef zeroext false), !callsite !12
104+
ret void
105+
}
106+
107+
define dso_local void @_Z1Mv() local_unnamed_addr #0 {
108+
entry:
109+
tail call void @_Z2XZv(), !callsite !19
110+
ret void
111+
}
112+
113+
define dso_local void @_Z3XZNv() local_unnamed_addr {
114+
entry:
115+
tail call void @_Z1Ab(i1 noundef zeroext true), !callsite !15
116+
ret void
117+
}
118+
119+
define dso_local void @_Z1Yv() local_unnamed_addr {
120+
entry:
121+
tail call void @_Z1Ab(i1 noundef zeroext true), !callsite !17
122+
ret void
123+
}
124+
125+
define dso_local noundef i32 @main() local_unnamed_addr {
126+
entry:
127+
tail call void @_Z1Mv(), !callsite !13 ;; Not cold context
128+
tail call void @_Z1Mv(), !callsite !14 ;; Cold context
129+
tail call void @_Z3XZNv(), !callsite !16 ;; Cold context
130+
tail call void @_Z1Yv(), !callsite !18 ;; Not cold context
131+
ret i32 0
132+
}
133+
134+
attributes #0 = { nobuiltin }
135+
attributes #7 = { builtin }
136+
137+
!0 = !{!1, !3}
138+
;; Not cold context via first call to _Z1Mv in main
139+
!1 = !{!2, !"notcold"}
140+
!2 = !{i64 1, i64 2, i64 8, i64 3, i64 4}
141+
;; Cold context via second call to _Z1Mv in main
142+
!3 = !{!4, !"cold"}
143+
!4 = !{i64 1, i64 2, i64 8, i64 3, i64 5}
144+
!5 = !{!6, !8}
145+
;; Cold (trimmed) context via call to _Z3XZNv in main
146+
!6 = !{!7, !"cold"}
147+
!7 = !{i64 6, i64 2, i64 8}
148+
;; Not cold (trimmed) context via call to _Z1Yv in main
149+
!8 = !{!9, !"notcold"}
150+
!9 = !{i64 6, i64 7}
151+
!10 = !{i64 1}
152+
!11 = !{i64 6}
153+
!12 = !{i64 2, i64 8}
154+
!13 = !{i64 4}
155+
!14 = !{i64 5}
156+
;; Inlined context in _Z3XZNv, which includes part of trimmed cold context
157+
!15 = !{i64 2, i64 8, i64 9}
158+
!16 = !{i64 11}
159+
!17 = !{i64 7}
160+
!18 = !{i64 10}
161+
!19 = !{i64 3}
162+
163+
; IR: define {{.*}} @_Z1Ab(i1 noundef zeroext %b)
164+
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
165+
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
166+
; IR: define {{.*}} @_Z2XZv()
167+
; IR: call {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext false)
168+
; IR: define {{.*}} @_Z1Mv()
169+
; IR: call {{.*}} @_Z2XZv()
170+
;; Make sure the inlined context in _Z3XZNv, which partially overlaps
171+
;; trimmed cold context, and also partially overlaps completely
172+
;; unrelated contexts, correctly calls the cloned version of Z1Ab
173+
;; that will call the cold annotated allocation.
174+
; IR: define {{.*}} @_Z3XZNv()
175+
; IR: call {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext true)
176+
; IR: define {{.*}} @_Z1Yv()
177+
; IR: call {{.*}} @_Z1Ab(i1 noundef zeroext true)
178+
; IR: define {{.*}} @main()
179+
; IR: call {{.*}} @_Z1Mv()
180+
; IR: call {{.*}} @_Z1Mv.memprof.1()
181+
; IR: call {{.*}} @_Z3XZNv()
182+
; IR: call {{.*}} @_Z1Yv()
183+
; IR: define {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext %b)
184+
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]]
185+
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
186+
; IR: define {{.*}} @_Z2XZv.memprof.1()
187+
; IR: call {{.*}} @_Z1Ab(i1 noundef zeroext false)
188+
; IR: define {{.*}} @_Z1Mv.memprof.1()
189+
; IR: call {{.*}} @_Z2XZv.memprof.1()
190+
191+
; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
192+
; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
193+
; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis

0 commit comments

Comments
 (0)