Skip to content

Commit 3b974a3

Browse files
committed
The actual fixes and a test case that provokes the original issue.
The problem is that we always expected NonAllocationCallToContextNodeMap to have an entry when assigning stack nodes, if the saved context id set is null and there was a saved matching call. However, this would not be the case if the recomputation of context ids for the primary matching call determined that the saved context ids had all been assigned to a different node, which can happen with inlining. Added a test case to provoke this situation. Also fix some assertion checking in the original stack node analysis.
1 parent ee3d9d5 commit 3b974a3

File tree

2 files changed

+131
-10
lines changed

2 files changed

+131
-10
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,7 +1379,15 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
13791379
DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
13801380
assert(!LastNodeContextIds.empty());
13811381

1382+
#ifndef NDEBUG
1383+
bool PrevIterCreatedNode = false;
1384+
bool CreatedNode = false;
1385+
for (unsigned I = 0; I < Calls.size();
1386+
I++, PrevIterCreatedNode = CreatedNode) {
1387+
CreatedNode = false;
1388+
#else
13821389
for (unsigned I = 0; I < Calls.size(); I++) {
1390+
#endif
13831391
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
13841392
// Skip any for which we didn't assign any ids, these don't get a node in
13851393
// the graph.
@@ -1391,7 +1399,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
13911399
if (!CallToMatchingCall.contains(Call))
13921400
continue;
13931401
auto MatchingCall = CallToMatchingCall[Call];
1394-
assert(NonAllocationCallToContextNodeMap.contains(MatchingCall));
1402+
if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1403+
// This should only happen if we had a prior iteration, and it didn't
1404+
// create a node because of the below recomputation of context ids
1405+
// finding none remaining and continuing early.
1406+
assert(I > 0 && !PrevIterCreatedNode);
1407+
continue;
1408+
}
13951409
NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
13961410
Call);
13971411
continue;
@@ -1444,6 +1458,9 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
14441458
ContextNode *NewNode = NodeOwner.back().get();
14451459
NodeToCallingFunc[NewNode] = Func;
14461460
NonAllocationCallToContextNodeMap[Call] = NewNode;
1461+
#ifndef NDEBUG
1462+
CreatedNode = true;
1463+
#endif
14471464
NewNode->AllocTypes = computeAllocType(SavedContextIds);
14481465

14491466
ContextNode *FirstNode = getNodeForStackId(Ids[0]);
@@ -1600,15 +1617,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
16001617
// set used to ensure they are sorted properly.
16011618
if (I > 0 && Ids != Calls[I - 1].StackIds)
16021619
MatchingIdsFuncSet.clear();
1603-
else
1604-
// If the prior call had the same stack ids this set would not be empty.
1605-
// Check if we already have a call that "matches" because it is located
1606-
// in the same function. If the Calls list was sorted properly we should
1607-
// not encounter this situation as all such entries should be adjacent
1608-
// and processed in bulk further below.
1609-
assert(!MatchingIdsFuncSet.contains(Func));
1610-
1611-
MatchingIdsFuncSet.insert(Func);
16121620
#endif
16131621

16141622
// First compute the context ids for this stack id sequence (the
@@ -1679,6 +1687,17 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
16791687
continue;
16801688
}
16811689

1690+
#ifndef NDEBUG
1691+
// If the prior call had the same stack ids this set would not be empty.
1692+
// Check if we already have a call that "matches" because it is located
1693+
// in the same function. If the Calls list was sorted properly we should
1694+
// not encounter this situation as all such entries should be adjacent
1695+
// and processed in bulk further below.
1696+
assert(!MatchingIdsFuncSet.contains(Func));
1697+
1698+
MatchingIdsFuncSet.insert(Func);
1699+
#endif
1700+
16821701
// Check if the next set of stack ids is the same (since the Calls vector
16831702
// of tuples is sorted by the stack ids we can just look at the next one).
16841703
// If so, save them in the CallToMatchingCall map so that they get
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
;; This test ensures that the logic which assigns calls to stack nodes
2+
;; correctly handles a case where multiple nodes have stack ids that
3+
;; overlap with each other but have different last nodes (can happen with
4+
;; inlining into various levels of a call chain). Specifically, when we
5+
;; have one that is duplicated (e.g. unrolling), we need to correctly
6+
;; handle the case where the context id has already been assigned to
7+
;; a different call with a different last node.
8+
9+
;; -stats requires asserts
10+
; REQUIRES: asserts
11+
12+
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
13+
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
14+
; RUN: -stats -pass-remarks=memprof-context-disambiguation \
15+
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=IR \
16+
; RUN: --check-prefix=STATS --check-prefix=REMARKS
17+
18+
; REMARKS: created clone _Z1Ab.memprof.1
19+
; REMARKS: created clone _Z3XZNv.memprof.1
20+
; REMARKS: call in clone main assigned to call function clone _Z3XZNv.memprof.1
21+
;; Make sure the inlined context in _Z3XZNv, which partially overlaps the stack
22+
;; ids in the shorter inlined context of Z2XZv, correctly calls a cloned
23+
;; version of Z1Ab, which will call the cold annotated allocation.
24+
; REMARKS: call in clone _Z3XZNv.memprof.1 assigned to call function clone _Z1Ab.memprof.1
25+
; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute cold
26+
; REMARKS: call in clone main assigned to call function clone _Z3XZNv
27+
; REMARKS: call in clone _Z3XZNv assigned to call function clone _Z1Ab
28+
; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute notcold
29+
30+
31+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
32+
target triple = "x86_64-unknown-linux-gnu"
33+
34+
define dso_local void @_Z1Ab() {
35+
entry:
36+
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #1, !memprof !0, !callsite !5
37+
ret void
38+
}
39+
40+
; Function Attrs: nobuiltin
41+
declare ptr @_Znam(i64) #0
42+
43+
;; Inlining of stack id 2 into 3. Assume this is called from somewhere else.
44+
define dso_local void @_Z2XZv() local_unnamed_addr #0 {
45+
entry:
46+
;; Simulate duplication of the callsite (e.g. unrolling).
47+
tail call void @_Z1Ab(), !callsite !6
48+
tail call void @_Z1Ab(), !callsite !6
49+
ret void
50+
}
51+
52+
;; Inlining of stack id 2 into 3 into 4. Called by main below.
53+
define dso_local void @_Z3XZNv() local_unnamed_addr {
54+
entry:
55+
tail call void @_Z1Ab(), !callsite !7
56+
ret void
57+
}
58+
59+
define dso_local noundef i32 @main() local_unnamed_addr {
60+
entry:
61+
tail call void @_Z3XZNv(), !callsite !8 ;; Not cold context
62+
tail call void @_Z3XZNv(), !callsite !9 ;; Cold context
63+
ret i32 0
64+
}
65+
66+
attributes #0 = { nobuiltin }
67+
attributes #7 = { builtin }
68+
69+
!0 = !{!1, !3}
70+
;; Not cold context via first call to _Z3XZNv in main
71+
!1 = !{!2, !"notcold"}
72+
!2 = !{i64 1, i64 2, i64 3, i64 4, i64 5}
73+
;; Cold context via second call to _Z3XZNv in main
74+
!3 = !{!4, !"cold"}
75+
!4 = !{i64 1, i64 2, i64 3, i64 4, i64 6}
76+
!5 = !{i64 1}
77+
!6 = !{i64 2, i64 3}
78+
!7 = !{i64 2, i64 3, i64 4}
79+
!8 = !{i64 5}
80+
!9 = !{i64 6}
81+
82+
; IR: define {{.*}} @_Z1Ab()
83+
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
84+
; IR: define {{.*}} @_Z2XZv()
85+
; IR: call {{.*}} @_Z1Ab()
86+
; IR: call {{.*}} @_Z1Ab()
87+
; IR: define {{.*}} @_Z3XZNv()
88+
; IR: call {{.*}} @_Z1Ab()
89+
; IR: define {{.*}} @main()
90+
; IR: call {{.*}} @_Z3XZNv()
91+
; IR: call {{.*}} @_Z3XZNv.memprof.1()
92+
; IR: define {{.*}} @_Z1Ab.memprof.1()
93+
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
94+
; IR: define {{.*}} @_Z3XZNv.memprof.1()
95+
; IR: call {{.*}} @_Z1Ab.memprof.1()
96+
97+
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
98+
; IR: attributes #[[COLD]] = { "memprof"="cold" }
99+
100+
; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
101+
; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
102+
; STATS: 2 memprof-context-disambiguation - Number of function clones created during whole program analysis

0 commit comments

Comments
 (0)