Skip to content

Commit 1a3947d

Browse files
committed
[MemProf] Use MapVector to avoid non-determinism
Multiple cases of instability in the cloning behavior occurred due to iteration of maps indexed by pointers. Fix by changing the maps to MapVector. This necessitated adding DenseMapInfo specializations for the structure types used in the keys. These were found while trying to commit patch 3 of the cloning (bfe7205), but the second one turned out to be in code committed in patch 2, but just exposed by a new test added with patch 3. Specifically, the iteration in identifyClones(). Added the portion of the new test cases from patch 3 that only relied on the already committed changes and exposed the issue. Differential Revision: https://reviews.llvm.org/D149924
1 parent b18161d commit 1a3947d

File tree

3 files changed

+430
-2
lines changed

3 files changed

+430
-2
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
2424
#include "llvm/ADT/DenseMap.h"
2525
#include "llvm/ADT/DenseSet.h"
26+
#include "llvm/ADT/MapVector.h"
2627
#include "llvm/ADT/SetOperations.h"
2728
#include "llvm/ADT/SmallPtrSet.h"
2829
#include "llvm/ADT/SmallSet.h"
@@ -438,8 +439,8 @@ class CallsiteContextGraph {
438439
std::map<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
439440

440441
/// Maps to track the calls to their corresponding nodes in the graph.
441-
std::map<const CallInfo, ContextNode *> AllocationCallToContextNodeMap;
442-
std::map<const CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;
442+
MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap;
443+
MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;
443444

444445
/// Owner of all ContextNode unique_ptrs.
445446
std::vector<std::unique_ptr<ContextNode>> NodeOwner;
@@ -493,6 +494,7 @@ struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> {
493494
IndexCall(std::nullptr_t) : IndexCall() {}
494495
IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
495496
IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
497+
IndexCall(PointerUnion PT) : PointerUnion(PT) {}
496498

497499
IndexCall *operator->() { return this; }
498500

@@ -537,6 +539,20 @@ class IndexCallsiteContextGraph
537539
const ModuleSummaryIndex &Index;
538540
};
539541

542+
namespace llvm {
543+
template <>
544+
struct DenseMapInfo<typename CallsiteContextGraph<
545+
ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>
546+
: public DenseMapInfo<std::pair<Instruction *, unsigned>> {};
547+
template <>
548+
struct DenseMapInfo<typename CallsiteContextGraph<
549+
IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>
550+
: public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
551+
template <>
552+
struct DenseMapInfo<IndexCall>
553+
: public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
554+
} // end namespace llvm
555+
540556
namespace {
541557

542558
struct FieldSeparator {
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
;; Test context disambiguation for a callgraph containing multiple memprof
2+
;; contexts and no inlining, where we need to perform additional cloning
3+
;; during function assignment/cloning to handle the combination of contexts
4+
;; to 2 different allocations.
5+
;;
6+
;; void E(char **buf1, char **buf2) {
7+
;; *buf1 = new char[10];
8+
;; *buf2 = new char[10];
9+
;; }
10+
;;
11+
;; void B(char **buf1, char **buf2) {
12+
;; E(buf1, buf2);
13+
;; }
14+
;;
15+
;; void C(char **buf1, char **buf2) {
16+
;; E(buf1, buf2);
17+
;; }
18+
;;
19+
;; void D(char **buf1, char **buf2) {
20+
;; E(buf1, buf2);
21+
;; }
22+
;; int main(int argc, char **argv) {
23+
;; char *cold1, *cold2, *default1, *default2, *default3, *default4;
24+
;; B(&default1, &default2);
25+
;; C(&default3, &cold1);
26+
;; D(&cold2, &default4);
27+
;; memset(cold1, 0, 10);
28+
;; memset(cold2, 0, 10);
29+
;; memset(default1, 0, 10);
30+
;; memset(default2, 0, 10);
31+
;; memset(default3, 0, 10);
32+
;; memset(default4, 0, 10);
33+
;; delete[] default1;
34+
;; delete[] default2;
35+
;; delete[] default3;
36+
;; delete[] default4;
37+
;; sleep(10);
38+
;; delete[] cold1;
39+
;; delete[] cold2;
40+
;; return 0;
41+
;; }
42+
;;
43+
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
44+
;; memory freed after sleep(10) results in cold lifetimes.
45+
;;
46+
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
47+
48+
49+
; RUN: opt -thinlto-bc %s >%t.o
50+
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
51+
; RUN: -r=%t.o,main,plx \
52+
; RUN: -r=%t.o,_ZdaPv, \
53+
; RUN: -r=%t.o,sleep, \
54+
; RUN: -r=%t.o,_Znam, \
55+
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
56+
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
57+
58+
59+
;; Try again but with distributed ThinLTO
60+
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
61+
; RUN: -thinlto-distributed-indexes \
62+
; RUN: -r=%t.o,main,plx \
63+
; RUN: -r=%t.o,_ZdaPv, \
64+
; RUN: -r=%t.o,sleep, \
65+
; RUN: -r=%t.o,_Znam, \
66+
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
67+
; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP
68+
69+
70+
source_filename = "funcassigncloning.ll"
71+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
72+
target triple = "x86_64-unknown-linux-gnu"
73+
74+
; Function Attrs: noinline optnone
75+
define internal void @_Z1EPPcS0_(ptr %buf1, ptr %buf2) {
76+
entry:
77+
%call = call ptr @_Znam(i64 noundef 10), !memprof !0, !callsite !7
78+
%call1 = call ptr @_Znam(i64 noundef 10), !memprof !8, !callsite !15
79+
ret void
80+
}
81+
82+
declare ptr @_Znam(i64)
83+
84+
define internal void @_Z1BPPcS0_() {
85+
entry:
86+
call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !16
87+
ret void
88+
}
89+
90+
define internal void @_Z1CPPcS0_() {
91+
entry:
92+
call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !17
93+
ret void
94+
}
95+
96+
define internal void @_Z1DPPcS0_() {
97+
entry:
98+
call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !18
99+
ret void
100+
}
101+
102+
; Function Attrs: noinline optnone
103+
define i32 @main() {
104+
entry:
105+
call void @_Z1BPPcS0_()
106+
call void @_Z1CPPcS0_()
107+
call void @_Z1DPPcS0_()
108+
ret i32 0
109+
}
110+
111+
declare void @_ZdaPv()
112+
113+
declare i32 @sleep()
114+
115+
; uselistorder directives
116+
uselistorder ptr @_Znam, { 1, 0 }
117+
118+
!0 = !{!1, !3, !5}
119+
!1 = !{!2, !"cold"}
120+
!2 = !{i64 -3461278137325233666, i64 -7799663586031895603}
121+
!3 = !{!4, !"notcold"}
122+
!4 = !{i64 -3461278137325233666, i64 -3483158674395044949}
123+
!5 = !{!6, !"notcold"}
124+
!6 = !{i64 -3461278137325233666, i64 -2441057035866683071}
125+
!7 = !{i64 -3461278137325233666}
126+
!8 = !{!9, !11, !13}
127+
!9 = !{!10, !"notcold"}
128+
!10 = !{i64 -1415475215210681400, i64 -2441057035866683071}
129+
!11 = !{!12, !"cold"}
130+
!12 = !{i64 -1415475215210681400, i64 -3483158674395044949}
131+
!13 = !{!14, !"notcold"}
132+
!14 = !{i64 -1415475215210681400, i64 -7799663586031895603}
133+
!15 = !{i64 -1415475215210681400}
134+
!16 = !{i64 -2441057035866683071}
135+
!17 = !{i64 -3483158674395044949}
136+
!18 = !{i64 -7799663586031895603}
137+
138+
139+
;; Originally we create a single clone of each call to new from E, since each
140+
;; allocates cold memory for a single caller.
141+
142+
; DUMP: CCG after cloning:
143+
; DUMP: Callsite Context Graph:
144+
; DUMP: Node [[ENEW1ORIG:0x[a-z0-9]+]]
145+
; DUMP: Versions: 1 MIB:
146+
; DUMP: AllocType 2 StackIds: 0
147+
; DUMP: AllocType 1 StackIds: 1
148+
; DUMP: AllocType 1 StackIds: 2
149+
; DUMP: (clone 0)
150+
; DUMP: AllocTypes: NotCold
151+
; DUMP: ContextIds: 2 3
152+
; DUMP: CalleeEdges:
153+
; DUMP: CallerEdges:
154+
; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
155+
; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
156+
; DUMP: Clones: [[ENEW1CLONE:0x[a-z0-9]+]]
157+
158+
; DUMP: Node [[D:0x[a-z0-9]+]]
159+
; DUMP: Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 0 (clone 0)
160+
; DUMP: AllocTypes: NotColdCold
161+
; DUMP: ContextIds: 1 6
162+
; DUMP: CalleeEdges:
163+
; DUMP: Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
164+
; DUMP: Edge from Callee [[ENEW2ORIG:0x[a-z0-9]+]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
165+
; DUMP: CallerEdges:
166+
167+
; DUMP: Node [[C]]
168+
; DUMP: Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 1 (clone 0)
169+
; DUMP: AllocTypes: NotColdCold
170+
; DUMP: ContextIds: 2 5
171+
; DUMP: CalleeEdges:
172+
; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[C]] AllocTypes: NotCold ContextIds: 2
173+
; DUMP: Edge from Callee [[ENEW2CLONE:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
174+
; DUMP: CallerEdges:
175+
176+
; DUMP: Node [[B]]
177+
; DUMP: Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 2 (clone 0)
178+
; DUMP: AllocTypes: NotCold
179+
; DUMP: ContextIds: 3 4
180+
; DUMP: CalleeEdges:
181+
; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 3
182+
; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
183+
; DUMP: CallerEdges:
184+
185+
; DUMP: Node [[ENEW2ORIG]]
186+
; DUMP: Versions: 1 MIB:
187+
; DUMP: AllocType 1 StackIds: 2
188+
; DUMP: AllocType 2 StackIds: 1
189+
; DUMP: AllocType 1 StackIds: 0
190+
; DUMP: (clone 0)
191+
; DUMP: AllocTypes: NotCold
192+
; DUMP: ContextIds: 4 6
193+
; DUMP: CalleeEdges:
194+
; DUMP: CallerEdges:
195+
; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
196+
; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
197+
; DUMP: Clones: [[ENEW2CLONE]]
198+
199+
; DUMP: Node [[ENEW1CLONE]]
200+
; DUMP: Versions: 1 MIB:
201+
; DUMP: AllocType 2 StackIds: 0
202+
; DUMP: AllocType 1 StackIds: 1
203+
; DUMP: AllocType 1 StackIds: 2
204+
; DUMP: (clone 0)
205+
; DUMP: AllocTypes: Cold
206+
; DUMP: ContextIds: 1
207+
; DUMP: CalleeEdges:
208+
; DUMP: CallerEdges:
209+
; DUMP: Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
210+
; DUMP: Clone of [[ENEW1ORIG]]
211+
212+
; DUMP: Node [[ENEW2CLONE]]
213+
; DUMP: Versions: 1 MIB:
214+
; DUMP: AllocType 1 StackIds: 2
215+
; DUMP: AllocType 2 StackIds: 1
216+
; DUMP: AllocType 1 StackIds: 0
217+
; DUMP: (clone 0)
218+
; DUMP: AllocTypes: Cold
219+
; DUMP: ContextIds: 5
220+
; DUMP: CalleeEdges:
221+
; DUMP: CallerEdges:
222+
; DUMP: Edge from Callee [[ENEW2CLONE]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
223+
; DUMP: Clone of [[ENEW2ORIG]]

0 commit comments

Comments
 (0)