|
| 1 | +;; This test ensures that the logic which assigns calls to stack nodes |
| 2 | +;; correctly handles an inlined callsite with stack ids that partially |
| 3 | +;; overlap with a trimmed context. In particular when it also partially |
| 4 | +;; overlaps with a longer non-trimmed context that doesn't match all of |
| 5 | +;; the inlined callsite stack ids. |
| 6 | + |
| 7 | +;; The profile data and call stacks were all manually added, but the code |
| 8 | +;; would be structured something like the following (fairly contrived to |
| 9 | +;; result in the type of control flow needed to test): |
| 10 | + |
| 11 | +;; void A(bool b) { |
| 12 | +;; if (b) |
| 13 | +;; // cold: stack ids 6, 2, 8 (trimmed ids 10) |
| 14 | +;; // not cold: stack ids 6, 7 (trimmed ids 9, 11) |
| 15 | +;; new char[10]; // stack id 6 |
| 16 | +;; else |
| 17 | +;; // not cold: stack ids 1, 2, 8, 3, 4 |
| 18 | +;; // cold: stack ids 1, 2, 8, 3, 5 |
| 19 | +;; new char[10]; // stack id 1 |
| 20 | +;; } |
| 21 | +;; |
| 22 | +;; void XZ() { |
| 23 | +;; A(false); // stack ids 2, 8 (e.g. X inlined into Z) |
| 24 | +;; } |
| 25 | +;; |
| 26 | +;; void XZN() { |
| 27 | +;; // This is the tricky one to get right. We want to ensure it gets |
| 28 | +;; // correctly correlated with a stack node for the trimmed 6, 2, 8 |
| 29 | +;; // context shown in A. It should *not* be correlated with the longer |
| 30 | +;; // untrimmed 1, 2, 8, 3, 4|5 contexts. |
| 31 | +;; A(true); // stack ids 2, 8, 9 (e.g. X inlined into Z inlined into N) |
| 32 | +;; } |
| 33 | +;; |
| 34 | +;; void Y() { |
| 35 | +;; A(true); // stack id 7 |
| 36 | +;; } |
| 37 | +;; |
| 38 | +;; void M() { |
| 39 | +;; XZ(); // stack id 3 |
| 40 | +;; } |
| 41 | +;; |
| 42 | +;; int main() { |
| 43 | +;; M(); // stack id 4 (leads to not cold allocation) |
| 44 | +;; M(); // stack id 5 (leads to cold allocation) |
| 45 | +;; XZN(); // stack id 11 (leads to cold allocation) |
| 46 | +;; Y(); // stack id 10 (leads to not cold allocation) |
| 47 | +;; } |
| 48 | + |
| 49 | +;; -stats requires asserts |
| 50 | +; REQUIRES: asserts |
| 51 | + |
| 52 | +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ |
| 53 | +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ |
| 54 | +; RUN: -stats -pass-remarks=memprof-context-disambiguation \ |
| 55 | +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=IR \ |
| 56 | +; RUN: --check-prefix=STATS --check-prefix=REMARKS |
| 57 | + |
| 58 | +; REMARKS: created clone _Z1Ab.memprof.1 |
| 59 | +; REMARKS: created clone _Z2XZv.memprof.1 |
| 60 | +; REMARKS: created clone _Z1Mv.memprof.1 |
| 61 | +;; Make sure the inlined context in _Z3XZNv, which partially overlaps |
| 62 | +;; trimmed cold context, and also partially overlaps completely |
| 63 | +;; unrelated contexts, correctly calls a cloned version of Z1Ab, |
| 64 | +;; which will call the cold annotated allocation. |
| 65 | +; REMARKS: call in clone _Z3XZNv assigned to call function clone _Z1Ab.memprof.1 |
| 66 | +; REMARKS: call in clone main assigned to call function clone _Z1Mv.memprof.1 |
| 67 | +; REMARKS: call in clone _Z1Mv.memprof.1 assigned to call function clone _Z2XZv.memprof.1 |
| 68 | +; REMARKS: call in clone _Z2XZv.memprof.1 assigned to call function clone _Z1Ab |
| 69 | +; REMARKS: call in clone main assigned to call function clone _Z1Mv |
| 70 | +; REMARKS: call in clone _Z1Mv assigned to call function clone _Z2XZv |
| 71 | +; REMARKS: call in clone _Z2XZv assigned to call function clone _Z1Ab.memprof.1 |
| 72 | +; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute cold |
| 73 | +; REMARKS: call in clone _Z1Yv assigned to call function clone _Z1Ab |
| 74 | +; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute notcold |
| 75 | +; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute cold |
| 76 | +; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute notcold |
| 77 | + |
| 78 | + |
| 79 | +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" |
| 80 | +target triple = "x86_64-unknown-linux-gnu" |
| 81 | + |
| 82 | +define dso_local void @_Z1Ab(i1 noundef zeroext %b) { |
| 83 | +entry: |
| 84 | + br i1 %b, label %if.then, label %if.else |
| 85 | + |
| 86 | +if.then: |
| 87 | + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !5, !callsite !11 |
| 88 | + br label %if.end |
| 89 | + |
| 90 | +if.else: |
| 91 | + %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !10 |
| 92 | + br label %if.end |
| 93 | + |
| 94 | +if.end: |
| 95 | + ret void |
| 96 | +} |
| 97 | + |
| 98 | +; Function Attrs: nobuiltin |
| 99 | +declare ptr @_Znam(i64) #0 |
| 100 | + |
| 101 | +define dso_local void @_Z2XZv() local_unnamed_addr #0 { |
| 102 | +entry: |
| 103 | + tail call void @_Z1Ab(i1 noundef zeroext false), !callsite !12 |
| 104 | + ret void |
| 105 | +} |
| 106 | + |
| 107 | +define dso_local void @_Z1Mv() local_unnamed_addr #0 { |
| 108 | +entry: |
| 109 | + tail call void @_Z2XZv(), !callsite !19 |
| 110 | + ret void |
| 111 | +} |
| 112 | + |
| 113 | +define dso_local void @_Z3XZNv() local_unnamed_addr { |
| 114 | +entry: |
| 115 | + tail call void @_Z1Ab(i1 noundef zeroext true), !callsite !15 |
| 116 | + ret void |
| 117 | +} |
| 118 | + |
| 119 | +define dso_local void @_Z1Yv() local_unnamed_addr { |
| 120 | +entry: |
| 121 | + tail call void @_Z1Ab(i1 noundef zeroext true), !callsite !17 |
| 122 | + ret void |
| 123 | +} |
| 124 | + |
| 125 | +define dso_local noundef i32 @main() local_unnamed_addr { |
| 126 | +entry: |
| 127 | + tail call void @_Z1Mv(), !callsite !13 ;; Not cold context |
| 128 | + tail call void @_Z1Mv(), !callsite !14 ;; Cold context |
| 129 | + tail call void @_Z3XZNv(), !callsite !16 ;; Cold context |
| 130 | + tail call void @_Z1Yv(), !callsite !18 ;; Not cold context |
| 131 | + ret i32 0 |
| 132 | +} |
| 133 | + |
| 134 | +attributes #0 = { nobuiltin } |
| 135 | +attributes #7 = { builtin } |
| 136 | + |
| 137 | +!0 = !{!1, !3} |
| 138 | +;; Not cold context via first call to _Z1Mv in main |
| 139 | +!1 = !{!2, !"notcold"} |
| 140 | +!2 = !{i64 1, i64 2, i64 8, i64 3, i64 4} |
| 141 | +;; Cold context via second call to _Z1Mv in main |
| 142 | +!3 = !{!4, !"cold"} |
| 143 | +!4 = !{i64 1, i64 2, i64 8, i64 3, i64 5} |
| 144 | +!5 = !{!6, !8} |
| 145 | +;; Cold (trimmed) context via call to _Z3XZNv in main |
| 146 | +!6 = !{!7, !"cold"} |
| 147 | +!7 = !{i64 6, i64 2, i64 8} |
| 148 | +;; Not cold (trimmed) context via call to _Z1Yv in main |
| 149 | +!8 = !{!9, !"notcold"} |
| 150 | +!9 = !{i64 6, i64 7} |
| 151 | +!10 = !{i64 1} |
| 152 | +!11 = !{i64 6} |
| 153 | +!12 = !{i64 2, i64 8} |
| 154 | +!13 = !{i64 4} |
| 155 | +!14 = !{i64 5} |
| 156 | +;; Inlined context in _Z3XZNv, which includes part of trimmed cold context |
| 157 | +!15 = !{i64 2, i64 8, i64 9} |
| 158 | +!16 = !{i64 11} |
| 159 | +!17 = !{i64 7} |
| 160 | +!18 = !{i64 10} |
| 161 | +!19 = !{i64 3} |
| 162 | + |
| 163 | +; IR: define {{.*}} @_Z1Ab(i1 noundef zeroext %b) |
| 164 | +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] |
| 165 | +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] |
| 166 | +; IR: define {{.*}} @_Z2XZv() |
| 167 | +; IR: call {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext false) |
| 168 | +; IR: define {{.*}} @_Z1Mv() |
| 169 | +; IR: call {{.*}} @_Z2XZv() |
| 170 | +;; Make sure the inlined context in _Z3XZNv, which partially overlaps |
| 171 | +;; trimmed cold context, and also partially overlaps completely |
| 172 | +;; unrelated contexts, correctly calls the cloned version of Z1Ab |
| 173 | +;; that will call the cold annotated allocation. |
| 174 | +; IR: define {{.*}} @_Z3XZNv() |
| 175 | +; IR: call {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext true) |
| 176 | +; IR: define {{.*}} @_Z1Yv() |
| 177 | +; IR: call {{.*}} @_Z1Ab(i1 noundef zeroext true) |
| 178 | +; IR: define {{.*}} @main() |
| 179 | +; IR: call {{.*}} @_Z1Mv() |
| 180 | +; IR: call {{.*}} @_Z1Mv.memprof.1() |
| 181 | +; IR: call {{.*}} @_Z3XZNv() |
| 182 | +; IR: call {{.*}} @_Z1Yv() |
| 183 | +; IR: define {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext %b) |
| 184 | +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]] |
| 185 | +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]] |
| 186 | +; IR: define {{.*}} @_Z2XZv.memprof.1() |
| 187 | +; IR: call {{.*}} @_Z1Ab(i1 noundef zeroext false) |
| 188 | +; IR: define {{.*}} @_Z1Mv.memprof.1() |
| 189 | +; IR: call {{.*}} @_Z2XZv.memprof.1() |
| 190 | + |
| 191 | +; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) |
| 192 | +; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) |
| 193 | +; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis |
0 commit comments