|
| 1 | +;; Test to make sure that the memprof ThinLTO backend finds the correct summary |
| 2 | +;; for an imported promoted local, so that we can perform the correct cloning. |
| 3 | +;; In particular, we should be able to use the thinlto_src_file metadata to |
| 4 | +;; recreate its original GUID. In particular, this test contains promoted |
| 5 | +;; internal functions with the same original name as those that were imported, |
| 6 | +;; and we want to ensure we don't use those by mistake. |
| 7 | + |
| 8 | +;; The original code looks something like: |
| 9 | +;; |
| 10 | +;; src1.cc: |
| 11 | +;; extern void external1(); |
| 12 | +;; extern void external2(); |
| 13 | +;; static void internal1() { |
| 14 | +;; external2(); |
| 15 | +;; } |
| 16 | +;; static void internal2() { |
| 17 | +;; external2(); |
| 18 | +;; } |
| 19 | +;; int main() { |
| 20 | +;; internal1(); |
| 21 | +;; internal2(); |
| 22 | +;; external1(); |
| 23 | +;; return 0; |
| 24 | +;; } |
| 25 | +;; |
| 26 | +;; src2.cc: |
| 27 | +;; extern void external2(); |
| 28 | +;; static void internal1() { |
| 29 | +;; external2(); |
| 30 | +;; } |
| 31 | +;; static void internal2() { |
| 32 | +;; external2(); |
| 33 | +;; } |
| 34 | +;; void external1() { |
| 35 | +;; internal1(); |
| 36 | +;; internal2(); |
| 37 | +;; } |
| 38 | +;; |
| 39 | +;; The assembly for src1 shown below was dumped after function importing, with |
| 40 | +;; some hand modification to ensure we import the definitions of src2.cc's |
| 41 | +;; external1 and internal1 functions, and the declaration only for its |
| 42 | +;; internal2 function. I also hand modified it to add !callsite metadata |
| 43 | +;; to a few calls, and the distributed ThinLTO summary in src1.o.thinlto.ll to |
| 44 | +;; contain callsite metadata records with cloning results. |
| 45 | + |
| 46 | +; RUN: rm -rf %t && split-file %s %t && cd %t |
| 47 | +; RUN: llvm-as src1.ll -o src1.o |
| 48 | +; RUN: llvm-as src1.o.thinlto.ll -o src1.o.thinlto.bc |
| 49 | + |
| 50 | +; RUN: opt -passes=memprof-context-disambiguation src1.o -S -memprof-import-summary=src1.o.thinlto.bc | FileCheck %s |
| 51 | + |
| 52 | +;; Per the cloning results in the summary, none of the original functions should |
| 53 | +;; call any memprof clones. |
| 54 | +; CHECK-NOT: memprof |
| 55 | +;; We should have one clone of src1.cc's internal1 that calls a clone of |
| 56 | +;; external2. |
| 57 | +; CHECK-LABEL: define void @_ZL9internal1v.llvm.5985484347676238233.memprof.1() |
| 58 | +; CHECK: tail call void @_Z9external2v.memprof.1() |
| 59 | +; CHECK-LABEL: declare void @_Z9external2v.memprof.1() |
| 60 | +;; We should have one clone of external1 that calls a clone of internal2 from |
| 61 | +;; a synthesized callsite record (for a tail call with a missing frame). |
| 62 | +; CHECK-LABEL: define available_externally {{.*}} void @_Z9external1v.memprof.1() |
| 63 | +; CHECK: tail call void @_ZL9internal1v.llvm.3267420853450984672() |
| 64 | +; CHECK: tail call void @_ZL9internal2v.llvm.3267420853450984672.memprof.1() |
| 65 | +; CHECK-LABEL: declare void @_ZL9internal2v.llvm.3267420853450984672.memprof.1() |
| 66 | +;; We should have 2 clones of src2.cc's internal1 function, calling a single |
| 67 | +;; clone of external2. |
| 68 | +; CHECK-LABEL: define available_externally void @_ZL9internal1v.llvm.3267420853450984672.memprof.1() |
| 69 | +; CHECK: tail call void @_Z9external2v.memprof.1() |
| 70 | +; CHECK: tail call void @_Z9external2v.memprof.1() |
| 71 | +; CHECK-LABEL: define available_externally void @_ZL9internal1v.llvm.3267420853450984672.memprof.2() |
| 72 | +; CHECK: tail call void @_Z9external2v.memprof.1() |
| 73 | +; CHECK: tail call void @_Z9external2v.memprof.1() |
| 74 | +; CHECK-NOT: memprof |
| 75 | + |
| 76 | +;--- src1.ll |
| 77 | +; ModuleID = 'src1.o' |
| 78 | +source_filename = "src1.cc" |
| 79 | +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" |
| 80 | +target triple = "x86_64-unknown-linux-gnu" |
| 81 | + |
| 82 | +define dso_local noundef i32 @main() { |
| 83 | +entry: |
| 84 | + tail call void @_ZL9internal1v.llvm.5985484347676238233() |
| 85 | + tail call void @_ZL9internal2v.llvm.5985484347676238233() |
| 86 | + tail call void @_Z9external1v() |
| 87 | + ret i32 0 |
| 88 | +} |
| 89 | + |
| 90 | +define void @_ZL9internal1v.llvm.5985484347676238233() { |
| 91 | +entry: |
| 92 | + tail call void @_Z9external2v(), !callsite !8 |
| 93 | + ret void |
| 94 | +} |
| 95 | + |
| 96 | +define void @_ZL9internal2v.llvm.5985484347676238233() { |
| 97 | +entry: |
| 98 | + tail call void @_Z9external2v() |
| 99 | + ret void |
| 100 | +} |
| 101 | + |
| 102 | +declare void @_Z9external2v() |
| 103 | + |
| 104 | +define available_externally dso_local void @_Z9external1v() !thinlto_src_module !6 !thinlto_src_file !7 { |
| 105 | +entry: |
| 106 | + tail call void @_ZL9internal1v.llvm.3267420853450984672() |
| 107 | + tail call void @_ZL9internal2v.llvm.3267420853450984672() |
| 108 | + ret void |
| 109 | +} |
| 110 | + |
| 111 | +define available_externally void @_ZL9internal1v.llvm.3267420853450984672() !thinlto_src_module !6 !thinlto_src_file !7 { |
| 112 | +entry: |
| 113 | + ;; This one has more callsite records than the other version of internal1, |
| 114 | + ;; which would cause the code to iterate past the end of the callsite |
| 115 | + ;; records if we incorrectly got the other internal1's summary. |
| 116 | + tail call void @_Z9external2v(), !callsite !9 |
| 117 | + tail call void @_Z9external2v(), !callsite !10 |
| 118 | + ret void |
| 119 | +} |
| 120 | + |
| 121 | +declare void @_ZL9internal2v.llvm.3267420853450984672() |
| 122 | + |
| 123 | +!6 = !{!"src2.o"} |
| 124 | +!7 = !{!"src2.cc"} |
| 125 | +!8 = !{i64 12345} |
| 126 | +!9 = !{i64 23456} |
| 127 | +!10 = !{i64 34567} |
| 128 | + |
| 129 | +;--- src1.o.thinlto.ll |
| 130 | +; ModuleID = 'src1.o.thinlto.bc' |
| 131 | +source_filename = "src1.o.thinlto.bc" |
| 132 | + |
| 133 | +^0 = module: (path: "src1.o", hash: (1393604173, 1072112025, 2857473630, 2016801496, 3238735916)) |
| 134 | +^1 = module: (path: "src2.o", hash: (760755700, 1705397472, 4198605753, 677969311, 2408738824)) |
| 135 | +;; src2.o:internal1. It specifies that we should have 3 clones total (including |
| 136 | +;; original). |
| 137 | +^3 = gv: (guid: 1143217136900127394, summaries: (function: (module: ^1, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^6, tail: 1), (callee: ^6, tail: 1)), callsites: ((callee: ^6, clones: (0, 1, 1), stackIds: (23456)), (callee: ^6, clones: (0, 1, 1), stackIds: (34567)))))) |
| 138 | +;; src2.o:internal2. It was manually modified to have importType = declaration. |
| 139 | +^4 = gv: (guid: 3599593882704738259, summaries: (function: (module: ^1, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: declaration), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^6, tail: 1))))) |
| 140 | +;; src1.o:internal1. |
| 141 | +^5 = gv: (guid: 6084810090198994915, summaries: (function: (module: ^0, flags: (linkage: internal, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^6, tail: 1)), callsites: ((callee: ^6, clones: (0, 1), stackIds: (12345)))))) |
| 142 | +^6 = gv: (guid: 8596367375252297795) |
| 143 | +;; src1.o:internal2. |
| 144 | +^7 = gv: (guid: 11092151021205906565, summaries: (function: (module: ^0, flags: (linkage: internal, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^6, tail: 1))))) |
| 145 | +;; src2.o:external1. It contains a synthesized callsite record for the tail call |
| 146 | +;; to internal2 (the empty stackId list indicates it is synthesized for a |
| 147 | +;; discovered missing tail call frame. |
| 148 | +^8 = gv: (guid: 12313225385227428720, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 3, calls: ((callee: ^3, tail: 1), (callee: ^4, tail: 1)), callsites: ((callee: ^4, clones: (0, 1), stackIds: ()))))) |
| 149 | +;; src1.o:main. |
| 150 | +^9 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^5, tail: 1), (callee: ^7, tail: 1), (callee: ^8, tail: 1))))) |
| 151 | +^10 = flags: 97 |
| 152 | +^11 = blockcount: 0 |
0 commit comments