Skip to content

Commit 6264288

Browse files
[MemProf] Fix the option to disable memprof ICP (llvm#112917)
The -enable-memprof-indirect-call-support meant to guard the recently added memprof ICP support was not used in enough places. Specifically, it was not checked in mayHaveMemprofSummary, which is called from the ThinLTO backend applyImports. This led to failures when checking the callsite records, as we incorrectly expected records for indirect calls. Fix the option to be checked in all necessary locations, and add testing.
1 parent 6c60ead commit 6264288

File tree

2 files changed

+51
-4
lines changed

2 files changed

+51
-4
lines changed

llvm/lib/Analysis/ModuleSummaryAnalysis.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,10 @@ static void computeFunctionSummary(
503503
if (!IsThinLTO)
504504
continue;
505505

506+
// Skip indirect calls if we haven't enabled memprof ICP.
507+
if (!CalledFunction && !EnableMemProfIndirectCallSupport)
508+
continue;
509+
506510
// Ensure we keep this analysis in sync with the handling in the ThinLTO
507511
// backend (see MemProfContextDisambiguation::applyImport). Save this call
508512
// so that we can skip it in checking the reverse case later.
@@ -561,7 +565,8 @@ static void computeFunctionSummary(
561565
auto CalleeValueInfo =
562566
Index.getOrInsertValueInfo(cast<GlobalValue>(CalledValue));
563567
Callsites.push_back({CalleeValueInfo, StackIdIndices});
564-
} else if (EnableMemProfIndirectCallSupport) {
568+
} else {
569+
assert(EnableMemProfIndirectCallSupport);
565570
// For indirect callsites, create multiple Callsites, one per target.
566571
// This enables having a different set of clone versions per target,
567572
// and we will apply the cloning decisions while speculatively
@@ -1223,6 +1228,9 @@ bool llvm::mayHaveMemprofSummary(const CallBase *CB) {
12231228
if (CI && CalledFunction->isIntrinsic())
12241229
return false;
12251230
} else {
1231+
// Skip indirect calls if we haven't enabled memprof ICP.
1232+
if (!EnableMemProfIndirectCallSupport)
1233+
return false;
12261234
// Skip inline assembly calls.
12271235
if (CI && CI->isInlineAsm())
12281236
return false;

llvm/test/ThinLTO/X86/memprof-icp.ll

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,17 +76,18 @@
7676
;; for each profiled target in the VP metadata. They will have the same stackIds
7777
;; since the debug information for the callsite is the same.
7878
; RUN: llvm-dis %t/foo.o -o - | FileCheck %s --check-prefix=CALLSITES
79-
; CALLSITES: gv: (name: "_Z3fooR2B0j", {{.*}} callsites: ((callee: ^{{[0-9]+}}, clones: (0), stackIds: (16345663650247127235)), (callee: ^{{[0-9]+}}, clones: (0), stackIds: (16345663650247127235)))
79+
; CALLSITES: gv: (name: "_Z3fooR2B0j", {{.*}} callsites: ((callee: ^{{[0-9]+}}, clones: (0), stackIds: (16345663650247127235)), (callee: ^{{[0-9]+}}, clones: (0), stackIds: (16345663650247127235))
8080

8181
;; Make sure that we don't get the synthesized callsite records if the
8282
;; -enable-memprof-indirect-call-support flag is false.
83-
; RUN: opt -thinlto-bc %t/foo.ll -enable-memprof-indirect-call-support=false -o - \
84-
; RUN: | llvm-dis -o - | FileCheck %s --implicit-check-not callsites
83+
; RUN: opt -thinlto-bc %t/foo.ll -enable-memprof-indirect-call-support=false >%t/foo.noicp.o
84+
; RUN: llvm-dis %t/foo.noicp.o -o - | FileCheck %s --implicit-check-not "stackIds: (16345663650247127235)"
8585

8686
;; First perform in-process ThinLTO
8787
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
8888
; RUN: -supports-hot-cold-new \
8989
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
90+
; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \
9091
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
9192
; RUN: -r=%t/main.o,_Znwm, \
9293
; RUN: -r=%t/main.o,_ZdlPvm, \
@@ -116,6 +117,7 @@
116117
; RUN: -supports-hot-cold-new \
117118
; RUN: -thinlto-distributed-indexes \
118119
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
120+
; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \
119121
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
120122
; RUN: -r=%t/main.o,_Znwm, \
121123
; RUN: -r=%t/main.o,_ZdlPvm, \
@@ -141,6 +143,36 @@
141143
; RUN: %t/foo.o -S 2>&1 | FileCheck %s --check-prefix=IR \
142144
; RUN: --check-prefix=STATS-BE-DISTRIB --check-prefix=REMARKS-FOO
143145

146+
;; Retry with the ICP-disabled object file, and make sure we disable it again
147+
;; so we don't look for the synthesized callsite records when applying imports.
148+
;; We should not get any cloning.
149+
; RUN: llvm-lto2 run %t/main.o %t/foo.noicp.o -enable-memprof-context-disambiguation \
150+
; RUN: -enable-memprof-indirect-call-support=false \
151+
; RUN: -supports-hot-cold-new \
152+
; RUN: -r=%t/foo.noicp.o,_Z3fooR2B0j,plx \
153+
; RUN: -r=%t/foo.noicp.o,_Z3xyzR2B0j, \
154+
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
155+
; RUN: -r=%t/main.o,_Znwm, \
156+
; RUN: -r=%t/main.o,_ZdlPvm, \
157+
; RUN: -r=%t/main.o,_Z8externalPi, \
158+
; RUN: -r=%t/main.o,main,plx \
159+
; RUN: -r=%t/main.o,_ZN2B03barEj,plx \
160+
; RUN: -r=%t/main.o,_ZN1B3barEj,plx \
161+
; RUN: -r=%t/main.o,_ZTV1B,plx \
162+
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \
163+
; RUN: -r=%t/main.o,_ZTS1B,plx \
164+
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \
165+
; RUN: -r=%t/main.o,_ZTS2B0,plx \
166+
; RUN: -r=%t/main.o,_ZTI2B0,plx \
167+
; RUN: -r=%t/main.o,_ZTI1B,plx \
168+
; RUN: -r=%t/main.o,_ZTV2B0,plx \
169+
; RUN: -thinlto-threads=1 \
170+
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
171+
; RUN: -pass-remarks=. -save-temps \
172+
; RUN: -o %t.noicp.out 2>&1 | FileCheck %s --implicit-check-not "created clone"
173+
174+
; RUN: llvm-dis %t.noicp.out.2.4.opt.bc -o - | FileCheck %s --implicit-check-not "_Z3fooR2B0j.memprof"
175+
144176
; REMARKS-MAIN: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
145177
; REMARKS-MAIN: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
146178
; REMARKS-MAIN: created clone _ZN2B03barEj.memprof.1
@@ -215,15 +247,22 @@
215247
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
216248
target triple = "x86_64-unknown-linux-gnu"
217249

250+
declare i32 @_Z3xyzR2B0j(ptr %b)
251+
218252
define i32 @_Z3fooR2B0j(ptr %b) {
219253
entry:
220254
%0 = load ptr, ptr %b, align 8
221255
%call = tail call i32 %0(ptr null, i32 0), !prof !0, !callsite !1
256+
;; Add a dummy call to ensure that we have some callsite metadata,
257+
;; which triggers callsite record checking in the ThinLTO backend
258+
;; even with -enable-memprof-indirect-call-support=false.
259+
%call2 = call i32 @_Z3xyzR2B0j(ptr null, i32 0), !callsite !2
222260
ret i32 0
223261
}
224262

225263
!0 = !{!"VP", i32 0, i64 4, i64 4445083295448962937, i64 2, i64 -2718743882639408571, i64 2}
226264
!1 = !{i64 -2101080423462424381}
265+
!2 = !{i64 1234}
227266

228267
;--- main.ll
229268
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"

0 commit comments

Comments
 (0)