-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MemProf] Fix the option to disable memprof ICP #112917
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The -enable-memprof-indirect-call-support meant to guard the recently added memprof ICP support was not used in enough places. Specifically, it was not checked in mayHaveMemprofSummary, which is called from the ThinLTO backend applyImports. This led to failures when checking the callsite records, as we incorrectly expected records for indirect calls. Fix the option to be checked in all necessary locations, and add testing.
@llvm/pr-subscribers-lto @llvm/pr-subscribers-llvm-analysis Author: Teresa Johnson (teresajohnson) ChangesThe -enable-memprof-indirect-call-support meant to guard the recently Fix the option to be checked in all necessary locations, and add testing. Full diff: https://github.com/llvm/llvm-project/pull/112917.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 1bd9ee651d2b0b..0f4e85f5123f3a 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -503,6 +503,10 @@ static void computeFunctionSummary(
if (!IsThinLTO)
continue;
+ // Skip indirect calls if we haven't enabled memprof ICP.
+ if (!CalledFunction && !EnableMemProfIndirectCallSupport)
+ continue;
+
// Ensure we keep this analysis in sync with the handling in the ThinLTO
// backend (see MemProfContextDisambiguation::applyImport). Save this call
// so that we can skip it in checking the reverse case later.
@@ -561,7 +565,8 @@ static void computeFunctionSummary(
auto CalleeValueInfo =
Index.getOrInsertValueInfo(cast<GlobalValue>(CalledValue));
Callsites.push_back({CalleeValueInfo, StackIdIndices});
- } else if (EnableMemProfIndirectCallSupport) {
+ } else {
+ assert(EnableMemProfIndirectCallSupport);
// For indirect callsites, create multiple Callsites, one per target.
// This enables having a different set of clone versions per target,
// and we will apply the cloning decisions while speculatively
@@ -1223,6 +1228,9 @@ bool llvm::mayHaveMemprofSummary(const CallBase *CB) {
if (CI && CalledFunction->isIntrinsic())
return false;
} else {
+ // Skip indirect calls if we haven't enabled memprof ICP.
+ if (!EnableMemProfIndirectCallSupport)
+ return false;
// Skip inline assembly calls.
if (CI && CI->isInlineAsm())
return false;
diff --git a/llvm/test/ThinLTO/X86/memprof-icp.ll b/llvm/test/ThinLTO/X86/memprof-icp.ll
index 5c6d4e383d3217..e19c56b90e6200 100644
--- a/llvm/test/ThinLTO/X86/memprof-icp.ll
+++ b/llvm/test/ThinLTO/X86/memprof-icp.ll
@@ -76,17 +76,18 @@
;; for each profiled target in the VP metadata. They will have the same stackIds
;; since the debug information for the callsite is the same.
; RUN: llvm-dis %t/foo.o -o - | FileCheck %s --check-prefix=CALLSITES
-; CALLSITES: gv: (name: "_Z3fooR2B0j", {{.*}} callsites: ((callee: ^{{[0-9]+}}, clones: (0), stackIds: (16345663650247127235)), (callee: ^{{[0-9]+}}, clones: (0), stackIds: (16345663650247127235)))
+; CALLSITES: gv: (name: "_Z3fooR2B0j", {{.*}} callsites: ((callee: ^{{[0-9]+}}, clones: (0), stackIds: (16345663650247127235)), (callee: ^{{[0-9]+}}, clones: (0), stackIds: (16345663650247127235))
;; Make sure that we don't get the synthesized callsite records if the
;; -enable-memprof-indirect-call-support flag is false.
-; RUN: opt -thinlto-bc %t/foo.ll -enable-memprof-indirect-call-support=false -o - \
-; RUN: | llvm-dis -o - | FileCheck %s --implicit-check-not callsites
+; RUN: opt -thinlto-bc %t/foo.ll -enable-memprof-indirect-call-support=false >%t/foo.noicp.o
+; RUN: llvm-dis %t/foo.noicp.o -o - | FileCheck %s --implicit-check-not "stackIds: (16345663650247127235)"
;; First perform in-process ThinLTO
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
+; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
; RUN: -r=%t/main.o,_Znwm, \
; RUN: -r=%t/main.o,_ZdlPvm, \
@@ -116,6 +117,7 @@
; RUN: -supports-hot-cold-new \
; RUN: -thinlto-distributed-indexes \
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
+; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
; RUN: -r=%t/main.o,_Znwm, \
; RUN: -r=%t/main.o,_ZdlPvm, \
@@ -141,6 +143,36 @@
; RUN: %t/foo.o -S 2>&1 | FileCheck %s --check-prefix=IR \
; RUN: --check-prefix=STATS-BE-DISTRIB --check-prefix=REMARKS-FOO
+;; Retry with the ICP-disabled object file, and make sure we disable it again
+;; so we don't look for the synthesized callsite records when applying imports.
+;; We should not get any cloning.
+; RUN: llvm-lto2 run %t/main.o %t/foo.noicp.o -enable-memprof-context-disambiguation \
+; RUN: -enable-memprof-indirect-call-support=false \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t/foo.noicp.o,_Z3fooR2B0j,plx \
+; RUN: -r=%t/foo.noicp.o,_Z3xyzR2B0j, \
+; RUN: -r=%t/main.o,_Z3fooR2B0j, \
+; RUN: -r=%t/main.o,_Znwm, \
+; RUN: -r=%t/main.o,_ZdlPvm, \
+; RUN: -r=%t/main.o,_Z8externalPi, \
+; RUN: -r=%t/main.o,main,plx \
+; RUN: -r=%t/main.o,_ZN2B03barEj,plx \
+; RUN: -r=%t/main.o,_ZN1B3barEj,plx \
+; RUN: -r=%t/main.o,_ZTV1B,plx \
+; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \
+; RUN: -r=%t/main.o,_ZTS1B,plx \
+; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \
+; RUN: -r=%t/main.o,_ZTS2B0,plx \
+; RUN: -r=%t/main.o,_ZTI2B0,plx \
+; RUN: -r=%t/main.o,_ZTI1B,plx \
+; RUN: -r=%t/main.o,_ZTV2B0,plx \
+; RUN: -thinlto-threads=1 \
+; RUN: -memprof-verify-ccg -memprof-verify-nodes \
+; RUN: -pass-remarks=. -save-temps \
+; RUN: -o %t.noicp.out 2>&1 | FileCheck %s --implicit-check-not "created clone"
+
+; RUN: llvm-dis %t.noicp.out.2.4.opt.bc -o - | FileCheck %s --implicit-check-not "_Z3fooR2B0j.memprof"
+
; REMARKS-MAIN: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
; REMARKS-MAIN: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
; REMARKS-MAIN: created clone _ZN2B03barEj.memprof.1
@@ -215,15 +247,22 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
+declare i32 @_Z3xyzR2B0j(ptr %b)
+
define i32 @_Z3fooR2B0j(ptr %b) {
entry:
%0 = load ptr, ptr %b, align 8
%call = tail call i32 %0(ptr null, i32 0), !prof !0, !callsite !1
+ ;; Add a dummy call to ensure that we have some callsite metadata,
+ ;; which triggers callsite record checking in the ThinLTO backend
+ ;; even with -enable-memprof-indirect-call-support=false.
+ %call2 = call i32 @_Z3xyzR2B0j(ptr null, i32 0), !callsite !2
ret i32 0
}
!0 = !{!"VP", i32 0, i64 4, i64 4445083295448962937, i64 2, i64 -2718743882639408571, i64 2}
!1 = !{i64 -2101080423462424381}
+!2 = !{i64 1234}
;--- main.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
The -enable-memprof-indirect-call-support meant to guard the recently
added memprof ICP support was not used in enough places. Specifically,
it was not checked in mayHaveMemprofSummary, which is called from the
ThinLTO backend applyImports. This led to failures when checking the
callsite records, as we incorrectly expected records for indirect calls.
Fix the option to be checked in all necessary locations, and add testing.