-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MemProf] Allow promotion if target is a declaration #115555
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -93,6 +93,8 @@ | |
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \ | ||
; RUN: -r=%t/foo.o,_ZN2B03barEj.abc,plx \ | ||
; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \ | ||
; RUN: -r=%t/foo.o,_ZN2B03barEj, \ | ||
; RUN: -r=%t/foo.o,_ZN1B3barEj, \ | ||
; RUN: -r=%t/main.o,_Z3fooR2B0j, \ | ||
; RUN: -r=%t/main.o,_Znwm, \ | ||
; RUN: -r=%t/main.o,_ZdlPvm, \ | ||
|
@@ -113,9 +115,9 @@ | |
; RUN: -pass-remarks=. -save-temps \ | ||
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \ | ||
; RUN: --check-prefix=STATS-BE --check-prefix=REMARKS-MAIN \ | ||
; RUN: --check-prefix=REMARKS-FOO | ||
; RUN: --check-prefix=REMARKS-FOO --check-prefix=REMARKS-FOO-IMPORT | ||
|
||
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR | ||
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR --check-prefix=IR-IMPORT | ||
|
||
;; Try again but with distributed ThinLTO | ||
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \ | ||
|
@@ -124,6 +126,8 @@ | |
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \ | ||
; RUN: -r=%t/foo.o,_ZN2B03barEj.abc,plx \ | ||
; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \ | ||
; RUN: -r=%t/foo.o,_ZN2B03barEj, \ | ||
; RUN: -r=%t/foo.o,_ZN1B3barEj, \ | ||
; RUN: -r=%t/main.o,_Z3fooR2B0j, \ | ||
; RUN: -r=%t/main.o,_Znwm, \ | ||
; RUN: -r=%t/main.o,_ZdlPvm, \ | ||
|
@@ -147,8 +151,9 @@ | |
; RUN: -enable-memprof-indirect-call-support=true \ | ||
; RUN: -summary-file=%t/foo.o.thinlto.bc -memprof-import-summary=%t/foo.o.thinlto.bc \ | ||
; RUN: -enable-import-metadata -stats -pass-remarks=. \ | ||
; RUN: %t/foo.o -S 2>&1 | FileCheck %s --check-prefix=IR \ | ||
; RUN: --check-prefix=STATS-BE-DISTRIB --check-prefix=REMARKS-FOO | ||
; RUN: %t/foo.o -S 2>&1 | FileCheck %s --check-prefix=IR --check-prefix=IR-IMPORT \ | ||
; RUN: --check-prefix=STATS-BE-DISTRIB --check-prefix=REMARKS-FOO \ | ||
; RUN: --check-prefix=REMARKS-FOO-IMPORT | ||
|
||
;; Retry with the ICP-disabled object file, and make sure we disable it again | ||
;; so we don't look for the synthesized callsite records when applying imports. | ||
|
@@ -159,6 +164,8 @@ | |
; RUN: -r=%t/foo.noicp.o,_Z3fooR2B0j,plx \ | ||
; RUN: -r=%t/foo.noicp.o,_ZN2B03barEj.abc,plx \ | ||
; RUN: -r=%t/foo.noicp.o,_Z3xyzR2B0j, \ | ||
; RUN: -r=%t/foo.noicp.o,_ZN2B03barEj, \ | ||
; RUN: -r=%t/foo.noicp.o,_ZN1B3barEj, \ | ||
; RUN: -r=%t/main.o,_Z3fooR2B0j, \ | ||
; RUN: -r=%t/main.o,_Znwm, \ | ||
; RUN: -r=%t/main.o,_ZdlPvm, \ | ||
|
@@ -184,6 +191,74 @@ | |
;; metadata. | ||
; RUN: llvm-dis %t.noicp.out.2.4.opt.bc -o - | FileCheck %s --implicit-check-not "_Z3fooR2B0j.memprof" --implicit-check-not "!callsite" | ||
|
||
;; Run in-process ThinLTO again, but with importing disabled by setting the | ||
;; instruction limit to 0. Ensure that the existing declarations of B::bar | ||
;; and B0::bar are sufficient to allow for the promotion and cloning. | ||
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \ | ||
; RUN: -import-instr-limit=0 \ | ||
; RUN: -enable-memprof-indirect-call-support=true \ | ||
; RUN: -supports-hot-cold-new \ | ||
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \ | ||
; RUN: -r=%t/foo.o,_ZN2B03barEj.abc,plx \ | ||
; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \ | ||
; RUN: -r=%t/foo.o,_ZN2B03barEj, \ | ||
; RUN: -r=%t/foo.o,_ZN1B3barEj, \ | ||
; RUN: -r=%t/main.o,_Z3fooR2B0j, \ | ||
; RUN: -r=%t/main.o,_Znwm, \ | ||
; RUN: -r=%t/main.o,_ZdlPvm, \ | ||
; RUN: -r=%t/main.o,_Z8externalPi, \ | ||
; RUN: -r=%t/main.o,main,plx \ | ||
; RUN: -r=%t/main.o,_ZN2B03barEj,plx \ | ||
; RUN: -r=%t/main.o,_ZN1B3barEj,plx \ | ||
; RUN: -r=%t/main.o,_ZTV1B,plx \ | ||
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \ | ||
; RUN: -r=%t/main.o,_ZTS1B,plx \ | ||
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \ | ||
; RUN: -r=%t/main.o,_ZTS2B0,plx \ | ||
; RUN: -r=%t/main.o,_ZTI2B0,plx \ | ||
; RUN: -r=%t/main.o,_ZTI1B,plx \ | ||
; RUN: -r=%t/main.o,_ZTV2B0,plx \ | ||
; RUN: -thinlto-threads=1 \ | ||
; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ | ||
; RUN: -pass-remarks=. -save-temps \ | ||
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \ | ||
; RUN: --check-prefix=STATS-BE-NOIMPORT --check-prefix=REMARKS-MAIN \ | ||
; RUN: --check-prefix=REMARKS-FOO | ||
|
||
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR --check-prefix=IR-NOIMPORT | ||
|
||
;; Run it gain but with -memprof-require-definition-for-promotion, and confirm | ||
;; that no promotions occur. | ||
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \ | ||
; RUN: -import-instr-limit=0 \ | ||
; RUN: -memprof-require-definition-for-promotion \ | ||
; RUN: -enable-memprof-indirect-call-support=true \ | ||
; RUN: -supports-hot-cold-new \ | ||
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \ | ||
; RUN: -r=%t/foo.o,_ZN2B03barEj.abc,plx \ | ||
; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \ | ||
; RUN: -r=%t/foo.o,_ZN2B03barEj, \ | ||
; RUN: -r=%t/foo.o,_ZN1B3barEj, \ | ||
; RUN: -r=%t/main.o,_Z3fooR2B0j, \ | ||
; RUN: -r=%t/main.o,_Znwm, \ | ||
; RUN: -r=%t/main.o,_ZdlPvm, \ | ||
; RUN: -r=%t/main.o,_Z8externalPi, \ | ||
; RUN: -r=%t/main.o,main,plx \ | ||
; RUN: -r=%t/main.o,_ZN2B03barEj,plx \ | ||
; RUN: -r=%t/main.o,_ZN1B3barEj,plx \ | ||
; RUN: -r=%t/main.o,_ZTV1B,plx \ | ||
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \ | ||
; RUN: -r=%t/main.o,_ZTS1B,plx \ | ||
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \ | ||
; RUN: -r=%t/main.o,_ZTS2B0,plx \ | ||
; RUN: -r=%t/main.o,_ZTI2B0,plx \ | ||
; RUN: -r=%t/main.o,_ZTI1B,plx \ | ||
; RUN: -r=%t/main.o,_ZTV2B0,plx \ | ||
; RUN: -thinlto-threads=1 \ | ||
; RUN: -memprof-verify-ccg -memprof-verify-nodes \ | ||
; RUN: -pass-remarks=. \ | ||
; RUN: -o %t.out 2>&1 | FileCheck %s --implicit-check-not Promote | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a little confused why this one has There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because the flag enforces the more conservative behavior, requiring a definition. |
||
|
||
; REMARKS-MAIN: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1 | ||
; REMARKS-MAIN: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1 | ||
; REMARKS-MAIN: created clone _ZN2B03barEj.memprof.1 | ||
|
@@ -208,51 +283,59 @@ | |
; REMARKS-FOO: call in clone _Z3fooR2B0j promoted and assigned to call function clone _ZN2B03barEj | ||
; REMARKS-FOO: Promote indirect call to _ZN2B03barEj with count 2 out of 2 | ||
; REMARKS-FOO: call in clone _Z3fooR2B0j.memprof.1 promoted and assigned to call function clone _ZN2B03barEj.memprof.1 | ||
; REMARKS-FOO: created clone _ZN2B03barEj.memprof.1 | ||
; REMARKS-FOO: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold | ||
; REMARKS-FOO: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold | ||
; REMARKS-FOO: created clone _ZN1B3barEj.memprof.1 | ||
; REMARKS-FOO: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold | ||
; REMARKS-FOO: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold | ||
; REMARKS-FOO-IMPORT: created clone _ZN2B03barEj.memprof.1 | ||
; REMARKS-FOO-IMPORT: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold | ||
; REMARKS-FOO-IMPORT: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold | ||
; REMARKS-FOO-IMPORT: created clone _ZN1B3barEj.memprof.1 | ||
; REMARKS-FOO-IMPORT: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold | ||
; REMARKS-FOO-IMPORT: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold | ||
|
||
; STATS: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis | ||
; STATS-BE: 8 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend | ||
; STATS-BE-NOIMPORT: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend | ||
; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis | ||
; STATS-BE: 8 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend | ||
; STATS-BE-NOIMPORT: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend | ||
; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis | ||
; STATS-BE: 5 memprof-context-disambiguation - Number of function clones created during ThinLTO backend | ||
; STATS-BE-NOIMPORT: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend | ||
|
||
; IR-NOIMPORT: foo | ||
; IR: define {{.*}} @_Z3fooR2B0j( | ||
; IR: %1 = icmp eq ptr %0, @_ZN1B3barEj | ||
; IR: br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect | ||
; IR: %[[R1:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj | ||
; IR: br i1 %[[R1]], label %if.true.direct_targ, label %if.false.orig_indirect | ||
; IR: if.true.direct_targ: | ||
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]] | ||
; IR-IMPORT: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]] | ||
; IR-NOIMPORT: call {{.*}} @_ZN1B3barEj( | ||
; IR: if.false.orig_indirect: | ||
; IR: %2 = icmp eq ptr %0, @_ZN2B03barEj | ||
; IR: br i1 %2, label %if.true.direct_targ1, label %if.false.orig_indirect2 | ||
; IR: %[[R2:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj | ||
; IR: br i1 %[[R2]], label %if.true.direct_targ1, label %if.false.orig_indirect2 | ||
; IR: if.true.direct_targ1: | ||
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]] | ||
; IR-IMPORT: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]] | ||
; IR-NOIMPORT: call {{.*}} @_ZN2B03barEj( | ||
; IR: if.false.orig_indirect2: | ||
; IR: call {{.*}} %0 | ||
|
||
; IR: define {{.*}} @_Z3fooR2B0j.memprof.1( | ||
;; We should still compare against the original versions of bar since that is | ||
;; what is in the vtable. However, we should have called the cloned versions | ||
;; that perform cold allocations, which were subsequently inlined. | ||
; IR: %1 = icmp eq ptr %0, @_ZN1B3barEj | ||
; IR: br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect | ||
; IR: %[[R3:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj | ||
; IR: br i1 %[[R3]], label %if.true.direct_targ, label %if.false.orig_indirect | ||
; IR: if.true.direct_targ: | ||
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]] | ||
; IR-IMPORT: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]] | ||
; IR-NOIMPORT: call {{.*}} @_ZN1B3barEj.memprof.1( | ||
; IR: if.false.orig_indirect: | ||
; IR: %2 = icmp eq ptr %0, @_ZN2B03barEj | ||
; IR: br i1 %2, label %if.true.direct_targ1, label %if.false.orig_indirect2 | ||
; IR: %[[R4:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj | ||
; IR: br i1 %[[R4]], label %if.true.direct_targ1, label %if.false.orig_indirect2 | ||
; IR: if.true.direct_targ1: | ||
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]] | ||
; IR-IMPORT: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]] | ||
; IR-NOIMPORT: call {{.*}} @_ZN2B03barEj.memprof.1( | ||
; IR: if.false.orig_indirect2: | ||
; IR: call {{.*}} %0 | ||
|
||
; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold" | ||
; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold" | ||
; IR-IMPORT: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold" | ||
; IR-IMPORT: attributes #[[COLD]] = {{.*}} "memprof"="cold" | ||
|
||
; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend | ||
; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend | ||
|
@@ -272,6 +355,9 @@ define i32 @_ZN2B03barEj.abc(ptr %this, i32 %s) { | |
ret i32 0 | ||
} | ||
|
||
declare i32 @_ZN2B03barEj(ptr %this, i32 %s) | ||
declare i32 @_ZN1B3barEj(ptr %this, i32 %s) | ||
|
||
define i32 @_Z3fooR2B0j(ptr %b) { | ||
entry: | ||
%0 = load ptr, ptr %b, align 8 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this flag default to true to preserve the current behaviour (when the flag is not set on the cmd line)?
The commit message also mentioned enabled by default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I meant to change it to be more aggressive by default and enable promotion with the target is a declaration (that's what I meant by "enabled" - promotion enabled, not the flag).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got it, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I updated the description to be clearer about what I mean here, reread and it is definitely confusing