-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Reapply "[MemProf] Streamline and avoid unnecessary context id duplication (#107918)" #110036
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
teresajohnson
merged 4 commits into
llvm:main
from
teresajohnson:memprof_reapply_107918_fixed
Sep 26, 2024
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
ee3d9d5
Reapply "[MemProf] Streamline and avoid unnecessary context id duplic…
teresajohnson 3b974a3
The actual fixes and a test case that provokes the original issue.
teresajohnson 93e319e
Fix assert from recent #109857 exposed by this test case
teresajohnson d1b5d9e
Address feedback
teresajohnson File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
102 changes: 102 additions & 0 deletions
102
llvm/test/Transforms/MemProfContextDisambiguation/inlined4.ll
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
;; This test ensures that the logic which assigns calls to stack nodes | ||
;; correctly handles a case where multiple nodes have stack ids that | ||
;; overlap with each other but have different last nodes (can happen with | ||
;; inlining into various levels of a call chain). Specifically, when we | ||
;; have one that is duplicated (e.g. unrolling), we need to correctly | ||
;; handle the case where the context id has already been assigned to | ||
;; a different call with a different last node. | ||
|
||
;; -stats requires asserts | ||
; REQUIRES: asserts | ||
|
||
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ | ||
; RUN: -memprof-verify-ccg -memprof-verify-nodes \ | ||
; RUN: -stats -pass-remarks=memprof-context-disambiguation \ | ||
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=IR \ | ||
; RUN: --check-prefix=STATS --check-prefix=REMARKS | ||
|
||
; REMARKS: created clone _Z1Ab.memprof.1 | ||
; REMARKS: created clone _Z3XZNv.memprof.1 | ||
; REMARKS: call in clone main assigned to call function clone _Z3XZNv.memprof.1 | ||
;; Make sure the inlined context in _Z3XZNv, which partially overlaps the stack | ||
;; ids in the shorter inlined context of Z2XZv, correctly calls a cloned | ||
;; version of Z1Ab, which will call the cold annotated allocation. | ||
; REMARKS: call in clone _Z3XZNv.memprof.1 assigned to call function clone _Z1Ab.memprof.1 | ||
; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute cold | ||
; REMARKS: call in clone main assigned to call function clone _Z3XZNv | ||
; REMARKS: call in clone _Z3XZNv assigned to call function clone _Z1Ab | ||
; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute notcold | ||
|
||
|
||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
define dso_local void @_Z1Ab() { | ||
entry: | ||
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #1, !memprof !0, !callsite !5 | ||
ret void | ||
} | ||
|
||
; Function Attrs: nobuiltin | ||
declare ptr @_Znam(i64) #0 | ||
|
||
;; Inlining of stack id 2 into 3. Assume this is called from somewhere else. | ||
define dso_local void @_Z2XZv() local_unnamed_addr #0 { | ||
entry: | ||
;; Simulate duplication of the callsite (e.g. unrolling). | ||
call void @_Z1Ab(), !callsite !6 | ||
call void @_Z1Ab(), !callsite !6 | ||
ret void | ||
} | ||
|
||
;; Inlining of stack id 2 into 3 into 4. Called by main below. | ||
define dso_local void @_Z3XZNv() local_unnamed_addr { | ||
entry: | ||
call void @_Z1Ab(), !callsite !7 | ||
ret void | ||
} | ||
|
||
define dso_local noundef i32 @main() local_unnamed_addr { | ||
entry: | ||
call void @_Z3XZNv(), !callsite !8 ;; Not cold context | ||
call void @_Z3XZNv(), !callsite !9 ;; Cold context | ||
ret i32 0 | ||
} | ||
|
||
attributes #0 = { nobuiltin } | ||
attributes #7 = { builtin } | ||
|
||
!0 = !{!1, !3} | ||
;; Not cold context via first call to _Z3XZNv in main | ||
!1 = !{!2, !"notcold"} | ||
!2 = !{i64 1, i64 2, i64 3, i64 4, i64 5} | ||
;; Cold context via second call to _Z3XZNv in main | ||
!3 = !{!4, !"cold"} | ||
!4 = !{i64 1, i64 2, i64 3, i64 4, i64 6} | ||
!5 = !{i64 1} | ||
!6 = !{i64 2, i64 3} | ||
!7 = !{i64 2, i64 3, i64 4} | ||
!8 = !{i64 5} | ||
!9 = !{i64 6} | ||
|
||
; IR: define {{.*}} @_Z1Ab() | ||
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] | ||
; IR: define {{.*}} @_Z2XZv() | ||
; IR: call {{.*}} @_Z1Ab() | ||
; IR: call {{.*}} @_Z1Ab() | ||
; IR: define {{.*}} @_Z3XZNv() | ||
; IR: call {{.*}} @_Z1Ab() | ||
; IR: define {{.*}} @main() | ||
; IR: call {{.*}} @_Z3XZNv() | ||
; IR: call {{.*}} @_Z3XZNv.memprof.1() | ||
; IR: define {{.*}} @_Z1Ab.memprof.1() | ||
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] | ||
; IR: define {{.*}} @_Z3XZNv.memprof.1() | ||
; IR: call {{.*}} @_Z1Ab.memprof.1() | ||
|
||
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" } | ||
; IR: attributes #[[COLD]] = { "memprof"="cold" } | ||
|
||
; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) | ||
; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) | ||
; STATS: 2 memprof-context-disambiguation - Number of function clones created during whole program analysis |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had rebased my client right before mailing this PR and a retest showed this new code from #109857 asserted with the test case from this PR. The assert is too strict at this point because we may have already reassigned the context ids on this last node to a different stack node (the same reason why we need to recompute the stack nodes for each call below).