-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Metadata] Handle memprof, callsite merging when one is missing. #132106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Metadata] Handle memprof, callsite merging when one is missing. #132106
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-transforms Author: Snehasish Kumar (snehasish) ChangesFor memprof and callsite metadata we want to pick one deterministically Full diff: https://github.com/llvm/llvm-project/pull/132106.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 95f0d099aacb5..161c7c875e0eb 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3355,8 +3355,14 @@ static void combineMetadata(Instruction *K, const Instruction *J,
case LLVMContext::MD_invariant_group:
// Preserve !invariant.group in K.
break;
+ // Keep empty cases for mmra, memprof, and callsite to prevent them from
+ // being removed as unknown metadata. The actual merging is handled
+ // separately below.
case LLVMContext::MD_mmra:
- // Combine MMRAs
+ [[fallthrough]];
+ case LLVMContext::MD_memprof:
+ [[fallthrough]];
+ case LLVMContext::MD_callsite:
break;
case LLVMContext::MD_align:
if (!AAOnly && (DoesKMove || !K->hasMetadata(LLVMContext::MD_noundef)))
@@ -3369,14 +3375,6 @@ static void combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind,
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
- case LLVMContext::MD_memprof:
- if (!AAOnly)
- K->setMetadata(Kind, MDNode::getMergedMemProfMetadata(KMD, JMD));
- break;
- case LLVMContext::MD_callsite:
- if (!AAOnly)
- K->setMetadata(Kind, MDNode::getMergedCallsiteMetadata(KMD, JMD));
- break;
case LLVMContext::MD_preserve_access_index:
// Preserve !preserve.access.index in K.
break;
@@ -3420,6 +3418,26 @@ static void combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(LLVMContext::MD_mmra,
MMRAMetadata::combine(K->getContext(), JMMRA, KMMRA));
}
+
+ // Merge memprof metadata.
+ // Handle separately to support cases where only one instruction has the
+ // metadata.
+ auto JMemProf = J->getMetadata(LLVMContext::MD_memprof);
+ auto KMemProf = K->getMetadata(LLVMContext::MD_memprof);
+ if (!AAOnly && (JMemProf || KMemProf)) {
+ K->setMetadata(LLVMContext::MD_memprof,
+ MDNode::getMergedMemProfMetadata(KMemProf, JMemProf));
+ }
+
+ // Merge callsite metadata.
+ // Handle separately to support cases where only one instruction has the
+ // metadata.
+ auto JCallSite = J->getMetadata(LLVMContext::MD_callsite);
+ auto KCallSite = K->getMetadata(LLVMContext::MD_callsite);
+ if (!AAOnly && (JCallSite || KCallSite)) {
+ K->setMetadata(LLVMContext::MD_callsite,
+ MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite));
+ }
}
void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll
index 10c6aeb26ba76..d15eeb7b69fee 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll
@@ -1,5 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-
;; Test to ensure that memprof related metadata is not dropped when
;; instructions are combined. Currently the metadata from the first instruction
;; is kept, which prevents full loss of profile context information.
@@ -32,6 +30,51 @@ if.end: ; preds = %if.else, %if.then
ret ptr %x.0
}
+define dso_local noundef nonnull ptr @_Z9test_leftb(i1 noundef zeroext %b) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z9test_leftb(
+; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]]
+; CHECK-NEXT: ret ptr [[CALL]]
+;
+entry:
+ br i1 %b, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !0, !callsite !3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4)
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
+ ret ptr %x.0
+}
+
+define dso_local noundef nonnull ptr @_Z10test_rightb(i1 noundef zeroext %b) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z10test_rightb(
+; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META4:![0-9]+]], !callsite [[META7:![0-9]+]]
+; CHECK-NEXT: ret ptr [[CALL]]
+;
+entry:
+ br i1 %b, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4)
+ br label %if.end
+
+if.else: ; preds = %entry
+ %call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !4, !callsite !7
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
+ ret ptr %x.0
+}
declare ptr @_Znwm(i64) nounwind readonly
@@ -43,9 +86,13 @@ declare ptr @_Znwm(i64) nounwind readonly
!5 = !{!6, !"cold"}
!6 = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434}
!7 = !{i64 123}
-;.
+
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"notcold"}
; CHECK: [[META2]] = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
; CHECK: [[META3]] = !{i64 -852997907418798798}
-;.
+; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
+; CHECK: [[META5]] = !{[[META6:![0-9]+]], !"cold"}
+; CHECK: [[META6]] = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434}
+; CHECK: [[META7]] = !{i64 123}
+
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
@@ -1,5 +1,3 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why did this get dropped?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Teresa mentioned that when this test was introduced in #121359 it was copied over and not actually created using update_test_checks.py. Since I didn't use the tool to generate the checks for the new tests I dropped it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is it not possible to use generated checks in this test?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's probably possible to do so after splitting out the additional test cases I added into their own files. Is it a requirement for SimplifyCFG tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Teresa mentioned that when this test was introduced in #121359 it was copied over and not actually created using update_test_checks.py. Since I didn't use the tool to generate the checks for the new tests I dropped it.
IIRC I copied the test from somewhere else so did run update_test_checks.py as in the original test. Does it not work to use that for the new tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for clarifying Teresa, I misunderstood your comment earlier. I've reverted the changes to the original test and added two new ones. The new tests also use update_test_checks to generate CHECK prefixes.
Ptal, @nikic
24e5573
to
a55c4a2
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm but the tests can be cleaned up a little as noted
a55c4a2
to
b2601e0
Compare
I'll wait a day and merge the stack on Wednesday morning unless there are additional comments. |
For memprof and callsite metadata we want to pick one deterministically and keep that even if one of them may be missing.
b2601e0
to
7772c93
Compare
; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s | ||
|
||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please no include data layout, triple, attributes etc in tests if they are not necessary for the test.
For memprof and callsite metadata we want to pick one deterministically
and keep that even if one of them may be missing.