Skip to content

Commit e39a9e2

Browse files
committed
[Metadata] Handle memprof, callsite merging when one is missing.
For memprof and callsite metadata we want to pick one deterministically and keep that even if one of them may be missing.
1 parent bc37fea commit e39a9e2

File tree

2 files changed

+78
-13
lines changed

2 files changed

+78
-13
lines changed

llvm/lib/Transforms/Utils/Local.cpp

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3355,8 +3355,14 @@ static void combineMetadata(Instruction *K, const Instruction *J,
33553355
case LLVMContext::MD_invariant_group:
33563356
// Preserve !invariant.group in K.
33573357
break;
3358+
// Keep empty cases for mmra, memprof, and callsite to prevent them from
3359+
// being removed as unknown metadata. The actual merging is handled
3360+
// separately below.
33583361
case LLVMContext::MD_mmra:
3359-
// Combine MMRAs
3362+
[[fallthrough]];
3363+
case LLVMContext::MD_memprof:
3364+
[[fallthrough]];
3365+
case LLVMContext::MD_callsite:
33603366
break;
33613367
case LLVMContext::MD_align:
33623368
if (!AAOnly && (DoesKMove || !K->hasMetadata(LLVMContext::MD_noundef)))
@@ -3369,14 +3375,6 @@ static void combineMetadata(Instruction *K, const Instruction *J,
33693375
K->setMetadata(Kind,
33703376
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
33713377
break;
3372-
case LLVMContext::MD_memprof:
3373-
if (!AAOnly)
3374-
K->setMetadata(Kind, MDNode::getMergedMemProfMetadata(KMD, JMD));
3375-
break;
3376-
case LLVMContext::MD_callsite:
3377-
if (!AAOnly)
3378-
K->setMetadata(Kind, MDNode::getMergedCallsiteMetadata(KMD, JMD));
3379-
break;
33803378
case LLVMContext::MD_preserve_access_index:
33813379
// Preserve !preserve.access.index in K.
33823380
break;
@@ -3420,6 +3418,26 @@ static void combineMetadata(Instruction *K, const Instruction *J,
34203418
K->setMetadata(LLVMContext::MD_mmra,
34213419
MMRAMetadata::combine(K->getContext(), JMMRA, KMMRA));
34223420
}
3421+
3422+
// Merge memprof metadata.
3423+
// Handle separately to support cases where only one instruction has the
3424+
// metadata.
3425+
auto JMemProf = J->getMetadata(LLVMContext::MD_memprof);
3426+
auto KMemProf = K->getMetadata(LLVMContext::MD_memprof);
3427+
if (!AAOnly && (JMemProf || KMemProf)) {
3428+
K->setMetadata(LLVMContext::MD_memprof,
3429+
MDNode::getMergedMemProfMetadata(KMemProf, JMemProf));
3430+
}
3431+
3432+
// Merge callsite metadata.
3433+
// Handle separately to support cases where only one instruction has the
3434+
// metadata.
3435+
auto JCallSite = J->getMetadata(LLVMContext::MD_callsite);
3436+
auto KCallSite = K->getMetadata(LLVMContext::MD_callsite);
3437+
if (!AAOnly && (JCallSite || KCallSite)) {
3438+
K->setMetadata(LLVMContext::MD_callsite,
3439+
MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite));
3440+
}
34233441
}
34243442

34253443
void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,

llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
31
;; Test to ensure that memprof related metadata is not dropped when
42
;; instructions are combined. Currently the metadata from the first instruction
53
;; is kept, which prevents full loss of profile context information.
@@ -32,6 +30,51 @@ if.end: ; preds = %if.else, %if.then
3230
ret ptr %x.0
3331
}
3432

33+
define dso_local noundef nonnull ptr @_Z9test_leftb(i1 noundef zeroext %b) local_unnamed_addr #0 {
34+
; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z9test_leftb(
35+
; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr {
36+
; CHECK-NEXT: [[ENTRY:.*:]]
37+
; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]]
38+
; CHECK-NEXT: ret ptr [[CALL]]
39+
;
40+
entry:
41+
br i1 %b, label %if.then, label %if.else
42+
43+
if.then: ; preds = %entry
44+
%call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !0, !callsite !3
45+
br label %if.end
46+
47+
if.else: ; preds = %entry
48+
%call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4)
49+
br label %if.end
50+
51+
if.end: ; preds = %if.else, %if.then
52+
%x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
53+
ret ptr %x.0
54+
}
55+
56+
define dso_local noundef nonnull ptr @_Z10test_rightb(i1 noundef zeroext %b) local_unnamed_addr #0 {
57+
; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z10test_rightb(
58+
; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr {
59+
; CHECK-NEXT: [[ENTRY:.*:]]
60+
; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META4:![0-9]+]], !callsite [[META7:![0-9]+]]
61+
; CHECK-NEXT: ret ptr [[CALL]]
62+
;
63+
entry:
64+
br i1 %b, label %if.then, label %if.else
65+
66+
if.then: ; preds = %entry
67+
%call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4)
68+
br label %if.end
69+
70+
if.else: ; preds = %entry
71+
%call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !4, !callsite !7
72+
br label %if.end
73+
74+
if.end: ; preds = %if.else, %if.then
75+
%x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
76+
ret ptr %x.0
77+
}
3578

3679
declare ptr @_Znwm(i64) nounwind readonly
3780

@@ -43,9 +86,13 @@ declare ptr @_Znwm(i64) nounwind readonly
4386
!5 = !{!6, !"cold"}
4487
!6 = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434}
4588
!7 = !{i64 123}
46-
;.
89+
4790
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
4891
; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"notcold"}
4992
; CHECK: [[META2]] = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
5093
; CHECK: [[META3]] = !{i64 -852997907418798798}
51-
;.
94+
; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
95+
; CHECK: [[META5]] = !{[[META6:![0-9]+]], !"cold"}
96+
; CHECK: [[META6]] = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434}
97+
; CHECK: [[META7]] = !{i64 123}
98+

0 commit comments

Comments
 (0)