Skip to content

Commit 3a423a1

Browse files
[MemProf][PGO] Prevent dropping of profile metadata during optimization (#121359)
This patch fixes a couple of places where memprof-related metadata (!memprof and !callsite) were being dropped, and one place where PGO metadata (!prof) was being dropped. All were due to instances of combineMetadata() being invoked. That function drops all metadata not in the list provided by the client, and also drops any not in its switch statement. Memprof metadata needed a case in the combineMetadata switch statement. For now we simply keep the metadata of the instruction being kept, which doesn't retain all the profile information when two calls with memprof metadata are being combined, but at least retains some. For the memprof metadata being dropped during call CSE, add memprof and callsite metadata to the list of known ids in combineMetadataForCSE. Neither memprof nor regular prof metadata were in the list of known ids for the callsite in MemCpyOptimizer, which was added to combine AA metadata after optimization of byval arguments fed by memcpy instructions, and similar types of optimizations of memcpy uses. There is one other callsite of combineMetadata, but it is only invoked on load instructions, which do not carry these types of metadata.
1 parent 5f5792a commit 3a423a1

File tree

8 files changed

+120
-5
lines changed

8 files changed

+120
-5
lines changed

llvm/include/llvm/IR/Metadata.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1464,6 +1464,8 @@ class MDNode : public Metadata {
14641464
static MDNode *getMergedProfMetadata(MDNode *A, MDNode *B,
14651465
const Instruction *AInstr,
14661466
const Instruction *BInstr);
1467+
static MDNode *getMergedMemProfMetadata(MDNode *A, MDNode *B);
1468+
static MDNode *getMergedCallsiteMetadata(MDNode *A, MDNode *B);
14671469
};
14681470

14691471
/// Tuple of metadata.

llvm/include/llvm/Transforms/Utils/Local.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,11 @@ Instruction *removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
412412
bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr,
413413
MemorySSAUpdater *MSSAU = nullptr);
414414

415+
/// DO NOT CALL EXTERNALLY.
416+
/// FIXME: https://github.com/llvm/llvm-project/issues/121495
417+
/// Once external callers of this function are removed, either inline into
418+
/// combineMetadataForCSE, or internalize and remove KnownIDs parameter.
419+
///
415420
/// Combine the metadata of two instructions so that K can replace J. Some
416421
/// metadata kinds can only be kept if K does not move, meaning it dominated
417422
/// J in the original IR.

llvm/lib/Analysis/MemoryProfileInfo.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,3 +347,20 @@ template <> uint64_t CallStack<MDNode, MDNode::op_iterator>::back() const {
347347
return mdconst::dyn_extract<ConstantInt>(N->operands().back())
348348
->getZExtValue();
349349
}
350+
351+
MDNode *MDNode::getMergedMemProfMetadata(MDNode *A, MDNode *B) {
352+
// TODO: Support more sophisticated merging, such as selecting the one with
353+
// more bytes allocated, or implement support for carrying multiple allocation
354+
// leaf contexts. For now, keep the first one.
355+
if (A)
356+
return A;
357+
return B;
358+
}
359+
360+
MDNode *MDNode::getMergedCallsiteMetadata(MDNode *A, MDNode *B) {
361+
// TODO: Support more sophisticated merging, which will require support for
362+
// carrying multiple contexts. For now, keep the first one.
363+
if (A)
364+
return A;
365+
return B;
366+
}

llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,9 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
788788
BasicBlock *BB = std::get<0>(Incoming);
789789
Value *V = std::get<1>(Incoming);
790790
LoadInst *LI = cast<LoadInst>(V);
791+
// FIXME: https://github.com/llvm/llvm-project/issues/121495
792+
// Call combineMetadataForCSE instead, so that an explicit set of KnownIDs
793+
// doesn't need to be maintained here.
791794
combineMetadata(NewLI, LI, KnownIDs, true);
792795
Value *NewInVal = LI->getOperand(0);
793796
if (NewInVal != InVal)

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,10 +345,14 @@ static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA,
345345
static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
346346
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
347347
// handled here, but combineMetadata doesn't support them yet
348-
unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
349-
LLVMContext::MD_noalias,
350-
LLVMContext::MD_invariant_group,
351-
LLVMContext::MD_access_group};
348+
unsigned KnownIDs[] = {
349+
LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
350+
LLVMContext::MD_noalias, LLVMContext::MD_invariant_group,
351+
LLVMContext::MD_access_group, LLVMContext::MD_prof,
352+
LLVMContext::MD_memprof, LLVMContext::MD_callsite};
353+
// FIXME: https://github.com/llvm/llvm-project/issues/121495
354+
// Use custom AA metadata combining handling instead of combineMetadata, which
355+
// is meant for CSE and will drop any metadata not in the KnownIDs list.
352356
combineMetadata(ReplInst, I, KnownIDs, true);
353357
}
354358

llvm/lib/Transforms/Utils/Local.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3308,6 +3308,9 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
33083308
return Changed;
33093309
}
33103310

3311+
// FIXME: https://github.com/llvm/llvm-project/issues/121495
3312+
// Once external callers of this function are removed, either inline into
3313+
// combineMetadataForCSE, or internalize and remove KnownIDs parameter.
33113314
void llvm::combineMetadata(Instruction *K, const Instruction *J,
33123315
ArrayRef<unsigned> KnownIDs, bool DoesKMove) {
33133316
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
@@ -3320,6 +3323,10 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
33203323

33213324
switch (Kind) {
33223325
default:
3326+
// FIXME: https://github.com/llvm/llvm-project/issues/121495
3327+
// Change to removing only explicitly listed other metadata, and assert
3328+
// on unknown metadata, to avoid inadvertently dropping newly added
3329+
// metadata types.
33233330
K->setMetadata(Kind, nullptr); // Remove unknown metadata
33243331
break;
33253332
case LLVMContext::MD_dbg:
@@ -3379,6 +3386,12 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
33793386
K->setMetadata(Kind,
33803387
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
33813388
break;
3389+
case LLVMContext::MD_memprof:
3390+
K->setMetadata(Kind, MDNode::getMergedMemProfMetadata(KMD, JMD));
3391+
break;
3392+
case LLVMContext::MD_callsite:
3393+
K->setMetadata(Kind, MDNode::getMergedCallsiteMetadata(KMD, JMD));
3394+
break;
33823395
case LLVMContext::MD_preserve_access_index:
33833396
// Preserve !preserve.access.index in K.
33843397
break;
@@ -3442,7 +3455,9 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
34423455
LLVMContext::MD_nontemporal,
34433456
LLVMContext::MD_noundef,
34443457
LLVMContext::MD_mmra,
3445-
LLVMContext::MD_noalias_addrspace};
3458+
LLVMContext::MD_noalias_addrspace,
3459+
LLVMContext::MD_memprof,
3460+
LLVMContext::MD_callsite};
34463461
combineMetadata(K, J, KnownIDs, KDominatesJ);
34473462
}
34483463

llvm/test/Transforms/MemCpyOpt/memcpy.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,19 @@ define void @byval_param_noalias_metadata(ptr align 4 byval(i32) %ptr) {
803803
ret void
804804
}
805805

806+
define void @byval_param_profile_metadata(ptr align 4 byval(i32) %ptr) {
807+
; CHECK-LABEL: @byval_param_profile_metadata(
808+
; CHECK-NEXT: store i32 1, ptr [[PTR2:%.*]], align 4
809+
; CHECK-NEXT: call void @f_byval(ptr byval(i32) align 4 [[PTR2]]), !prof [[PROF3:![0-9]+]], !memprof [[META4:![0-9]+]], !callsite [[META7:![0-9]+]]
810+
; CHECK-NEXT: ret void
811+
;
812+
%tmp = alloca i32, align 4
813+
store i32 1, ptr %ptr
814+
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %tmp, ptr align 4 %ptr, i64 4, i1 false)
815+
call void @f_byval(ptr align 4 byval(i32) %tmp), !memprof !3, !callsite !6, !prof !7
816+
ret void
817+
}
818+
806819
define void @memcpy_memory_none(ptr %p, ptr %p2, i64 %size) {
807820
; CHECK-LABEL: @memcpy_memory_none(
808821
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P:%.*]], ptr [[P2:%.*]], i64 [[SIZE:%.*]], i1 false) #[[ATTR7:[0-9]+]]
@@ -897,3 +910,8 @@ define void @memcpy_immut_escape_after(ptr align 4 noalias %val) {
897910
!0 = !{!0}
898911
!1 = !{!1, !0}
899912
!2 = !{!1}
913+
!3 = !{!4}
914+
!4 = !{!5, !"cold"}
915+
!5 = !{i64 123, i64 456}
916+
!6 = !{i64 123}
917+
!7 = !{!"branch_weights", i32 10}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
3+
;; Test to ensure that memprof related metadata is not dropped when
4+
;; instructions are combined. Currently the metadata from the first instruction
5+
;; is kept, which prevents full loss of profile context information.
6+
7+
; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s
8+
9+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
10+
target triple = "x86_64-unknown-linux-gnu"
11+
12+
define dso_local noundef nonnull ptr @_Z4testb(i1 noundef zeroext %b) local_unnamed_addr #0 {
13+
; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z4testb(
14+
; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr {
15+
; CHECK-NEXT: [[ENTRY:.*:]]
16+
; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]]
17+
; CHECK-NEXT: ret ptr [[CALL]]
18+
;
19+
entry:
20+
br i1 %b, label %if.then, label %if.else
21+
22+
if.then: ; preds = %entry
23+
%call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !0, !callsite !3
24+
br label %if.end
25+
26+
if.else: ; preds = %entry
27+
%call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !4, !callsite !7
28+
br label %if.end
29+
30+
if.end: ; preds = %if.else, %if.then
31+
%x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
32+
ret ptr %x.0
33+
}
34+
35+
36+
declare ptr @_Znwm(i64) nounwind readonly
37+
38+
!0 = !{!1}
39+
!1 = !{!2, !"notcold"}
40+
!2 = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
41+
!3 = !{i64 -852997907418798798}
42+
!4 = !{!5}
43+
!5 = !{!6, !"cold"}
44+
!6 = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434}
45+
!7 = !{i64 123}
46+
;.
47+
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
48+
; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"notcold"}
49+
; CHECK: [[META2]] = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
50+
; CHECK: [[META3]] = !{i64 -852997907418798798}
51+
;.

0 commit comments

Comments
 (0)