Skip to content

[MemProf][PGO] Prevent dropping of profile metadata during optimization #121359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/Metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -1464,6 +1464,8 @@ class MDNode : public Metadata {
static MDNode *getMergedProfMetadata(MDNode *A, MDNode *B,
const Instruction *AInstr,
const Instruction *BInstr);
static MDNode *getMergedMemProfMetadata(MDNode *A, MDNode *B);
static MDNode *getMergedCallsiteMetadata(MDNode *A, MDNode *B);
};

/// Tuple of metadata.
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/Transforms/Utils/Local.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,11 @@ Instruction *removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr,
MemorySSAUpdater *MSSAU = nullptr);

/// DO NOT CALL EXTERNALLY.
/// FIXME: https://github.com/llvm/llvm-project/issues/121495
/// Once external callers of this function are removed, either inline into
/// combineMetadataForCSE, or internalize and remove KnownIDs parameter.
///
/// Combine the metadata of two instructions so that K can replace J. Some
/// metadata kinds can only be kept if K does not move, meaning it dominated
/// J in the original IR.
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Analysis/MemoryProfileInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,20 @@ template <> uint64_t CallStack<MDNode, MDNode::op_iterator>::back() const {
return mdconst::dyn_extract<ConstantInt>(N->operands().back())
->getZExtValue();
}

MDNode *MDNode::getMergedMemProfMetadata(MDNode *A, MDNode *B) {
// TODO: Support more sophisticated merging, such as selecting the one with
// more bytes allocated, or implement support for carrying multiple allocation
// leaf contexts. For now, keep the first one.
if (A)
return A;
return B;
}

MDNode *MDNode::getMergedCallsiteMetadata(MDNode *A, MDNode *B) {
// TODO: Support more sophisticated merging, which will require support for
// carrying multiple contexts. For now, keep the first one.
if (A)
return A;
return B;
}
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,9 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
BasicBlock *BB = std::get<0>(Incoming);
Value *V = std::get<1>(Incoming);
LoadInst *LI = cast<LoadInst>(V);
// FIXME: https://github.com/llvm/llvm-project/issues/121495
// Call combineMetadataForCSE instead, so that an explicit set of KnownIDs
// doesn't need to be maintained here.
combineMetadata(NewLI, LI, KnownIDs, true);
Value *NewInVal = LI->getOperand(0);
if (NewInVal != InVal)
Expand Down
12 changes: 8 additions & 4 deletions llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,10 +345,14 @@ static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA,
static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
// handled here, but combineMetadata doesn't support them yet
unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias,
LLVMContext::MD_invariant_group,
LLVMContext::MD_access_group};
unsigned KnownIDs[] = {
LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias, LLVMContext::MD_invariant_group,
LLVMContext::MD_access_group, LLVMContext::MD_prof,
LLVMContext::MD_memprof, LLVMContext::MD_callsite};
// FIXME: https://github.com/llvm/llvm-project/issues/121495
// Use custom AA metadata combining handling instead of combineMetadata, which
// is meant for CSE and will drop any metadata not in the KnownIDs list.
combineMetadata(ReplInst, I, KnownIDs, true);
}

Expand Down
17 changes: 16 additions & 1 deletion llvm/lib/Transforms/Utils/Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3308,6 +3308,9 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
return Changed;
}

// FIXME: https://github.com/llvm/llvm-project/issues/121495
// Once external callers of this function are removed, either inline into
// combineMetadataForCSE, or internalize and remove KnownIDs parameter.
void llvm::combineMetadata(Instruction *K, const Instruction *J,
ArrayRef<unsigned> KnownIDs, bool DoesKMove) {
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
Expand All @@ -3320,6 +3323,10 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,

switch (Kind) {
default:
// FIXME: https://github.com/llvm/llvm-project/issues/121495
// Change to removing only explicitly listed other metadata, and assert
// on unknown metadata, to avoid inadvertently dropping newly added
// metadata types.
K->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
case LLVMContext::MD_dbg:
Expand Down Expand Up @@ -3379,6 +3386,12 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind,
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
case LLVMContext::MD_memprof:
K->setMetadata(Kind, MDNode::getMergedMemProfMetadata(KMD, JMD));
break;
case LLVMContext::MD_callsite:
K->setMetadata(Kind, MDNode::getMergedCallsiteMetadata(KMD, JMD));
break;
case LLVMContext::MD_preserve_access_index:
// Preserve !preserve.access.index in K.
break;
Expand Down Expand Up @@ -3442,7 +3455,9 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
LLVMContext::MD_nontemporal,
LLVMContext::MD_noundef,
LLVMContext::MD_mmra,
LLVMContext::MD_noalias_addrspace};
LLVMContext::MD_noalias_addrspace,
LLVMContext::MD_memprof,
LLVMContext::MD_callsite};
combineMetadata(K, J, KnownIDs, KDominatesJ);
}

Expand Down
18 changes: 18 additions & 0 deletions llvm/test/Transforms/MemCpyOpt/memcpy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,19 @@ define void @byval_param_noalias_metadata(ptr align 4 byval(i32) %ptr) {
ret void
}

define void @byval_param_profile_metadata(ptr align 4 byval(i32) %ptr) {
; CHECK-LABEL: @byval_param_profile_metadata(
; CHECK-NEXT: store i32 1, ptr [[PTR2:%.*]], align 4
; CHECK-NEXT: call void @f_byval(ptr byval(i32) align 4 [[PTR2]]), !prof [[PROF3:![0-9]+]], !memprof [[META4:![0-9]+]], !callsite [[META7:![0-9]+]]
; CHECK-NEXT: ret void
;
%tmp = alloca i32, align 4
store i32 1, ptr %ptr
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %tmp, ptr align 4 %ptr, i64 4, i1 false)
call void @f_byval(ptr align 4 byval(i32) %tmp), !memprof !3, !callsite !6, !prof !7
ret void
}

define void @memcpy_memory_none(ptr %p, ptr %p2, i64 %size) {
; CHECK-LABEL: @memcpy_memory_none(
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P:%.*]], ptr [[P2:%.*]], i64 [[SIZE:%.*]], i1 false) #[[ATTR7:[0-9]+]]
Expand Down Expand Up @@ -897,3 +910,8 @@ define void @memcpy_immut_escape_after(ptr align 4 noalias %val) {
!0 = !{!0}
!1 = !{!1, !0}
!2 = !{!1}
!3 = !{!4}
!4 = !{!5, !"cold"}
!5 = !{i64 123, i64 456}
!6 = !{i64 123}
!7 = !{!"branch_weights", i32 10}
51 changes: 51 additions & 0 deletions llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5

;; Test to ensure that memprof related metadata is not dropped when
;; instructions are combined. Currently the metadata from the first instruction
;; is kept, which prevents full loss of profile context information.

; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define dso_local noundef nonnull ptr @_Z4testb(i1 noundef zeroext %b) local_unnamed_addr #0 {
; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z4testb(
; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]]
; CHECK-NEXT: ret ptr [[CALL]]
;
entry:
br i1 %b, label %if.then, label %if.else

if.then: ; preds = %entry
%call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !0, !callsite !3
br label %if.end

if.else: ; preds = %entry
%call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !4, !callsite !7
br label %if.end

if.end: ; preds = %if.else, %if.then
%x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
ret ptr %x.0
}


declare ptr @_Znwm(i64) nounwind readonly

!0 = !{!1}
!1 = !{!2, !"notcold"}
!2 = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
!3 = !{i64 -852997907418798798}
!4 = !{!5}
!5 = !{!6, !"cold"}
!6 = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434}
!7 = !{i64 123}
;.
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"notcold"}
; CHECK: [[META2]] = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
; CHECK: [[META3]] = !{i64 -852997907418798798}
;.
Loading