Skip to content

[MemProf] Skip unmatched callers when cloning #120455

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3369,6 +3369,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
break;

// If the caller was not successfully matched to a call in the IR/summary,
// there is no point in trying to clone for it as we can't update that call.
if (!CallerEdge->Caller->hasCall()) {
++EI;
continue;
}

// Only need to process the ids along this edge pertaining to the given
// allocation.
auto CallerEdgeContextsForAlloc =
Expand Down
70 changes: 70 additions & 0 deletions llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
;; Test callsite context graph generation for simple call graph with
;; two memprof contexts and no inlining, where one callsite required for
;; cloning is missing (e.g. unmatched).
;;
;; Original code looks like:
;;
;; char *foo() {
;; return new char[10];
;; }
;;
;; int main(int argc, char **argv) {
;; char *x = foo();
;; char *y = foo();
;; memset(x, 0, 10);
;; memset(y, 0, 10);
;; delete[] x;
;; sleep(200);
;; delete[] y;
;; return 0;
;; }

; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t.o,main,plx \
; RUN: -r=%t.o,_Znam, \
; RUN: -memprof-report-hinted-sizes \
; RUN: -pass-remarks=memprof-context-disambiguation -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s --implicit-check-not "call in clone _Z3foov" \
; RUN: --check-prefix=SIZESUNHINTED
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not \"memprof\"=\"cold\"

source_filename = "memprof-missing-callsite.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define i32 @main() #0 {
entry:
;; Missing callsite metadata blocks cloning
%call = call ptr @_Z3foov()
%call1 = call ptr @_Z3foov()
ret i32 0
}

define internal ptr @_Z3foov() #0 {
entry:
%call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
ret ptr null
}

declare ptr @_Znam(i64)

; uselistorder directives
uselistorder ptr @_Z3foov, { 1, 0 }

attributes #0 = { noinline optnone }

!2 = !{!3, !5}
!3 = !{!4, !"notcold", !10}
!4 = !{i64 9086428284934609951, i64 8632435727821051414}
!5 = !{!6, !"cold", !11, !12}
!6 = !{i64 9086428284934609951, i64 -3421689549917153178}
!7 = !{i64 9086428284934609951}
!10 = !{i64 123, i64 100}
!11 = !{i64 456, i64 200}
!12 = !{i64 789, i64 300}

; SIZESUNHINTED: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning
; SIZESUNHINTED: Cold full allocation context 456 with total size 200 is NotColdCold after cloning
; SIZESUNHINTED: Cold full allocation context 789 with total size 300 is NotColdCold after cloning
32 changes: 6 additions & 26 deletions llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
; RUN: -stats -debug -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG

; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
;; We should not see any type of cold attribute or cloning applied
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not cold \
; RUN: --implicit-check-not ".memprof."

;; Try again but with distributed ThinLTO
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
Expand All @@ -39,26 +41,23 @@
; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG

;; Run ThinLTO backend
;; We should not see any type of cold attribute or cloning applied
; RUN: opt -passes=memprof-context-disambiguation \
; RUN: -memprof-import-summary=%t.o.thinlto.bc \
; RUN: -stats %t.o -S 2>&1 | FileCheck %s --check-prefix=IR
; RUN: -stats %t.o -S 2>&1 | FileCheck %s --implicit-check-not cold \
; RUN: --implicit-check-not ".memprof."

; DEBUG: Not found through unique tail call chain: 17377440600225628772 (_Z3barv) from 15822663052811949562 (main) that actually called 8716735811002003409 (xyz) (found multiple possible chains)

; STATS: 1 memprof-context-disambiguation - Number of profiled callees found via multiple tail call chains

;; Check that all calls in the IR are to the original functions, leading to a
;; non-cold operator new call.

source_filename = "tailcall-nonunique.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: noinline
; IR-LABEL: @_Z3barv()
define dso_local ptr @_Z3barv() local_unnamed_addr #0 {
entry:
; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]]
%call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !9
ret ptr %call
}
Expand All @@ -67,54 +66,43 @@ entry:
declare ptr @_Znam(i64) #1

; Function Attrs: noinline
; IR-LABEL: @_Z5blah1v()
define dso_local ptr @_Z5blah1v() local_unnamed_addr #0 {
entry:
; IR: call ptr @_Z3barv()
%call = tail call ptr @_Z3barv()
ret ptr %call
}

; Function Attrs: noinline
; IR-LABEL: @_Z5blah2v()
define dso_local ptr @_Z5blah2v() local_unnamed_addr #0 {
entry:
; IR: call ptr @_Z3barv()
%call = tail call ptr @_Z3barv()
ret ptr %call
}

; Function Attrs: noinline
; IR-LABEL: @_Z4baz1v()
define dso_local ptr @_Z4baz1v() local_unnamed_addr #0 {
entry:
; IR: call ptr @_Z5blah1v()
%call = tail call ptr @_Z5blah1v()
ret ptr %call
}

; Function Attrs: noinline
; IR-LABEL: @_Z4baz2v()
define dso_local ptr @_Z4baz2v() local_unnamed_addr #0 {
entry:
; IR: call ptr @_Z5blah2v()
%call = tail call ptr @_Z5blah2v()
ret ptr %call
}

; Function Attrs: noinline
; IR-LABEL: @_Z3foob(i1 %b)
define dso_local ptr @_Z3foob(i1 %b) local_unnamed_addr #0 {
entry:
br i1 %b, label %if.then, label %if.else

if.then: ; preds = %entry
; IR: call ptr @_Z4baz1v()
%call = tail call ptr @_Z4baz1v()
br label %return

if.else: ; preds = %entry
; IR: call ptr @_Z4baz2v()
%call1 = tail call ptr @_Z4baz2v()
br label %return

Expand All @@ -124,29 +112,21 @@ return: ; preds = %if.else, %if.then
}

; Function Attrs: noinline
; IR-LABEL: @xyz()
define dso_local i32 @xyz() local_unnamed_addr #0 {
delete.end13:
; IR: call ptr @_Z3foob(i1 true)
%call = tail call ptr @_Z3foob(i1 true)
; IR: call ptr @_Z3foob(i1 true)
%call1 = tail call ptr @_Z3foob(i1 true)
; IR: call ptr @_Z3foob(i1 false)
%call2 = tail call ptr @_Z3foob(i1 false)
; IR: call ptr @_Z3foob(i1 false)
%call3 = tail call ptr @_Z3foob(i1 false)
ret i32 0
}

define dso_local i32 @main() local_unnamed_addr #0 {
delete.end13:
; IR: call i32 @xyz()
%call1 = tail call i32 @xyz(), !callsite !11
ret i32 0
}

; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" }

attributes #0 = { noinline }
attributes #1 = { nobuiltin allocsize(0) }
attributes #2 = { builtin allocsize(0) }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,8 @@
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
; RUN: -pass-remarks=memprof-context-disambiguation %s -S 2>&1 | FileCheck %s

;; Make sure we created some clones
; CHECK: created clone A.memprof.1
; CHECK: created clone C.memprof.1
; CHECK: created clone D.memprof.1
; CHECK: created clone E.memprof.1
; CHECK: created clone B.memprof.1
; CHECK: created clone F.memprof.1
; CHECK: created clone G.memprof.1
;; Make sure we successfully created at least one clone
; CHECK: created clone {{.*}}.memprof.1

; ModuleID = '<stdin>'
source_filename = "reduced.ll"
Expand Down
Loading