Skip to content

[MemProf] Tolerate missing leaf debug frames #71233

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion llvm/include/llvm/Passes/PassBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -628,11 +628,16 @@ class PassBuilder {
Error parseModulePassPipeline(ModulePassManager &MPM,
ArrayRef<PipelineElement> Pipeline);

// Adds passes to do pre-inlining and related cleanup passes before
// profile instrumentation/matching (to enable better context sensitivity),
// and for memprof to enable better matching with missing debug frames.
void addPreInlinerPasses(ModulePassManager &MPM, OptimizationLevel Level,
ThinOrFullLTOPhase LTOPhase);

void addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level,
bool RunProfileGen, bool IsCS,
bool AtomicCounterUpdate, std::string ProfileFile,
std::string ProfileRemappingFile,
ThinOrFullLTOPhase LTOPhase,
IntrusiveRefCntPtr<vfs::FileSystem> FS);

// Extension Point callbacks
Expand Down
89 changes: 50 additions & 39 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -733,47 +733,52 @@ void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
MPM.addPass(NameAnonGlobalPass());
}

void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
OptimizationLevel Level, bool RunProfileGen,
bool IsCS, bool AtomicCounterUpdate,
std::string ProfileFile,
std::string ProfileRemappingFile,
ThinOrFullLTOPhase LTOPhase,
IntrusiveRefCntPtr<vfs::FileSystem> FS) {
void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
OptimizationLevel Level,
ThinOrFullLTOPhase LTOPhase) {
assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
if (!IsCS && !DisablePreInliner) {
InlineParams IP;
if (DisablePreInliner)
return;
InlineParams IP;

IP.DefaultThreshold = PreInlineThreshold;
IP.DefaultThreshold = PreInlineThreshold;

// FIXME: The hint threshold has the same value used by the regular inliner
// when not optimzing for size. This should probably be lowered after
// performance testing.
// FIXME: this comment is cargo culted from the old pass manager, revisit).
IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
ModuleInlinerWrapperPass MIWP(
IP, /* MandatoryFirst */ true,
InlineContext{LTOPhase, InlinePass::EarlyInliner});
CGSCCPassManager &CGPipeline = MIWP.getPM();
// FIXME: The hint threshold has the same value used by the regular inliner
// when not optimzing for size. This should probably be lowered after
// performance testing.
// FIXME: this comment is cargo culted from the old pass manager, revisit).
IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
ModuleInlinerWrapperPass MIWP(
IP, /* MandatoryFirst */ true,
InlineContext{LTOPhase, InlinePass::EarlyInliner});
CGSCCPassManager &CGPipeline = MIWP.getPM();

FunctionPassManager FPM;
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
true))); // Merge & remove basic blocks.
FPM.addPass(InstCombinePass()); // Combine silly sequences.
invokePeepholeEPCallbacks(FPM, Level);
FunctionPassManager FPM;
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
true))); // Merge & remove basic blocks.
FPM.addPass(InstCombinePass()); // Combine silly sequences.
invokePeepholeEPCallbacks(FPM, Level);

CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
std::move(FPM), PTO.EagerlyInvalidateAnalyses));
CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
std::move(FPM), PTO.EagerlyInvalidateAnalyses));

MPM.addPass(std::move(MIWP));
MPM.addPass(std::move(MIWP));

// Delete anything that is now dead to make sure that we don't instrument
// dead code. Instrumentation can end up keeping dead code around and
// dramatically increase code size.
MPM.addPass(GlobalDCEPass());
}
// Delete anything that is now dead to make sure that we don't instrument
// dead code. Instrumentation can end up keeping dead code around and
// dramatically increase code size.
MPM.addPass(GlobalDCEPass());
}

void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
OptimizationLevel Level, bool RunProfileGen,
bool IsCS, bool AtomicCounterUpdate,
std::string ProfileFile,
std::string ProfileRemappingFile,
IntrusiveRefCntPtr<vfs::FileSystem> FS) {
assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");

if (!RunProfileGen) {
assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
Expand Down Expand Up @@ -1104,14 +1109,20 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
PTO.EagerlyInvalidateAnalyses));

// Invoke the pre-inliner passes for instrumentation PGO or MemProf.
if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
(PGOOpt->Action == PGOOptions::IRInstr ||
PGOOpt->Action == PGOOptions::IRUse || !PGOOpt->MemoryProfile.empty()))
addPreInlinerPasses(MPM, Level, Phase);

// Add all the requested passes for instrumentation PGO, if requested.
if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
(PGOOpt->Action == PGOOptions::IRInstr ||
PGOOpt->Action == PGOOptions::IRUse)) {
addPGOInstrPasses(MPM, Level,
/*RunProfileGen=*/PGOOpt->Action == PGOOptions::IRInstr,
/*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase,
PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
PGOOpt->FS);
MPM.addPass(PGOIndirectCallPromotion(false, false));
}
Expand Down Expand Up @@ -1332,12 +1343,12 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
LTOPhase, PGOOpt->FS);
PGOOpt->FS);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
LTOPhase, PGOOpt->FS);
PGOOpt->FS);
}

// Re-compute GlobalsAA here prior to function passes. This is particularly
Expand Down Expand Up @@ -1831,12 +1842,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS);
PGOOpt->FS);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS);
PGOOpt->FS);
}

// Break up allocas
Expand Down
18 changes: 9 additions & 9 deletions llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -808,19 +808,19 @@ static void readMemprof(Module &M, Function &F,
auto CalleeGUID = Function::getGUID(Name);
auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
ProfileHasColumns ? DIL->getColumn() : 0);
// LeafFound will only be false on the first iteration, since we either
// set it true or break out of the loop below.
// Check if we have found the profile's leaf frame. If yes, collect
// the rest of the call's inlined context starting here. If not, see if
// we find a match further up the inlined context (in case the profile
// was missing debug frames at the leaf).
if (!LeafFound) {
AllocInfoIter = LocHashToAllocInfo.find(StackId);
CallSitesIter = LocHashToCallSites.find(StackId);
// Check if the leaf is in one of the maps. If not, no need to look
// further at this call.
if (AllocInfoIter == LocHashToAllocInfo.end() &&
CallSitesIter == LocHashToCallSites.end())
break;
LeafFound = true;
if (AllocInfoIter != LocHashToAllocInfo.end() ||
CallSitesIter != LocHashToCallSites.end())
LeafFound = true;
}
InlinedCallStack.push_back(StackId);
if (LeafFound)
InlinedCallStack.push_back(StackId);
}
// If leaf not in either of the maps, skip inst.
if (!LeafFound)
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/Other/new-pm-memprof.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
;; Ensure we invoke the preinliner when feeding back a memprof profile.

;; The opt invocation will fail as the profdata file is empty, which is fine
;; since we are simply testing the pass pipeline below.
; RUN: not opt -debug-pass-manager -passes='default<O2>' -memory-profile-file=/dev/null %s 2>&1 | FileCheck %s

; CHECK: Running pass: InlinerPass on (foo)
; CHECK: Running pass: MemProfUsePass

define void @foo() {
ret void
}
Binary file not shown.
Binary file not shown.
33 changes: 33 additions & 0 deletions llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,36 @@ ${LLVMPROFDATA} merge --text ${OUTDIR}/memprof_pgo.profraw -o ${OUTDIR}/memprof_
rm ${OUTDIR}/memprof.cc
rm ${OUTDIR}/pgo.exe
rm ${OUTDIR}/memprof_pgo.profraw

# Use musttail to simulate a missing leaf debug frame in the profiled binary.
# Note we don't currently match onto explicit ::operator new calls, which is
# why the non-musttail case uses implicit new (which doesn't support musttail).
# Note that changes in the code below which affect relative line number
# offsets of calls from their parent function can affect callsite matching in
# the LLVM IR.
cat > ${OUTDIR}/memprof_missing_leaf.cc << EOF
#include <new>
#ifndef USE_MUSTTAIL
#define USE_MUSTTAIL 0
#endif

// clang::musttail requires that the argument signature matches that of the caller.
void *bar(std::size_t s) {
#if USE_MUSTTAIL
[[clang::musttail]] return ::operator new (s);
#else
return new char[s];
#endif
}

int main() {
char *a = (char *)bar(1);
delete a;
return 0;
}
EOF

${CLANG} ${COMMON_FLAGS} -fmemory-profile -DUSE_MUSTTAIL=1 ${OUTDIR}/memprof_missing_leaf.cc -o ${OUTDIR}/memprof_missing_leaf.exe
env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof_missing_leaf.exe > ${OUTDIR}/memprof_missing_leaf.memprofraw

rm ${OUTDIR}/memprof_missing_leaf.cc
88 changes: 88 additions & 0 deletions llvm/test/Transforms/PGOProfile/memprof_missing_leaf.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
;; Tests memprof profile matching when the leaf frame is missing in the
;; profile. In this case the call to operator new was inlined before
;; matching and we are able to match the next call frame up the inlined
;; context.

;; Avoid failures on big-endian systems that can't read the profile properly
; REQUIRES: x86_64-linux

;; TODO: Use text profile inputs once that is available for memprof.
;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh.
;; # To generate below LLVM IR for use in matching.
;; $ clang++ -gmlt -fdebug-info-for-profiling -S memprof_missing_leaf.cc \
;; -O2 -emit-llvm

; RUN: llvm-profdata merge %S/Inputs/memprof_missing_leaf.memprofraw --profiled-binary %S/Inputs/memprof_missing_leaf.exe -o %t.memprofdata
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S | FileCheck %s

; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]]
; CHECK: attributes #[[ATTR]] = {{.*}} "memprof"="notcold"

; ModuleID = '<stdin>'
source_filename = "memprof_missing_leaf.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: nobuiltin allocsize(0)
declare noundef nonnull ptr @_Znam(i64 noundef) #0

; Function Attrs: mustprogress norecurse uwtable
define dso_local noundef i32 @main() #1 !dbg !8 {
entry:
%s.addr.i = alloca i64, align 8
%retval = alloca i32, align 4
%a = alloca ptr, align 8
store i32 0, ptr %retval, align 4
store i64 1, ptr %s.addr.i, align 8, !tbaa !11
%0 = load i64, ptr %s.addr.i, align 8, !dbg !15, !tbaa !11
%call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef %0) #3, !dbg !18
store ptr %call.i, ptr %a, align 8, !dbg !19, !tbaa !20
%1 = load ptr, ptr %a, align 8, !dbg !22, !tbaa !20
%isnull = icmp eq ptr %1, null, !dbg !23
br i1 %isnull, label %delete.end, label %delete.notnull, !dbg !23

delete.notnull: ; preds = %entry
call void @_ZdlPv(ptr noundef %1) #4, !dbg !23
br label %delete.end, !dbg !23

delete.end: ; preds = %delete.notnull, %entry
ret i32 0, !dbg !24
}

; Function Attrs: nobuiltin nounwind
declare void @_ZdlPv(ptr noundef) #2

attributes #0 = { nobuiltin allocsize(0) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { mustprogress norecurse uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { nobuiltin nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #3 = { builtin allocsize(0) }
attributes #4 = { builtin nounwind }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}

!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 18.0.0 ([email protected]:llvm/llvm-project.git 71bf052ec90e77cb4aa66505d47cbc4b6016ac1d)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
!1 = !DIFile(filename: "memprof_missing_leaf.cc", directory: ".", checksumkind: CSK_MD5, checksum: "f1445a8699406a6b826128704d257677")
!2 = !{i32 7, !"Dwarf Version", i32 5}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 8, !"PIC Level", i32 2}
!6 = !{i32 7, !"PIE Level", i32 2}
!7 = !{i32 7, !"uwtable", i32 2}
!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 15, type: !9, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
!9 = !DISubroutineType(types: !10)
!10 = !{}
!11 = !{!12, !12, i64 0}
!12 = !{!"long", !13, i64 0}
!13 = !{!"omnipotent char", !14, i64 0}
!14 = !{!"Simple C++ TBAA"}
!15 = !DILocation(line: 11, column: 19, scope: !16, inlinedAt: !17)
!16 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barm", scope: !1, file: !1, line: 7, type: !9, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
!17 = distinct !DILocation(line: 16, column: 21, scope: !8)
!18 = !DILocation(line: 11, column: 10, scope: !16, inlinedAt: !17)
!19 = !DILocation(line: 16, column: 9, scope: !8)
!20 = !{!21, !21, i64 0}
!21 = !{!"any pointer", !13, i64 0}
!22 = !DILocation(line: 17, column: 10, scope: !8)
!23 = !DILocation(line: 17, column: 3, scope: !8)
!24 = !DILocation(line: 18, column: 3, scope: !8)