Skip to content

[BOLT] Set call to continuation count in pre-aggregated profile #109486

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Nov 8, 2024
4 changes: 4 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,10 @@ class BinaryFunction {
return BB && BB->getOffset() == Offset ? BB : nullptr;
}

const BinaryBasicBlock *getBasicBlockAtOffset(uint64_t Offset) const {
return const_cast<BinaryFunction *>(this)->getBasicBlockAtOffset(Offset);
}

/// Retrieve the landing pad BB associated with invoke instruction \p Invoke
/// that is in \p BB. Return nullptr if none exists
BinaryBasicBlock *getLandingPadBBFor(const BinaryBasicBlock &BB,
Expand Down
3 changes: 2 additions & 1 deletion bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,8 @@ class DataAggregator : public DataReader {
uint64_t Mispreds);

/// Register a \p Branch.
bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds);
bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds,
bool IsPreagg);

/// Register a trace between two LBR entries supplied in execution order.
bool doTrace(const LBREntry &First, const LBREntry &Second,
Expand Down
111 changes: 77 additions & 34 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -778,42 +778,75 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
}

bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
uint64_t Mispreds) {
bool IsReturn = false;
auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * {
if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) {
Addr -= Func->getAddress();
if (IsFrom) {
auto checkReturn = [&](auto MaybeInst) {
IsReturn = MaybeInst && BC->MIB->isReturn(*MaybeInst);
};
if (Func->hasInstructions())
checkReturn(Func->getInstructionAtOffset(Addr));
else
checkReturn(Func->disassembleInstructionAtOffset(Addr));
}
uint64_t Mispreds, bool IsPreagg) {
// Returns whether \p Offset in \p Func contains a return instruction.
auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) {
auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
return Func.hasInstructions()
? isReturn(Func.getInstructionAtOffset(Offset))
: isReturn(Func.disassembleInstructionAtOffset(Offset));
};

if (BAT)
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
// Returns whether \p Offset in \p Func may be a call continuation excluding
// entry points and landing pads.
auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) {
// No call continuation at a function start.
if (!Offset)
return false;

// FIXME: support BAT case where the function might be in empty state
// (split fragments declared non-simple).
if (!Func.hasCFG())
return false;

// The offset should not be an entry point or a landing pad.
const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset);
return ContBB && !ContBB->isEntryPoint() && !ContBB->isLandingPad();
};

if (BinaryFunction *ParentFunc = getBATParentFunction(*Func)) {
Func = ParentFunc;
if (IsFrom)
NumColdSamples += Count;
}
// Mutates \p Addr to an offset into the containing function, performing BAT
// offset translation and parent lookup.
//
// Returns the containing function (or BAT parent) and whether the address
// corresponds to a return (if \p IsFrom) or a call continuation (otherwise).
auto handleAddress = [&](uint64_t &Addr, bool IsFrom) {
BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
if (!Func)
return std::pair{Func, false};

return Func;
}
return nullptr;
Addr -= Func->getAddress();

bool IsRetOrCallCont =
IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr);

if (BAT)
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);

BinaryFunction *ParentFunc = getBATParentFunction(*Func);
if (!ParentFunc)
return std::pair{Func, IsRetOrCallCont};

if (IsFrom)
NumColdSamples += Count;

return std::pair{ParentFunc, IsRetOrCallCont};
};

BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
uint64_t ToOrig = To;
auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true);
auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom*/ false);
if (!FromFunc && !ToFunc)
return false;

// Record call to continuation trace.
if (IsPreagg && FromFunc != ToFunc && (IsReturn || IsCallCont)) {
LBREntry First{ToOrig - 1, ToOrig - 1, false};
LBREntry Second{ToOrig, ToOrig, false};
return doTrace(First, Second, Count);
}
// Ignore returns.
if (IsReturn)
return true;
BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
if (!FromFunc && !ToFunc)
return false;

// Treat recursive control transfers as inter-branches.
if (FromFunc == ToFunc && To != 0) {
Expand All @@ -830,10 +863,19 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
if (!FromFunc || !ToFunc) {
LLVM_DEBUG({
dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
<< formatv(" @ {0:x}", First.To - FromFunc->getAddress())
<< " and ending in " << ToFunc->getPrintName()
<< formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
dbgs() << "Out of range trace starting in ";
if (FromFunc)
dbgs() << formatv("{0} @ {1:x}", *FromFunc,
First.To - FromFunc->getAddress());
else
dbgs() << Twine::utohexstr(First.To);
dbgs() << " and ending in ";
if (ToFunc)
dbgs() << formatv("{0} @ {1:x}", *ToFunc,
Second.From - ToFunc->getAddress());
else
dbgs() << Twine::utohexstr(Second.From);
dbgs() << '\n';
});
NumLongRangeTraces += Count;
return false;
Expand Down Expand Up @@ -1620,7 +1662,8 @@ void DataAggregator::processBranchEvents() {
for (const auto &AggrLBR : BranchLBRs) {
const Trace &Loc = AggrLBR.first;
const TakenBranchInfo &Info = AggrLBR.second;
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount,
/*IsPreagg*/ false);
}
}

Expand Down Expand Up @@ -1781,7 +1824,7 @@ void DataAggregator::processPreAggregated() {
switch (AggrEntry.EntryType) {
case AggregatedLBREntry::BRANCH:
doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
AggrEntry.Mispreds);
AggrEntry.Mispreds, /*IsPreagg*/ true);
break;
case AggregatedLBREntry::FT:
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
Expand Down
132 changes: 132 additions & 0 deletions bolt/test/X86/callcont-fallthru.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
## Ensures that a call continuation fallthrough count is set when using
## pre-aggregated perf data.

# RUN: %clangxx %cxxflags %s -o %t -Wl,-q -nostdlib
# RUN: link_fdata %s %t %t.pa1 PREAGG
# RUN: link_fdata %s %t %t.pa2 PREAGG2
# RUN: link_fdata %s %t %t.pa3 PREAGG3
# RUN: link_fdata %s %t %t.pa4 PREAGG4

## Check normal case: fallthrough is not LP or secondary entry.
# RUN: llvm-strip --strip-unneeded %t -o %t.exe
# RUN: llvm-bolt %t.exe --pa -p %t.pa1 -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s

## Check that getFallthroughsInTrace correctly handles a trace starting at plt
## call continuation
# RUN: llvm-bolt %t.exe --pa -p %t.pa2 -o %t.out2 \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK2

## Check that we don't treat secondary entry points as call continuation sites.
# RUN: llvm-bolt %t --pa -p %t.pa3 -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3

## Check fallthrough to a landing pad case.
# RUN: llvm-bolt %t.exe --pa -p %t.pa4 -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK4

.globl foo
.type foo, %function
foo:
pushq %rbp
movq %rsp, %rbp
popq %rbp
Lfoo_ret:
retq
.size foo, .-foo

.globl main
.type main, %function
main:
.Lfunc_begin0:
.cfi_startproc
.cfi_personality 155, DW.ref.__gxx_personality_v0
.cfi_lsda 27, .Lexception0
pushq %rbp
movq %rsp, %rbp
subq $0x20, %rsp
movl $0x0, -0x4(%rbp)
movl %edi, -0x8(%rbp)
movq %rsi, -0x10(%rbp)
callq puts@PLT
## Target is a call continuation
# PREAGG: B X:0 #Ltmp1# 2 0
# CHECK: callq puts@PLT
# CHECK-NEXT: count: 2

Ltmp1:
movq -0x10(%rbp), %rax
movq 0x8(%rax), %rdi
movl %eax, -0x14(%rbp)

Ltmp4:
cmpl $0x0, -0x14(%rbp)
je Ltmp0
# CHECK2: je .Ltmp0
# CHECK2-NEXT: count: 3

movl $0xa, -0x18(%rbp)
callq foo
## Target is a call continuation
# PREAGG: B #Lfoo_ret# #Ltmp3# 1 0
# CHECK: callq foo
# CHECK-NEXT: count: 1

## PLT call continuation fallthrough spanning the call
# PREAGG2: F #Ltmp1# #Ltmp3_br# 3
# CHECK2: callq foo
# CHECK2-NEXT: count: 3

## Target is a secondary entry point
# PREAGG3: B X:0 #Ltmp3# 2 0
# CHECK3: callq foo
# CHECK3-NEXT: count: 0

## Target is a landing pad
# PREAGG4: B X:0 #Ltmp3# 2 0
# CHECK4: callq puts@PLT
# CHECK4-NEXT: count: 0

Ltmp3:
cmpl $0x0, -0x18(%rbp)
Ltmp3_br:
jmp Ltmp2

Ltmp2:
movl -0x18(%rbp), %eax
addl $-0x1, %eax
movl %eax, -0x18(%rbp)
jmp Ltmp3
jmp Ltmp4
jmp Ltmp1

Ltmp0:
xorl %eax, %eax
addq $0x20, %rsp
popq %rbp
retq
.Lfunc_end0:
.cfi_endproc
.size main, .-main

.section .gcc_except_table,"a",@progbits
.p2align 2, 0x0
GCC_except_table0:
.Lexception0:
.byte 255 # @LPStart Encoding = omit
.byte 255 # @TType Encoding = omit
.byte 1 # Call site Encoding = uleb128
.uleb128 .Lcst_end0-.Lcst_begin0
.Lcst_begin0:
.uleb128 .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 <<
.uleb128 .Lfunc_end0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Lfunc_end0
.uleb128 Ltmp3-.Lfunc_begin0 # jumps to Ltmp3
.byte 0 # has no landing pad
.byte 0 # On action: cleanup
.Lcst_end0:
.p2align 2, 0x0
.hidden DW.ref.__gxx_personality_v0
.weak DW.ref.__gxx_personality_v0
.section .data.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat
.p2align 3, 0x0
.type DW.ref.__gxx_personality_v0,@object