Skip to content

[PseudoProbe] Extend to skip instrumenting probe into the dests of invoke #79919

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion llvm/include/llvm/Analysis/EHUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ static void computeEHOnlyBlocks(FunctionT &F, DenseSet<BlockT *> &EHBlocks) {
}
}

EHBlocks.clear();
for (auto Entry : Statuses) {
if (Entry.second == EH)
EHBlocks.insert(Entry.first);
Expand Down
13 changes: 11 additions & 2 deletions llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,17 @@ class SampleProfileProber {
uint64_t getFunctionHash() const { return FunctionHash; }
uint32_t getBlockId(const BasicBlock *BB) const;
uint32_t getCallsiteId(const Instruction *Call) const;
void computeCFGHash();
void computeProbeIdForBlocks();
void findUnreachableBlocks(DenseSet<BasicBlock *> &BlocksToIgnore);
void findInvokeNormalDests(DenseSet<BasicBlock *> &InvokeNormalDests);
void computeBlocksToIgnore(DenseSet<BasicBlock *> &BlocksToIgnore,
DenseSet<BasicBlock *> &BlocksAndCallsToIgnore);
void computeProbeIdForCallsites(
const DenseSet<BasicBlock *> &BlocksAndCallsToIgnore);
const Instruction *
getOriginalTerminator(const BasicBlock *Head,
const DenseSet<BasicBlock *> &BlocksToIgnore);
void computeCFGHash(const DenseSet<BasicBlock *> &BlocksToIgnore);
void computeProbeIdForBlocks(const DenseSet<BasicBlock *> &BlocksToIgnore);
void computeProbeIdForCallsites();

Function *F;
Expand Down
121 changes: 107 additions & 14 deletions llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,21 +173,114 @@ SampleProfileProber::SampleProfileProber(Function &Func,
BlockProbeIds.clear();
CallProbeIds.clear();
LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
computeProbeIdForBlocks();
computeProbeIdForCallsites();
computeCFGHash();

DenseSet<BasicBlock *> BlocksToIgnore;
DenseSet<BasicBlock *> BlocksAndCallsToIgnore;
computeBlocksToIgnore(BlocksToIgnore, BlocksAndCallsToIgnore);

computeProbeIdForBlocks(BlocksToIgnore);
computeProbeIdForCallsites(BlocksAndCallsToIgnore);
computeCFGHash(BlocksToIgnore);
}

// Two purposes to compute the blocks to ignore:
// 1. Reduce the IR size.
// 2. Make the instrumentation(checksum) stable. e.g. the frondend may
// generate unstable IR while optimizing nounwind attribute, some versions are
// optimized with the call-to-invoke conversion, while other versions do not.
// This discrepancy in probe ID could cause profile mismatching issues.
// Note that those ignored blocks are either cold blocks or new split blocks
// whose original blocks are instrumented, so it shouldn't degrade the profile
// quality.
void SampleProfileProber::computeBlocksToIgnore(
DenseSet<BasicBlock *> &BlocksToIgnore,
DenseSet<BasicBlock *> &BlocksAndCallsToIgnore) {
// Ignore the cold EH and unreachable blocks and calls.
computeEHOnlyBlocks(*F, BlocksAndCallsToIgnore);
findUnreachableBlocks(BlocksAndCallsToIgnore);

BlocksToIgnore.insert(BlocksAndCallsToIgnore.begin(),
BlocksAndCallsToIgnore.end());

// Handle the call-to-invoke conversion case: make sure that the probe id and
// callsite id are consistent before and after the block split. For block
// probe, we only keep the head block probe id and ignore the block ids of the
// normal dests. For callsite probe, it's different to block probe, there is
// no additional callsite in the normal dests, so we don't ignore the
// callsites.
findInvokeNormalDests(BlocksToIgnore);
}

// Unreachable blocks and calls are always cold, ignore them.
void SampleProfileProber::findUnreachableBlocks(
DenseSet<BasicBlock *> &BlocksToIgnore) {
for (auto &BB : *F) {
if (&BB != &F->getEntryBlock() && pred_size(&BB) == 0)
BlocksToIgnore.insert(&BB);
}
}

// In call-to-invoke conversion, basic block can be split into multiple blocks,
// only instrument probe in the head block, ignore the normal dests.
void SampleProfileProber::findInvokeNormalDests(
DenseSet<BasicBlock *> &InvokeNormalDests) {
for (auto &BB : *F) {
auto *TI = BB.getTerminator();
if (auto *II = dyn_cast<InvokeInst>(TI)) {
auto *ND = II->getNormalDest();
InvokeNormalDests.insert(ND);

// The normal dest and the try/catch block are connected by an
// unconditional branch.
while (pred_size(ND) == 1) {
auto *Pred = *pred_begin(ND);
if (succ_size(Pred) == 1) {
InvokeNormalDests.insert(Pred);
ND = Pred;
} else
break;
}
}
}
}

// The call-to-invoke conversion splits the original block into a list of block,
// we need to compute the hash using the original block's successors to keep the
// CFG Hash consistent. For a given head block, we keep searching the
// succesor(normal dest or unconditional branch dest) to find the tail block,
// the tail block's successors are the original block's successors.
const Instruction *SampleProfileProber::getOriginalTerminator(
const BasicBlock *Head, const DenseSet<BasicBlock *> &BlocksToIgnore) {
auto *TI = Head->getTerminator();
if (auto *II = dyn_cast<InvokeInst>(TI)) {
return getOriginalTerminator(II->getNormalDest(), BlocksToIgnore);
} else if (succ_size(Head) == 1 &&
BlocksToIgnore.contains(*succ_begin(Head))) {
// Go to the unconditional branch dest.
return getOriginalTerminator(*succ_begin(Head), BlocksToIgnore);
}
return TI;
}

// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
// value of each BB in the CFG. The higher 32 bits record the number of edges
// preceded by the number of indirect calls.
// This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
void SampleProfileProber::computeCFGHash() {
void SampleProfileProber::computeCFGHash(
const DenseSet<BasicBlock *> &BlocksToIgnore) {
std::vector<uint8_t> Indexes;
JamCRC JC;
for (auto &BB : *F) {
for (BasicBlock *Succ : successors(&BB)) {
if (BlocksToIgnore.contains(&BB))
continue;

auto *TI = getOriginalTerminator(&BB, BlocksToIgnore);
for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
auto *Succ = TI->getSuccessor(I);
auto Index = getBlockId(Succ);
// Ingore ignored-block(zero ID) to avoid unstable checksum.
if (Index == 0)
continue;
for (int J = 0; J < 4; J++)
Indexes.push_back((uint8_t)(Index >> (J * 8)));
}
Expand All @@ -207,23 +300,23 @@ void SampleProfileProber::computeCFGHash() {
<< ", Hash = " << FunctionHash << "\n");
}

void SampleProfileProber::computeProbeIdForBlocks() {
DenseSet<BasicBlock *> KnownColdBlocks;
computeEHOnlyBlocks(*F, KnownColdBlocks);
// Insert pseudo probe to non-cold blocks only. This will reduce IR size as
// well as the binary size while retaining the profile quality.
void SampleProfileProber::computeProbeIdForBlocks(
const DenseSet<BasicBlock *> &BlocksToIgnore) {
for (auto &BB : *F) {
++LastProbeId;
if (!KnownColdBlocks.contains(&BB))
BlockProbeIds[&BB] = LastProbeId;
if (BlocksToIgnore.contains(&BB))
continue;
BlockProbeIds[&BB] = ++LastProbeId;
}
}

void SampleProfileProber::computeProbeIdForCallsites() {
void SampleProfileProber::computeProbeIdForCallsites(
const DenseSet<BasicBlock *> &BlocksAndCallsToIgnore) {
LLVMContext &Ctx = F->getContext();
Module *M = F->getParent();

for (auto &BB : *F) {
if (BlocksAndCallsToIgnore.contains(&BB))
continue;
for (auto &I : BB) {
if (!isa<CallBase>(I))
continue;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
; RUN: llvm-lto -thinlto-action=import %t3.bc -thinlto-index=%t3.index.bc -o /dev/null 2>&1 | FileCheck %s --check-prefix=WARN


; CHECK-NOT: {i64 6699318081062747564, i64 4294967295, !"foo"
; CHECK: !{i64 -2624081020897602054, i64 281479271677951, !"main"
; CHECK-NOT: {i64 6699318081062747564, i64 [[#]], !"foo"
; CHECK: !{i64 -2624081020897602054, i64 [[#]], !"main"

; WARN: warning: Pseudo-probe ignored: source module '{{.*}}' is compiled with -fpseudo-probe-for-profiling while destination module '{{.*}}' is not

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ entry:
to label %ret unwind label %lpad

ret:
; CHECK: call void @llvm.pseudoprobe
; CHECK-NOT: call void @llvm.pseudoprobe
ret void

lpad: ; preds = %entry
Expand Down
155 changes: 155 additions & 0 deletions llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
; REQUIRES: x86_64-linux
; RUN: opt < %s -passes=pseudo-probe -S -o - | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

$__clang_call_terminate = comdat any

@x = dso_local global i32 0, align 4, !dbg !0

; Function Attrs: mustprogress noinline nounwind uwtable
define dso_local void @_Z3barv() #0 personality ptr @__gxx_personality_v0 !dbg !14 {
entry:
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 1
%0 = load volatile i32, ptr @x, align 4, !dbg !17, !tbaa !19
%tobool = icmp ne i32 %0, 0, !dbg !17
br i1 %tobool, label %if.then, label %if.else, !dbg !23

if.then: ; preds = %entry
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 2
invoke void @_Z3foov()
to label %invoke.cont unwind label %terminate.lpad, !dbg !24

invoke.cont: ; preds = %if.then
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
invoke void @_Z3bazv()
to label %invoke.cont1 unwind label %terminate.lpad, !dbg !26

invoke.cont1: ; preds = %invoke.cont
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
br label %if.end, !dbg !27

if.else: ; preds = %entry
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 3
invoke void @_Z3foov()
to label %invoke.cont2 unwind label %terminate.lpad, !dbg !28

invoke.cont2: ; preds = %if.else
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
br label %if.end

if.end: ; preds = %invoke.cont2, %invoke.cont1
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 4
invoke void @_Z3foov()
to label %invoke.cont3 unwind label %terminate.lpad, !dbg !29

invoke.cont3: ; preds = %if.end
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
%1 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !19
%tobool4 = icmp ne i32 %1, 0, !dbg !30
br i1 %tobool4, label %if.then5, label %if.end6, !dbg !32

if.then5: ; preds = %invoke.cont3
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 5
%2 = load volatile i32, ptr @x, align 4, !dbg !33, !tbaa !19
%inc = add nsw i32 %2, 1, !dbg !33
store volatile i32 %inc, ptr @x, align 4, !dbg !33, !tbaa !19
br label %if.end6, !dbg !35

if.end6: ; preds = %if.then5, %invoke.cont3
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 6
ret void, !dbg !36

terminate.lpad: ; preds = %if.end, %if.else, %invoke.cont, %if.then
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
%3 = landingpad { ptr, i32 }
catch ptr null, !dbg !24
%4 = extractvalue { ptr, i32 } %3, 0, !dbg !24
call void @__clang_call_terminate(ptr %4) #3, !dbg !24
unreachable, !dbg !24
}

; Function Attrs: mustprogress noinline nounwind uwtable
define dso_local void @_Z3foov() #0 !dbg !37 {
entry:
ret void, !dbg !38
}

declare i32 @__gxx_personality_v0(...)

; Function Attrs: noinline noreturn nounwind uwtable
define linkonce_odr hidden void @__clang_call_terminate(ptr noundef %0) #1 comdat {
%2 = call ptr @__cxa_begin_catch(ptr %0) #4
call void @_ZSt9terminatev() #3
unreachable
}

declare ptr @__cxa_begin_catch(ptr)

declare void @_ZSt9terminatev()

; Function Attrs: mustprogress noinline nounwind uwtable
define dso_local void @_Z3bazv() #0 !dbg !39 {
entry:
ret void, !dbg !40
}

; CHECK: ![[#]] = !{i64 -3270123626113159616, i64 4294967295, !"_Z3bazv"}

attributes #0 = { mustprogress noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { noinline noreturn nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { mustprogress noinline norecurse nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #3 = { noreturn nounwind }
attributes #4 = { nounwind }

!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!7, !8, !9, !10, !11, !12}
!llvm.ident = !{!13}

!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
!3 = !DIFile(filename: "test.cpp", directory: "/home", checksumkind: CSK_MD5, checksum: "a4c7b0392f3fd9c8ebb85065159dbb02")
!4 = !{!0}
!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!7 = !{i32 7, !"Dwarf Version", i32 5}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{i32 1, !"wchar_size", i32 4}
!10 = !{i32 8, !"PIC Level", i32 2}
!11 = !{i32 7, !"PIE Level", i32 2}
!12 = !{i32 7, !"uwtable", i32 2}
!13 = !{!"clang version 19.0.0"}
!14 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !3, file: !3, line: 4, type: !15, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
!15 = !DISubroutineType(types: !16)
!16 = !{null}
!17 = !DILocation(line: 5, column: 6, scope: !18)
!18 = distinct !DILexicalBlock(scope: !14, file: !3, line: 5, column: 6)
!19 = !{!20, !20, i64 0}
!20 = !{!"int", !21, i64 0}
!21 = !{!"omnipotent char", !22, i64 0}
!22 = !{!"Simple C++ TBAA"}
!23 = !DILocation(line: 5, column: 6, scope: !14)
!24 = !DILocation(line: 6, column: 5, scope: !25)
!25 = distinct !DILexicalBlock(scope: !18, file: !3, line: 5, column: 9)
!26 = !DILocation(line: 7, column: 5, scope: !25)
!27 = !DILocation(line: 8, column: 3, scope: !25)
!28 = !DILocation(line: 9, column: 5, scope: !18)
!29 = !DILocation(line: 11, column: 3, scope: !14)
!30 = !DILocation(line: 12, column: 6, scope: !31)
!31 = distinct !DILexicalBlock(scope: !14, file: !3, line: 12, column: 6)
!32 = !DILocation(line: 12, column: 6, scope: !14)
!33 = !DILocation(line: 13, column: 5, scope: !34)
!34 = distinct !DILexicalBlock(scope: !31, file: !3, line: 12, column: 9)
!35 = !DILocation(line: 14, column: 5, scope: !34)
!36 = !DILocation(line: 17, column: 1, scope: !14)
!37 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 19, type: !15, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
!38 = !DILocation(line: 19, column: 13, scope: !37)
!39 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !3, file: !3, line: 18, type: !15, scopeLine: 18, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
!40 = !DILocation(line: 18, column: 13, scope: !39)
!41 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 22, type: !42, scopeLine: 22, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
!42 = !DISubroutineType(types: !43)
!43 = !{!6}
!44 = !DILocation(line: 23, column: 3, scope: !41)
!45 = !DILocation(line: 24, column: 1, scope: !41)