Skip to content

Commit 994eb5a

Browse files
committed
[CodeGen] Fix unconditional branch duplication issue in bbsections
If an end section basic block ends in an unconditional branch to its fallthrough, BasicBlockSections will duplicate the unconditional branch. This doesn't break x86, but it is a (slight) size optimization and more importantly prevents AArch64 builds from breaking. Ex: ``` bb1 (bbsections Hot): jmp bb2 bb2 (bbsections Cold): /* do work... */ ``` After running sortBasicBlocksAndUpdateBranches(): ``` bb1 (bbsections Hot): jmp bb2 jmp bb2 bb2 (bbsections Cold): /* do work... */ ``` Differential Revision: https://reviews.llvm.org/D158674
1 parent 4ad8913 commit 994eb5a

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

llvm/lib/CodeGen/BasicBlockSections.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,8 @@ void llvm::sortBasicBlocksAndUpdateBranches(
258258
[[maybe_unused]] const MachineBasicBlock *EntryBlock = &MF.front();
259259
SmallVector<MachineBasicBlock *> PreLayoutFallThroughs(MF.getNumBlockIDs());
260260
for (auto &MBB : MF)
261-
PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
261+
PreLayoutFallThroughs[MBB.getNumber()] =
262+
MBB.getFallThrough(/*JumpToFallThrough=*/false);
262263

263264
MF.sort(MBBCmp);
264265
assert(&MF.front() == EntryBlock &&

llvm/test/CodeGen/Generic/machine-function-splitter.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-X86
77
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-X86
88
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-X86
9+
; RUN: llc < %s -mtriple=x86_64 -split-machine-functions -O0 -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s -check-prefixes=MFS-O0,MFS-O0-X86
910

1011
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64
1112
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64
1213
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64
1314
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-AARCH64
15+
; RUN: llc < %s -mtriple=aarch64 -enable-split-machine-functions -O0 -mfs-allow-unsupported-triple -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s --dump-input=always -check-prefixes=MFS-O0,MFS-O0-AARCH64
1416

1517
; COM: Machine function splitting with AFDO profiles
1618
; RUN: sed 's/InstrProf/SampleProfile/g' %s > %t.ll
@@ -462,6 +464,29 @@ define void @foo15(i1 zeroext %0, i1 zeroext %1) nounwind !prof !27 {
462464
ret void
463465
}
464466

467+
define void @foo16(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 {
468+
;; Check that an unconditional branch is only appended to a block
469+
;; if it would fall through to the wrong block otherwise.
470+
; MFS-O0-LABEL: foo16
471+
; MFS-O0-X86: jmp
472+
; MFS-O0-X86-NOT: jmp
473+
; MFS-O0-AARCH64: br
474+
; MFS-O0-AARCH64: br
475+
; MFS-O0-AARCH64-NOT: br
476+
; MFS-O0: .section .text.split.foo16
477+
; MFS-O0-NEXT: foo16.cold
478+
%2 = call i32 @baz()
479+
br i1 false, label %3, label %5, !prof !25
480+
481+
3: ; preds = %1
482+
%4 = call i32 @bar()
483+
unreachable
484+
485+
5: ; preds = %1
486+
%6 = tail call i32 @qux()
487+
ret void
488+
}
489+
465490
declare i32 @bar()
466491
declare i32 @baz()
467492
declare i32 @bam()

0 commit comments

Comments
 (0)