-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Implement tail call optimization in machine outliner #115297
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Mark Goncharov (mga-sc) ChangesFollowing up issue #89822, this patch adds opportunity to use tail call in machine outliner pass. Patch is 24.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115297.diff 7 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 04bb964bfc48cf..f6425d47fd81bb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2929,6 +2929,7 @@ bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
// Enum values indicating how an outlined call should be constructed.
enum MachineOutlinerConstructionID {
+ MachineOutlinerTailCall,
MachineOutlinerDefault
};
@@ -2937,19 +2938,47 @@ bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
return MF.getFunction().hasMinSize();
}
+static bool IsCandidatePatchable(const MachineInstr &MI) {
+ const MachineBasicBlock *MBB = MI.getParent();
+ const MachineFunction *MF = MBB->getParent();
+ const Function &F = MF->getFunction();
+ return F.getFnAttribute("fentry-call").getValueAsBool() ||
+ F.hasFnAttribute("patchable-function-entry");
+}
+
+static bool CannotInsertTailCall(const MachineInstr &MI) {
+ if (MI.isTerminator())
+ return IsCandidatePatchable(MI);
+ return true;
+}
+
+static bool MIUseX5(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
+ return MI.modifiesRegister(RISCV::X5, TRI) ||
+ MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5);
+}
+
std::optional<std::unique_ptr<outliner::OutlinedFunction>>
RISCVInstrInfo::getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
std::vector<outliner::Candidate> &RepeatedSequenceLocs,
unsigned MinRepeats) const {
- // First we need to filter out candidates where the X5 register (IE t0) can't
- // be used to setup the function call.
- auto CannotInsertCall = [](outliner::Candidate &C) {
+ auto CandidateUseX5 = [](outliner::Candidate &C) {
const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
+ for (const MachineInstr &MI : C)
+ if (MIUseX5(MI, TRI))
+ return true;
return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
};
+ auto CannotInsertCall = [CandidateUseX5](outliner::Candidate &C) {
+ if (!CandidateUseX5(C))
+ return false;
+ if (!CannotInsertTailCall(C.back()))
+ return false;
+ return true;
+ };
+
llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
// If the sequence doesn't have enough candidates left, then we're done.
@@ -2961,6 +2990,17 @@ RISCVInstrInfo::getOutliningCandidateInfo(
for (auto &MI : RepeatedSequenceLocs[0])
SequenceSize += getInstSizeInBytes(MI);
+ if (!CannotInsertTailCall(RepeatedSequenceLocs[0].back())) {
+ // tail function = 8 bytes. Can't be compressed
+ for (auto &C : RepeatedSequenceLocs)
+ C.setCallInfo(MachineOutlinerTailCall, 8);
+
+ // Using tail call we move ret instrunction from caller to calee.
+ // So, FrameOverhead for this is 0
+ return std::make_unique<outliner::OutlinedFunction>(
+ RepeatedSequenceLocs, SequenceSize, 0, MachineOutlinerTailCall);
+ }
+
// call t0, function = 8 bytes.
unsigned CallOverhead = 8;
for (auto &C : RepeatedSequenceLocs)
@@ -2997,15 +3037,7 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
: outliner::InstrType::Invisible;
- // We need support for tail calls to outlined functions before return
- // statements can be allowed.
- if (MI.isReturn())
- return outliner::InstrType::Illegal;
-
- // Don't allow modifying the X5 register which we use for return addresses for
- // these outlined functions.
- if (MI.modifiesRegister(RISCV::X5, TRI) ||
- MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
+ if (CannotInsertTailCall(MBB->back()) && MIUseX5(MI, TRI))
return outliner::InstrType::Illegal;
// Make sure the operands don't reference something unsafe.
@@ -3041,19 +3073,29 @@ void RISCVInstrInfo::buildOutlinedFrame(
}
}
+ if (OF.FrameConstructionID == MachineOutlinerTailCall)
+ return;
+
MBB.addLiveIn(RISCV::X5);
// Add in a return instruction to the end of the outlined frame.
MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
- .addReg(RISCV::X0, RegState::Define)
- .addReg(RISCV::X5)
- .addImm(0));
+ .addReg(RISCV::X0, RegState::Define)
+ .addReg(RISCV::X5)
+ .addImm(0));
}
MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
MachineFunction &MF, outliner::Candidate &C) const {
+ if (C.CallConstructionID == MachineOutlinerTailCall) {
+ It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL))
+ .addGlobalAddress(M.getNamedValue(MF.getName()),
+ /*Offset=*/0, RISCVII::MO_CALL));
+ return It;
+ }
+
// Add in a call instruction to the outlined function at the given location.
It = MBB.insert(It,
BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-call.ll b/llvm/test/CodeGen/RISCV/machine-outliner-call.ll
new file mode 100644
index 00000000000000..b019cfe74864b0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-call.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -verify-machineinstrs -enable-machine-outliner | FileCheck %s
+
+target triple = "riscv64-unknown-linux-gnu"
+
+declare void @foo(i32, i32, i32, i32) minsize
+
+define void @fentry0(i1 %a) nounwind {
+; CHECK-LABEL: fentry0:
+; CHECK: # %bb.1:
+; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT: call foo
+; CHECK-LABEL: .LBB0_2:
+; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+entry:
+ br i1 %a, label %if.then, label %if.end
+if.then:
+ call void @foo(i32 1, i32 2, i32 3, i32 4)
+ br label %if.end
+if.end:
+ call void @foo(i32 5, i32 6, i32 7, i32 8)
+ ret void
+}
+
+define void @fentry1(i1 %a) nounwind {
+; CHECK-LABEL: fentry1:
+; CHECK: # %bb.1:
+; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT: call foo
+; CHECK-LABEL: .LBB1_2:
+; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+entry:
+ br i1 %a, label %if.then, label %if.end
+if.then:
+ call void @foo(i32 1, i32 2, i32 3, i32 4)
+ br label %if.end
+if.end:
+ call void @foo(i32 5, i32 6, i32 7, i32 8)
+ ret void
+}
+
+define void @fentry2(i1 %a) nounwind {
+; CHECK-LABEL: fentry2:
+; CHECK: # %bb.1:
+; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT: call foo
+; CHECK-LABEL: .LBB2_2:
+; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+entry:
+ br i1 %a, label %if.then, label %if.end
+if.then:
+ call void @foo(i32 1, i32 2, i32 3, i32 4)
+ br label %if.end
+if.end:
+ call void @foo(i32 5, i32 6, i32 7, i32 8)
+ ret void
+}
+
+; CHECK: OUTLINED_FUNCTION_[[BB2]]:
+; CHECK: li a0, 5
+; CHECK-NEXT: li a1, 6
+; CHECK-NEXT: li a2, 7
+; CHECK-NEXT: li a3, 8
+; CHECK-NEXT: call foo
+
+; CHECK: OUTLINED_FUNCTION_[[BB1]]:
+; CHECK: li a0, 1
+; CHECK-NEXT: li a1, 2
+; CHECK-NEXT: li a2, 3
+; CHECK-NEXT: li a3, 4
+; CHECK-NEXT: jr t0
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir b/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
index 6ecca6a1b18ef8..2acb1d43e01eaf 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
@@ -22,13 +22,11 @@ body: |
; RV32I-MO-LABEL: name: func1
; RV32I-MO: liveins: $x10, $x11
; RV32I-MO-NEXT: {{ $}}
- ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV32I-MO-NEXT: PseudoRET
+ ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
; RV64I-MO-LABEL: name: func1
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
- ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV64I-MO-NEXT: PseudoRET
+ ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
CFI_INSTRUCTION offset $x1, 0
$x11 = ORI $x11, 1023
@@ -49,13 +47,11 @@ body: |
; RV32I-MO-LABEL: name: func2
; RV32I-MO: liveins: $x10, $x11
; RV32I-MO-NEXT: {{ $}}
- ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV32I-MO-NEXT: PseudoRET
+ ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
; RV64I-MO-LABEL: name: func2
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
- ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV64I-MO-NEXT: PseudoRET
+ ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
CFI_INSTRUCTION offset $x1, 0
$x11 = ORI $x11, 1023
@@ -76,13 +72,11 @@ body: |
; RV32I-MO-LABEL: name: func3
; RV32I-MO: liveins: $x10, $x11
; RV32I-MO-NEXT: {{ $}}
- ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV32I-MO-NEXT: PseudoRET
+ ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
; RV64I-MO-LABEL: name: func3
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
- ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV64I-MO-NEXT: PseudoRET
+ ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
CFI_INSTRUCTION offset $x1, -12
$x11 = ORI $x11, 1023
@@ -96,11 +90,11 @@ body: |
# OUTLINED-LABEL: name: OUTLINED_FUNCTION_0
-# OUTLINED: liveins: $x11, $x10, $x5
+# OUTLINED: liveins: $x11, $x10
# OUTLINED-NEXT: {{ $}}
# OUTLINED-NEXT: $x10 = ORI $x10, 1023
# OUTLINED-NEXT: $x11 = ORI $x11, 1023
# OUTLINED-NEXT: $x12 = ADDI $x10, 17
# OUTLINED-NEXT: $x11 = AND $x12, $x11
# OUTLINED-NEXT: $x10 = SUB $x10, $x11
-# OUTLINED-NEXT: $x0 = JALR $x5, 0
+# OUTLINED-NEXT: PseudoRET
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll b/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
index 4ef3abd241577f..4a54a7289ddf27 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
@@ -11,7 +11,11 @@ define void @fentry0(i1 %a) nounwind "fentry-call"="true" {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: # FEntry call
; CHECK: # %bb.1:
-; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
+; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT: call foo
+; CHECK-LABEL: .LBB0_2:
+; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+; CHECK-NEXT: call foo
entry:
br i1 %a, label %if.then, label %if.end
if.then:
@@ -27,7 +31,11 @@ define void @fentry1(i1 %a) nounwind "fentry-call"="true" {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: # FEntry call
; CHECK: # %bb.1:
-; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
+; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT: call foo
+; CHECK-LABEL: .LBB1_2:
+; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+; CHECK-NEXT: call foo
entry:
br i1 %a, label %if.then, label %if.end
if.then:
@@ -47,7 +55,11 @@ define void @patchable0(i1 %a) nounwind "patchable-function-entry"="2" {
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK: # %bb.1:
-; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
+; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT: call foo
+; CHECK-LABEL: .LBB2_2:
+; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+; CHECK-NEXT: call foo
entry:
br i1 %a, label %if.then, label %if.end
if.then:
@@ -65,7 +77,11 @@ define void @patchable1(i1 %a) nounwind "patchable-function-entry"="2" {
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK: # %bb.1:
-; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
+; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
+; CHECK-NEXT: call foo
+; CHECK-LABEL: .LBB3_2:
+; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
+; CHECK-NEXT: call foo
entry:
br i1 %a, label %if.then, label %if.end
if.then:
diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-position.mir b/llvm/test/CodeGen/RISCV/machine-outliner-position.mir
index 715e212eecabb3..f2f43f7a1dcd9d 100644
--- a/llvm/test/CodeGen/RISCV/machine-outliner-position.mir
+++ b/llvm/test/CodeGen/RISCV/machine-outliner-position.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
# RUN: | FileCheck -check-prefixes=RV32I-MO %s
@@ -25,15 +26,14 @@ body: |
; RV32I-MO-NEXT: {{ $}}
; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
; RV32I-MO-NEXT: EH_LABEL <mcsymbol .Ltmp0>
- ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV32I-MO-NEXT: PseudoRET
+ ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
+ ;
; RV64I-MO-LABEL: name: func1
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
; RV64I-MO-NEXT: EH_LABEL <mcsymbol .Ltmp0>
- ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV64I-MO-NEXT: PseudoRET
+ ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
EH_LABEL <mcsymbol .Ltmp0>
$x11 = ORI $x11, 1023
@@ -53,15 +53,14 @@ body: |
; RV32I-MO-NEXT: {{ $}}
; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
; RV32I-MO-NEXT: GC_LABEL <mcsymbol .Ltmp1>
- ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV32I-MO-NEXT: PseudoRET
+ ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
+ ;
; RV64I-MO-LABEL: name: func2
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
; RV64I-MO-NEXT: GC_LABEL <mcsymbol .Ltmp1>
- ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV64I-MO-NEXT: PseudoRET
+ ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
GC_LABEL <mcsymbol .Ltmp1>
$x11 = ORI $x11, 1023
@@ -81,15 +80,14 @@ body: |
; RV32I-MO-NEXT: {{ $}}
; RV32I-MO-NEXT: $x10 = ORI $x10, 1023
; RV32I-MO-NEXT: ANNOTATION_LABEL <mcsymbol .Ltmp2>
- ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV32I-MO-NEXT: PseudoRET
+ ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
+ ;
; RV64I-MO-LABEL: name: func3
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
; RV64I-MO-NEXT: $x10 = ORI $x10, 1023
; RV64I-MO-NEXT: ANNOTATION_LABEL <mcsymbol .Ltmp2>
- ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
- ; RV64I-MO-NEXT: PseudoRET
+ ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
ANNOTATION_LABEL <mcsymbol .Ltmp2>
$x11 = ORI $x11, 1023
diff --git a/llvm/test/CodeGen/RISCV/machineoutliner-x5.mir b/llvm/test/CodeGen/RISCV/machineoutliner-x5.mir
new file mode 100644
index 00000000000000..b01cda582e19b0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/machineoutliner-x5.mir
@@ -0,0 +1,58 @@
+# Check that modifying X5 register is not a problem for machine outliner
+
+# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
+# RUN: | FileCheck -check-prefixes=CHECK,RV32I-MO %s
+# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
+# RUN: | FileCheck -check-prefixes=CHECK,RV64I-MO %s
+
+--- |
+ define i32 @outline_0(i32 %a, i32 %b) { ret i32 0 }
+
+ define i32 @outline_1(i32 %a, i32 %b) { ret i32 0 }
+
+ define i32 @outline_2(i32 %a, i32 %b) { ret i32 0 }
+
+...
+---
+name: outline_0
+tracksRegLiveness: true
+isOutlined: false
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x5
+ ; RV32I-MO-LABEL: name: outline_0
+ ; RV32I-MO: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x5, implicit $x10, implicit $x11
+ ;
+ ; RV64I-MO-LABEL: name: outline_0
+ ; RV64I-MO: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x5, implicit $x10, implicit $x11
+ $x11 = ORI $x11, 1023
+ $x12 = ADDI $x10, 17
+ $x10 = ADD $x10, $x5
+ $x11 = AND $x12, $x11
+ $x10 = SUB $x10, $x11
+ PseudoRET implicit $x10
+
+...
+---
+name: outline_1
+tracksRegLiveness: true
+isOutlined: false
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x5
+ ; RV32I-MO-LABEL: name: outline_1
+ ; RV32I-MO: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x5, implicit $x10, implicit $x11
+ ;
+ ; RV64I-MO-LABEL: name: outline_1
+ ; RV64I-MO: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x5,...
[truncated]
|
d4ee651
to
1677bf4
Compare
Following up issue llvm#89822, this patch adds opportunity to use tail call in machine outliner pass. Also it enables outline patterns with X5(T0) register.
1677bf4
to
dd8a3d3
Compare
// First we need to filter out candidates where the X5 register (IE t0) can't | ||
// be used to setup the function call. | ||
auto CannotInsertCall = [](outliner::Candidate &C) { | ||
auto CandidateUseX5 = [](outliner::Candidate &C) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use -> Uses
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Other than @topperc's comments this seems good, modulo a few small nits w.r.t. formatting. Thanks for working on this! 😄
.addReg(RISCV::X0, RegState::Define) | ||
.addReg(RISCV::X5) | ||
.addImm(0)); | ||
.addReg(RISCV::X0, RegState::Define) | ||
.addReg(RISCV::X5) | ||
.addImm(0)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this intentional?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, get changes back.
It was changed by git clang-format
...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM in general, but I'd like to know if we have passed all tests in llvm-test-suite with -Oz
.
@wangpc-pp , llvm-test-suite with |
It's OK as long as you have run llvm-test-suite on qemu and all tests are passed. The LLVM Github CI is not so complete for RISC-V. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
@topperc , ping. Could we merge this MR? I don't have write access. |
@@ -2961,6 +2990,17 @@ RISCVInstrInfo::getOutliningCandidateInfo( | |||
for (auto &MI : RepeatedSequenceLocs[0]) | |||
SequenceSize += getInstSizeInBytes(MI); | |||
|
|||
if (!cannotInsertTailCall(RepeatedSequenceLocs[0].back())) { | |||
// tail function = 8 bytes. Can't be compressed |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why can't it be compressed? Or do you just mean it can't be compressed in the assembler. It should be compressible in the linker.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rewrote it in a more precise form
// tail call = auipc + jalr in the worst case without linker relaxation.
CallOverhead = 4 + InstrSizeCExt;
for (auto &C : RepeatedSequenceLocs) | ||
C.setCallInfo(MachineOutlinerTailCall, 8); | ||
|
||
// Using tail call we move ret instruction from caller to calle. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
callee*
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed
@@ -3054,6 +3089,13 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( | |||
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, | |||
MachineFunction &MF, outliner::Candidate &C) const { | |||
|
|||
if (C.CallConstructionID == MachineOutlinerTailCall) { | |||
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PseudoTAIL expands to AUIPC+JALR using X6 or X7 as a temporary. If the linker doesn't relax the AUIPC+JALR sequence to JAL, then X6 or X7 will be overwritten. How do we know it is ok to overwrite X6 or X7?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the comment, I did not consider the case of the influence of registers x6 and x7 before relaxation. Apparently, even llvm-test-suite didn't get such case.
I rewrote verification algorithm for tail call. If we modify X6/X7 (Specific register get from enabled extensions info) earlier than reads from X6/X7, than my optimization can be applied.
7c57f39
to
00ebb2a
Compare
@topperc , ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@topperc , could you please merge this PR? |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/9302 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/9183 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/9184 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/13099 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/10099 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/15815 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/13711 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/9622 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/7217 Here is the relevant piece of the build log for the reference
|
This test has been failed after commit [RISCV] Implement tail call optimization in machine outliner (PR llvm#115297). Changes to the same file were merged today earlier [TTI][RISCV] Unconditionally break critical edges to sink ADDI (PR llvm#108889).
Following up issue #89822, this patch adds opportunity to use tail call in machine outliner pass.
Also it enables outline patterns with X5(T0) register.