-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Implement trampolines for rv64 #96309
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Roger Ferrer Ibáñez (rofirrim) ChangesThis is implementation is heavily based on what the X86 target does but emitting the instructions that GCC emits for rv64. Full diff: https://github.com/llvm/llvm-project/pull/96309.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a02fd5bd1b65e..6b63f500abe7b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -637,6 +637,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
Subtarget.is64Bit() ? Legal : Custom);
+ if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+ }
+
setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget.is64Bit())
@@ -7155,6 +7160,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
Op.getOperand(2), Flags, DL);
}
+ case ISD::INIT_TRAMPOLINE:
+ return lowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE:
+ return lowerADJUST_TRAMPOLINE(Op, DAG);
}
}
@@ -7170,6 +7179,123 @@ SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
return CallResult.second;
}
+SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (!Subtarget.is64Bit())
+ llvm::report_fatal_error("Trampolines only implemented for RV64");
+
+ SDValue Root = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDLoc dl(Op);
+
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ // We store in the trampoline buffer the following instructions and data.
+ // Offset:
+ // 0: auipc t2, 0
+ // 4: ld t0, 24(t2)
+ // 8: ld t2, 16(t2)
+ // 12: jalr t0
+ // 16: <StaticChainOffset>
+ // 24: <FunctionAddressOffset>
+ // 32:
+
+ // Constants shamelessly taken from GCC.
+ constexpr unsigned Opcode_AUIPC = 0x17;
+ constexpr unsigned Opcode_LD = 0x3003;
+ constexpr unsigned Opcode_JALR = 0x67;
+ constexpr unsigned ShiftField_RD = 7;
+ constexpr unsigned ShiftField_RS1 = 15;
+ constexpr unsigned ShiftField_IMM = 20;
+ constexpr unsigned Reg_X5 = 0x5; // x5/t0 (holds the address to the function)
+ constexpr unsigned Reg_X7 = 0x7; // x7/t2 (holds the static chain)
+
+ constexpr unsigned StaticChainOffset = 16;
+ constexpr unsigned FunctionAddressOffset = 24;
+
+ SDValue OutChains[6];
+ SDValue Addr = Trmp;
+
+ // auipc t2, 0
+ // Loads the current PC into t2.
+ constexpr uint32_t AUIPC_X7_0 =
+ Opcode_AUIPC | (Reg_X7 << ShiftField_RD);
+ OutChains[0] =
+ DAG.getTruncStore(Root, dl, DAG.getConstant(AUIPC_X7_0, dl, MVT::i64),
+ Addr, MachinePointerInfo(TrmpAddr), MVT::i32);
+
+ // ld t0, 24(t2)
+ // Loads the function address into t0. Note that we are using offsets
+ // pc-relative to the first instruction of the trampoline.
+ const uint32_t LD_X5_TargetFunctionOffset =
+ Opcode_LD | (Reg_X5 << ShiftField_RD) |
+ (Reg_X7 << ShiftField_RS1) | (FunctionAddressOffset << ShiftField_IMM);
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(4, dl, MVT::i64));
+ OutChains[1] = DAG.getTruncStore(
+ Root, dl,
+ DAG.getConstant(LD_X5_TargetFunctionOffset, dl, MVT::i64), Addr,
+ MachinePointerInfo(TrmpAddr, 4), MVT::i32);
+
+ // ld t2, 16(t2)
+ // Load the value of the static chain.
+ const uint32_t LD_X7_StaticChainOffset =
+ Opcode_LD | (Reg_X7 << ShiftField_RD) |
+ (Reg_X7 << ShiftField_RS1) | (StaticChainOffset << ShiftField_IMM);
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(8, dl, MVT::i64));
+ OutChains[2] = DAG.getTruncStore(
+ Root, dl, DAG.getConstant(LD_X7_StaticChainOffset, dl, MVT::i64),
+ Addr, MachinePointerInfo(TrmpAddr, 8), MVT::i32);
+
+ // jalr t0
+ // Jump to the function.
+ const uint32_t JALR_X5 =
+ Opcode_JALR | (Reg_X5 << ShiftField_RS1);
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(12, dl, MVT::i64));
+ OutChains[3] =
+ DAG.getTruncStore(Root, dl, DAG.getConstant(JALR_X5, dl, MVT::i64), Addr,
+ MachinePointerInfo(TrmpAddr, 12), MVT::i32);
+
+ // Now store the variable part of the trampoline.
+ SDValue FunctionAddress = Op.getOperand(2);
+ SDValue StaticChain = Op.getOperand(3);
+
+ // Store the given static chain in the trampoline buffer.
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(StaticChainOffset, dl, MVT::i64));
+ OutChains[4] = DAG.getStore(Root, dl, StaticChain, Addr,
+ MachinePointerInfo(TrmpAddr, StaticChainOffset));
+
+ // Store the given function address in the trampoline buffer.
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(FunctionAddressOffset, dl, MVT::i64));
+ OutChains[5] =
+ DAG.getStore(Root, dl, FunctionAddress, Addr,
+ MachinePointerInfo(TrmpAddr, FunctionAddressOffset));
+
+ SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+
+ // Compute end of trampoline.
+ SDValue EndOfTrmp = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(32, dl, MVT::i64));
+
+ // Call clear cache on the trampoline buffer.
+ SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
+ Trmp, EndOfTrmp);
+
+ return Chain;
+}
+
+SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (!Subtarget.is64Bit())
+ llvm::report_fatal_error("Trampolines only implemented for RV64");
+
+ return Op.getOperand(0);
+}
+
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 7d8bceb5cb341..7f8f1f4e42c31 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -993,6 +993,9 @@ class RISCVTargetLowering : public TargetLowering {
SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+
bool isEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVector<CCValAssign, 16> &ArgLocs) const;
diff --git a/llvm/test/CodeGen/RISCV/rv64-trampoline.ll b/llvm/test/CodeGen/RISCV/rv64-trampoline.ll
new file mode 100644
index 0000000000000..4a7a50fc09bf8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-trampoline.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64 %s
+; RUN: llc -mtriple=riscv64-unknown-linux-gnu -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64-LINUX %s
+
+declare void @llvm.init.trampoline(ptr, ptr, ptr)
+declare ptr @llvm.adjust.trampoline(ptr)
+declare i64 @f(ptr nest, i64)
+
+define i64 @test0(i64 %n, ptr %p) nounwind {
+; RV64-LABEL: test0:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -64
+; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT: mv s0, a0
+; RV64-NEXT: lui a0, %hi(.LCPI0_0)
+; RV64-NEXT: ld a0, %lo(.LCPI0_0)(a0)
+; RV64-NEXT: lui a2, %hi(f)
+; RV64-NEXT: addi a2, a2, %lo(f)
+; RV64-NEXT: sd a2, 32(sp)
+; RV64-NEXT: sd a1, 24(sp)
+; RV64-NEXT: sd a0, 16(sp)
+; RV64-NEXT: lui a0, 6203
+; RV64-NEXT: addi a0, a0, 643
+; RV64-NEXT: slli a0, a0, 32
+; RV64-NEXT: addi a0, a0, 919
+; RV64-NEXT: sd a0, 8(sp)
+; RV64-NEXT: addi a1, sp, 40
+; RV64-NEXT: addi a0, sp, 8
+; RV64-NEXT: addi s1, sp, 8
+; RV64-NEXT: call __clear_cache
+; RV64-NEXT: mv a0, s0
+; RV64-NEXT: jalr s1
+; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 64
+; RV64-NEXT: ret
+;
+; RV64-LINUX-LABEL: test0:
+; RV64-LINUX: # %bb.0:
+; RV64-LINUX-NEXT: addi sp, sp, -64
+; RV64-LINUX-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-LINUX-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-LINUX-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; RV64-LINUX-NEXT: mv s0, a0
+; RV64-LINUX-NEXT: lui a0, %hi(.LCPI0_0)
+; RV64-LINUX-NEXT: ld a0, %lo(.LCPI0_0)(a0)
+; RV64-LINUX-NEXT: lui a2, %hi(f)
+; RV64-LINUX-NEXT: addi a2, a2, %lo(f)
+; RV64-LINUX-NEXT: sd a2, 32(sp)
+; RV64-LINUX-NEXT: sd a1, 24(sp)
+; RV64-LINUX-NEXT: sd a0, 16(sp)
+; RV64-LINUX-NEXT: lui a0, 6203
+; RV64-LINUX-NEXT: addi a0, a0, 643
+; RV64-LINUX-NEXT: slli a0, a0, 32
+; RV64-LINUX-NEXT: addi a0, a0, 919
+; RV64-LINUX-NEXT: sd a0, 8(sp)
+; RV64-LINUX-NEXT: addi a1, sp, 40
+; RV64-LINUX-NEXT: addi a0, sp, 8
+; RV64-LINUX-NEXT: addi s1, sp, 8
+; RV64-LINUX-NEXT: li a2, 0
+; RV64-LINUX-NEXT: call __riscv_flush_icache
+; RV64-LINUX-NEXT: mv a0, s0
+; RV64-LINUX-NEXT: jalr s1
+; RV64-LINUX-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-LINUX-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-LINUX-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; RV64-LINUX-NEXT: addi sp, sp, 64
+; RV64-LINUX-NEXT: ret
+ %alloca = alloca [32 x i8], align 8
+ call void @llvm.init.trampoline(ptr %alloca, ptr @f, ptr %p)
+ %tramp = call ptr @llvm.adjust.trampoline(ptr %alloca)
+ %ret = call i64 %tramp(i64 %n)
+ ret i64 %ret
+
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
12c9bd7
to
ebaf74f
Compare
Ok, now using MCCodeEmitter to encode the instructions rather than using hardcoded constants. Hope this is reasonable. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
96de857
to
0675cae
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. @jrtc27 Any more comments?
9cc30f6
to
d129338
Compare
Any more comments on this? |
This is implementation is heavily based on what the X86 target does but emitting the instructions that GCC emits for rv64.
Co-authored-by: Pengcheng Wang <[email protected]>
Codegen used to coalesce the stores into a single sd. Now it emits two sw.
d129338
to
acb1e6b
Compare
The test has changed because we used to coalesce the two |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM as well
This is implementation is heavily based on what the X86 target does but emitting the instructions that GCC emits for rv64.