[RISCV] Implement trampolines for rv64 #96309

rofirrim · 2024-06-21T14:02:09Z

This is implementation is heavily based on what the X86 target does but emitting the instructions that GCC emits for rv64.

llvmbot · 2024-06-21T14:02:42Z

@llvm/pr-subscribers-backend-risc-v

Author: Roger Ferrer Ibáñez (rofirrim)

Changes

This is implementation is heavily based on what the X86 target does but emitting the instructions that GCC emits for rv64.

Full diff: https://github.com/llvm/llvm-project/pull/96309.diff

3 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+126)
(modified) llvm/lib/Target/RISCV/RISCVISelLowering.h (+3)
(added) llvm/test/CodeGen/RISCV/rv64-trampoline.ll (+80)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a02fd5bd1b65e..6b63f500abe7b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -637,6 +637,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
                      Subtarget.is64Bit() ? Legal : Custom);
 
+  if (Subtarget.is64Bit()) {
+    setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+    setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+  }
+
   setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   if (Subtarget.is64Bit())
@@ -7155,6 +7160,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
                            Op.getOperand(2), Flags, DL);
   }
+  case ISD::INIT_TRAMPOLINE:
+    return lowerINIT_TRAMPOLINE(Op, DAG);
+  case ISD::ADJUST_TRAMPOLINE:
+    return lowerADJUST_TRAMPOLINE(Op, DAG);
   }
 }
 
@@ -7170,6 +7179,123 @@ SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
   return CallResult.second;
 }
 
+SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  if (!Subtarget.is64Bit())
+    llvm::report_fatal_error("Trampolines only implemented for RV64");
+
+  SDValue Root = Op.getOperand(0);
+  SDValue Trmp = Op.getOperand(1); // trampoline
+  SDLoc dl(Op);
+
+  const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+  // We store in the trampoline buffer the following instructions and data.
+  // Offset:
+  //      0: auipc   t2, 0
+  //      4: ld      t0, 24(t2)
+  //      8: ld      t2, 16(t2)
+  //     12: jalr    t0
+  //     16: <StaticChainOffset>
+  //     24: <FunctionAddressOffset>
+  //     32:
+
+  // Constants shamelessly taken from GCC.
+  constexpr unsigned Opcode_AUIPC = 0x17;
+  constexpr unsigned Opcode_LD = 0x3003;
+  constexpr unsigned Opcode_JALR = 0x67;
+  constexpr unsigned ShiftField_RD = 7;
+  constexpr unsigned ShiftField_RS1 = 15;
+  constexpr unsigned ShiftField_IMM = 20;
+  constexpr unsigned Reg_X5 = 0x5; // x5/t0 (holds the address to the function)
+  constexpr unsigned Reg_X7 = 0x7; // x7/t2 (holds the static chain)
+
+  constexpr unsigned StaticChainOffset = 16;
+  constexpr unsigned FunctionAddressOffset = 24;
+
+  SDValue OutChains[6];
+  SDValue Addr = Trmp;
+
+  // auipc t2, 0
+  // Loads the current PC into t2.
+  constexpr uint32_t AUIPC_X7_0 =
+      Opcode_AUIPC | (Reg_X7 << ShiftField_RD);
+  OutChains[0] =
+      DAG.getTruncStore(Root, dl, DAG.getConstant(AUIPC_X7_0, dl, MVT::i64),
+                        Addr, MachinePointerInfo(TrmpAddr), MVT::i32);
+
+  // ld t0, 24(t2)
+  // Loads the function address into t0. Note that we are using offsets
+  // pc-relative to the first instruction of the trampoline.
+  const uint32_t LD_X5_TargetFunctionOffset =
+      Opcode_LD | (Reg_X5 << ShiftField_RD) |
+      (Reg_X7 << ShiftField_RS1) | (FunctionAddressOffset << ShiftField_IMM);
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                     DAG.getConstant(4, dl, MVT::i64));
+  OutChains[1] = DAG.getTruncStore(
+      Root, dl,
+      DAG.getConstant(LD_X5_TargetFunctionOffset, dl, MVT::i64), Addr,
+      MachinePointerInfo(TrmpAddr, 4), MVT::i32);
+
+  // ld t2, 16(t2)
+  // Load the value of the static chain.
+  const uint32_t LD_X7_StaticChainOffset =
+      Opcode_LD | (Reg_X7 << ShiftField_RD) |
+      (Reg_X7 << ShiftField_RS1) | (StaticChainOffset << ShiftField_IMM);
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                     DAG.getConstant(8, dl, MVT::i64));
+  OutChains[2] = DAG.getTruncStore(
+      Root, dl, DAG.getConstant(LD_X7_StaticChainOffset, dl, MVT::i64),
+      Addr, MachinePointerInfo(TrmpAddr, 8), MVT::i32);
+
+  // jalr t0
+  // Jump to the function.
+  const uint32_t JALR_X5 =
+      Opcode_JALR | (Reg_X5 << ShiftField_RS1);
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                     DAG.getConstant(12, dl, MVT::i64));
+  OutChains[3] =
+      DAG.getTruncStore(Root, dl, DAG.getConstant(JALR_X5, dl, MVT::i64), Addr,
+                        MachinePointerInfo(TrmpAddr, 12), MVT::i32);
+
+  // Now store the variable part of the trampoline.
+  SDValue FunctionAddress = Op.getOperand(2);
+  SDValue StaticChain = Op.getOperand(3);
+
+  // Store the given static chain in the trampoline buffer.
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                     DAG.getConstant(StaticChainOffset, dl, MVT::i64));
+  OutChains[4] = DAG.getStore(Root, dl, StaticChain, Addr,
+                              MachinePointerInfo(TrmpAddr, StaticChainOffset));
+
+  // Store the given function address in the trampoline buffer.
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                     DAG.getConstant(FunctionAddressOffset, dl, MVT::i64));
+  OutChains[5] =
+      DAG.getStore(Root, dl, FunctionAddress, Addr,
+                   MachinePointerInfo(TrmpAddr, FunctionAddressOffset));
+
+  SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+
+  // Compute end of trampoline.
+  SDValue EndOfTrmp = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                                  DAG.getConstant(32, dl, MVT::i64));
+
+  // Call clear cache on the trampoline buffer.
+  SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
+                              Trmp, EndOfTrmp);
+
+  return Chain;
+}
+
+SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  if (!Subtarget.is64Bit())
+    llvm::report_fatal_error("Trampolines only implemented for RV64");
+
+  return Op.getOperand(0);
+}
+
 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
                              SelectionDAG &DAG, unsigned Flags) {
   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 7d8bceb5cb341..7f8f1f4e42c31 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -993,6 +993,9 @@ class RISCVTargetLowering : public TargetLowering {
   SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
   SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+
   bool isEligibleForTailCallOptimization(
       CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
       const SmallVector<CCValAssign, 16> &ArgLocs) const;
diff --git a/llvm/test/CodeGen/RISCV/rv64-trampoline.ll b/llvm/test/CodeGen/RISCV/rv64-trampoline.ll
new file mode 100644
index 0000000000000..4a7a50fc09bf8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64-trampoline.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64 %s
+; RUN: llc -mtriple=riscv64-unknown-linux-gnu -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64-LINUX %s
+
+declare void @llvm.init.trampoline(ptr, ptr, ptr)
+declare ptr @llvm.adjust.trampoline(ptr)
+declare i64 @f(ptr nest, i64)
+
+define i64 @test0(i64 %n, ptr %p) nounwind {
+; RV64-LABEL: test0:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -64
+; RV64-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    mv s0, a0
+; RV64-NEXT:    lui a0, %hi(.LCPI0_0)
+; RV64-NEXT:    ld a0, %lo(.LCPI0_0)(a0)
+; RV64-NEXT:    lui a2, %hi(f)
+; RV64-NEXT:    addi a2, a2, %lo(f)
+; RV64-NEXT:    sd a2, 32(sp)
+; RV64-NEXT:    sd a1, 24(sp)
+; RV64-NEXT:    sd a0, 16(sp)
+; RV64-NEXT:    lui a0, 6203
+; RV64-NEXT:    addi a0, a0, 643
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    addi a0, a0, 919
+; RV64-NEXT:    sd a0, 8(sp)
+; RV64-NEXT:    addi a1, sp, 40
+; RV64-NEXT:    addi a0, sp, 8
+; RV64-NEXT:    addi s1, sp, 8
+; RV64-NEXT:    call __clear_cache
+; RV64-NEXT:    mv a0, s0
+; RV64-NEXT:    jalr s1
+; RV64-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 64
+; RV64-NEXT:    ret
+;
+; RV64-LINUX-LABEL: test0:
+; RV64-LINUX:       # %bb.0:
+; RV64-LINUX-NEXT:    addi sp, sp, -64
+; RV64-LINUX-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
+; RV64-LINUX-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
+; RV64-LINUX-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
+; RV64-LINUX-NEXT:    mv s0, a0
+; RV64-LINUX-NEXT:    lui a0, %hi(.LCPI0_0)
+; RV64-LINUX-NEXT:    ld a0, %lo(.LCPI0_0)(a0)
+; RV64-LINUX-NEXT:    lui a2, %hi(f)
+; RV64-LINUX-NEXT:    addi a2, a2, %lo(f)
+; RV64-LINUX-NEXT:    sd a2, 32(sp)
+; RV64-LINUX-NEXT:    sd a1, 24(sp)
+; RV64-LINUX-NEXT:    sd a0, 16(sp)
+; RV64-LINUX-NEXT:    lui a0, 6203
+; RV64-LINUX-NEXT:    addi a0, a0, 643
+; RV64-LINUX-NEXT:    slli a0, a0, 32
+; RV64-LINUX-NEXT:    addi a0, a0, 919
+; RV64-LINUX-NEXT:    sd a0, 8(sp)
+; RV64-LINUX-NEXT:    addi a1, sp, 40
+; RV64-LINUX-NEXT:    addi a0, sp, 8
+; RV64-LINUX-NEXT:    addi s1, sp, 8
+; RV64-LINUX-NEXT:    li a2, 0
+; RV64-LINUX-NEXT:    call __riscv_flush_icache
+; RV64-LINUX-NEXT:    mv a0, s0
+; RV64-LINUX-NEXT:    jalr s1
+; RV64-LINUX-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-LINUX-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-LINUX-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
+; RV64-LINUX-NEXT:    addi sp, sp, 64
+; RV64-LINUX-NEXT:    ret
+  %alloca = alloca [32 x i8], align 8
+  call void @llvm.init.trampoline(ptr %alloca, ptr @f, ptr %p)
+  %tramp = call ptr @llvm.adjust.trampoline(ptr %alloca)
+  %ret = call i64 %tramp(i64 %n)
+  ret i64 %ret
+
+}

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

topperc

LGTM

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

rofirrim · 2024-07-19T07:49:54Z

Ok, now using MCCodeEmitter to encode the instructions rather than using hardcoded constants. Hope this is reasonable.

github-actions · 2024-07-19T07:52:18Z

✅ With the latest revision this PR passed the C/C++ code formatter.

topperc

LGTM

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

wangpc-pp

LGTM. Thanks!

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

wangpc-pp

LGTM. @jrtc27 Any more comments?

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

rofirrim · 2024-09-02T05:59:03Z

Any more comments on this?

This is implementation is heavily based on what the X86 target does but emitting the instructions that GCC emits for rv64.

Co-authored-by: Pengcheng Wang <[email protected]>

Codegen used to coalesce the stores into a single sd. Now it emits two sw.

rofirrim · 2024-10-17T05:55:46Z

The test has changed because we used to coalesce the two sw into a single sd. I assume this is still reasonable, though.

kito-cheng

LGTM as well

rofirrim requested review from asb and topperc June 21, 2024 14:02

llvmbot added the backend:RISC-V label Jun 21, 2024

topperc reviewed Jun 24, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

topperc approved these changes Jul 12, 2024

View reviewed changes

wangpc-pp reviewed Jul 15, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

rofirrim force-pushed the riscv-trampolines branch from 12c9bd7 to ebaf74f Compare July 19, 2024 07:48

topperc approved these changes Jul 21, 2024

View reviewed changes

wangpc-pp reviewed Jul 22, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

rofirrim force-pushed the riscv-trampolines branch from 96de857 to 0675cae Compare July 30, 2024 13:03

wangpc-pp approved these changes Jul 30, 2024

View reviewed changes

jrtc27 reviewed Jul 30, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

wangpc-pp approved these changes Aug 2, 2024

View reviewed changes

jrtc27 reviewed Aug 2, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

jrtc27 reviewed Aug 2, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

jrtc27 reviewed Aug 2, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Show resolved Hide resolved

michaelmaitland reviewed Aug 2, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVISelLowering.cpp Outdated Show resolved Hide resolved

rofirrim force-pushed the riscv-trampolines branch from 9cc30f6 to d129338 Compare August 30, 2024 09:46

rofirrim and others added 8 commits October 17, 2024 05:22

[RISCV] Implement trampolines for rv64

94f4194

This is implementation is heavily based on what the X86 target does but emitting the instructions that GCC emits for rv64.

Use MCCodeEmitter to encode instructions rather than hardcoded constants

3e3d9fc

Remove stray blank line

5e3ce76

clang-format

82367f4

Apply suggestions from code review

3a76960

Co-authored-by: Pengcheng Wang <[email protected]>

Avoid awkward variable names

b6f7492

Reduce the amount of repetition

ca45c15

clang-format

215597b

rofirrim and others added 6 commits October 17, 2024 05:22

Remove assertions

9a0c0bb

Hoist initialisation

63d0daa

Only flush the part of the trampoline buffer containing instructions

6d65d1b

Reduce the amount of repetition

3a73d1c

Remove some more repetition

54248d5

Update test

acb1e6b

Codegen used to coalesce the stores into a single sd. Now it emits two sw.

rofirrim force-pushed the riscv-trampolines branch from d129338 to acb1e6b Compare October 17, 2024 05:54

wangpc-pp approved these changes Oct 17, 2024

View reviewed changes

kito-cheng approved these changes Oct 17, 2024

View reviewed changes

rofirrim merged commit 9d469b5 into llvm:main Oct 18, 2024
8 checks passed

[RISCV] Implement trampolines for rv64 #96309

[RISCV] Implement trampolines for rv64 #96309

Uh oh!

Conversation

rofirrim commented Jun 21, 2024

Uh oh!

llvmbot commented Jun 21, 2024

Uh oh!

Uh oh!

topperc left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

rofirrim commented Jul 19, 2024

Uh oh!

github-actions bot commented Jul 19, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

topperc left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

wangpc-pp left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

wangpc-pp left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

rofirrim commented Sep 2, 2024

Uh oh!

rofirrim commented Oct 17, 2024

Uh oh!

kito-cheng left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Jul 19, 2024 •

edited

Loading