Skip to content

Commit ea76b2d

Browse files
[XRay][RISCV] RISCV support for XRay (#117368)
Add RISC-V support for XRay. The RV64 implementation has been tested in both QEMU and in our hardware environment. Currently this requires D and C extensions, but since both RV64GC and RVA22/RVA23 are becoming mainstream, I don't think this requirement will be a big problem. Based on the previous work by @a-poduval : https://reviews.llvm.org/D117929 --------- Co-authored-by: Ashwin Poduval <[email protected]>
1 parent 7987f47 commit ea76b2d

15 files changed

+696
-5
lines changed

clang/lib/Driver/XRayArgs.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
5151
case llvm::Triple::mips64:
5252
case llvm::Triple::mips64el:
5353
case llvm::Triple::systemz:
54+
case llvm::Triple::riscv32:
55+
case llvm::Triple::riscv64:
5456
break;
5557
default:
5658
D.Diag(diag::err_drv_unsupported_opt_for_target)

compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ if(APPLE)
102102
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM64})
103103
else()
104104
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
105-
powerpc64le ${HEXAGON} ${LOONGARCH64})
105+
powerpc64le ${HEXAGON} ${LOONGARCH64} ${RISCV32} ${RISCV64})
106106
endif()
107107
set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64} ${ARM64})
108108
set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})

compiler-rt/lib/xray/CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,16 @@ set(hexagon_SOURCES
9696
xray_trampoline_hexagon.S
9797
)
9898

99+
set(riscv32_SOURCES
100+
xray_riscv.cpp
101+
xray_trampoline_riscv32.S
102+
)
103+
104+
set(riscv64_SOURCES
105+
xray_riscv.cpp
106+
xray_trampoline_riscv64.S
107+
)
108+
99109
set(XRAY_SOURCE_ARCHS
100110
arm
101111
armhf
@@ -156,6 +166,8 @@ set(XRAY_ALL_SOURCE_FILES
156166
${mips64_SOURCES}
157167
${mips64el_SOURCES}
158168
${powerpc64le_SOURCES}
169+
${riscv32_SOURCES}
170+
${riscv64_SOURCES}
159171
${XRAY_IMPL_HEADERS}
160172
)
161173
list(REMOVE_DUPLICATES XRAY_ALL_SOURCE_FILES)

compiler-rt/lib/xray/xray_interface.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ static const int16_t cSledLength = 64;
5757
static const int16_t cSledLength = 8;
5858
#elif defined(__hexagon__)
5959
static const int16_t cSledLength = 20;
60+
#elif defined(__riscv) && (__riscv_xlen == 64)
61+
static const int16_t cSledLength = 68;
62+
#elif defined(__riscv) && (__riscv_xlen == 32)
63+
static const int16_t cSledLength = 52;
6064
#else
6165
#error "Unsupported CPU Architecture"
6266
#endif /* CPU architecture */

compiler-rt/lib/xray/xray_riscv.cpp

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
//===-- xray_riscv.cpp ----------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file is a part of XRay, a dynamic runtime instrumentation system.
10+
//
11+
// Implementation of RISC-V specific routines (32- and 64-bit).
12+
//
13+
//===----------------------------------------------------------------------===//
14+
#include "sanitizer_common/sanitizer_common.h"
15+
#include "xray_defs.h"
16+
#include "xray_interface_internal.h"
17+
#include <atomic>
18+
19+
namespace __xray {
20+
21+
// The machine codes for some instructions used in runtime patching.
22+
enum PatchOpcodes : uint32_t {
23+
PO_ADDI = 0x00000013, // addi rd, rs1, imm
24+
PO_ADD = 0x00000033, // add rd, rs1, rs2
25+
PO_SW = 0x00002023, // sw rs2, imm(rs1)
26+
PO_SD = 0x00003023, // sd rs2, imm(rs1)
27+
PO_LUI = 0x00000037, // lui rd, imm
28+
PO_OR = 0x00006033, // or rd, rs1, rs2
29+
PO_SLLI = 0x00001013, // slli rd, rs1, shamt
30+
PO_JALR = 0x00000067, // jalr rd, rs1
31+
PO_LW = 0x00002003, // lw rd, imm(rs1)
32+
PO_LD = 0x00003003, // ld rd, imm(rs1)
33+
PO_J = 0x0000006f, // jal imm
34+
PO_NOP = PO_ADDI, // addi x0, x0, 0
35+
};
36+
37+
enum RegNum : uint32_t {
38+
RN_X0 = 0,
39+
RN_RA = 1,
40+
RN_SP = 2,
41+
RN_T1 = 6,
42+
RN_A0 = 10,
43+
};
44+
45+
static inline uint32_t encodeRTypeInstruction(uint32_t Opcode, uint32_t Rs1,
46+
uint32_t Rs2, uint32_t Rd) {
47+
return Rs2 << 20 | Rs1 << 15 | Rd << 7 | Opcode;
48+
}
49+
50+
static inline uint32_t encodeITypeInstruction(uint32_t Opcode, uint32_t Rs1,
51+
uint32_t Rd, uint32_t Imm) {
52+
return Imm << 20 | Rs1 << 15 | Rd << 7 | Opcode;
53+
}
54+
55+
static inline uint32_t encodeSTypeInstruction(uint32_t Opcode, uint32_t Rs1,
56+
uint32_t Rs2, uint32_t Imm) {
57+
uint32_t ImmMSB = (Imm & 0xfe0) << 20;
58+
uint32_t ImmLSB = (Imm & 0x01f) << 7;
59+
return ImmMSB | Rs2 << 20 | Rs1 << 15 | ImmLSB | Opcode;
60+
}
61+
62+
static inline uint32_t encodeUTypeInstruction(uint32_t Opcode, uint32_t Rd,
63+
uint32_t Imm) {
64+
return Imm << 12 | Rd << 7 | Opcode;
65+
}
66+
67+
static inline uint32_t encodeJTypeInstruction(uint32_t Opcode, uint32_t Rd,
68+
uint32_t Imm) {
69+
uint32_t ImmMSB = (Imm & 0x100000) << 11;
70+
uint32_t ImmLSB = (Imm & 0x7fe) << 20;
71+
uint32_t Imm11 = (Imm & 0x800) << 9;
72+
uint32_t Imm1912 = (Imm & 0xff000);
73+
return ImmMSB | ImmLSB | Imm11 | Imm1912 | Rd << 7 | Opcode;
74+
}
75+
76+
static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }
77+
static uint32_t lo12(uint32_t val) { return val & 0xfff; }
78+
79+
static inline bool patchSled(const bool Enable, const uint32_t FuncId,
80+
const XRaySledEntry &Sled,
81+
void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
82+
// When |Enable| == true,
83+
// We replace the following compile-time stub (sled):
84+
//
85+
// xray_sled_n:
86+
// J .tmpN
87+
// 21 or 33 C.NOPs (42 or 66 bytes)
88+
// .tmpN
89+
//
90+
// With one of the following runtime patches:
91+
//
92+
// xray_sled_n (32-bit):
93+
// addi sp, sp, -16 ;create stack frame
94+
// sw ra, 12(sp) ;save return address
95+
// sw a0, 8(sp) ;save register a0
96+
// lui ra, %hi(__xray_FunctionEntry/Exit)
97+
// addi ra, ra, %lo(__xray_FunctionEntry/Exit)
98+
// lui a0, %hi(function_id)
99+
// addi a0, a0, %lo(function_id) ;pass function id
100+
// jalr ra ;call Tracing hook
101+
// lw a0, 8(sp) ;restore register a0
102+
// lw ra, 12(sp) ;restore return address
103+
// addi sp, sp, 16 ;delete stack frame
104+
//
105+
// xray_sled_n (64-bit):
106+
// addi sp, sp, -32 ;create stack frame
107+
// sd ra, 24(sp) ;save return address
108+
// sd a0, 16(sp) ;save register a0
109+
// sd t1, 8(sp) ;save register t1
110+
// lui t1, %highest(__xray_FunctionEntry/Exit)
111+
// addi t1, t1, %higher(__xray_FunctionEntry/Exit)
112+
// slli t1, t1, 32
113+
// lui ra, ra, %hi(__xray_FunctionEntry/Exit)
114+
// addi ra, ra, %lo(__xray_FunctionEntry/Exit)
115+
// add ra, t1, ra
116+
// lui a0, %hi(function_id)
117+
// addi a0, a0, %lo(function_id) ;pass function id
118+
// jalr ra ;call Tracing hook
119+
// ld t1, 8(sp) ;restore register t1
120+
// ld a0, 16(sp) ;restore register a0
121+
// ld ra, 24(sp) ;restore return address
122+
// addi sp, sp, 32 ;delete stack frame
123+
//
124+
// Replacement of the first 4-byte instruction should be the last and atomic
125+
// operation, so that the user code which reaches the sled concurrently
126+
// either jumps over the whole sled, or executes the whole sled when the
127+
// latter is ready.
128+
//
129+
// When |Enable|==false, we set back the first instruction in the sled to be
130+
// J 44 bytes (rv32)
131+
// J 68 bytes (rv64)
132+
133+
uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address());
134+
if (Enable) {
135+
#if __riscv_xlen == 64
136+
// If the ISA is RV64, the Tracing Hook needs to be typecast to a 64 bit
137+
// value.
138+
uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint64_t>(TracingHook));
139+
uint32_t HiTracingHookAddr = hi20(reinterpret_cast<uint64_t>(TracingHook));
140+
uint32_t HigherTracingHookAddr =
141+
lo12((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32);
142+
uint32_t HighestTracingHookAddr =
143+
hi20((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32);
144+
#elif __riscv_xlen == 32
145+
// We typecast the Tracing Hook to a 32 bit value for RV32
146+
uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint32_t>(TracingHook));
147+
uint32_t HiTracingHookAddr = hi20((reinterpret_cast<uint32_t>(TracingHook));
148+
#endif
149+
uint32_t LoFunctionID = lo12(FuncId);
150+
uint32_t HiFunctionID = hi20(FuncId);
151+
152+
// The sled that is patched in for RISCV64 defined below. We need the entire
153+
// sleds corresponding to both ISAs to be protected by defines because the
154+
// first few instructions are all different, because we store doubles in
155+
// case of RV64 and store words for RV32. Subsequently, we have LUI - and in
156+
// case of RV64, we need extra instructions from this point on, so we see
157+
// differences in addresses to which instructions are stored.
158+
size_t Idx = 1U;
159+
const uint32_t XLenBytes = __riscv_xlen / 8;
160+
#if __riscv_xlen == 64
161+
const uint32_t LoadOp = PatchOpcodes::PO_LD;
162+
const uint32_t StoreOp = PatchOpcodes::PO_SD;
163+
#elif __riscv_xlen == 32
164+
const uint32_t LoadOp = PatchOpcodes::PO_LW;
165+
const uint32_t StoreOp = PatchOpcodes::PO_SW;
166+
#endif
167+
168+
Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP,
169+
RegNum::RN_RA, 3 * XLenBytes);
170+
Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP,
171+
RegNum::RN_A0, 2 * XLenBytes);
172+
173+
#if __riscv_xlen == 64
174+
Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP,
175+
RegNum::RN_T1, XLenBytes);
176+
Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T1,
177+
HighestTracingHookAddr);
178+
Address[Idx++] =
179+
encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T1,
180+
RegNum::RN_T1, HigherTracingHookAddr);
181+
Address[Idx++] = encodeITypeInstruction(PatchOpcodes::PO_SLLI,
182+
RegNum::RN_T1, RegNum::RN_T1, 32);
183+
#endif
184+
Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_RA,
185+
HiTracingHookAddr);
186+
Address[Idx++] = encodeITypeInstruction(
187+
PatchOpcodes::PO_ADDI, RegNum::RN_RA, RegNum::RN_RA, LoTracingHookAddr);
188+
#if __riscv_xlen == 64
189+
Address[Idx++] = encodeRTypeInstruction(PatchOpcodes::PO_ADD, RegNum::RN_RA,
190+
RegNum::RN_T1, RegNum::RN_RA);
191+
#endif
192+
Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_A0,
193+
HiFunctionID);
194+
Address[Idx++] = encodeITypeInstruction(
195+
PatchOpcodes::PO_ADDI, RegNum::RN_A0, RegNum::RN_A0, LoFunctionID);
196+
Address[Idx++] = encodeITypeInstruction(PatchOpcodes::PO_JALR,
197+
RegNum::RN_RA, RegNum::RN_RA, 0);
198+
199+
#if __riscv_xlen == 64
200+
Address[Idx++] =
201+
encodeITypeInstruction(LoadOp, RegNum::RN_SP, RegNum::RN_T1, XLenBytes);
202+
#endif
203+
Address[Idx++] = encodeITypeInstruction(LoadOp, RegNum::RN_SP,
204+
RegNum::RN_A0, 2 * XLenBytes);
205+
Address[Idx++] = encodeITypeInstruction(LoadOp, RegNum::RN_SP,
206+
RegNum::RN_RA, 3 * XLenBytes);
207+
Address[Idx++] = encodeITypeInstruction(
208+
PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, 4 * XLenBytes);
209+
210+
uint32_t CreateStackSpace = encodeITypeInstruction(
211+
PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, -4 * XLenBytes);
212+
213+
std::atomic_store_explicit(
214+
reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateStackSpace,
215+
std::memory_order_release);
216+
} else {
217+
uint32_t CreateBranch = encodeJTypeInstruction(
218+
// Jump distance is different in both ISAs due to difference in size of
219+
// sleds
220+
#if __riscv_xlen == 64
221+
PatchOpcodes::PO_J, RegNum::RN_X0,
222+
68); // jump encodes an offset of 68
223+
#elif __riscv_xlen == 32
224+
PatchOpcodes::PO_J, RegNum::RN_X0,
225+
44); // jump encodes an offset of 44
226+
#endif
227+
std::atomic_store_explicit(
228+
reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateBranch,
229+
std::memory_order_release);
230+
}
231+
return true;
232+
}
233+
234+
bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
235+
const XRaySledEntry &Sled,
236+
const XRayTrampolines &Trampolines,
237+
bool LogArgs) XRAY_NEVER_INSTRUMENT {
238+
// We don't support logging argument at this moment, so we always
239+
// use EntryTrampoline.
240+
return patchSled(Enable, FuncId, Sled, Trampolines.EntryTrampoline);
241+
}
242+
243+
bool patchFunctionExit(
244+
const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
245+
const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
246+
return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
247+
}
248+
249+
bool patchFunctionTailExit(
250+
const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
251+
const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
252+
return patchSled(Enable, FuncId, Sled, Trampolines.TailExitTrampoline);
253+
}
254+
255+
bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
256+
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
257+
return false;
258+
}
259+
260+
bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
261+
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
262+
return false;
263+
}
264+
} // namespace __xray
265+
266+
extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {}

0 commit comments

Comments
 (0)