Skip to content
This repository was archived by the owner on May 21, 2019. It is now read-only.

Commit dcd2129

Browse files
committed
[XRay] Implement powerpc64le xray.
Summary: powerpc64 big-endian is not supported, but I believe that most logic can be shared, except for xray_powerpc64.cc. Also add a function InvalidateInstructionCache to xray_util.h, which is copied from llvm/Support/Memory.cpp. I'm not sure if I need to add a unittest, and I don't know how. Reviewers: dberris, echristo, iteratee, kbarton, hfinkel Subscribers: mehdi_amini, nemanjai, mgorny, llvm-commits Differential Revision: https://reviews.llvm.org/D29742 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@294781 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 324a908 commit dcd2129

File tree

8 files changed

+329
-1
lines changed

8 files changed

+329
-1
lines changed

cmake/config-ix.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
175175
set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
176176
set(ALL_ESAN_SUPPORTED_ARCH ${X86_64} ${MIPS64})
177177
set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64})
178-
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64})
178+
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${PPC64})
179179

180180
if(APPLE)
181181
include(CompilerRTDarwinUtils)

lib/xray/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ set(aarch64_SOURCES
2929
xray_trampoline_AArch64.S
3030
${XRAY_SOURCES})
3131

32+
set(powerpc64le_SOURCES
33+
xray_powerpc64.cc
34+
xray_trampoline_powerpc64.cc
35+
xray_trampoline_powerpc64.S
36+
${XRAY_SOURCES})
37+
3238
include_directories(..)
3339
include_directories(../../include)
3440

lib/xray/xray_interface.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ static const int16_t cSledLength = 12;
3535
static const int16_t cSledLength = 32;
3636
#elif defined(__arm__)
3737
static const int16_t cSledLength = 28;
38+
#elif defined(__powerpc64__)
39+
static const int16_t cSledLength = 8;
3840
#else
3941
#error "Unsupported CPU Architecture"
4042
#endif /* CPU architecture */

lib/xray/xray_powerpc64.cc

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
//===-- xray_AArch64.cc -----------------------------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file is a part of XRay, a dynamic runtime instrumentation system.
11+
//
12+
// Implementation of powerpc64 and powerpc64le routines.
13+
//
14+
//===----------------------------------------------------------------------===//
15+
#include "sanitizer_common/sanitizer_common.h"
16+
#include "xray_defs.h"
17+
#include "xray_interface_internal.h"
18+
#include "xray_utils.h"
19+
#include <atomic>
20+
#include <cassert>
21+
#include <cstring>
22+
23+
#ifndef __LITTLE_ENDIAN__
24+
#error powerpc64 big endian is not supported for now.
25+
#endif
26+
27+
namespace {
28+
29+
constexpr unsigned long long JumpOverInstNum = 7;
30+
31+
void clearCache(void *Addr, size_t Len) {
32+
const size_t LineSize = 32;
33+
34+
const intptr_t Mask = ~(LineSize - 1);
35+
const intptr_t StartLine = ((intptr_t)Addr) & Mask;
36+
const intptr_t EndLine = ((intptr_t)Addr + Len + LineSize - 1) & Mask;
37+
38+
for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
39+
asm volatile("dcbf 0, %0" : : "r"(Line));
40+
asm volatile("sync");
41+
42+
for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
43+
asm volatile("icbi 0, %0" : : "r"(Line));
44+
asm volatile("isync");
45+
}
46+
47+
} // namespace
48+
49+
extern "C" void __clear_cache(void *start, void *end);
50+
51+
namespace __xray {
52+
53+
bool patchFunctionEntry(const bool Enable, uint32_t FuncId,
54+
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
55+
if (Enable) {
56+
// lis 0, FuncId[16..32]
57+
// li 0, FuncId[0..15]
58+
*reinterpret_cast<uint64_t *>(Sled.Address) =
59+
(0x3c000000ull + (FuncId >> 16)) +
60+
((0x60000000ull + (FuncId & 0xffff)) << 32);
61+
} else {
62+
// b +JumpOverInstNum instructions.
63+
*reinterpret_cast<uint32_t *>(Sled.Address) =
64+
0x48000000ull + (JumpOverInstNum << 2);
65+
}
66+
clearCache(reinterpret_cast<void *>(Sled.Address), 8);
67+
return true;
68+
}
69+
70+
bool patchFunctionExit(const bool Enable, uint32_t FuncId,
71+
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
72+
if (Enable) {
73+
// lis 0, FuncId[16..32]
74+
// li 0, FuncId[0..15]
75+
*reinterpret_cast<uint64_t *>(Sled.Address) =
76+
(0x3c000000ull + (FuncId >> 16)) +
77+
((0x60000000ull + (FuncId & 0xffff)) << 32);
78+
} else {
79+
// Copy the blr/b instruction after JumpOverInstNum instructions.
80+
*reinterpret_cast<uint32_t *>(Sled.Address) =
81+
*(reinterpret_cast<uint32_t *>(Sled.Address) + JumpOverInstNum);
82+
}
83+
clearCache(reinterpret_cast<void *>(Sled.Address), 8);
84+
return true;
85+
}
86+
87+
bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
88+
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
89+
return patchFunctionExit(Enable, FuncId, Sled);
90+
}
91+
92+
// FIXME: Maybe implement this better?
93+
bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
94+
95+
} // namespace __xray

lib/xray/xray_powerpc64.inc

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//===-- xray_x86_64.inc -----------------------------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file is a part of XRay, a dynamic runtime instrumentation system.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include <cstdint>
15+
#include <mutex>
16+
#include <sys/platform/ppc.h>
17+
18+
#include "xray_defs.h"
19+
20+
namespace __xray {
21+
22+
ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
23+
CPU = 0;
24+
return __ppc_get_timebase();
25+
}
26+
27+
inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
28+
static std::mutex M;
29+
std::lock_guard<std::mutex> Guard(M);
30+
return __ppc_get_timebase_freq();
31+
}
32+
33+
inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
34+
return true;
35+
}
36+
37+
} // namespace __xray

lib/xray/xray_trampoline_powerpc64.S

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
.text
2+
.abiversion 2
3+
.globl __xray_FunctionEntry
4+
.p2align 4
5+
__xray_FunctionEntry:
6+
std 0, 16(1)
7+
stdu 1, -408(1)
8+
# Spill r3-r10, f1-f13, and vsr34-vsr45, which are parameter registers.
9+
# If this appears to be slow, the caller needs to pass in number of generic,
10+
# floating point, and vector parameters, so that we only spill those live ones.
11+
std 3, 32(1)
12+
ld 3, 400(1) # FuncId
13+
std 4, 40(1)
14+
std 5, 48(1)
15+
std 6, 56(1)
16+
std 7, 64(1)
17+
std 8, 72(1)
18+
std 9, 80(1)
19+
std 10, 88(1)
20+
addi 4, 1, 96
21+
stxsdx 1, 0, 4
22+
addi 4, 1, 104
23+
stxsdx 2, 0, 4
24+
addi 4, 1, 112
25+
stxsdx 3, 0, 4
26+
addi 4, 1, 120
27+
stxsdx 4, 0, 4
28+
addi 4, 1, 128
29+
stxsdx 5, 0, 4
30+
addi 4, 1, 136
31+
stxsdx 6, 0, 4
32+
addi 4, 1, 144
33+
stxsdx 7, 0, 4
34+
addi 4, 1, 152
35+
stxsdx 8, 0, 4
36+
addi 4, 1, 160
37+
stxsdx 9, 0, 4
38+
addi 4, 1, 168
39+
stxsdx 10, 0, 4
40+
addi 4, 1, 176
41+
stxsdx 11, 0, 4
42+
addi 4, 1, 184
43+
stxsdx 12, 0, 4
44+
addi 4, 1, 192
45+
stxsdx 13, 0, 4
46+
addi 4, 1, 200
47+
stxvd2x 34, 0, 4
48+
addi 4, 1, 216
49+
stxvd2x 35, 0, 4
50+
addi 4, 1, 232
51+
stxvd2x 36, 0, 4
52+
addi 4, 1, 248
53+
stxvd2x 37, 0, 4
54+
addi 4, 1, 264
55+
stxvd2x 38, 0, 4
56+
addi 4, 1, 280
57+
stxvd2x 39, 0, 4
58+
addi 4, 1, 296
59+
stxvd2x 40, 0, 4
60+
addi 4, 1, 312
61+
stxvd2x 41, 0, 4
62+
addi 4, 1, 328
63+
stxvd2x 42, 0, 4
64+
addi 4, 1, 344
65+
stxvd2x 43, 0, 4
66+
addi 4, 1, 360
67+
stxvd2x 44, 0, 4
68+
addi 4, 1, 376
69+
stxvd2x 45, 0, 4
70+
std 2, 392(1)
71+
mflr 0
72+
std 0, 400(1)
73+
74+
li 4, 0
75+
bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType
76+
nop
77+
78+
addi 4, 1, 96
79+
lxsdx 1, 0, 4
80+
addi 4, 1, 104
81+
lxsdx 2, 0, 4
82+
addi 4, 1, 112
83+
lxsdx 3, 0, 4
84+
addi 4, 1, 120
85+
lxsdx 4, 0, 4
86+
addi 4, 1, 128
87+
lxsdx 5, 0, 4
88+
addi 4, 1, 136
89+
lxsdx 6, 0, 4
90+
addi 4, 1, 144
91+
lxsdx 7, 0, 4
92+
addi 4, 1, 152
93+
lxsdx 8, 0, 4
94+
addi 4, 1, 160
95+
lxsdx 9, 0, 4
96+
addi 4, 1, 168
97+
lxsdx 10, 0, 4
98+
addi 4, 1, 176
99+
lxsdx 11, 0, 4
100+
addi 4, 1, 184
101+
lxsdx 12, 0, 4
102+
addi 4, 1, 192
103+
lxsdx 13, 0, 4
104+
addi 4, 1, 200
105+
lxvd2x 34, 0, 4
106+
addi 4, 1, 216
107+
lxvd2x 35, 0, 4
108+
addi 4, 1, 232
109+
lxvd2x 36, 0, 4
110+
addi 4, 1, 248
111+
lxvd2x 37, 0, 4
112+
addi 4, 1, 264
113+
lxvd2x 38, 0, 4
114+
addi 4, 1, 280
115+
lxvd2x 39, 0, 4
116+
addi 4, 1, 296
117+
lxvd2x 40, 0, 4
118+
addi 4, 1, 312
119+
lxvd2x 41, 0, 4
120+
addi 4, 1, 328
121+
lxvd2x 42, 0, 4
122+
addi 4, 1, 344
123+
lxvd2x 43, 0, 4
124+
addi 4, 1, 360
125+
lxvd2x 44, 0, 4
126+
addi 4, 1, 376
127+
lxvd2x 45, 0, 4
128+
ld 0, 400(1)
129+
mtlr 0
130+
ld 2, 392(1)
131+
ld 3, 32(1)
132+
ld 4, 40(1)
133+
ld 5, 48(1)
134+
ld 6, 56(1)
135+
ld 7, 64(1)
136+
ld 8, 72(1)
137+
ld 9, 80(1)
138+
ld 10, 88(1)
139+
140+
addi 1, 1, 408
141+
ld 0, 16(1)
142+
blr
143+
144+
.globl __xray_FunctionExit
145+
.p2align 4
146+
__xray_FunctionExit:
147+
std 0, 16(1)
148+
ld 0, -8(1) # FuncId
149+
stdu 1, -72(1)
150+
# Spill r3, f1, and vsr34, the return value registers.
151+
std 3, 32(1)
152+
mr 3, 0
153+
addi 4, 1, 40
154+
stxsdx 1, 0, 4
155+
addi 4, 1, 48
156+
stxvd2x 34, 0, 4
157+
mflr 0
158+
std 0, 64(1)
159+
li 4, 1
160+
bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType
161+
nop
162+
ld 0, 64(1)
163+
mtlr 0
164+
ld 3, 32(1)
165+
addi 4, 1, 40
166+
lxsdx 1, 0, 4
167+
addi 4, 1, 48
168+
lxvd2x 34, 0, 4
169+
addi 1, 1, 72
170+
ld 0, 16(1)
171+
blr

lib/xray/xray_trampoline_powerpc64.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#include <atomic>
2+
#include <xray/xray_interface.h>
3+
4+
namespace __xray {
5+
6+
extern std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction;
7+
8+
// Implement this in C++ instead of assembly, to avoid dealing with ToC by hand.
9+
void CallXRayPatchedFunction(int32_t FuncId, XRayEntryType Type) {
10+
auto fptr = __xray::XRayPatchedFunction.load();
11+
if (fptr != nullptr)
12+
(*fptr)(FuncId, Type);
13+
}
14+
15+
} // namespace __xray

lib/xray/xray_tsc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
#if defined(__x86_64__)
1717
#include "xray_x86_64.inc"
18+
#elif defined(__powerpc64__)
19+
#include "xray_powerpc64.inc"
1820
#elif defined(__arm__) || defined(__aarch64__)
1921
// Emulated TSC.
2022
// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does

0 commit comments

Comments
 (0)