Skip to content

Commit b46c898

Browse files
EliaGerettoMaskRay
authored andcommitted
[XRay][compiler-rt][x86_64] Fix CFI directives in assembly trampolines
This patch modifies the x86_64 XRay trampolines to fix the CFI information generated by the assembler. One of the main issues in correcting the CFI directives is the `ALIGNED_CALL_RAX` macro, which makes the CFA dependent on the alignment of the stack. However, this macro is not really necessary because some additional assumptions can be made on the alignment of the stack when the trampolines are called. The code has been written as if the stack is guaranteed to be 8-bytes aligned; however, it is instead guaranteed to be misaligned by 8 bytes with respect to a 16-bytes alignment. For this reason, always moving the stack pointer by 8 bytes is sufficient to restore the appropriate alignment. Trampolines that are called from within a function as a result of the builtins `__xray_typedevent` and `__xray_customevent` are necessarely called with the stack properly aligned so, in this case too, `ALIGNED_CALL_RAX` can be eliminated. Fixes https://bugs.llvm.org/show_bug.cgi?id=49060 Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D96785
1 parent ca747e4 commit b46c898

File tree

1 file changed

+50
-27
lines changed

1 file changed

+50
-27
lines changed

compiler-rt/lib/xray/xray_trampoline_x86_64.S

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,37 @@
1515
#include "../builtins/assembly.h"
1616
#include "../sanitizer_common/sanitizer_asm.h"
1717

18+
// XRay trampolines which are not produced by intrinsics are not System V AMD64
19+
// ABI compliant because they are called with a stack that is always misaligned
20+
// by 8 bytes with respect to a 16 bytes alignment. This is because they are
21+
// called immediately after the call to, or immediately before returning from,
22+
// the function being instrumented. This saves space in the patch point, but
23+
// misaligns the stack by 8 bytes.
24+
25+
.macro ALIGN_STACK_16B
26+
#if defined(__APPLE__)
27+
subq $$8, %rsp
28+
#else
29+
subq $8, %rsp
30+
#endif
31+
CFI_ADJUST_CFA_OFFSET(8)
32+
.endm
1833

34+
.macro RESTORE_STACK_ALIGNMENT
35+
#if defined(__APPLE__)
36+
addq $$8, %rsp
37+
#else
38+
addq $8, %rsp
39+
#endif
40+
CFI_ADJUST_CFA_OFFSET(-8)
41+
.endm
1942

43+
// This macro should keep the stack aligned to 16 bytes.
2044
.macro SAVE_REGISTERS
2145
pushfq
46+
CFI_ADJUST_CFA_OFFSET(8)
2247
subq $240, %rsp
23-
CFI_DEF_CFA_OFFSET(248)
48+
CFI_ADJUST_CFA_OFFSET(240)
2449
movq %rbp, 232(%rsp)
2550
movupd %xmm0, 216(%rsp)
2651
movupd %xmm1, 200(%rsp)
@@ -45,6 +70,7 @@
4570
movq %r15, 0(%rsp)
4671
.endm
4772

73+
// This macro should keep the stack aligned to 16 bytes.
4874
.macro RESTORE_REGISTERS
4975
movq 232(%rsp), %rbp
5076
movupd 216(%rsp), %xmm0
@@ -69,22 +95,9 @@
6995
movq 8(%rsp), %r14
7096
movq 0(%rsp), %r15
7197
addq $240, %rsp
98+
CFI_ADJUST_CFA_OFFSET(-240)
7299
popfq
73-
CFI_DEF_CFA_OFFSET(8)
74-
.endm
75-
76-
.macro ALIGNED_CALL_RAX
77-
// Call the logging handler, after aligning the stack to a 16-byte boundary.
78-
// The approach we're taking here uses additional stack space to stash the
79-
// stack pointer twice before aligning the pointer to 16-bytes. If the stack
80-
// was 8-byte aligned, it will become 16-byte aligned -- when restoring the
81-
// pointer, we can always look -8 bytes from the current position to get
82-
// either of the values we've stashed in the first place.
83-
pushq %rsp
84-
pushq (%rsp)
85-
andq $-0x10, %rsp
86-
callq *%rax
87-
movq 8(%rsp), %rsp
100+
CFI_ADJUST_CFA_OFFSET(-8)
88101
.endm
89102

90103
.text
@@ -104,6 +117,7 @@
104117
# LLVM-MCA-BEGIN __xray_FunctionEntry
105118
ASM_SYMBOL(__xray_FunctionEntry):
106119
CFI_STARTPROC
120+
ALIGN_STACK_16B
107121
SAVE_REGISTERS
108122

109123
// This load has to be atomic, it's concurrent with __xray_patch().
@@ -115,10 +129,11 @@ ASM_SYMBOL(__xray_FunctionEntry):
115129
// The patched function prologue puts its xray_instr_map index into %r10d.
116130
movl %r10d, %edi
117131
xor %esi,%esi
118-
ALIGNED_CALL_RAX
132+
callq *%rax
119133

120134
.Ltmp0:
121135
RESTORE_REGISTERS
136+
RESTORE_STACK_ALIGNMENT
122137
retq
123138
# LLVM-MCA-END
124139
ASM_SIZE(__xray_FunctionEntry)
@@ -133,11 +148,13 @@ ASM_SYMBOL(__xray_FunctionEntry):
133148
# LLVM-MCA-BEGIN __xray_FunctionExit
134149
ASM_SYMBOL(__xray_FunctionExit):
135150
CFI_STARTPROC
151+
ALIGN_STACK_16B
152+
136153
// Save the important registers first. Since we're assuming that this
137154
// function is only jumped into, we only preserve the registers for
138155
// returning.
139-
subq $56, %rsp
140-
CFI_DEF_CFA_OFFSET(64)
156+
subq $64, %rsp
157+
CFI_ADJUST_CFA_OFFSET(64)
141158
movq %rbp, 48(%rsp)
142159
movupd %xmm0, 32(%rsp)
143160
movupd %xmm1, 16(%rsp)
@@ -149,7 +166,7 @@ ASM_SYMBOL(__xray_FunctionExit):
149166

150167
movl %r10d, %edi
151168
movl $1, %esi
152-
ALIGNED_CALL_RAX
169+
callq *%rax
153170

154171
.Ltmp2:
155172
// Restore the important registers.
@@ -158,8 +175,10 @@ ASM_SYMBOL(__xray_FunctionExit):
158175
movupd 16(%rsp), %xmm1
159176
movq 8(%rsp), %rax
160177
movq 0(%rsp), %rdx
161-
addq $56, %rsp
162-
CFI_DEF_CFA_OFFSET(8)
178+
addq $64, %rsp
179+
CFI_ADJUST_CFA_OFFSET(-64)
180+
181+
RESTORE_STACK_ALIGNMENT
163182
retq
164183
# LLVM-MCA-END
165184
ASM_SIZE(__xray_FunctionExit)
@@ -174,6 +193,7 @@ ASM_SYMBOL(__xray_FunctionExit):
174193
# LLVM-MCA-BEGIN __xray_FunctionTailExit
175194
ASM_SYMBOL(__xray_FunctionTailExit):
176195
CFI_STARTPROC
196+
ALIGN_STACK_16B
177197
SAVE_REGISTERS
178198

179199
movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
@@ -182,11 +202,11 @@ ASM_SYMBOL(__xray_FunctionTailExit):
182202

183203
movl %r10d, %edi
184204
movl $2, %esi
185-
186-
ALIGNED_CALL_RAX
205+
callq *%rax
187206

188207
.Ltmp4:
189208
RESTORE_REGISTERS
209+
RESTORE_STACK_ALIGNMENT
190210
retq
191211
# LLVM-MCA-END
192212
ASM_SIZE(__xray_FunctionTailExit)
@@ -201,6 +221,7 @@ ASM_SYMBOL(__xray_FunctionTailExit):
201221
# LLVM-MCA-BEGIN __xray_ArgLoggerEntry
202222
ASM_SYMBOL(__xray_ArgLoggerEntry):
203223
CFI_STARTPROC
224+
ALIGN_STACK_16B
204225
SAVE_REGISTERS
205226

206227
// Again, these function pointer loads must be atomic; MOV is fine.
@@ -223,10 +244,12 @@ ASM_SYMBOL(__xray_ArgLoggerEntry):
223244

224245
// 32-bit function ID becomes the first
225246
movl %r10d, %edi
226-
ALIGNED_CALL_RAX
247+
248+
callq *%rax
227249

228250
.Larg1entryFail:
229251
RESTORE_REGISTERS
252+
RESTORE_STACK_ALIGNMENT
230253
retq
231254
# LLVM-MCA-END
232255
ASM_SIZE(__xray_ArgLoggerEntry)
@@ -249,7 +272,7 @@ ASM_SYMBOL(__xray_CustomEvent):
249272
testq %rax,%rax
250273
je .LcustomEventCleanup
251274

252-
ALIGNED_CALL_RAX
275+
callq *%rax
253276

254277
.LcustomEventCleanup:
255278
RESTORE_REGISTERS
@@ -275,7 +298,7 @@ ASM_SYMBOL(__xray_TypedEvent):
275298
testq %rax,%rax
276299
je .LtypedEventCleanup
277300

278-
ALIGNED_CALL_RAX
301+
callq *%rax
279302

280303
.LtypedEventCleanup:
281304
RESTORE_REGISTERS

0 commit comments

Comments
 (0)