Skip to content

Commit 2b05fa8

Browse files
committed
[compiler-rt][aarch64] Add SME ABI support routines.
When compiling for SME and using the attributes to use PSTATE.ZA, Clang will emit calls to SME ABI support routines to save and restore ZA state. Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D154045
1 parent e724c7e commit 2b05fa8

File tree

5 files changed

+249
-0
lines changed

5 files changed

+249
-0
lines changed

compiler-rt/cmake/Modules/AddCompilerRT.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,10 @@ function(add_compiler_rt_runtime name type)
312312
set(COMPONENT_OPTION COMPONENT ${libname})
313313
endif()
314314

315+
if(type STREQUAL "SHARED")
316+
list(APPEND LIB_DEFS COMPILER_RT_SHARED_LIB)
317+
endif()
318+
315319
if(type STREQUAL "OBJECT")
316320
if(CMAKE_C_COMPILER_ID MATCHES Clang AND CMAKE_C_COMPILER_TARGET)
317321
list(APPEND extra_cflags_${libname} "--target=${CMAKE_C_COMPILER_TARGET}")

compiler-rt/cmake/builtin-config-ix.cmake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ asm(\".arch armv8-a+lse\");
3333
asm(\"cas w0, w1, [x2]\");
3434
")
3535

36+
builtin_check_c_compiler_source(COMPILER_RT_HAS_ASM_SME
37+
"
38+
asm(\".arch armv9-a+sme\");
39+
asm(\"smstart\");
40+
")
41+
3642
if(ANDROID)
3743
set(OS_NAME "Android")
3844
else()

compiler-rt/lib/builtins/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,8 @@ set(aarch64_SOURCES
551551
${GENERIC_SOURCES}
552552
cpu_model.c
553553
aarch64/fp_mode.c
554+
aarch64/sme-abi.S
555+
aarch64/sme-abi-init.c
554556
)
555557

556558
# Generate outline atomics helpers from lse.S base
@@ -780,6 +782,7 @@ else ()
780782
endif()
781783

782784
append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS)
785+
append_list_if(COMPILER_RT_HAS_ASM_SME HAS_ASM_SME BUILTIN_DEFS)
783786

784787
foreach (arch ${BUILTIN_SUPPORTED_ARCH})
785788
if (CAN_TARGET_${arch})
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
2+
// See https://llvm.org/LICENSE.txt for license information.
3+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4+
5+
__attribute__((visibility("hidden"), nocommon))
6+
_Bool __aarch64_has_sme_and_tpidr2_el0;
7+
8+
// We have multiple ways to check that the function has SME, depending on our
9+
// target.
10+
// * For Linux we can use __getauxval().
11+
// * For newlib we can use __aarch64_sme_accessible().
12+
13+
#if defined(__linux__)
14+
15+
#ifndef AT_HWCAP2
16+
#define AT_HWCAP2 26
17+
#endif
18+
19+
#ifndef HWCAP2_SME
20+
#define HWCAP2_SME (1 << 23)
21+
#endif
22+
23+
extern unsigned long int __getauxval (unsigned long int);
24+
25+
static _Bool has_sme(void) {
26+
return __getauxval(AT_HWCAP2) & HWCAP2_SME;
27+
}
28+
29+
#else // defined(__linux__)
30+
31+
#if defined(COMPILER_RT_SHARED_LIB)
32+
__attribute__((weak))
33+
#endif
34+
extern _Bool __aarch64_sme_accessible(void);
35+
36+
static _Bool has_sme(void) {
37+
#if defined(COMPILER_RT_SHARED_LIB)
38+
if (!__aarch64_sme_accessible)
39+
return 0;
40+
#endif
41+
return __aarch64_sme_accessible();
42+
}
43+
44+
#endif // defined(__linux__)
45+
46+
__attribute__((constructor(90)))
47+
static void init_aarch64_has_sme(void) {
48+
__aarch64_has_sme_and_tpidr2_el0 = has_sme();
49+
}
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
2+
// See https://llvm.org/LICENSE.txt for license information.
3+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4+
5+
// This patch implements the support routines for the SME ABI,
6+
// described here:
7+
// https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines
8+
9+
#include "../assembly.h"
10+
11+
#ifdef HAS_ASM_SME
12+
#define ARCH armv9-a+sme
13+
#define SMSTOP_SM smstop sm
14+
#define SMSTOP_ZA smstop za
15+
#define REG_TPIDR2_EL0 TPIDR2_EL0
16+
#define REG_SVCR SVCR
17+
#define ADDSVL_X16_X16_1 addsvl x16, x16, #1
18+
#define LDR_ZA_W15_0_X16 ldr za[w15,0], [x16]
19+
#define STR_ZA_W15_0_X16 str za[w15,0], [x16]
20+
#define CNTD_X0 cntd x0
21+
#define CFI_OFFSET_VG_MINUS_16 .cfi_offset vg, -16
22+
#else
23+
#define ARCH armv8-a
24+
#define SMSTOP_SM .inst 0xd503427f
25+
#define SMSTOP_ZA .inst 0xd503447f
26+
#define REG_TPIDR2_EL0 S3_3_C13_C0_5
27+
#define REG_SVCR S3_3_C4_C2_2
28+
#define ADDSVL_X16_X16_1 .inst 0x04305830
29+
#define LDR_ZA_W15_0_X16 .inst 0xe1006200
30+
#define STR_ZA_W15_0_X16 .inst 0xe1206200
31+
#define CNTD_X0 .inst 0x04e0e3e0
32+
#define CFI_OFFSET_VG_MINUS_16 .cfi_escape 0x10, 0x2e, 0x03, 0x11, 0x70, 0x22 // $vg @ cfa - 16
33+
#endif
34+
35+
.arch ARCH
36+
37+
// Utility function which calls a system's abort() routine. Because the function
38+
// is streaming-compatible it should disable streaming-SVE mode before calling
39+
// abort(). Note that there is no need to preserve any state before the call,
40+
// because the function does not return.
41+
DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort)
42+
.cfi_startproc
43+
.variant_pcs SYMBOL_NAME(do_abort)
44+
stp x29, x30, [sp, #-32]!
45+
CNTD_X0
46+
// Store VG to a stack location that we describe with .cfi_offset
47+
str x0, [sp, #16]
48+
.cfi_def_cfa_offset 32
49+
.cfi_offset w30, -24
50+
.cfi_offset w29, -32
51+
CFI_OFFSET_VG_MINUS_16
52+
bl __arm_sme_state
53+
tbz x0, #0, 2f
54+
1:
55+
SMSTOP_SM
56+
2:
57+
// We can't make this into a tail-call because the unwinder would
58+
// need to restore the value of VG.
59+
bl SYMBOL_NAME(abort)
60+
.cfi_endproc
61+
END_COMPILERRT_FUNCTION(do_abort)
62+
63+
// __arm_sme_state fills the result registers based on a local
64+
// that is set as part of the compiler-rt startup code.
65+
// __aarch64_has_sme_and_tpidr2_el0
66+
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
67+
.variant_pcs __arm_sme_state
68+
mov x0, xzr
69+
mov x1, xzr
70+
71+
adrp x16, SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
72+
ldrb w16, [x16, :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)]
73+
cbz w16, 1f
74+
0:
75+
orr x0, x0, #0xC000000000000000
76+
mrs x16, REG_SVCR
77+
bfxil x0, x16, #0, #2
78+
mrs x1, REG_TPIDR2_EL0
79+
1:
80+
ret
81+
END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state)
82+
83+
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore)
84+
.variant_pcs __arm_tpidr2_restore
85+
// If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific
86+
// manner.
87+
mrs x14, REG_TPIDR2_EL0
88+
cbnz x14, 2f
89+
90+
// If any of the reserved bytes in the first 16 bytes of BLK are nonzero,
91+
// the subroutine [..] aborts in some platform-defined manner.
92+
ldrh w14, [x0, #10]
93+
cbnz w14, 2f
94+
ldr w14, [x0, #12]
95+
cbnz w14, 2f
96+
97+
// If BLK.za_save_buffer is NULL, the subroutine does nothing.
98+
ldr x16, [x0]
99+
cbz x16, 1f
100+
101+
// If BLK.num_za_save_slices is zero, the subroutine does nothing.
102+
ldrh w14, [x0, #8]
103+
cbz x14, 1f
104+
105+
mov x15, xzr
106+
0:
107+
LDR_ZA_W15_0_X16
108+
ADDSVL_X16_X16_1
109+
add x15, x15, #1
110+
cmp x14, x15
111+
b.ne 0b
112+
1:
113+
ret
114+
2:
115+
b SYMBOL_NAME(do_abort)
116+
END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore)
117+
118+
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
119+
// If the current thread does not have access to TPIDR2_EL0, the subroutine
120+
// does nothing.
121+
adrp x14, SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
122+
ldrb w14, [x14, :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)]
123+
cbz w14, 1f
124+
125+
// If TPIDR2_EL0 is null, the subroutine does nothing.
126+
mrs x16, REG_TPIDR2_EL0
127+
cbz x16, 1f
128+
129+
// If any of the reserved bytes in the first 16 bytes of the TPIDR2 block are
130+
// nonzero, the subroutine [..] aborts in some platform-defined manner.
131+
ldrh w14, [x16, #10]
132+
cbnz w14, 2f
133+
ldr w14, [x16, #12]
134+
cbnz w14, 2f
135+
136+
// If num_za_save_slices is zero, the subroutine does nothing.
137+
ldrh w14, [x16, #8]
138+
cbz x14, 1f
139+
140+
// If za_save_buffer is NULL, the subroutine does nothing.
141+
ldr x16, [x16]
142+
cbz x16, 1f
143+
144+
mov x15, xzr
145+
0:
146+
STR_ZA_W15_0_X16
147+
ADDSVL_X16_X16_1
148+
add x15, x15, #1
149+
cmp x14, x15
150+
b.ne 0b
151+
1:
152+
ret
153+
2:
154+
b SYMBOL_NAME(do_abort)
155+
END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save)
156+
157+
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
158+
// If the current thread does not have access to SME, the subroutine does
159+
// nothing.
160+
adrp x14, SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
161+
ldrb w14, [x14, :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)]
162+
cbz w14, 0f
163+
164+
// Otherwise, the subroutine behaves as if it did the following:
165+
// * Call __arm_tpidr2_save.
166+
stp x29, x30, [sp, #-16]!
167+
.cfi_def_cfa_offset 16
168+
mov x29, sp
169+
.cfi_def_cfa w29, 16
170+
.cfi_offset w30, -8
171+
.cfi_offset w29, -16
172+
bl __arm_tpidr2_save
173+
174+
// * Set TPIDR2_EL0 to null.
175+
msr REG_TPIDR2_EL0, xzr
176+
177+
// * Set PSTATE.ZA to 0.
178+
SMSTOP_ZA
179+
180+
.cfi_def_cfa wsp, 16
181+
ldp x29, x30, [sp], #16
182+
.cfi_def_cfa_offset 0
183+
.cfi_restore w30
184+
.cfi_restore w29
185+
0:
186+
ret
187+
END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)

0 commit comments

Comments
 (0)