Skip to content

Commit 12e1815

Browse files
committed
[Compiler-rt] Add AArch64 routines for __arm_agnostic("sme_za_state")
The specification of these routines can be found here: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines
1 parent e0fb3ac commit 12e1815

File tree

3 files changed

+161
-2
lines changed

3 files changed

+161
-2
lines changed

compiler-rt/cmake/builtin-config-ix.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ asm(\"cas w0, w1, [x2]\");
4343
builtin_check_c_compiler_source(COMPILER_RT_HAS_AARCH64_SME
4444
"
4545
void foo(void) __arm_streaming_compatible {
46-
asm(\".arch armv9-a+sme\");
46+
asm(\".arch armv9-a+sme2\");
4747
asm(\"smstart\");
48+
asm(\"ldr zt0, [sp]\");
4849
}
4950
")
5051

compiler-rt/lib/builtins/aarch64/sme-abi-assert.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
#include "../cpu_model/AArch64CPUFeatures.inc"
99
_Static_assert(FEAT_SVE == 30, "sme-abi.S assumes FEAT_SVE = 30");
1010
_Static_assert(FEAT_SME == 42, "sme-abi.S assumes FEAT_SME = 42");
11+
_Static_assert(FEAT_SME2 == 57, "sme-abi.S assumes FEAT_SME2 = 57");

compiler-rt/lib/builtins/aarch64/sme-abi.S

Lines changed: 158 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
.set FEAT_SVE_BIT, 30
1212
.set FEAT_SME_BIT, 42
13+
.set FEAT_SME2_BIT, 57
14+
.set FEAT_SME2_MASK, 1 << 57
1315
.set SVCR_PSTATE_SM_BIT, 0
1416

1517
#if !defined(__APPLE__)
@@ -22,7 +24,7 @@
2224
#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
2325
#endif
2426

25-
.arch armv9-a+sme
27+
.arch armv9-a+sme2
2628

2729
// Utility function which calls a system's abort() routine. Because the function
2830
// is streaming-compatible it should disable streaming-SVE mode before calling
@@ -204,6 +206,161 @@ DEFINE_COMPILERRT_FUNCTION(__arm_get_current_vg)
204206
ret
205207
END_COMPILERRT_FUNCTION(__arm_get_current_vg)
206208

209+
DEFINE_COMPILERRT_FUNCTION(__arm_sme_state_size)
210+
.variant_pcs __arm_sme_state_size
211+
BTI_C
212+
213+
// Test if SME is available and PSTATE = 1.
214+
adrp x16, CPU_FEATS_SYMBOL
215+
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
216+
tbz x16, #FEAT_SME_BIT, 0f
217+
mrs x16, SVCR
218+
tbz x16, #1, 0f
219+
220+
// Size = HAS_FEAT_SME2 ? 32 : 96
221+
adrp x16, CPU_FEATS_SYMBOL
222+
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
223+
tst x16, #FEAT_SME2_MASK
224+
mov w17, #32
225+
mov w16, #96
226+
csel x16, x17, x16, eq
227+
228+
// Size = Size + (SVLB * SVLB)
229+
rdsvl x17, #1
230+
madd x0, x17, x17, x16
231+
ret
232+
233+
0:
234+
// Default case, 16 bytes is minimum (to encode VALID bit, multiple of 16 bytes)
235+
mov w0, #16
236+
ret
237+
END_COMPILERRT_FUNCTION(__arm_sme_state_size)
238+
239+
DEFINE_COMPILERRT_FUNCTION(__arm_sme_save)
240+
.variant_pcs __arm_sme_save
241+
BTI_C
242+
243+
// Clear internal state bits
244+
stp xzr, xzr, [x0]
245+
246+
// If PTR is not 16-byte aligned, abort.
247+
tst x0, #0xF
248+
b.ne 3f
249+
250+
// If SME is not available, PSTATE.ZA = 0 or TPIDR2_EL0 != 0, return.
251+
adrp x16, CPU_FEATS_SYMBOL
252+
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
253+
tbz x16, #FEAT_SME_BIT, 2f
254+
mrs x16, SVCR
255+
tbz x16, #1, 2f
256+
mrs x16, TPIDR2_EL0
257+
cbnz x16, 2f
258+
259+
# ZA or ZT0 need saving, we can now set internal VALID bit to 1
260+
mov w16, #1
261+
str x16, [x0]
262+
263+
adrp x16, CPU_FEATS_SYMBOL
264+
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
265+
tbz x16, #FEAT_SME2_BIT, 0f
266+
267+
// Store ZT0 and ZA
268+
add x16, x0, #32
269+
str zt0, [x16]
270+
add x18, x0, #96
271+
b 1f
272+
273+
0:
274+
// Has SME only
275+
add x18, x0, #32
276+
277+
1:
278+
// Set up lazy-save (x18 = pointer to buffer)
279+
rdsvl x17, #1
280+
str x18, [x0, #16]!
281+
strh w17, [x0, #8]
282+
stur wzr, [x0, #10]
283+
strh wzr, [x0, #14]
284+
msr TPIDR2_EL0, x0
285+
ret
286+
287+
2:
288+
// Do nothing
289+
ret
290+
291+
3:
292+
b SYMBOL_NAME(do_abort)
293+
END_COMPILERRT_FUNCTION(__arm_sme_save)
294+
295+
DEFINE_COMPILERRT_FUNCTION(__arm_sme_restore)
296+
.cfi_startproc
297+
.variant_pcs __arm_sme_save
298+
BTI_C
299+
300+
stp x29, x30, [sp, #-16]!
301+
.cfi_def_cfa_offset 16
302+
mov x29, sp
303+
.cfi_def_cfa w29, 16
304+
.cfi_offset w30, -8
305+
.cfi_offset w29, -16
306+
307+
// If PTR is not 16-byte aligned, abort.
308+
tst x0, #0xF
309+
b.ne 3f
310+
311+
// If the VALID bit is 0, return early.
312+
ldr x16, [x0]
313+
tbz x16, #0, 2f
314+
315+
// If SME is not available, abort.
316+
adrp x16, CPU_FEATS_SYMBOL
317+
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
318+
tbz x16, #FEAT_SME_BIT, 3f
319+
320+
// If TPIDR2_EL0 != nullptr, no lazy-save was committed, try to reload zt0.
321+
mrs x16, TPIDR2_EL0
322+
cbnz x16, 0f
323+
324+
// If TPIDR2_EL0 == nullptr and PSTATE.ZA = 1 (<=> ZA state is 'active'),
325+
// abort.
326+
mrs x16, SVCR
327+
tbnz x16, #1, 3f
328+
329+
// Restore za.
330+
smstart za
331+
mov x16, x0
332+
add x0, x0, #16
333+
bl __arm_tpidr2_restore
334+
mov x0, x16
335+
msr TPIDR2_EL0, xzr
336+
337+
0:
338+
smstart za
339+
340+
1:
341+
// Check if zt0 needs restoring.
342+
adrp x16, CPU_FEATS_SYMBOL
343+
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
344+
tbz x16, #FEAT_SME2_BIT, 2f
345+
346+
// Restore zt0.
347+
add x16, x0, #32
348+
ldr zt0, [x16]
349+
350+
2:
351+
// Do nothing
352+
.cfi_def_cfa wsp, 16
353+
ldp x29, x30, [sp], #16
354+
.cfi_def_cfa_offset 0
355+
.cfi_restore w30
356+
.cfi_restore w29
357+
ret
358+
359+
3:
360+
b SYMBOL_NAME(do_abort)
361+
.cfi_endproc
362+
END_COMPILERRT_FUNCTION(__arm_sme_restore)
363+
207364
NO_EXEC_STACK_DIRECTIVE
208365

209366
// GNU property note for BTI and PAC

0 commit comments

Comments
 (0)