@@ -206,23 +206,41 @@ DEFINE_COMPILERRT_FUNCTION(__arm_get_current_vg)
206
206
ret
207
207
END_COMPILERRT_FUNCTION(__arm_get_current_vg)
208
208
209
+ // The diagram below describes the layout used in the following routines:
210
+ // * __arm_sme_state_size
211
+ // * __arm_sme_save
212
+ // * __arm_sme_restore
213
+ //
214
+ // +---------------------------------+
215
+ // | ... |
216
+ // | ZA buffer |
217
+ // | ... |
218
+ // +---------------------------------+ < - @ 96
219
+ // | ZT0 contents |
220
+ // +---------------------------------+ < - @ 32
221
+ // | byte 15 - 10 : zero (reserved) |
222
+ // | byte 9 - 8 : num_za_save_slices | TPIDR2 block
223
+ // | byte 7 - 0 : za_save_buffer |
224
+ // +---------------------------------+ < - @ 16
225
+ // | bit 127 - 1 : zero (reserved) | Internal state for __arm_sme_save/restore
226
+ // | bit 0 : VALID |
227
+ // +---------------------------------+ < - @ 0
228
+
209
229
DEFINE_COMPILERRT_FUNCTION(__arm_sme_state_size)
210
230
.variant_pcs __arm_sme_state_size
211
231
BTI_C
212
232
213
233
// Test if SME is available and ZA state is 'active' .
214
- adrp x16 , CPU_FEATS_SYMBOL
215
- ldr x16 , [ x16 , CPU_FEATS_SYMBOL_OFFSET ]
216
- tbz x16 , #FEAT_SME_BIT , 0f
234
+ adrp x17 , CPU_FEATS_SYMBOL
235
+ ldr x17 , [ x17 , CPU_FEATS_SYMBOL_OFFSET ]
236
+ tbz x17 , #FEAT_SME_BIT , 0f
217
237
mrs x16 , SVCR
218
238
tbz x16 , # 1 , 0f
219
239
mrs x16 , TPIDR2_EL0
220
240
cbnz x16 , 0f
221
241
222
242
// Size = HAS_FEAT_SME2 ? 96 : 32
223
- adrp x16 , CPU_FEATS_SYMBOL
224
- ldr x16 , [ x16 , CPU_FEATS_SYMBOL_OFFSET ]
225
- tst x16 , #FEAT_SME2_MASK
243
+ tst x17 , #FEAT_SME2_MASK
226
244
mov w17 , # 32
227
245
mov w16 , # 96
228
246
csel x16 , x17 , x16 , eq
@@ -242,17 +260,17 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_save)
242
260
.variant_pcs __arm_sme_save
243
261
BTI_C
244
262
245
- // Clear internal state bits
246
- stp xzr , xzr , [ x0 ]
247
-
248
263
// If PTR is not 16 - byte aligned , abort.
249
264
tst x0 , # 0xF
250
265
b.ne 3f
251
266
267
+ // Clear internal state bits
268
+ stp xzr , xzr , [ x0 ]
269
+
252
270
// If SME is not available , PSTATE.ZA = 0 or TPIDR2_EL0 != 0 , return.
253
- adrp x16 , CPU_FEATS_SYMBOL
254
- ldr x16 , [ x16 , CPU_FEATS_SYMBOL_OFFSET ]
255
- tbz x16 , #FEAT_SME_BIT , 2f
271
+ adrp x17 , CPU_FEATS_SYMBOL
272
+ ldr x17 , [ x17 , CPU_FEATS_SYMBOL_OFFSET ]
273
+ tbz x17 , #FEAT_SME_BIT , 2f
256
274
mrs x16 , SVCR
257
275
tbz x16 , # 1 , 2f
258
276
mrs x16 , TPIDR2_EL0
@@ -262,20 +280,15 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_save)
262
280
mov w16 , # 1
263
281
str x16 , [ x0 ]
264
282
265
- adrp x16 , CPU_FEATS_SYMBOL
266
- ldr x16 , [ x16 , CPU_FEATS_SYMBOL_OFFSET ]
267
- tbz x16 , #FEAT_SME2_BIT , 0f
283
+ add x18 , x0 , # 32
284
+ tbz x17 , #FEAT_SME2_BIT , 1f
268
285
269
286
// Store ZT0 and ZA
270
287
add x16 , x0 , # 32
271
288
str zt0 , [ x16 ]
272
- add x18 , x0 , # 96
289
+ add x18 , x18 , # 64
273
290
b 1f
274
291
275
- 0 :
276
- // Has SME only
277
- add x18 , x0 , # 32
278
-
279
292
1 :
280
293
// Set up lazy - save (x18 = pointer to buffer)
281
294
rdsvl x17 , # 1
@@ -284,7 +297,6 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_save)
284
297
stur wzr , [ x0 , # 10 ]
285
298
strh wzr , [ x0 , # 14 ]
286
299
msr TPIDR2_EL0 , x0
287
- ret
288
300
289
301
2 :
290
302
// Do nothing
@@ -296,7 +308,7 @@ END_COMPILERRT_FUNCTION(__arm_sme_save)
296
308
297
309
DEFINE_COMPILERRT_FUNCTION(__arm_sme_restore)
298
310
.cfi_startproc
299
- .variant_pcs __arm_sme_save
311
+ .variant_pcs __arm_sme_restore
300
312
BTI_C
301
313
302
314
stp x29 , x30 , [ sp , # - 16 ] !
@@ -312,16 +324,16 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_restore)
312
324
313
325
// If the VALID bit is 0 , return early.
314
326
ldr x16 , [ x0 ]
315
- tbz x16 , # 0 , 2f
327
+ cbz x16 , 2f
316
328
317
329
// If SME is not available , abort.
318
- adrp x16 , CPU_FEATS_SYMBOL
319
- ldr x16 , [ x16 , CPU_FEATS_SYMBOL_OFFSET ]
320
- tbz x16 , #FEAT_SME_BIT , 3f
330
+ adrp x17 , CPU_FEATS_SYMBOL
331
+ ldr x17 , [ x17 , CPU_FEATS_SYMBOL_OFFSET ]
332
+ tbz x17 , #FEAT_SME_BIT , 3f
321
333
322
334
// If TPIDR2_EL0 != nullptr , no lazy - save was committed , try to reload zt0.
323
335
mrs x16 , TPIDR2_EL0
324
- cbnz x16 , 0f
336
+ cbnz x16 , 1f
325
337
326
338
// If TPIDR2_EL0 == nullptr and PSTATE.ZA = 1 (<=> ZA state is 'active' ) ,
327
339
// abort.
@@ -330,20 +342,16 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_restore)
330
342
331
343
// Restore za.
332
344
smstart za
333
- mov x16 , x0
334
345
add x0 , x0 , # 16
335
346
bl __arm_tpidr2_restore
336
- mov x0 , x16
337
- msr TPIDR2_EL0 , xzr
347
+ sub x0 , x0 , # 16
338
348
339
- 0 :
349
+ 1 :
340
350
smstart za
351
+ msr TPIDR2_EL0 , xzr
341
352
342
- 1 :
343
353
// Check if zt0 needs restoring.
344
- adrp x16 , CPU_FEATS_SYMBOL
345
- ldr x16 , [ x16 , CPU_FEATS_SYMBOL_OFFSET ]
346
- tbz x16 , #FEAT_SME2_BIT , 2f
354
+ tbz x17 , #FEAT_SME2_BIT , 2f
347
355
348
356
// Restore zt0.
349
357
add x16 , x0 , # 32
0 commit comments