Skip to content

Commit aaecb05

Browse files
committed
Address more comments
1 parent 88e7dc4 commit aaecb05

File tree

1 file changed

+43
-35
lines changed

1 file changed

+43
-35
lines changed

compiler-rt/lib/builtins/aarch64/sme-abi.S

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -206,23 +206,41 @@ DEFINE_COMPILERRT_FUNCTION(__arm_get_current_vg)
206206
ret
207207
END_COMPILERRT_FUNCTION(__arm_get_current_vg)
208208

209+
// The diagram below describes the layout used in the following routines:
210+
// * __arm_sme_state_size
211+
// * __arm_sme_save
212+
// * __arm_sme_restore
213+
//
214+
// +---------------------------------+
215+
// | ... |
216+
// | ZA buffer |
217+
// | ... |
218+
// +---------------------------------+ <- @96
219+
// | ZT0 contents |
220+
// +---------------------------------+ <- @32
221+
// | byte 15-10: zero (reserved) |
222+
// | byte 9-8: num_za_save_slices | TPIDR2 block
223+
// | byte 7-0: za_save_buffer |
224+
// +---------------------------------+ <- @16
225+
// | bit 127-1: zero (reserved) | Internal state for __arm_sme_save/restore
226+
// | bit 0: VALID |
227+
// +---------------------------------+ <- @0
228+
209229
DEFINE_COMPILERRT_FUNCTION(__arm_sme_state_size)
210230
.variant_pcs __arm_sme_state_size
211231
BTI_C
212232

213233
// Test if SME is available and ZA state is 'active'.
214-
adrp x16, CPU_FEATS_SYMBOL
215-
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
216-
tbz x16, #FEAT_SME_BIT, 0f
234+
adrp x17, CPU_FEATS_SYMBOL
235+
ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
236+
tbz x17, #FEAT_SME_BIT, 0f
217237
mrs x16, SVCR
218238
tbz x16, #1, 0f
219239
mrs x16, TPIDR2_EL0
220240
cbnz x16, 0f
221241

222242
// Size = HAS_FEAT_SME2 ? 96 : 32
223-
adrp x16, CPU_FEATS_SYMBOL
224-
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
225-
tst x16, #FEAT_SME2_MASK
243+
tst x17, #FEAT_SME2_MASK
226244
mov w17, #32
227245
mov w16, #96
228246
csel x16, x17, x16, eq
@@ -242,17 +260,17 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_save)
242260
.variant_pcs __arm_sme_save
243261
BTI_C
244262

245-
// Clear internal state bits
246-
stp xzr, xzr, [x0]
247-
248263
// If PTR is not 16-byte aligned, abort.
249264
tst x0, #0xF
250265
b.ne 3f
251266

267+
// Clear internal state bits
268+
stp xzr, xzr, [x0]
269+
252270
// If SME is not available, PSTATE.ZA = 0 or TPIDR2_EL0 != 0, return.
253-
adrp x16, CPU_FEATS_SYMBOL
254-
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
255-
tbz x16, #FEAT_SME_BIT, 2f
271+
adrp x17, CPU_FEATS_SYMBOL
272+
ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
273+
tbz x17, #FEAT_SME_BIT, 2f
256274
mrs x16, SVCR
257275
tbz x16, #1, 2f
258276
mrs x16, TPIDR2_EL0
@@ -262,20 +280,15 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_save)
262280
mov w16, #1
263281
str x16, [x0]
264282

265-
adrp x16, CPU_FEATS_SYMBOL
266-
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
267-
tbz x16, #FEAT_SME2_BIT, 0f
283+
add x18, x0, #32
284+
tbz x17, #FEAT_SME2_BIT, 1f
268285

269286
// Store ZT0 and ZA
270287
add x16, x0, #32
271288
str zt0, [x16]
272-
add x18, x0, #96
289+
add x18, x18, #64
273290
b 1f
274291

275-
0:
276-
// Has SME only
277-
add x18, x0, #32
278-
279292
1:
280293
// Set up lazy-save (x18 = pointer to buffer)
281294
rdsvl x17, #1
@@ -284,7 +297,6 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_save)
284297
stur wzr, [x0, #10]
285298
strh wzr, [x0, #14]
286299
msr TPIDR2_EL0, x0
287-
ret
288300

289301
2:
290302
// Do nothing
@@ -296,7 +308,7 @@ END_COMPILERRT_FUNCTION(__arm_sme_save)
296308

297309
DEFINE_COMPILERRT_FUNCTION(__arm_sme_restore)
298310
.cfi_startproc
299-
.variant_pcs __arm_sme_save
311+
.variant_pcs __arm_sme_restore
300312
BTI_C
301313

302314
stp x29, x30, [sp, #-16]!
@@ -312,16 +324,16 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_restore)
312324

313325
// If the VALID bit is 0, return early.
314326
ldr x16, [x0]
315-
tbz x16, #0, 2f
327+
cbz x16, 2f
316328

317329
// If SME is not available, abort.
318-
adrp x16, CPU_FEATS_SYMBOL
319-
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
320-
tbz x16, #FEAT_SME_BIT, 3f
330+
adrp x17, CPU_FEATS_SYMBOL
331+
ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
332+
tbz x17, #FEAT_SME_BIT, 3f
321333

322334
// If TPIDR2_EL0 != nullptr, no lazy-save was committed, try to reload zt0.
323335
mrs x16, TPIDR2_EL0
324-
cbnz x16, 0f
336+
cbnz x16, 1f
325337

326338
// If TPIDR2_EL0 == nullptr and PSTATE.ZA = 1 (<=> ZA state is 'active'),
327339
// abort.
@@ -330,20 +342,16 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_restore)
330342

331343
// Restore za.
332344
smstart za
333-
mov x16, x0
334345
add x0, x0, #16
335346
bl __arm_tpidr2_restore
336-
mov x0, x16
337-
msr TPIDR2_EL0, xzr
347+
sub x0, x0, #16
338348

339-
0:
349+
1:
340350
smstart za
351+
msr TPIDR2_EL0, xzr
341352

342-
1:
343353
// Check if zt0 needs restoring.
344-
adrp x16, CPU_FEATS_SYMBOL
345-
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
346-
tbz x16, #FEAT_SME2_BIT, 2f
354+
tbz x17, #FEAT_SME2_BIT, 2f
347355

348356
// Restore zt0.
349357
add x16, x0, #32

0 commit comments

Comments
 (0)