|
10 | 10 |
|
11 | 11 | .set FEAT_SVE_BIT, 30
|
12 | 12 | .set FEAT_SME_BIT, 42
|
| 13 | +.set FEAT_SME2_BIT, 57 |
| 14 | +.set FEAT_SME2_MASK, 1 << 57 |
13 | 15 | .set SVCR_PSTATE_SM_BIT, 0
|
14 | 16 |
|
15 | 17 | #if !defined(__APPLE__)
|
|
22 | 24 | #define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
|
23 | 25 | #endif
|
24 | 26 |
|
25 |
| -.arch armv9-a+sme |
| 27 | +.arch armv9-a+sme2 |
26 | 28 |
|
27 | 29 | // Utility function which calls a system's abort() routine. Because the function
|
28 | 30 | // is streaming-compatible it should disable streaming-SVE mode before calling
|
@@ -204,6 +206,161 @@ DEFINE_COMPILERRT_FUNCTION(__arm_get_current_vg)
|
204 | 206 | ret
|
205 | 207 | END_COMPILERRT_FUNCTION(__arm_get_current_vg)
|
206 | 208 |
|
| 209 | +DEFINE_COMPILERRT_FUNCTION(__arm_sme_state_size) |
| 210 | + .variant_pcs __arm_sme_state_size |
| 211 | + BTI_C |
| 212 | + |
| 213 | + // Test if SME is available and PSTATE = 1. |
| 214 | + adrp x16, CPU_FEATS_SYMBOL |
| 215 | + ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET] |
| 216 | + tbz x16, #FEAT_SME_BIT, 0f |
| 217 | + mrs x16, SVCR |
| 218 | + tbz x16, #1, 0f |
| 219 | + |
| 220 | + // Size = HAS_FEAT_SME2 ? 32 : 96 |
| 221 | + adrp x16, CPU_FEATS_SYMBOL |
| 222 | + ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET] |
| 223 | + tst x16, #FEAT_SME2_MASK |
| 224 | + mov w17, #32 |
| 225 | + mov w16, #96 |
| 226 | + csel x16, x17, x16, eq |
| 227 | + |
| 228 | + // Size = Size + (SVLB * SVLB) |
| 229 | + rdsvl x17, #1 |
| 230 | + madd x0, x17, x17, x16 |
| 231 | + ret |
| 232 | + |
| 233 | +0: |
| 234 | + // Default case, 16 bytes is minimum (to encode VALID bit, multiple of 16 bytes) |
| 235 | + mov w0, #16 |
| 236 | + ret |
| 237 | +END_COMPILERRT_FUNCTION(__arm_sme_state_size) |
| 238 | + |
| 239 | +DEFINE_COMPILERRT_FUNCTION(__arm_sme_save) |
| 240 | + .variant_pcs __arm_sme_save |
| 241 | + BTI_C |
| 242 | + |
| 243 | + // Clear internal state bits |
| 244 | + stp xzr, xzr, [x0] |
| 245 | + |
| 246 | + // If PTR is not 16-byte aligned, abort. |
| 247 | + tst x0, #0xF |
| 248 | + b.ne 3f |
| 249 | + |
| 250 | + // If SME is not available, PSTATE.ZA = 0 or TPIDR2_EL0 != 0, return. |
| 251 | + adrp x16, CPU_FEATS_SYMBOL |
| 252 | + ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET] |
| 253 | + tbz x16, #FEAT_SME_BIT, 2f |
| 254 | + mrs x16, SVCR |
| 255 | + tbz x16, #1, 2f |
| 256 | + mrs x16, TPIDR2_EL0 |
| 257 | + cbnz x16, 2f |
| 258 | + |
| 259 | + # ZA or ZT0 need saving, we can now set internal VALID bit to 1 |
| 260 | + mov w16, #1 |
| 261 | + str x16, [x0] |
| 262 | + |
| 263 | + adrp x16, CPU_FEATS_SYMBOL |
| 264 | + ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET] |
| 265 | + tbz x16, #FEAT_SME2_BIT, 0f |
| 266 | + |
| 267 | + // Store ZT0 and ZA |
| 268 | + add x16, x0, #32 |
| 269 | + str zt0, [x16] |
| 270 | + add x18, x0, #96 |
| 271 | + b 1f |
| 272 | + |
| 273 | +0: |
| 274 | + // Has SME only |
| 275 | + add x18, x0, #32 |
| 276 | + |
| 277 | +1: |
| 278 | + // Set up lazy-save (x18 = pointer to buffer) |
| 279 | + rdsvl x17, #1 |
| 280 | + str x18, [x0, #16]! |
| 281 | + strh w17, [x0, #8] |
| 282 | + stur wzr, [x0, #10] |
| 283 | + strh wzr, [x0, #14] |
| 284 | + msr TPIDR2_EL0, x0 |
| 285 | + ret |
| 286 | + |
| 287 | +2: |
| 288 | + // Do nothing |
| 289 | + ret |
| 290 | + |
| 291 | +3: |
| 292 | + b SYMBOL_NAME(do_abort) |
| 293 | +END_COMPILERRT_FUNCTION(__arm_sme_save) |
| 294 | + |
| 295 | +DEFINE_COMPILERRT_FUNCTION(__arm_sme_restore) |
| 296 | + .cfi_startproc |
| 297 | + .variant_pcs __arm_sme_save |
| 298 | + BTI_C |
| 299 | + |
| 300 | + stp x29, x30, [sp, #-16]! |
| 301 | + .cfi_def_cfa_offset 16 |
| 302 | + mov x29, sp |
| 303 | + .cfi_def_cfa w29, 16 |
| 304 | + .cfi_offset w30, -8 |
| 305 | + .cfi_offset w29, -16 |
| 306 | + |
| 307 | + // If PTR is not 16-byte aligned, abort. |
| 308 | + tst x0, #0xF |
| 309 | + b.ne 3f |
| 310 | + |
| 311 | + // If the VALID bit is 0, return early. |
| 312 | + ldr x16, [x0] |
| 313 | + tbz x16, #0, 2f |
| 314 | + |
| 315 | + // If SME is not available, abort. |
| 316 | + adrp x16, CPU_FEATS_SYMBOL |
| 317 | + ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET] |
| 318 | + tbz x16, #FEAT_SME_BIT, 3f |
| 319 | + |
| 320 | + // If TPIDR2_EL0 != nullptr, no lazy-save was committed, try to reload zt0. |
| 321 | + mrs x16, TPIDR2_EL0 |
| 322 | + cbnz x16, 0f |
| 323 | + |
| 324 | + // If TPIDR2_EL0 == nullptr and PSTATE.ZA = 1 (<=> ZA state is 'active'), |
| 325 | + // abort. |
| 326 | + mrs x16, SVCR |
| 327 | + tbnz x16, #1, 3f |
| 328 | + |
| 329 | + // Restore za. |
| 330 | + smstart za |
| 331 | + mov x16, x0 |
| 332 | + add x0, x0, #16 |
| 333 | + bl __arm_tpidr2_restore |
| 334 | + mov x0, x16 |
| 335 | + msr TPIDR2_EL0, xzr |
| 336 | + |
| 337 | +0: |
| 338 | + smstart za |
| 339 | + |
| 340 | +1: |
| 341 | + // Check if zt0 needs restoring. |
| 342 | + adrp x16, CPU_FEATS_SYMBOL |
| 343 | + ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET] |
| 344 | + tbz x16, #FEAT_SME2_BIT, 2f |
| 345 | + |
| 346 | + // Restore zt0. |
| 347 | + add x16, x0, #32 |
| 348 | + ldr zt0, [x16] |
| 349 | + |
| 350 | +2: |
| 351 | + // Do nothing |
| 352 | + .cfi_def_cfa wsp, 16 |
| 353 | + ldp x29, x30, [sp], #16 |
| 354 | + .cfi_def_cfa_offset 0 |
| 355 | + .cfi_restore w30 |
| 356 | + .cfi_restore w29 |
| 357 | + ret |
| 358 | + |
| 359 | +3: |
| 360 | + b SYMBOL_NAME(do_abort) |
| 361 | + .cfi_endproc |
| 362 | +END_COMPILERRT_FUNCTION(__arm_sme_restore) |
| 363 | + |
207 | 364 | NO_EXEC_STACK_DIRECTIVE
|
208 | 365 |
|
209 | 366 | // GNU property note for BTI and PAC
|
|
0 commit comments