Skip to content

Commit a38e1ae

Browse files
MacDuetstellar
authored andcommitted
[AArch64][SME2] Don't preserve ZT0 around SME ABI routines (llvm#132722)
This caused ZT0 to be preserved around `__arm_tpidr2_save` in functions with "aarch64_new_zt0". The block in which `__arm_tpidr2_save` is called is added by the SMEABIPass and may be reachable in cases where ZA has not been enabled* (so using `str zt0` is invalid). * (when za_save_buffer is null and num_za_save_slices is zero)
1 parent be4097b commit a38e1ae

File tree

3 files changed

+46
-27
lines changed

3 files changed

+46
-27
lines changed

llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,8 @@ class SMEAttrs {
133133
bool hasZT0State() const { return isNewZT0() || sharesZT0(); }
134134
bool requiresPreservingZT0(const SMEAttrs &Callee) const {
135135
return hasZT0State() && !Callee.sharesZT0() &&
136-
!Callee.hasAgnosticZAInterface();
136+
!Callee.hasAgnosticZAInterface() &&
137+
!(Callee.Bitmask & SME_ABI_Routine);
137138
}
138139
bool requiresDisablingZABeforeCall(const SMEAttrs &Callee) const {
139140
return hasZT0State() && !hasZAState() && Callee.hasPrivateZAInterface() &&

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -475,16 +475,12 @@ declare double @zt0_shared_callee(double) "aarch64_inout_zt0"
475475
define double @zt0_new_caller_to_zt0_shared_callee(double %x) nounwind noinline optnone "aarch64_new_zt0" {
476476
; CHECK-COMMON-LABEL: zt0_new_caller_to_zt0_shared_callee:
477477
; CHECK-COMMON: // %bb.0: // %prelude
478-
; CHECK-COMMON-NEXT: sub sp, sp, #80
479-
; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
478+
; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
480479
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
481480
; CHECK-COMMON-NEXT: cbz x8, .LBB13_2
482481
; CHECK-COMMON-NEXT: b .LBB13_1
483482
; CHECK-COMMON-NEXT: .LBB13_1: // %save.za
484-
; CHECK-COMMON-NEXT: mov x8, sp
485-
; CHECK-COMMON-NEXT: str zt0, [x8]
486483
; CHECK-COMMON-NEXT: bl __arm_tpidr2_save
487-
; CHECK-COMMON-NEXT: ldr zt0, [x8]
488484
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
489485
; CHECK-COMMON-NEXT: b .LBB13_2
490486
; CHECK-COMMON-NEXT: .LBB13_2: // %entry
@@ -495,8 +491,7 @@ define double @zt0_new_caller_to_zt0_shared_callee(double %x) nounwind noinline
495491
; CHECK-COMMON-NEXT: fmov d1, x8
496492
; CHECK-COMMON-NEXT: fadd d0, d0, d1
497493
; CHECK-COMMON-NEXT: smstop za
498-
; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
499-
; CHECK-COMMON-NEXT: add sp, sp, #80
494+
; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
500495
; CHECK-COMMON-NEXT: ret
501496
entry:
502497
%call = call double @zt0_shared_callee(double %x)

llvm/test/CodeGen/AArch64/sme-zt0-state.ll

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_inout_za" "aar
112112
ret void;
113113
}
114114

115-
; New-ZA Callee
115+
; New-ZT0 Callee
116116

117117
; Expect spill & fill of ZT0 around call
118118
; Expect smstop/smstart za around call
@@ -134,6 +134,39 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind {
134134
ret void;
135135
}
136136

137+
; New-ZT0 Callee
138+
139+
; Expect commit of lazy-save if ZA is dormant
140+
; Expect smstart ZA & clear ZT0
141+
; Expect spill & fill of ZT0 around call
142+
; Before return, expect smstop ZA
143+
define void @zt0_new_caller_zt0_new_callee() "aarch64_new_zt0" nounwind {
144+
; CHECK-LABEL: zt0_new_caller_zt0_new_callee:
145+
; CHECK: // %bb.0: // %prelude
146+
; CHECK-NEXT: sub sp, sp, #80
147+
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
148+
; CHECK-NEXT: mrs x8, TPIDR2_EL0
149+
; CHECK-NEXT: cbz x8, .LBB6_2
150+
; CHECK-NEXT: // %bb.1: // %save.za
151+
; CHECK-NEXT: bl __arm_tpidr2_save
152+
; CHECK-NEXT: msr TPIDR2_EL0, xzr
153+
; CHECK-NEXT: .LBB6_2:
154+
; CHECK-NEXT: smstart za
155+
; CHECK-NEXT: zero { zt0 }
156+
; CHECK-NEXT: mov x19, sp
157+
; CHECK-NEXT: str zt0, [x19]
158+
; CHECK-NEXT: smstop za
159+
; CHECK-NEXT: bl callee
160+
; CHECK-NEXT: smstart za
161+
; CHECK-NEXT: ldr zt0, [x19]
162+
; CHECK-NEXT: smstop za
163+
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
164+
; CHECK-NEXT: add sp, sp, #80
165+
; CHECK-NEXT: ret
166+
call void @callee() "aarch64_new_zt0";
167+
ret void;
168+
}
169+
137170
;
138171
; New-ZA Caller
139172
;
@@ -144,23 +177,18 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind {
144177
define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
145178
; CHECK-LABEL: zt0_new_caller:
146179
; CHECK: // %bb.0: // %prelude
147-
; CHECK-NEXT: sub sp, sp, #80
148-
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
180+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
149181
; CHECK-NEXT: mrs x8, TPIDR2_EL0
150-
; CHECK-NEXT: cbz x8, .LBB6_2
182+
; CHECK-NEXT: cbz x8, .LBB7_2
151183
; CHECK-NEXT: // %bb.1: // %save.za
152-
; CHECK-NEXT: mov x8, sp
153-
; CHECK-NEXT: str zt0, [x8]
154184
; CHECK-NEXT: bl __arm_tpidr2_save
155-
; CHECK-NEXT: ldr zt0, [x8]
156185
; CHECK-NEXT: msr TPIDR2_EL0, xzr
157-
; CHECK-NEXT: .LBB6_2:
186+
; CHECK-NEXT: .LBB7_2:
158187
; CHECK-NEXT: smstart za
159188
; CHECK-NEXT: zero { zt0 }
160189
; CHECK-NEXT: bl callee
161190
; CHECK-NEXT: smstop za
162-
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
163-
; CHECK-NEXT: add sp, sp, #80
191+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
164192
; CHECK-NEXT: ret
165193
call void @callee() "aarch64_in_zt0";
166194
ret void;
@@ -172,24 +200,19 @@ define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
172200
define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind {
173201
; CHECK-LABEL: new_za_zt0_caller:
174202
; CHECK: // %bb.0: // %prelude
175-
; CHECK-NEXT: sub sp, sp, #80
176-
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
203+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
177204
; CHECK-NEXT: mrs x8, TPIDR2_EL0
178-
; CHECK-NEXT: cbz x8, .LBB7_2
205+
; CHECK-NEXT: cbz x8, .LBB8_2
179206
; CHECK-NEXT: // %bb.1: // %save.za
180-
; CHECK-NEXT: mov x8, sp
181-
; CHECK-NEXT: str zt0, [x8]
182207
; CHECK-NEXT: bl __arm_tpidr2_save
183-
; CHECK-NEXT: ldr zt0, [x8]
184208
; CHECK-NEXT: msr TPIDR2_EL0, xzr
185-
; CHECK-NEXT: .LBB7_2:
209+
; CHECK-NEXT: .LBB8_2:
186210
; CHECK-NEXT: smstart za
187211
; CHECK-NEXT: zero {za}
188212
; CHECK-NEXT: zero { zt0 }
189213
; CHECK-NEXT: bl callee
190214
; CHECK-NEXT: smstop za
191-
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
192-
; CHECK-NEXT: add sp, sp, #80
215+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
193216
; CHECK-NEXT: ret
194217
call void @callee() "aarch64_inout_za" "aarch64_in_zt0";
195218
ret void;

0 commit comments

Comments
 (0)