Skip to content

Commit d4d81ac

Browse files
[AArch64][SME2] Extend SMEABIPass to handle functions with new ZT0 state (#78848)
updateNewZAFunctions is extended to generate the following on entry to a function with either the "aarch64_pstate_za_new" or "arm_new_zt0" attribute: - Private-ZA interface: commit any active lazy-saves & enable PSTATE.ZA. - "aarch64_pstate_za_new": zero ZA. - "arm_new_zt0": zero ZT0. Additionally, PSTATE.ZA should disabled before returning if the function has a private-ZA interface.
1 parent ae99966 commit d4d81ac

File tree

4 files changed

+211
-66
lines changed

4 files changed

+211
-66
lines changed

llvm/lib/Target/AArch64/SMEABIPass.cpp

Lines changed: 82 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ struct SMEABI : public FunctionPass {
4040
bool runOnFunction(Function &F) override;
4141

4242
private:
43-
bool updateNewZAFunctions(Module *M, Function *F, IRBuilder<> &Builder);
43+
bool updateNewStateFunctions(Module *M, Function *F, IRBuilder<> &Builder,
44+
SMEAttrs FnAttrs);
4445
};
4546
} // end anonymous namespace
4647

@@ -76,56 +77,87 @@ void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) {
7677
Builder.getInt64(0));
7778
}
7879

79-
/// This function generates code to commit a lazy save at the beginning of a
80-
/// function marked with `aarch64_pstate_za_new`. If the value read from
81-
/// TPIDR2_EL0 is not null on entry to the function then the lazy-saving scheme
82-
/// is active and we should call __arm_tpidr2_save to commit the lazy save.
83-
/// Additionally, PSTATE.ZA should be enabled at the beginning of the function
84-
/// and disabled before returning.
85-
bool SMEABI::updateNewZAFunctions(Module *M, Function *F,
86-
IRBuilder<> &Builder) {
80+
/// This function generates code at the beginning and end of a function marked
81+
/// with either `aarch64_pstate_za_new` or `aarch64_new_zt0`.
82+
/// At the beginning of the function, the following code is generated:
83+
/// - Commit lazy-save if active [Private-ZA Interface*]
84+
/// - Enable PSTATE.ZA [Private-ZA Interface]
85+
/// - Zero ZA [Has New ZA State]
86+
/// - Zero ZT0 [Has New ZT0 State]
87+
///
88+
/// * A function with new ZT0 state will not change ZA, so committing the
89+
/// lazy-save is not strictly necessary. However, the lazy-save mechanism
90+
/// may be active on entry to the function, with PSTATE.ZA set to 1. If
91+
/// the new ZT0 function calls a function that does not share ZT0, we will
92+
/// need to conditionally SMSTOP ZA before the call, setting PSTATE.ZA to 0.
93+
/// For this reason, it's easier to always commit the lazy-save at the
94+
/// beginning of the function regardless of whether it has ZA state.
95+
///
96+
/// At the end of the function, PSTATE.ZA is disabled if the function has a
97+
/// Private-ZA Interface. A function is considered to have a Private-ZA
98+
/// interface if it does not share ZA or ZT0.
99+
///
100+
bool SMEABI::updateNewStateFunctions(Module *M, Function *F,
101+
IRBuilder<> &Builder, SMEAttrs FnAttrs) {
87102
LLVMContext &Context = F->getContext();
88103
BasicBlock *OrigBB = &F->getEntryBlock();
89-
90-
// Create the new blocks for reading TPIDR2_EL0 & enabling ZA state.
91-
auto *SaveBB = OrigBB->splitBasicBlock(OrigBB->begin(), "save.za", true);
92-
auto *PreludeBB = BasicBlock::Create(Context, "prelude", F, SaveBB);
93-
94-
// Read TPIDR2_EL0 in PreludeBB & branch to SaveBB if not 0.
95-
Builder.SetInsertPoint(PreludeBB);
96-
Function *TPIDR2Intr =
97-
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_get_tpidr2);
98-
auto *TPIDR2 = Builder.CreateCall(TPIDR2Intr->getFunctionType(), TPIDR2Intr,
99-
{}, "tpidr2");
100-
auto *Cmp =
101-
Builder.CreateCmp(ICmpInst::ICMP_NE, TPIDR2, Builder.getInt64(0), "cmp");
102-
Builder.CreateCondBr(Cmp, SaveBB, OrigBB);
103-
104-
// Create a call __arm_tpidr2_save, which commits the lazy save.
105-
Builder.SetInsertPoint(&SaveBB->back());
106-
emitTPIDR2Save(M, Builder);
107-
108-
// Enable pstate.za at the start of the function.
109104
Builder.SetInsertPoint(&OrigBB->front());
110-
Function *EnableZAIntr =
111-
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_enable);
112-
Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr);
113-
114-
// ZA state must be zeroed upon entry to a function with NewZA
115-
Function *ZeroIntr =
116-
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_zero);
117-
Builder.CreateCall(ZeroIntr->getFunctionType(), ZeroIntr,
118-
Builder.getInt32(0xff));
119-
120-
// Before returning, disable pstate.za
121-
for (BasicBlock &BB : *F) {
122-
Instruction *T = BB.getTerminator();
123-
if (!T || !isa<ReturnInst>(T))
124-
continue;
125-
Builder.SetInsertPoint(T);
126-
Function *DisableZAIntr =
127-
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_disable);
128-
Builder.CreateCall(DisableZAIntr->getFunctionType(), DisableZAIntr);
105+
106+
// Commit any active lazy-saves if this is a Private-ZA function. If the
107+
// value read from TPIDR2_EL0 is not null on entry to the function then
108+
// the lazy-saving scheme is active and we should call __arm_tpidr2_save
109+
// to commit the lazy save.
110+
if (FnAttrs.hasPrivateZAInterface()) {
111+
// Create the new blocks for reading TPIDR2_EL0 & enabling ZA state.
112+
auto *SaveBB = OrigBB->splitBasicBlock(OrigBB->begin(), "save.za", true);
113+
auto *PreludeBB = BasicBlock::Create(Context, "prelude", F, SaveBB);
114+
115+
// Read TPIDR2_EL0 in PreludeBB & branch to SaveBB if not 0.
116+
Builder.SetInsertPoint(PreludeBB);
117+
Function *TPIDR2Intr =
118+
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_get_tpidr2);
119+
auto *TPIDR2 = Builder.CreateCall(TPIDR2Intr->getFunctionType(), TPIDR2Intr,
120+
{}, "tpidr2");
121+
auto *Cmp = Builder.CreateCmp(ICmpInst::ICMP_NE, TPIDR2,
122+
Builder.getInt64(0), "cmp");
123+
Builder.CreateCondBr(Cmp, SaveBB, OrigBB);
124+
125+
// Create a call __arm_tpidr2_save, which commits the lazy save.
126+
Builder.SetInsertPoint(&SaveBB->back());
127+
emitTPIDR2Save(M, Builder);
128+
129+
// Enable pstate.za at the start of the function.
130+
Builder.SetInsertPoint(&OrigBB->front());
131+
Function *EnableZAIntr =
132+
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_enable);
133+
Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr);
134+
}
135+
136+
if (FnAttrs.hasNewZABody()) {
137+
Function *ZeroIntr =
138+
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_zero);
139+
Builder.CreateCall(ZeroIntr->getFunctionType(), ZeroIntr,
140+
Builder.getInt32(0xff));
141+
}
142+
143+
if (FnAttrs.isNewZT0()) {
144+
Function *ClearZT0Intr =
145+
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_zero_zt);
146+
Builder.CreateCall(ClearZT0Intr->getFunctionType(), ClearZT0Intr,
147+
{Builder.getInt32(0)});
148+
}
149+
150+
if (FnAttrs.hasPrivateZAInterface()) {
151+
// Before returning, disable pstate.za
152+
for (BasicBlock &BB : *F) {
153+
Instruction *T = BB.getTerminator();
154+
if (!T || !isa<ReturnInst>(T))
155+
continue;
156+
Builder.SetInsertPoint(T);
157+
Function *DisableZAIntr =
158+
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_disable);
159+
Builder.CreateCall(DisableZAIntr->getFunctionType(), DisableZAIntr);
160+
}
129161
}
130162

131163
F->addFnAttr("aarch64_expanded_pstate_za");
@@ -142,8 +174,8 @@ bool SMEABI::runOnFunction(Function &F) {
142174

143175
bool Changed = false;
144176
SMEAttrs FnAttrs(F);
145-
if (FnAttrs.hasNewZABody())
146-
Changed |= updateNewZAFunctions(M, &F, Builder);
177+
if (FnAttrs.hasNewZABody() || FnAttrs.isNewZT0())
178+
Changed |= updateNewStateFunctions(M, &F, Builder, FnAttrs);
147179

148180
return Changed;
149181
}

llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,8 @@ void SMEAttrs::set(unsigned M, bool Enable) {
2727
"ZA_New and ZA_Shared are mutually exclusive");
2828
assert(!(hasNewZABody() && preservesZA()) &&
2929
"ZA_New and ZA_Preserved are mutually exclusive");
30-
assert(!(hasNewZABody() && (Bitmask & ZA_NoLazySave)) &&
31-
"ZA_New and ZA_NoLazySave are mutually exclusive");
32-
assert(!(sharesZA() && (Bitmask & ZA_NoLazySave)) &&
33-
"ZA_Shared and ZA_NoLazySave are mutually exclusive");
30+
assert(!(hasNewZABody() && (Bitmask & SME_ABI_Routine)) &&
31+
"ZA_New and SME_ABI_Routine are mutually exclusive");
3432

3533
// ZT0 Attrs
3634
assert(
@@ -49,11 +47,10 @@ SMEAttrs::SMEAttrs(const CallBase &CB) {
4947

5048
SMEAttrs::SMEAttrs(StringRef FuncName) : Bitmask(0) {
5149
if (FuncName == "__arm_tpidr2_save" || FuncName == "__arm_sme_state")
52-
Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::ZA_Preserved |
53-
SMEAttrs::ZA_NoLazySave);
50+
Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine);
5451
if (FuncName == "__arm_tpidr2_restore")
5552
Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::ZA_Shared |
56-
SMEAttrs::ZA_NoLazySave);
53+
SMEAttrs::SME_ABI_Routine);
5754
}
5855

5956
SMEAttrs::SMEAttrs(const AttributeList &Attrs) {

llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ class SMEAttrs {
3838
// Enum with bitmasks for each individual SME feature.
3939
enum Mask {
4040
Normal = 0,
41-
SM_Enabled = 1 << 0, // aarch64_pstate_sm_enabled
42-
SM_Compatible = 1 << 1, // aarch64_pstate_sm_compatible
43-
SM_Body = 1 << 2, // aarch64_pstate_sm_body
44-
ZA_Shared = 1 << 3, // aarch64_pstate_sm_shared
45-
ZA_New = 1 << 4, // aarch64_pstate_sm_new
46-
ZA_Preserved = 1 << 5, // aarch64_pstate_sm_preserved
47-
ZA_NoLazySave = 1 << 6, // Used for SME ABI routines to avoid lazy saves
41+
SM_Enabled = 1 << 0, // aarch64_pstate_sm_enabled
42+
SM_Compatible = 1 << 1, // aarch64_pstate_sm_compatible
43+
SM_Body = 1 << 2, // aarch64_pstate_sm_body
44+
ZA_Shared = 1 << 3, // aarch64_pstate_sm_shared
45+
ZA_New = 1 << 4, // aarch64_pstate_sm_new
46+
ZA_Preserved = 1 << 5, // aarch64_pstate_sm_preserved
47+
SME_ABI_Routine = 1 << 6, // Used for SME ABI routines to avoid lazy saves
4848
ZT0_Shift = 7,
4949
ZT0_Mask = 0b111 << ZT0_Shift
5050
};
@@ -86,7 +86,7 @@ class SMEAttrs {
8686
bool hasZAState() const { return hasNewZABody() || sharesZA(); }
8787
bool requiresLazySave(const SMEAttrs &Callee) const {
8888
return hasZAState() && Callee.hasPrivateZAInterface() &&
89-
!(Callee.Bitmask & ZA_NoLazySave);
89+
!(Callee.Bitmask & SME_ABI_Routine);
9090
}
9191

9292
// Interfaces to query ZT0 State
@@ -116,7 +116,8 @@ class SMEAttrs {
116116
return hasZT0State() && !Callee.sharesZT0();
117117
}
118118
bool requiresDisablingZABeforeCall(const SMEAttrs &Callee) const {
119-
return hasZT0State() && !hasZAState() && Callee.hasPrivateZAInterface();
119+
return hasZT0State() && !hasZAState() && Callee.hasPrivateZAInterface() &&
120+
!(Callee.Bitmask & SME_ABI_Routine);
120121
}
121122
bool requiresEnablingZAAfterCall(const SMEAttrs &Callee) const {
122123
return requiresLazySave(Callee) || requiresDisablingZABeforeCall(Callee);

llvm/test/CodeGen/AArch64/sme-zt0-state.ll

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,3 +153,118 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind {
153153
call void @callee() "aarch64_new_zt0";
154154
ret void;
155155
}
156+
157+
;
158+
; New-ZA Caller
159+
;
160+
161+
; Expect commit of lazy-save if ZA is dormant
162+
; Expect smstart ZA & clear ZT0
163+
; Before return, expect smstop ZA
164+
define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
165+
; CHECK-LABEL: zt0_new_caller:
166+
; CHECK: // %bb.0: // %prelude
167+
; CHECK-NEXT: sub sp, sp, #80
168+
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
169+
; CHECK-NEXT: mrs x8, TPIDR2_EL0
170+
; CHECK-NEXT: cbz x8, .LBB6_2
171+
; CHECK-NEXT: // %bb.1: // %save.za
172+
; CHECK-NEXT: mov x8, sp
173+
; CHECK-NEXT: str zt0, [x8]
174+
; CHECK-NEXT: bl __arm_tpidr2_save
175+
; CHECK-NEXT: ldr zt0, [x8]
176+
; CHECK-NEXT: msr TPIDR2_EL0, xzr
177+
; CHECK-NEXT: .LBB6_2:
178+
; CHECK-NEXT: smstart za
179+
; CHECK-NEXT: zero { zt0 }
180+
; CHECK-NEXT: bl callee
181+
; CHECK-NEXT: smstop za
182+
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
183+
; CHECK-NEXT: add sp, sp, #80
184+
; CHECK-NEXT: ret
185+
call void @callee() "aarch64_in_zt0";
186+
ret void;
187+
}
188+
189+
; Expect commit of lazy-save if ZA is dormant
190+
; Expect smstart ZA, clear ZA & clear ZT0
191+
; Before return, expect smstop ZA
192+
define void @new_za_zt0_caller() "aarch64_pstate_za_new" "aarch64_new_zt0" nounwind {
193+
; CHECK-LABEL: new_za_zt0_caller:
194+
; CHECK: // %bb.0: // %prelude
195+
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
196+
; CHECK-NEXT: mov x29, sp
197+
; CHECK-NEXT: sub sp, sp, #80
198+
; CHECK-NEXT: rdsvl x8, #1
199+
; CHECK-NEXT: mov x9, sp
200+
; CHECK-NEXT: msub x8, x8, x8, x9
201+
; CHECK-NEXT: mov sp, x8
202+
; CHECK-NEXT: stur wzr, [x29, #-4]
203+
; CHECK-NEXT: sturh wzr, [x29, #-6]
204+
; CHECK-NEXT: stur x8, [x29, #-16]
205+
; CHECK-NEXT: mrs x8, TPIDR2_EL0
206+
; CHECK-NEXT: cbz x8, .LBB7_2
207+
; CHECK-NEXT: // %bb.1: // %save.za
208+
; CHECK-NEXT: sub x8, x29, #80
209+
; CHECK-NEXT: str zt0, [x8]
210+
; CHECK-NEXT: bl __arm_tpidr2_save
211+
; CHECK-NEXT: ldr zt0, [x8]
212+
; CHECK-NEXT: msr TPIDR2_EL0, xzr
213+
; CHECK-NEXT: .LBB7_2:
214+
; CHECK-NEXT: smstart za
215+
; CHECK-NEXT: zero {za}
216+
; CHECK-NEXT: zero { zt0 }
217+
; CHECK-NEXT: bl callee
218+
; CHECK-NEXT: smstop za
219+
; CHECK-NEXT: mov sp, x29
220+
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
221+
; CHECK-NEXT: ret
222+
call void @callee() "aarch64_pstate_za_shared" "aarch64_in_zt0";
223+
ret void;
224+
}
225+
226+
; Expect clear ZA on entry
227+
define void @new_za_shared_zt0_caller() "aarch64_pstate_za_new" "aarch64_in_zt0" nounwind {
228+
; CHECK-LABEL: new_za_shared_zt0_caller:
229+
; CHECK: // %bb.0:
230+
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
231+
; CHECK-NEXT: mov x29, sp
232+
; CHECK-NEXT: sub sp, sp, #16
233+
; CHECK-NEXT: rdsvl x8, #1
234+
; CHECK-NEXT: mov x9, sp
235+
; CHECK-NEXT: msub x8, x8, x8, x9
236+
; CHECK-NEXT: mov sp, x8
237+
; CHECK-NEXT: stur wzr, [x29, #-4]
238+
; CHECK-NEXT: sturh wzr, [x29, #-6]
239+
; CHECK-NEXT: stur x8, [x29, #-16]
240+
; CHECK-NEXT: zero {za}
241+
; CHECK-NEXT: bl callee
242+
; CHECK-NEXT: mov sp, x29
243+
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
244+
; CHECK-NEXT: ret
245+
call void @callee() "aarch64_pstate_za_shared" "aarch64_in_zt0";
246+
ret void;
247+
}
248+
249+
; Expect clear ZT0 on entry
250+
define void @shared_za_new_zt0() "aarch64_pstate_za_shared" "aarch64_new_zt0" nounwind {
251+
; CHECK-LABEL: shared_za_new_zt0:
252+
; CHECK: // %bb.0:
253+
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
254+
; CHECK-NEXT: mov x29, sp
255+
; CHECK-NEXT: sub sp, sp, #16
256+
; CHECK-NEXT: rdsvl x8, #1
257+
; CHECK-NEXT: mov x9, sp
258+
; CHECK-NEXT: msub x8, x8, x8, x9
259+
; CHECK-NEXT: mov sp, x8
260+
; CHECK-NEXT: stur wzr, [x29, #-4]
261+
; CHECK-NEXT: sturh wzr, [x29, #-6]
262+
; CHECK-NEXT: stur x8, [x29, #-16]
263+
; CHECK-NEXT: zero { zt0 }
264+
; CHECK-NEXT: bl callee
265+
; CHECK-NEXT: mov sp, x29
266+
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
267+
; CHECK-NEXT: ret
268+
call void @callee() "aarch64_pstate_za_shared" "aarch64_in_zt0";
269+
ret void;
270+
}

0 commit comments

Comments
 (0)