@@ -40,7 +40,8 @@ struct SMEABI : public FunctionPass {
40
40
bool runOnFunction (Function &F) override ;
41
41
42
42
private:
43
- bool updateNewZAFunctions (Module *M, Function *F, IRBuilder<> &Builder);
43
+ bool updateNewStateFunctions (Module *M, Function *F, IRBuilder<> &Builder,
44
+ SMEAttrs FnAttrs);
44
45
};
45
46
} // end anonymous namespace
46
47
@@ -76,56 +77,87 @@ void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) {
76
77
Builder.getInt64 (0 ));
77
78
}
78
79
79
- // / This function generates code to commit a lazy save at the beginning of a
80
- // / function marked with `aarch64_pstate_za_new`. If the value read from
81
- // / TPIDR2_EL0 is not null on entry to the function then the lazy-saving scheme
82
- // / is active and we should call __arm_tpidr2_save to commit the lazy save.
83
- // / Additionally, PSTATE.ZA should be enabled at the beginning of the function
84
- // / and disabled before returning.
85
- bool SMEABI::updateNewZAFunctions (Module *M, Function *F,
86
- IRBuilder<> &Builder) {
80
+ // / This function generates code at the beginning and end of a function marked
81
+ // / with either `aarch64_pstate_za_new` or `aarch64_new_zt0`.
82
+ // / At the beginning of the function, the following code is generated:
83
+ // / - Commit lazy-save if active [Private-ZA Interface*]
84
+ // / - Enable PSTATE.ZA [Private-ZA Interface]
85
+ // / - Zero ZA [Has New ZA State]
86
+ // / - Zero ZT0 [Has New ZT0 State]
87
+ // /
88
+ // / * A function with new ZT0 state will not change ZA, so committing the
89
+ // / lazy-save is not strictly necessary. However, the lazy-save mechanism
90
+ // / may be active on entry to the function, with PSTATE.ZA set to 1. If
91
+ // / the new ZT0 function calls a function that does not share ZT0, we will
92
+ // / need to conditionally SMSTOP ZA before the call, setting PSTATE.ZA to 0.
93
+ // / For this reason, it's easier to always commit the lazy-save at the
94
+ // / beginning of the function regardless of whether it has ZA state.
95
+ // /
96
+ // / At the end of the function, PSTATE.ZA is disabled if the function has a
97
+ // / Private-ZA Interface. A function is considered to have a Private-ZA
98
+ // / interface if it does not share ZA or ZT0.
99
+ // /
100
+ bool SMEABI::updateNewStateFunctions (Module *M, Function *F,
101
+ IRBuilder<> &Builder, SMEAttrs FnAttrs) {
87
102
LLVMContext &Context = F->getContext ();
88
103
BasicBlock *OrigBB = &F->getEntryBlock ();
89
-
90
- // Create the new blocks for reading TPIDR2_EL0 & enabling ZA state.
91
- auto *SaveBB = OrigBB->splitBasicBlock (OrigBB->begin (), " save.za" , true );
92
- auto *PreludeBB = BasicBlock::Create (Context, " prelude" , F, SaveBB);
93
-
94
- // Read TPIDR2_EL0 in PreludeBB & branch to SaveBB if not 0.
95
- Builder.SetInsertPoint (PreludeBB);
96
- Function *TPIDR2Intr =
97
- Intrinsic::getDeclaration (M, Intrinsic::aarch64_sme_get_tpidr2);
98
- auto *TPIDR2 = Builder.CreateCall (TPIDR2Intr->getFunctionType (), TPIDR2Intr,
99
- {}, " tpidr2" );
100
- auto *Cmp =
101
- Builder.CreateCmp (ICmpInst::ICMP_NE, TPIDR2, Builder.getInt64 (0 ), " cmp" );
102
- Builder.CreateCondBr (Cmp, SaveBB, OrigBB);
103
-
104
- // Create a call __arm_tpidr2_save, which commits the lazy save.
105
- Builder.SetInsertPoint (&SaveBB->back ());
106
- emitTPIDR2Save (M, Builder);
107
-
108
- // Enable pstate.za at the start of the function.
109
104
Builder.SetInsertPoint (&OrigBB->front ());
110
- Function *EnableZAIntr =
111
- Intrinsic::getDeclaration (M, Intrinsic::aarch64_sme_za_enable);
112
- Builder.CreateCall (EnableZAIntr->getFunctionType (), EnableZAIntr);
113
-
114
- // ZA state must be zeroed upon entry to a function with NewZA
115
- Function *ZeroIntr =
116
- Intrinsic::getDeclaration (M, Intrinsic::aarch64_sme_zero);
117
- Builder.CreateCall (ZeroIntr->getFunctionType (), ZeroIntr,
118
- Builder.getInt32 (0xff ));
119
-
120
- // Before returning, disable pstate.za
121
- for (BasicBlock &BB : *F) {
122
- Instruction *T = BB.getTerminator ();
123
- if (!T || !isa<ReturnInst>(T))
124
- continue ;
125
- Builder.SetInsertPoint (T);
126
- Function *DisableZAIntr =
127
- Intrinsic::getDeclaration (M, Intrinsic::aarch64_sme_za_disable);
128
- Builder.CreateCall (DisableZAIntr->getFunctionType (), DisableZAIntr);
105
+
106
+ // Commit any active lazy-saves if this is a Private-ZA function. If the
107
+ // value read from TPIDR2_EL0 is not null on entry to the function then
108
+ // the lazy-saving scheme is active and we should call __arm_tpidr2_save
109
+ // to commit the lazy save.
110
+ if (FnAttrs.hasPrivateZAInterface ()) {
111
+ // Create the new blocks for reading TPIDR2_EL0 & enabling ZA state.
112
+ auto *SaveBB = OrigBB->splitBasicBlock (OrigBB->begin (), " save.za" , true );
113
+ auto *PreludeBB = BasicBlock::Create (Context, " prelude" , F, SaveBB);
114
+
115
+ // Read TPIDR2_EL0 in PreludeBB & branch to SaveBB if not 0.
116
+ Builder.SetInsertPoint (PreludeBB);
117
+ Function *TPIDR2Intr =
118
+ Intrinsic::getDeclaration (M, Intrinsic::aarch64_sme_get_tpidr2);
119
+ auto *TPIDR2 = Builder.CreateCall (TPIDR2Intr->getFunctionType (), TPIDR2Intr,
120
+ {}, " tpidr2" );
121
+ auto *Cmp = Builder.CreateCmp (ICmpInst::ICMP_NE, TPIDR2,
122
+ Builder.getInt64 (0 ), " cmp" );
123
+ Builder.CreateCondBr (Cmp, SaveBB, OrigBB);
124
+
125
+ // Create a call __arm_tpidr2_save, which commits the lazy save.
126
+ Builder.SetInsertPoint (&SaveBB->back ());
127
+ emitTPIDR2Save (M, Builder);
128
+
129
+ // Enable pstate.za at the start of the function.
130
+ Builder.SetInsertPoint (&OrigBB->front ());
131
+ Function *EnableZAIntr =
132
+ Intrinsic::getDeclaration (M, Intrinsic::aarch64_sme_za_enable);
133
+ Builder.CreateCall (EnableZAIntr->getFunctionType (), EnableZAIntr);
134
+ }
135
+
136
+ if (FnAttrs.hasNewZABody ()) {
137
+ Function *ZeroIntr =
138
+ Intrinsic::getDeclaration (M, Intrinsic::aarch64_sme_zero);
139
+ Builder.CreateCall (ZeroIntr->getFunctionType (), ZeroIntr,
140
+ Builder.getInt32 (0xff ));
141
+ }
142
+
143
+ if (FnAttrs.isNewZT0 ()) {
144
+ Function *ClearZT0Intr =
145
+ Intrinsic::getDeclaration (M, Intrinsic::aarch64_sme_zero_zt);
146
+ Builder.CreateCall (ClearZT0Intr->getFunctionType (), ClearZT0Intr,
147
+ {Builder.getInt32 (0 )});
148
+ }
149
+
150
+ if (FnAttrs.hasPrivateZAInterface ()) {
151
+ // Before returning, disable pstate.za
152
+ for (BasicBlock &BB : *F) {
153
+ Instruction *T = BB.getTerminator ();
154
+ if (!T || !isa<ReturnInst>(T))
155
+ continue ;
156
+ Builder.SetInsertPoint (T);
157
+ Function *DisableZAIntr =
158
+ Intrinsic::getDeclaration (M, Intrinsic::aarch64_sme_za_disable);
159
+ Builder.CreateCall (DisableZAIntr->getFunctionType (), DisableZAIntr);
160
+ }
129
161
}
130
162
131
163
F->addFnAttr (" aarch64_expanded_pstate_za" );
@@ -142,8 +174,8 @@ bool SMEABI::runOnFunction(Function &F) {
142
174
143
175
bool Changed = false ;
144
176
SMEAttrs FnAttrs (F);
145
- if (FnAttrs.hasNewZABody ())
146
- Changed |= updateNewZAFunctions (M, &F, Builder);
177
+ if (FnAttrs.hasNewZABody () || FnAttrs. isNewZT0 () )
178
+ Changed |= updateNewStateFunctions (M, &F, Builder, FnAttrs );
147
179
148
180
return Changed;
149
181
}
0 commit comments