@@ -285,6 +285,11 @@ static cl::opt<bool>
285
285
StackHazardInNonStreaming (" aarch64-stack-hazard-in-non-streaming" ,
286
286
cl::init (false ), cl::Hidden);
287
287
288
+ static cl::opt<bool > DisableMultiVectorSpillFill (
289
+ " aarch64-disable-multivector-spill-fill" ,
290
+ cl::desc (" Disable use of LD/ST pairs for SME2 or SVE2p1" ), cl::init(false ),
291
+ cl::Hidden);
292
+
288
293
STATISTIC (NumRedZoneFunctions, " Number of functions using red zone" );
289
294
290
295
// / Returns how much of the incoming argument stack area (in bytes) we should
@@ -2954,6 +2959,24 @@ unsigned findFreePredicateReg(BitVector &SavedRegs) {
2954
2959
return AArch64::NoRegister;
2955
2960
}
2956
2961
2962
+ // The multivector LD/ST are available only for SME or SVE2p1 targets
2963
+ bool enableMultiVectorSpillFill (const AArch64Subtarget &Subtarget,
2964
+ MachineFunction &MF) {
2965
+ if (DisableMultiVectorSpillFill)
2966
+ return false ;
2967
+
2968
+ SMEAttrs FuncAttrs (MF.getFunction ());
2969
+ bool IsLocallyStreaming =
2970
+ FuncAttrs.hasStreamingBody () && !FuncAttrs.hasStreamingInterface ();
2971
+
2972
+ // Only when in streaming mode SME2 instructions can be safely used.
2973
+ // It is not safe to use SME2 instructions when in streaming compatible or
2974
+ // locally streaming mode.
2975
+ return Subtarget.hasSVE2p1 () ||
2976
+ (Subtarget.hasSME2 () &&
2977
+ (!IsLocallyStreaming && Subtarget.isStreaming ()));
2978
+ }
2979
+
2957
2980
static void computeCalleeSaveRegisterPairs (
2958
2981
MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
2959
2982
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
@@ -3330,7 +3353,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
3330
3353
MF.getSubtarget <AArch64Subtarget>();
3331
3354
AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
3332
3355
unsigned PnReg = AFI->getPredicateRegForFillSpill ();
3333
- assert (((Subtarget. hasSVE2p1 () || Subtarget. hasSME2 ()) && PnReg != 0 ) &&
3356
+ assert ((PnReg != 0 && enableMultiVectorSpillFill (Subtarget, MF) ) &&
3334
3357
" Expects SVE2.1 or SME2 target and a predicate register" );
3335
3358
#ifdef EXPENSIVE_CHECKS
3336
3359
auto IsPPR = [](const RegPairInfo &c) {
@@ -3508,7 +3531,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
3508
3531
[[maybe_unused]] const AArch64Subtarget &Subtarget =
3509
3532
MF.getSubtarget <AArch64Subtarget>();
3510
3533
unsigned PnReg = AFI->getPredicateRegForFillSpill ();
3511
- assert (((Subtarget. hasSVE2p1 () || Subtarget. hasSME2 ()) && PnReg != 0 ) &&
3534
+ assert ((PnReg != 0 && enableMultiVectorSpillFill (Subtarget, MF) ) &&
3512
3535
" Expects SVE2.1 or SME2 target and a predicate register" );
3513
3536
#ifdef EXPENSIVE_CHECKS
3514
3537
assert (!(PPRBegin < ZPRBegin) &&
@@ -3722,7 +3745,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
3722
3745
SavedRegs.test (CSRegs[i ^ 1 ]));
3723
3746
}
3724
3747
3725
- if (HasPairZReg && (Subtarget. hasSVE2p1 () || Subtarget. hasSME2 () )) {
3748
+ if (HasPairZReg && enableMultiVectorSpillFill (Subtarget, MF )) {
3726
3749
AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
3727
3750
// Find a suitable predicate register for the multi-vector spill/fill
3728
3751
// instructions.
0 commit comments