@@ -3530,8 +3530,7 @@ void AArch64FrameLowering::determineStackHazardSlot(
3530
3530
3531
3531
// Stack hazards are only needed in streaming functions.
3532
3532
SMEAttrs Attrs (MF.getFunction ());
3533
- if (!StackHazardInNonStreaming &&
3534
- Attrs.hasNonStreamingInterfaceAndBody ())
3533
+ if (!StackHazardInNonStreaming && Attrs.hasNonStreamingInterfaceAndBody ())
3535
3534
return ;
3536
3535
3537
3536
MachineFrameInfo &MFI = MF.getFrameInfo ();
@@ -4651,9 +4650,10 @@ struct FrameObject {
4651
4650
// ObjectFirst==true) should be placed first.
4652
4651
bool GroupFirst = false ;
4653
4652
4654
- // Used to distinguish between FP and GPR accesses.
4655
- // 1 = GPR, 2 = FPR, 8 = Hazard Object .
4653
+ // Used to distinguish between FP and GPR accesses. The values are decided so
4654
+ // that they sort FPR < Hazard < GPR and they can be or'd together .
4656
4655
unsigned Accesses = 0 ;
4656
+ enum { AccessFPR = 1 , AccessHazard = 2 , AccessGPR = 4 };
4657
4657
};
4658
4658
4659
4659
class GroupBuilder {
@@ -4691,7 +4691,7 @@ bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
4691
4691
//
4692
4692
// If we want to include a stack hazard region, order FPR accesses < the
4693
4693
// hazard object < GPRs accesses in order to create a separation between the
4694
- // two. For the Accesses field 1 = GPR , 2 = FPR, 8 = Hazard Object .
4694
+ // two. For the Accesses field 1 = FPR , 2 = Hazard Object, 4 = GPR .
4695
4695
//
4696
4696
// Otherwise the "first" object goes first (closest to SP), followed by the
4697
4697
// members of the "first" group.
@@ -4703,16 +4703,10 @@ bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
4703
4703
//
4704
4704
// If all else equal, sort by the object index to keep the objects in the
4705
4705
// original order.
4706
- if (A.IsValid != B.IsValid )
4707
- return A.IsValid ;
4708
- if (A.Accesses == 2 && B.Accesses != 2 )
4709
- return true ;
4710
- if (A.Accesses == 8 && B.Accesses != 2 )
4711
- return true ;
4712
- return std::make_tuple (A.ObjectFirst , A.GroupFirst , A.GroupIndex ,
4713
- A.ObjectIndex ) <
4714
- std::make_tuple (B.ObjectFirst , B.GroupFirst , B.GroupIndex ,
4715
- B.ObjectIndex );
4706
+ return std::make_tuple (!A.IsValid , A.Accesses , A.ObjectFirst , A.GroupFirst ,
4707
+ A.GroupIndex , A.ObjectIndex ) <
4708
+ std::make_tuple (!B.IsValid , B.Accesses , B.ObjectFirst , B.GroupFirst ,
4709
+ B.GroupIndex , B.ObjectIndex );
4716
4710
}
4717
4711
} // namespace
4718
4712
@@ -4729,12 +4723,24 @@ void AArch64FrameLowering::orderFrameObjects(
4729
4723
FrameObjects[Obj].ObjectIndex = Obj;
4730
4724
}
4731
4725
4732
- // Identify stack slots that are tagged at the same time.
4726
+ // Identify FPR vs GPR slots for hazards, and stack slots that are tagged at
4727
+ // the same time.
4733
4728
GroupBuilder GB (FrameObjects);
4734
4729
for (auto &MBB : MF) {
4735
4730
for (auto &MI : MBB) {
4736
4731
if (MI.isDebugInstr ())
4737
4732
continue ;
4733
+
4734
+ if (AFI.hasStackHazardSlotIndex ()) {
4735
+ std::optional<int > FI = getLdStFrameID (MI, MFI);
4736
+ if (FI && *FI >= 0 && *FI < (int )FrameObjects.size ()) {
4737
+ if (MFI.getStackID (*FI) == 2 || AArch64InstrInfo::isFpOrNEON (MI))
4738
+ FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;
4739
+ else
4740
+ FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;
4741
+ }
4742
+ }
4743
+
4738
4744
int OpIndex;
4739
4745
switch (MI.getOpcode ()) {
4740
4746
case AArch64::STGloop:
@@ -4768,23 +4774,20 @@ void AArch64FrameLowering::orderFrameObjects(
4768
4774
GB.AddMember (TaggedFI);
4769
4775
else
4770
4776
GB.EndCurrentGroup ();
4771
-
4772
- if (AFI.hasStackHazardSlotIndex ()) {
4773
- std::optional<int > FI = getLdStFrameID (MI, MFI);
4774
- if (FI && *FI >= 0 && *FI < (int )FrameObjects.size ()) {
4775
- if (MFI.getStackID (*FI) == 2 || AArch64InstrInfo::isFpOrNEON (MI))
4776
- FrameObjects[*FI].Accesses |= 2 ;
4777
- else
4778
- FrameObjects[*FI].Accesses |= 1 ;
4779
- }
4780
- }
4781
4777
}
4782
4778
// Groups should never span multiple basic blocks.
4783
4779
GB.EndCurrentGroup ();
4784
4780
}
4785
4781
4786
- if (AFI.hasStackHazardSlotIndex ())
4787
- FrameObjects[AFI.getStackHazardSlotIndex ()].Accesses = 8 ;
4782
+ if (AFI.hasStackHazardSlotIndex ()) {
4783
+ FrameObjects[AFI.getStackHazardSlotIndex ()].Accesses =
4784
+ FrameObject::AccessHazard;
4785
+ // If a stack object is unknown or both GPR and FPR, sort it into GPR.
4786
+ for (auto &Obj : FrameObjects)
4787
+ if (!Obj.Accesses ||
4788
+ Obj.Accesses == (FrameObject::AccessGPR | FrameObject::AccessFPR))
4789
+ Obj.Accesses = FrameObject::AccessGPR;
4790
+ }
4788
4791
4789
4792
// If the function's tagged base pointer is pinned to a stack slot, we want to
4790
4793
// put that slot first when possible. This will likely place it at SP + 0,
0 commit comments