240
240
#include " llvm/Support/CommandLine.h"
241
241
#include " llvm/Support/Debug.h"
242
242
#include " llvm/Support/ErrorHandling.h"
243
+ #include " llvm/Support/FormatVariadic.h"
243
244
#include " llvm/Support/MathExtras.h"
244
245
#include " llvm/Support/raw_ostream.h"
245
246
#include " llvm/Target/TargetMachine.h"
@@ -275,6 +276,10 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
275
276
// Stack hazard padding size. 0 = disabled.
276
277
static cl::opt<unsigned > StackHazardSize (" aarch64-stack-hazard-size" ,
277
278
cl::init (0 ), cl::Hidden);
279
+ // Stack hazard size for analysis remarks. StackHazardSize takes precedence.
280
+ static cl::opt<unsigned >
281
+ StackHazardRemarkSize (" aarch64-stack-hazard-remark-size" , cl::init(0 ),
282
+ cl::Hidden);
278
283
// Whether to insert padding into non-streaming functions (for testing).
279
284
static cl::opt<bool >
280
285
StackHazardInNonStreaming (" aarch64-stack-hazard-in-non-streaming" ,
@@ -2662,9 +2667,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
2662
2667
const auto &MFI = MF.getFrameInfo ();
2663
2668
2664
2669
int64_t ObjectOffset = MFI.getObjectOffset (FI);
2670
+ StackOffset SVEStackSize = getSVEStackSize (MF);
2671
+
2672
+ // For VLA-area objects, just emit an offset at the end of the stack frame.
2673
+ // Whilst not quite correct, these objects do live at the end of the frame and
2674
+ // so it is more useful for analysis for the offset to reflect this.
2675
+ if (MFI.isVariableSizedObjectIndex (FI)) {
2676
+ return StackOffset::getFixed (-((int64_t )MFI.getStackSize ())) - SVEStackSize;
2677
+ }
2665
2678
2666
2679
// This is correct in the absence of any SVE stack objects.
2667
- StackOffset SVEStackSize = getSVEStackSize (MF);
2668
2680
if (!SVEStackSize)
2669
2681
return StackOffset::getFixed (ObjectOffset - getOffsetOfLocalArea ());
2670
2682
@@ -3575,13 +3587,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
3575
3587
return true ;
3576
3588
}
3577
3589
3578
- // Return the FrameID for a Load/Store instruction by looking at the MMO.
3579
- static std::optional<int > getLdStFrameID (const MachineInstr &MI,
3580
- const MachineFrameInfo &MFI) {
3581
- if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
3582
- return std::nullopt;
3583
-
3584
- MachineMemOperand *MMO = *MI.memoperands_begin ();
3590
+ // Return the FrameID for a MMO.
3591
+ static std::optional<int > getMMOFrameID (MachineMemOperand *MMO,
3592
+ const MachineFrameInfo &MFI) {
3585
3593
auto *PSV =
3586
3594
dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue ());
3587
3595
if (PSV)
@@ -3599,6 +3607,15 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
3599
3607
return std::nullopt;
3600
3608
}
3601
3609
3610
+ // Return the FrameID for a Load/Store instruction by looking at the first MMO.
3611
+ static std::optional<int > getLdStFrameID (const MachineInstr &MI,
3612
+ const MachineFrameInfo &MFI) {
3613
+ if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
3614
+ return std::nullopt;
3615
+
3616
+ return getMMOFrameID (*MI.memoperands_begin (), MFI);
3617
+ }
3618
+
3602
3619
// Check if a Hazard slot is needed for the current function, and if so create
3603
3620
// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
3604
3621
// which can be used to determine if any hazard padding is needed.
@@ -5076,3 +5093,174 @@ void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
5076
5093
MI->eraseFromParent ();
5077
5094
}
5078
5095
}
5096
+
5097
+ struct StackAccess {
5098
+ enum AccessType {
5099
+ NotAccessed = 0 , // Stack object not accessed by load/store instructions.
5100
+ GPR = 1 << 0 , // A general purpose register.
5101
+ PPR = 1 << 1 , // A predicate register.
5102
+ FPR = 1 << 2 , // A floating point/Neon/SVE register.
5103
+ };
5104
+
5105
+ int Idx;
5106
+ StackOffset Offset;
5107
+ int64_t Size;
5108
+ unsigned AccessTypes;
5109
+
5110
+ StackAccess () : Idx(0 ), Offset(), Size(0 ), AccessTypes(NotAccessed) {}
5111
+
5112
+ bool operator <(const StackAccess &Rhs) const {
5113
+ return std::make_tuple (start (), Idx) <
5114
+ std::make_tuple (Rhs.start (), Rhs.Idx );
5115
+ }
5116
+
5117
+ bool isCPU () const {
5118
+ // Predicate register load and store instructions execute on the CPU.
5119
+ return AccessTypes & (AccessType::GPR | AccessType::PPR);
5120
+ }
5121
+ bool isSME () const { return AccessTypes & AccessType::FPR; }
5122
+ bool isMixed () const { return isCPU () && isSME (); }
5123
+
5124
+ int64_t start () const { return Offset.getFixed () + Offset.getScalable (); }
5125
+ int64_t end () const { return start () + Size; }
5126
+
5127
+ std::string getTypeString () const {
5128
+ switch (AccessTypes) {
5129
+ case AccessType::FPR:
5130
+ return " FPR" ;
5131
+ case AccessType::PPR:
5132
+ return " PPR" ;
5133
+ case AccessType::GPR:
5134
+ return " GPR" ;
5135
+ case AccessType::NotAccessed:
5136
+ return " NA" ;
5137
+ default :
5138
+ return " Mixed" ;
5139
+ }
5140
+ }
5141
+
5142
+ void print (raw_ostream &OS) const {
5143
+ OS << getTypeString () << " stack object at [SP"
5144
+ << (Offset.getFixed () < 0 ? " " : " +" ) << Offset.getFixed ();
5145
+ if (Offset.getScalable ())
5146
+ OS << (Offset.getScalable () < 0 ? " " : " +" ) << Offset.getScalable ()
5147
+ << " * vscale" ;
5148
+ OS << " ]" ;
5149
+ }
5150
+ };
5151
+
5152
+ static inline raw_ostream &operator <<(raw_ostream &OS, const StackAccess &SA) {
5153
+ SA.print (OS);
5154
+ return OS;
5155
+ }
5156
+
5157
+ void AArch64FrameLowering::emitRemarks (
5158
+ const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {
5159
+
5160
+ SMEAttrs Attrs (MF.getFunction ());
5161
+ if (Attrs.hasNonStreamingInterfaceAndBody ())
5162
+ return ;
5163
+
5164
+ const uint64_t HazardSize =
5165
+ (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
5166
+
5167
+ if (HazardSize == 0 )
5168
+ return ;
5169
+
5170
+ const MachineFrameInfo &MFI = MF.getFrameInfo ();
5171
+ // Bail if function has no stack objects.
5172
+ if (!MFI.hasStackObjects ())
5173
+ return ;
5174
+
5175
+ std::vector<StackAccess> StackAccesses (MFI.getNumObjects ());
5176
+
5177
+ size_t NumFPLdSt = 0 ;
5178
+ size_t NumNonFPLdSt = 0 ;
5179
+
5180
+ // Collect stack accesses via Load/Store instructions.
5181
+ for (const MachineBasicBlock &MBB : MF) {
5182
+ for (const MachineInstr &MI : MBB) {
5183
+ if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
5184
+ continue ;
5185
+ for (MachineMemOperand *MMO : MI.memoperands ()) {
5186
+ std::optional<int > FI = getMMOFrameID (MMO, MFI);
5187
+ if (FI && !MFI.isDeadObjectIndex (*FI)) {
5188
+ int FrameIdx = *FI;
5189
+
5190
+ size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects ();
5191
+ if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) {
5192
+ StackAccesses[ArrIdx].Idx = FrameIdx;
5193
+ StackAccesses[ArrIdx].Offset =
5194
+ getFrameIndexReferenceFromSP (MF, FrameIdx);
5195
+ StackAccesses[ArrIdx].Size = MFI.getObjectSize (FrameIdx);
5196
+ }
5197
+
5198
+ unsigned RegTy = StackAccess::AccessType::GPR;
5199
+ if (MFI.getStackID (FrameIdx) == TargetStackID::ScalableVector) {
5200
+ if (AArch64::PPRRegClass.contains (MI.getOperand (0 ).getReg ()))
5201
+ RegTy = StackAccess::PPR;
5202
+ else
5203
+ RegTy = StackAccess::FPR;
5204
+ } else if (AArch64InstrInfo::isFpOrNEON (MI)) {
5205
+ RegTy = StackAccess::FPR;
5206
+ }
5207
+
5208
+ StackAccesses[ArrIdx].AccessTypes |= RegTy;
5209
+
5210
+ if (RegTy == StackAccess::FPR)
5211
+ ++NumFPLdSt;
5212
+ else
5213
+ ++NumNonFPLdSt;
5214
+ }
5215
+ }
5216
+ }
5217
+ }
5218
+
5219
+ if (NumFPLdSt == 0 || NumNonFPLdSt == 0 )
5220
+ return ;
5221
+
5222
+ llvm::sort (StackAccesses);
5223
+ StackAccesses.erase (llvm::remove_if (StackAccesses,
5224
+ [](const StackAccess &S) {
5225
+ return S.AccessTypes ==
5226
+ StackAccess::NotAccessed;
5227
+ }),
5228
+ StackAccesses.end ());
5229
+
5230
+ SmallVector<const StackAccess *> MixedObjects;
5231
+ SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs;
5232
+
5233
+ if (StackAccesses.front ().isMixed ())
5234
+ MixedObjects.push_back (&StackAccesses.front ());
5235
+
5236
+ for (auto It = StackAccesses.begin (), End = std::prev (StackAccesses.end ());
5237
+ It != End; ++It) {
5238
+ const auto &First = *It;
5239
+ const auto &Second = *(It + 1 );
5240
+
5241
+ if (Second.isMixed ())
5242
+ MixedObjects.push_back (&Second);
5243
+
5244
+ if ((First.isSME () && Second.isCPU ()) ||
5245
+ (First.isCPU () && Second.isSME ())) {
5246
+ uint64_t Distance = static_cast <uint64_t >(Second.start () - First.end ());
5247
+ if (Distance < HazardSize)
5248
+ HazardPairs.emplace_back (&First, &Second);
5249
+ }
5250
+ }
5251
+
5252
+ auto EmitRemark = [&](llvm::StringRef Str) {
5253
+ ORE->emit ([&]() {
5254
+ auto R = MachineOptimizationRemarkAnalysis (
5255
+ " sme" , " StackHazard" , MF.getFunction ().getSubprogram (), &MF.front ());
5256
+ return R << formatv (" stack hazard in '{0}': " , MF.getName ()).str () << Str;
5257
+ });
5258
+ };
5259
+
5260
+ for (const auto &P : HazardPairs)
5261
+ EmitRemark (formatv (" {0} is too close to {1}" , *P.first , *P.second ).str ());
5262
+
5263
+ for (const auto *Obj : MixedObjects)
5264
+ EmitRemark (
5265
+ formatv (" {0} accessed by both GP and FP instructions" , *Obj).str ());
5266
+ }
0 commit comments