Skip to content

Commit a2057f2

Browse files
author
git apple-llvm automerger
committed
Merge commit '8fbe69a407b2' from llvm.org/release/19.x into stable/20240723
2 parents b8abb14 + 8fbe69a commit a2057f2

File tree

6 files changed

+364
-11
lines changed

6 files changed

+364
-11
lines changed

llvm/include/llvm/CodeGen/TargetFrameLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "llvm/ADT/BitVector.h"
1717
#include "llvm/CodeGen/MachineBasicBlock.h"
18+
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
1819
#include "llvm/Support/TypeSize.h"
1920
#include <vector>
2021

@@ -473,6 +474,11 @@ class TargetFrameLowering {
473474
/// Return the frame base information to be encoded in the DWARF subprogram
474475
/// debug info.
475476
virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const;
477+
478+
/// This method is called at the end of prolog/epilog code insertion, so
479+
/// targets can emit remarks based on the final frame layout.
480+
virtual void emitRemarks(const MachineFunction &MF,
481+
MachineOptimizationRemarkEmitter *ORE) const {};
476482
};
477483

478484
} // End llvm namespace

llvm/lib/CodeGen/PrologEpilogInserter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
341341
<< ore::NV("Function", MF.getFunction().getName()) << "'";
342342
});
343343

344+
// Emit any remarks implemented for the target, based on final frame layout.
345+
TFI->emitRemarks(MF, ORE);
346+
344347
delete RS;
345348
SaveBlocks.clear();
346349
RestoreBlocks.clear();

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 196 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@
240240
#include "llvm/Support/CommandLine.h"
241241
#include "llvm/Support/Debug.h"
242242
#include "llvm/Support/ErrorHandling.h"
243+
#include "llvm/Support/FormatVariadic.h"
243244
#include "llvm/Support/MathExtras.h"
244245
#include "llvm/Support/raw_ostream.h"
245246
#include "llvm/Target/TargetMachine.h"
@@ -275,6 +276,10 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
275276
// Stack hazard padding size. 0 = disabled.
276277
static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size",
277278
cl::init(0), cl::Hidden);
279+
// Stack hazard size for analysis remarks. StackHazardSize takes precedence.
280+
static cl::opt<unsigned>
281+
StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
282+
cl::Hidden);
278283
// Whether to insert padding into non-streaming functions (for testing).
279284
static cl::opt<bool>
280285
StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming",
@@ -2662,9 +2667,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
26622667
const auto &MFI = MF.getFrameInfo();
26632668

26642669
int64_t ObjectOffset = MFI.getObjectOffset(FI);
2670+
StackOffset SVEStackSize = getSVEStackSize(MF);
2671+
2672+
// For VLA-area objects, just emit an offset at the end of the stack frame.
2673+
// Whilst not quite correct, these objects do live at the end of the frame and
2674+
// so it is more useful for analysis for the offset to reflect this.
2675+
if (MFI.isVariableSizedObjectIndex(FI)) {
2676+
return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize;
2677+
}
26652678

26662679
// This is correct in the absence of any SVE stack objects.
2667-
StackOffset SVEStackSize = getSVEStackSize(MF);
26682680
if (!SVEStackSize)
26692681
return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
26702682

@@ -3575,13 +3587,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
35753587
return true;
35763588
}
35773589

3578-
// Return the FrameID for a Load/Store instruction by looking at the MMO.
3579-
static std::optional<int> getLdStFrameID(const MachineInstr &MI,
3580-
const MachineFrameInfo &MFI) {
3581-
if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
3582-
return std::nullopt;
3583-
3584-
MachineMemOperand *MMO = *MI.memoperands_begin();
3590+
// Return the FrameID for a MMO.
3591+
static std::optional<int> getMMOFrameID(MachineMemOperand *MMO,
3592+
const MachineFrameInfo &MFI) {
35853593
auto *PSV =
35863594
dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue());
35873595
if (PSV)
@@ -3599,6 +3607,15 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
35993607
return std::nullopt;
36003608
}
36013609

3610+
// Return the FrameID for a Load/Store instruction by looking at the first MMO.
3611+
static std::optional<int> getLdStFrameID(const MachineInstr &MI,
3612+
const MachineFrameInfo &MFI) {
3613+
if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
3614+
return std::nullopt;
3615+
3616+
return getMMOFrameID(*MI.memoperands_begin(), MFI);
3617+
}
3618+
36023619
// Check if a Hazard slot is needed for the current function, and if so create
36033620
// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
36043621
// which can be used to determine if any hazard padding is needed.
@@ -5076,3 +5093,174 @@ void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
50765093
MI->eraseFromParent();
50775094
}
50785095
}
5096+
5097+
struct StackAccess {
5098+
enum AccessType {
5099+
NotAccessed = 0, // Stack object not accessed by load/store instructions.
5100+
GPR = 1 << 0, // A general purpose register.
5101+
PPR = 1 << 1, // A predicate register.
5102+
FPR = 1 << 2, // A floating point/Neon/SVE register.
5103+
};
5104+
5105+
int Idx;
5106+
StackOffset Offset;
5107+
int64_t Size;
5108+
unsigned AccessTypes;
5109+
5110+
StackAccess() : Idx(0), Offset(), Size(0), AccessTypes(NotAccessed) {}
5111+
5112+
bool operator<(const StackAccess &Rhs) const {
5113+
return std::make_tuple(start(), Idx) <
5114+
std::make_tuple(Rhs.start(), Rhs.Idx);
5115+
}
5116+
5117+
bool isCPU() const {
5118+
// Predicate register load and store instructions execute on the CPU.
5119+
return AccessTypes & (AccessType::GPR | AccessType::PPR);
5120+
}
5121+
bool isSME() const { return AccessTypes & AccessType::FPR; }
5122+
bool isMixed() const { return isCPU() && isSME(); }
5123+
5124+
int64_t start() const { return Offset.getFixed() + Offset.getScalable(); }
5125+
int64_t end() const { return start() + Size; }
5126+
5127+
std::string getTypeString() const {
5128+
switch (AccessTypes) {
5129+
case AccessType::FPR:
5130+
return "FPR";
5131+
case AccessType::PPR:
5132+
return "PPR";
5133+
case AccessType::GPR:
5134+
return "GPR";
5135+
case AccessType::NotAccessed:
5136+
return "NA";
5137+
default:
5138+
return "Mixed";
5139+
}
5140+
}
5141+
5142+
void print(raw_ostream &OS) const {
5143+
OS << getTypeString() << " stack object at [SP"
5144+
<< (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed();
5145+
if (Offset.getScalable())
5146+
OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable()
5147+
<< " * vscale";
5148+
OS << "]";
5149+
}
5150+
};
5151+
5152+
static inline raw_ostream &operator<<(raw_ostream &OS, const StackAccess &SA) {
5153+
SA.print(OS);
5154+
return OS;
5155+
}
5156+
5157+
void AArch64FrameLowering::emitRemarks(
5158+
const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {
5159+
5160+
SMEAttrs Attrs(MF.getFunction());
5161+
if (Attrs.hasNonStreamingInterfaceAndBody())
5162+
return;
5163+
5164+
const uint64_t HazardSize =
5165+
(StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
5166+
5167+
if (HazardSize == 0)
5168+
return;
5169+
5170+
const MachineFrameInfo &MFI = MF.getFrameInfo();
5171+
// Bail if function has no stack objects.
5172+
if (!MFI.hasStackObjects())
5173+
return;
5174+
5175+
std::vector<StackAccess> StackAccesses(MFI.getNumObjects());
5176+
5177+
size_t NumFPLdSt = 0;
5178+
size_t NumNonFPLdSt = 0;
5179+
5180+
// Collect stack accesses via Load/Store instructions.
5181+
for (const MachineBasicBlock &MBB : MF) {
5182+
for (const MachineInstr &MI : MBB) {
5183+
if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
5184+
continue;
5185+
for (MachineMemOperand *MMO : MI.memoperands()) {
5186+
std::optional<int> FI = getMMOFrameID(MMO, MFI);
5187+
if (FI && !MFI.isDeadObjectIndex(*FI)) {
5188+
int FrameIdx = *FI;
5189+
5190+
size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects();
5191+
if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) {
5192+
StackAccesses[ArrIdx].Idx = FrameIdx;
5193+
StackAccesses[ArrIdx].Offset =
5194+
getFrameIndexReferenceFromSP(MF, FrameIdx);
5195+
StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx);
5196+
}
5197+
5198+
unsigned RegTy = StackAccess::AccessType::GPR;
5199+
if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
5200+
if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
5201+
RegTy = StackAccess::PPR;
5202+
else
5203+
RegTy = StackAccess::FPR;
5204+
} else if (AArch64InstrInfo::isFpOrNEON(MI)) {
5205+
RegTy = StackAccess::FPR;
5206+
}
5207+
5208+
StackAccesses[ArrIdx].AccessTypes |= RegTy;
5209+
5210+
if (RegTy == StackAccess::FPR)
5211+
++NumFPLdSt;
5212+
else
5213+
++NumNonFPLdSt;
5214+
}
5215+
}
5216+
}
5217+
}
5218+
5219+
if (NumFPLdSt == 0 || NumNonFPLdSt == 0)
5220+
return;
5221+
5222+
llvm::sort(StackAccesses);
5223+
StackAccesses.erase(llvm::remove_if(StackAccesses,
5224+
[](const StackAccess &S) {
5225+
return S.AccessTypes ==
5226+
StackAccess::NotAccessed;
5227+
}),
5228+
StackAccesses.end());
5229+
5230+
SmallVector<const StackAccess *> MixedObjects;
5231+
SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs;
5232+
5233+
if (StackAccesses.front().isMixed())
5234+
MixedObjects.push_back(&StackAccesses.front());
5235+
5236+
for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());
5237+
It != End; ++It) {
5238+
const auto &First = *It;
5239+
const auto &Second = *(It + 1);
5240+
5241+
if (Second.isMixed())
5242+
MixedObjects.push_back(&Second);
5243+
5244+
if ((First.isSME() && Second.isCPU()) ||
5245+
(First.isCPU() && Second.isSME())) {
5246+
uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end());
5247+
if (Distance < HazardSize)
5248+
HazardPairs.emplace_back(&First, &Second);
5249+
}
5250+
}
5251+
5252+
auto EmitRemark = [&](llvm::StringRef Str) {
5253+
ORE->emit([&]() {
5254+
auto R = MachineOptimizationRemarkAnalysis(
5255+
"sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front());
5256+
return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str;
5257+
});
5258+
};
5259+
5260+
for (const auto &P : HazardPairs)
5261+
EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str());
5262+
5263+
for (const auto *Obj : MixedObjects)
5264+
EmitRemark(
5265+
formatv("{0} accessed by both GP and FP instructions", *Obj).str());
5266+
}

llvm/lib/Target/AArch64/AArch64FrameLowering.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
1414
#define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
1515

16-
#include "llvm/Support/TypeSize.h"
16+
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
1717
#include "llvm/CodeGen/TargetFrameLowering.h"
18+
#include "llvm/Support/TypeSize.h"
1819

1920
namespace llvm {
2021

@@ -187,6 +188,9 @@ class AArch64FrameLowering : public TargetFrameLowering {
187188
inlineStackProbeLoopExactMultiple(MachineBasicBlock::iterator MBBI,
188189
int64_t NegProbeSize,
189190
Register TargetReg) const;
191+
192+
void emitRemarks(const MachineFunction &MF,
193+
MachineOptimizationRemarkEmitter *ORE) const override;
190194
};
191195

192196
} // End llvm namespace

0 commit comments

Comments
 (0)