Skip to content

[AArch64] Add getStreamingHazardSize() to AArch64Subtarget #113679

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,15 @@
// and the SME unit try to access the same area of memory, including if the
// access is to an area of the stack. To try to alleviate this we attempt to
// introduce extra padding into the stack frame between FP and GPR accesses,
// controlled by the StackHazardSize option. Without changing the layout of the
// stack frame in the diagram above, a stack object of size StackHazardSize is
// added between GPR and FPR CSRs. Another is added to the stack objects
// section, and stack objects are sorted so that FPR > Hazard padding slot >
// GPRs (where possible). Unfortunately some things are not handled well (VLA
// area, arguments on the stack, object with both GPR and FPR accesses), but if
// those are controlled by the user then the entire stack frame becomes GPR at
// the start/end with FPR in the middle, surrounded by Hazard padding.
// controlled by the aarch64-stack-hazard-size option. Without changing the
// layout of the stack frame in the diagram above, a stack object of size
// aarch64-stack-hazard-size is added between GPR and FPR CSRs. Another is added
// to the stack objects section, and stack objects are sorted so that FPR >
// Hazard padding slot > GPRs (where possible). Unfortunately some things are
// not handled well (VLA area, arguments on the stack, objects with both GPR and
// FPR accesses), but if those are controlled by the user then the entire stack
// frame becomes GPR at the start/end with FPR in the middle, surrounded by
// Hazard padding.
//
// An example of the prologue:
//
Expand Down Expand Up @@ -273,9 +274,6 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
cl::desc("Emit homogeneous prologue and epilogue for the size "
"optimization (default = off)"));

// Stack hazard padding size. 0 = disabled.
static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size",
cl::init(0), cl::Hidden);
// Stack hazard size for analysis remarks. StackHazardSize takes precedence.
static cl::opt<unsigned>
StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
Expand Down Expand Up @@ -1614,6 +1612,10 @@ static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}

static unsigned getStackHazardSize(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
}

// Convenience function to determine whether I is an SVE callee save.
static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
switch (I->getOpcode()) {
Expand Down Expand Up @@ -2985,6 +2987,7 @@ static void computeCalleeSaveRegisterPairs(
bool IsWindows = isTargetWindows(MF);
bool NeedsWinCFI = needsWinCFI(MF);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned StackHazardSize = getStackHazardSize(MF);
MachineFrameInfo &MFI = MF.getFrameInfo();
CallingConv::ID CC = MF.getFunction().getCallingConv();
unsigned Count = CSI.size();
Expand Down Expand Up @@ -3612,6 +3615,7 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
// which can be used to determine if any hazard padding is needed.
void AArch64FrameLowering::determineStackHazardSlot(
MachineFunction &MF, BitVector &SavedRegs) const {
unsigned StackHazardSize = getStackHazardSize(MF);
if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex())
return;
Expand Down Expand Up @@ -3802,7 +3806,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// StackHazardSize if so.
determineStackHazardSlot(MF, SavedRegs);
if (AFI->hasStackHazardSlotIndex())
CSStackSize += StackHazardSize;
CSStackSize += getStackHazardSize(MF);

// Save number of saved regs, so we can easily update CSStackSize later.
unsigned NumSavedRegs = SavedRegs.count();
Expand Down Expand Up @@ -3917,6 +3921,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
unsigned &MaxCSFrameIndex) const {
bool NeedsWinCFI = needsWinCFI(MF);
unsigned StackHazardSize = getStackHazardSize(MF);
// To match the canonical windows frame layout, reverse the list of
// callee saved registers to get them laid out by PrologEpilogInserter
// in the right order. (PrologEpilogInserter allocates stack objects top
Expand Down Expand Up @@ -5151,6 +5156,7 @@ void AArch64FrameLowering::emitRemarks(
if (Attrs.hasNonStreamingInterfaceAndBody())
return;

unsigned StackHazardSize = getStackHazardSize(MF);
const uint64_t HazardSize =
(StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;

Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,16 @@ static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
"aarch64-min-jump-table-entries", cl::init(13), cl::Hidden,
cl::desc("Set minimum number of entries to use a jump table on AArch64"));

static cl::opt<unsigned> AArch64StreamingHazardSize(
"aarch64-streaming-hazard-size",
cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."),
cl::init(0), cl::Hidden);

static cl::alias AArch64StreamingStackHazardSize(
"aarch64-stack-hazard-size",
cl::desc("alias for -aarch64-streaming-hazard-size"),
cl::aliasopt(AArch64StreamingHazardSize));

unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
return OverrideVectorInsertExtractBaseCost;
Expand Down Expand Up @@ -333,6 +343,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian), IsStreaming(IsStreaming),
IsStreamingCompatible(IsStreamingCompatible),
StreamingHazardSize(AArch64StreamingHazardSize),
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {

bool IsStreaming;
bool IsStreamingCompatible;
unsigned StreamingHazardSize;
unsigned MinSVEVectorSizeInBits;
unsigned MaxSVEVectorSizeInBits;
unsigned VScaleForTuning = 2;
Expand Down Expand Up @@ -172,6 +173,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
/// Returns true if the function has a streaming-compatible body.
bool isStreamingCompatible() const { return IsStreamingCompatible; }

/// Returns the size of memory region that if accessed by both the CPU and
/// the SME unit could result in a hazard. 0 = disabled.
unsigned getStreamingHazardSize() const { return StreamingHazardSize; }

/// Returns true if the target has NEON and the function at runtime is known
/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
/// mode, which disables NEON instructions).
Expand Down
Loading