Skip to content

Commit 6e00a34

Browse files
committed
[AArch64] Add support for -fzero-call-used-regs
Support the "-fzero-call-used-regs" option on AArch64. This involves much less specialized code than the X86 version. Most of the checks can be done with TableGen. Reviewed By: nickdesaulniers, MaskRay Differential Revision: https://reviews.llvm.org/D124836
1 parent a9a19f5 commit 6e00a34

File tree

10 files changed

+897
-3
lines changed

10 files changed

+897
-3
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2986,7 +2986,7 @@ def fenable_matrix : Flag<["-"], "fenable-matrix">, Group<f_Group>,
29862986

29872987
def fzero_call_used_regs_EQ
29882988
: Joined<["-"], "fzero-call-used-regs=">, Group<f_Group>, Flags<[CC1Option]>,
2989-
HelpText<"Clear call-used registers upon function return.">,
2989+
HelpText<"Clear call-used registers upon function return (AArch64/x86 only)">,
29902990
Values<"skip,used-gpr-arg,used-gpr,used-arg,used,all-gpr-arg,all-gpr,all-arg,all">,
29912991
NormalizedValues<["Skip", "UsedGPRArg", "UsedGPR", "UsedArg", "Used",
29922992
"AllGPRArg", "AllGPR", "AllArg", "All"]>,

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5993,7 +5993,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
59935993
// FIXME: There's no reason for this to be restricted to X86. The backend
59945994
// code needs to be changed to include the appropriate function calls
59955995
// automatically.
5996-
if (!Triple.isX86())
5996+
if (!Triple.isX86() && !Triple.isAArch64())
59975997
D.Diag(diag::err_drv_unsupported_opt_for_target)
59985998
<< A->getAsString(Args) << TripleStr;
59995999
}

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,138 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
675675
emitCalleeSavedRestores(MBB, MBBI, true);
676676
}
677677

678+
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
679+
switch (Reg.id()) {
680+
default:
681+
// The called routine is expected to preserve r19-r28
682+
// r29 and r30 are used as frame pointer and link register resp.
683+
return 0;
684+
685+
// GPRs
686+
#define CASE(n) \
687+
case AArch64::W##n: \
688+
case AArch64::X##n: \
689+
return AArch64::X##n
690+
CASE(0);
691+
CASE(1);
692+
CASE(2);
693+
CASE(3);
694+
CASE(4);
695+
CASE(5);
696+
CASE(6);
697+
CASE(7);
698+
CASE(8);
699+
CASE(9);
700+
CASE(10);
701+
CASE(11);
702+
CASE(12);
703+
CASE(13);
704+
CASE(14);
705+
CASE(15);
706+
CASE(16);
707+
CASE(17);
708+
CASE(18);
709+
#undef CASE
710+
711+
// FPRs
712+
#define CASE(n) \
713+
case AArch64::B##n: \
714+
case AArch64::H##n: \
715+
case AArch64::S##n: \
716+
case AArch64::D##n: \
717+
case AArch64::Q##n: \
718+
return HasSVE ? AArch64::Z##n : AArch64::Q##n
719+
CASE(0);
720+
CASE(1);
721+
CASE(2);
722+
CASE(3);
723+
CASE(4);
724+
CASE(5);
725+
CASE(6);
726+
CASE(7);
727+
CASE(8);
728+
CASE(9);
729+
CASE(10);
730+
CASE(11);
731+
CASE(12);
732+
CASE(13);
733+
CASE(14);
734+
CASE(15);
735+
CASE(16);
736+
CASE(17);
737+
CASE(18);
738+
CASE(19);
739+
CASE(20);
740+
CASE(21);
741+
CASE(22);
742+
CASE(23);
743+
CASE(24);
744+
CASE(25);
745+
CASE(26);
746+
CASE(27);
747+
CASE(28);
748+
CASE(29);
749+
CASE(30);
750+
CASE(31);
751+
#undef CASE
752+
}
753+
}
754+
755+
void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
756+
MachineBasicBlock &MBB) const {
757+
// Insertion point.
758+
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
759+
760+
// Fake a debug loc.
761+
DebugLoc DL;
762+
if (MBBI != MBB.end())
763+
DL = MBBI->getDebugLoc();
764+
765+
const MachineFunction &MF = *MBB.getParent();
766+
const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
767+
const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
768+
769+
BitVector GPRsToZero(TRI.getNumRegs());
770+
BitVector FPRsToZero(TRI.getNumRegs());
771+
bool HasSVE = STI.hasSVE();
772+
for (MCRegister Reg : RegsToZero.set_bits()) {
773+
if (TRI.isGeneralPurposeRegister(MF, Reg)) {
774+
// For GPRs, we only care to clear out the 64-bit register.
775+
if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE))
776+
GPRsToZero.set(XReg);
777+
} else if (AArch64::FPR128RegClass.contains(Reg) ||
778+
AArch64::FPR64RegClass.contains(Reg) ||
779+
AArch64::FPR32RegClass.contains(Reg) ||
780+
AArch64::FPR16RegClass.contains(Reg) ||
781+
AArch64::FPR8RegClass.contains(Reg)) {
782+
// For FPRs,
783+
if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE))
784+
FPRsToZero.set(XReg);
785+
}
786+
}
787+
788+
const AArch64InstrInfo &TII = *STI.getInstrInfo();
789+
790+
// Zero out GPRs.
791+
for (MCRegister Reg : GPRsToZero.set_bits())
792+
BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), Reg).addImm(0);
793+
794+
// Zero out FP/vector registers.
795+
for (MCRegister Reg : FPRsToZero.set_bits())
796+
BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVID), Reg).addImm(0);
797+
798+
if (HasSVE) {
799+
for (MCRegister PReg :
800+
{AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
801+
AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
802+
AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
803+
AArch64::P15}) {
804+
if (RegsToZero[PReg])
805+
BuildMI(MBB, MBBI, DL, TII.get(AArch64::PFALSE), PReg);
806+
}
807+
}
808+
}
809+
678810
// Find a scratch register that we can use at the start of the prologue to
679811
// re-align the stack pointer. We avoid using callee-save registers since they
680812
// may appear to be free when this is called from canUseAsPrologue (during

llvm/lib/Target/AArch64/AArch64FrameLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,10 @@ class AArch64FrameLowering : public TargetFrameLowering {
153153
MachineBasicBlock::iterator MBBI) const;
154154
void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
155155
MachineBasicBlock::iterator MBBI) const;
156+
157+
/// Emit target zero call-used regs.
158+
void emitZeroCallUsedRegs(BitVector RegsToZero,
159+
MachineBasicBlock &MBB) const override;
156160
};
157161

158162
} // End llvm namespace

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333

3434
using namespace llvm;
3535

36+
#define GET_CC_REGISTER_LISTS
37+
#include "AArch64GenCallingConv.inc"
3638
#define GET_REGINFO_TARGET_DESC
3739
#include "AArch64GenRegisterInfo.inc"
3840

@@ -418,6 +420,68 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
418420
return false;
419421
}
420422

423+
bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
424+
MCRegister Reg) const {
425+
CallingConv::ID CC = MF.getFunction().getCallingConv();
426+
const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
427+
bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv());
428+
429+
auto HasReg = [](ArrayRef<MCRegister> RegList, MCRegister Reg) {
430+
return llvm::any_of(RegList,
431+
[Reg](const MCRegister R) { return R == Reg; });
432+
};
433+
434+
switch (CC) {
435+
default:
436+
report_fatal_error("Unsupported calling convention.");
437+
case CallingConv::WebKit_JS:
438+
return HasReg(CC_AArch64_WebKit_JS_ArgRegs, Reg);
439+
case CallingConv::GHC:
440+
return HasReg(CC_AArch64_GHC_ArgRegs, Reg);
441+
case CallingConv::C:
442+
case CallingConv::Fast:
443+
case CallingConv::PreserveMost:
444+
case CallingConv::CXX_FAST_TLS:
445+
case CallingConv::Swift:
446+
case CallingConv::SwiftTail:
447+
case CallingConv::Tail:
448+
if (STI.isTargetWindows() && IsVarArg)
449+
return HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg);
450+
if (!STI.isTargetDarwin()) {
451+
switch (CC) {
452+
default:
453+
return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg);
454+
case CallingConv::Swift:
455+
case CallingConv::SwiftTail:
456+
return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg) ||
457+
HasReg(CC_AArch64_AAPCS_Swift_ArgRegs, Reg);
458+
}
459+
}
460+
if (!IsVarArg) {
461+
switch (CC) {
462+
default:
463+
return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg);
464+
case CallingConv::Swift:
465+
case CallingConv::SwiftTail:
466+
return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg) ||
467+
HasReg(CC_AArch64_DarwinPCS_Swift_ArgRegs, Reg);
468+
}
469+
}
470+
if (STI.isTargetILP32())
471+
return HasReg(CC_AArch64_DarwinPCS_ILP32_VarArg_ArgRegs, Reg);
472+
return HasReg(CC_AArch64_DarwinPCS_VarArg_ArgRegs, Reg);
473+
case CallingConv::Win64:
474+
if (IsVarArg)
475+
HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg);
476+
return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg);
477+
case CallingConv::CFGuard_Check:
478+
return HasReg(CC_AArch64_Win64_CFGuard_Check_ArgRegs, Reg);
479+
case CallingConv::AArch64_VectorCall:
480+
case CallingConv::AArch64_SVE_VectorCall:
481+
return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg);
482+
}
483+
}
484+
421485
Register
422486
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
423487
const AArch64FrameLowering *TFI = getFrameLowering(MF);

llvm/lib/Target/AArch64/AArch64RegisterInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
120120
bool hasBasePointer(const MachineFunction &MF) const;
121121
unsigned getBaseRegister() const;
122122

123+
bool isArgumentRegister(const MachineFunction &MF,
124+
MCRegister Reg) const override;
125+
123126
// Debug information queries.
124127
Register getFrameRegister(const MachineFunction &MF) const override;
125128

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,3 +1385,12 @@ def svcr_op : Operand<i32> {
13851385
return AArch64SVCR::lookupSVCRByEncoding(MCOp.getImm()) != nullptr;
13861386
}];
13871387
}
1388+
1389+
//===----------------------------------------------------------------------===//
1390+
// Register categories.
1391+
//
1392+
1393+
def GeneralPurposeRegisters : RegisterCategory<[GPR64, GPR32]>;
1394+
1395+
def FIXED_REGS : RegisterClass<"AArch64", [i64], 64, (add FP, SP, VG, FFR)>;
1396+
def FixedRegisters : RegisterCategory<[CCR, FIXED_REGS]>;

llvm/lib/Target/X86/X86RegisterInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
656656
[&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
657657
return true;
658658

659-
return false;
659+
return X86GenRegisterInfo::isArgumentRegister(MF, Reg);
660660
}
661661

662662
bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,

0 commit comments

Comments
 (0)