Skip to content

Commit c8bdbb9

Browse files
Address comments about PN8 register use
1 parent ecb0f57 commit c8bdbb9

File tree

3 files changed

+66
-36
lines changed

3 files changed

+66
-36
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 18 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2784,28 +2784,6 @@ struct RegPairInfo {
27842784

27852785
} // end anonymous namespace
27862786

2787-
static unsigned getPredicateAsCounterReg(unsigned Reg) {
2788-
switch (Reg) {
2789-
case AArch64::P8:
2790-
return AArch64::PN8;
2791-
case AArch64::P9:
2792-
return AArch64::PN9;
2793-
case AArch64::P10:
2794-
return AArch64::PN10;
2795-
case AArch64::P11:
2796-
return AArch64::PN11;
2797-
case AArch64::P12:
2798-
return AArch64::PN12;
2799-
case AArch64::P13:
2800-
return AArch64::PN13;
2801-
case AArch64::P14:
2802-
return AArch64::PN14;
2803-
case AArch64::P15:
2804-
return AArch64::PN15;
2805-
}
2806-
return 0;
2807-
}
2808-
28092787
static void computeCalleeSaveRegisterPairs(
28102788
MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
28112789
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
@@ -2816,7 +2794,6 @@ static void computeCalleeSaveRegisterPairs(
28162794

28172795
bool IsWindows = isTargetWindows(MF);
28182796
bool NeedsWinCFI = needsWinCFI(MF);
2819-
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
28202797
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
28212798
MachineFrameInfo &MFI = MF.getFrameInfo();
28222799
CallingConv::ID CC = MF.getFunction().getCallingConv();
@@ -2887,7 +2864,7 @@ static void computeCalleeSaveRegisterPairs(
28872864
case RegPairInfo::PPR:
28882865
break;
28892866
case RegPairInfo::ZPR:
2890-
if (Subtarget.hasSVE2p1() || Subtarget.hasSME2())
2867+
if (AFI->getPredicateRegForFillSpill() != 0)
28912868
if (((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1))
28922869
RPI.Reg2 = NextReg;
28932870
break;
@@ -3107,8 +3084,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
31073084
MBB.addLiveIn(Reg1);
31083085
if (!MRI.isReserved(Reg2))
31093086
MBB.addLiveIn(Reg2);
3110-
unsigned PairRegs = AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0);
3111-
MIB.addReg(PairRegs);
3087+
MIB.addReg(/*PairRegs*/ AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));
31123088
MIB.addMemOperand(MF.getMachineMemOperand(
31133089
MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
31143090
MachineMemOperand::MOStore, Size, Alignment));
@@ -3258,8 +3234,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
32583234
.setMIFlags(MachineInstr::FrameDestroy);
32593235
}
32603236
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc));
3261-
unsigned PairRegs = AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0);
3262-
MIB.addReg(PairRegs, getDefRegState(true));
3237+
MIB.addReg(/*PairRegs*/ AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),
3238+
getDefRegState(true));
32633239
MIB.addMemOperand(MF.getMachineMemOperand(
32643240
MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
32653241
MachineMemOperand::MOLoad, Size, Alignment));
@@ -3381,9 +3357,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
33813357
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
33823358
if (Subtarget.hasSVE2p1() || Subtarget.hasSME2()) {
33833359
if (AArch64::PPRRegClass.contains(Reg) &&
3384-
(Reg > AArch64::P8 || Reg < AArch64::P15) && SavedRegs.test(Reg) &&
3360+
(Reg >= AArch64::P8 && Reg <= AArch64::P15) && SavedRegs.test(Reg) &&
33853361
AFI->getPredicateRegForFillSpill() == 0)
3386-
AFI->setPredicateRegForFillSpill(getPredicateAsCounterReg(Reg));
3362+
AFI->setPredicateRegForFillSpill((Reg - AArch64::P0) + AArch64::PN0);
33873363

33883364
// Check if there is a pair of ZRegs, so it can select P8 to create PTRUE,
33893365
// in case there is no PRege being saved(above)
@@ -3395,10 +3371,18 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
33953371

33963372
// Make sure there is a PReg saved to be used in save and restore when there
33973373
// is ZReg pair.
3398-
if (AFI->getPredicateRegForFillSpill() == 0 && HasPairZReg) {
3399-
SavedRegs.set(AArch64::P8);
3400-
AFI->setPredicateRegForFillSpill(AArch64::PN8);
3401-
}
3374+
if ((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
3375+
(MF.getFunction().getCallingConv() ==
3376+
CallingConv::AArch64_SVE_VectorCall ||
3377+
MF.getFunction().getCallingConv() ==
3378+
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 ||
3379+
MF.getFunction().getCallingConv() ==
3380+
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2))
3381+
if (AFI->getPredicateRegForFillSpill() == 0 && HasPairZReg) {
3382+
assert(!RegInfo->isReservedReg(MF, AArch64::P8) && "P8 is reserved");
3383+
SavedRegs.set(AArch64::P8);
3384+
AFI->setPredicateRegForFillSpill(AArch64::PN8);
3385+
}
34023386

34033387
if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&
34043388
!Subtarget.isTargetWindows()) {

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,9 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
227227
void setPredicateRegForFillSpill(unsigned Reg) {
228228
PredicateRegForFillSpill = Reg;
229229
}
230-
unsigned getPredicateRegForFillSpill() { return PredicateRegForFillSpill; }
230+
unsigned getPredicateRegForFillSpill() const {
231+
return PredicateRegForFillSpill;
232+
}
231233

232234
Register getPStateSMReg() const { return PStateSMReg; };
233235
void setPStateSMReg(Register Reg) { PStateSMReg = Reg; };

llvm/test/CodeGen/AArch64/sve-callee-save-restore-pairs.ll

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ define void @test_clobbers_2_z_regs_(<vscale x 16 x i8> %v) {
334334

335335
;; Do NOT group Z10
336336
;; DO group Z8 and Z9 and save P8
337-
define void @test_clobbers_z_p_regs(<vscale x 16 x i8> %v) {
337+
define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs(<vscale x 16 x i8> %v) {
338338
; NOPAIR-LABEL: test_clobbers_z_p_regs:
339339
; NOPAIR: // %bb.0:
340340
; NOPAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
@@ -446,3 +446,47 @@ define void @test_clobbers_z_p_regs2(<vscale x 16 x i8> %v) {
446446
ret void
447447
}
448448

449+
450+
;; DO NOT group Z8 and Z9 and
451+
;; DO NOT save P8
452+
;; It does not belong to the allowed calling conventions
453+
;; NOPAIR and PAIR should have the same assembly
454+
define void @test_clobbers_z_p_regs_negative(<vscale x 16 x i8> %v) {
455+
; NOPAIR-LABEL: test_clobbers_z_p_regs_negative:
456+
; NOPAIR: // %bb.0:
457+
; NOPAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
458+
; NOPAIR-NEXT: addvl sp, sp, #-2
459+
; NOPAIR-NEXT: str z9, [sp] // 16-byte Folded Spill
460+
; NOPAIR-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
461+
; NOPAIR-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
462+
; NOPAIR-NEXT: .cfi_offset w29, -16
463+
; NOPAIR-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
464+
; NOPAIR-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
465+
; NOPAIR-NEXT: //APP
466+
; NOPAIR-NEXT: //NO_APP
467+
; NOPAIR-NEXT: ldr z9, [sp] // 16-byte Folded Reload
468+
; NOPAIR-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
469+
; NOPAIR-NEXT: addvl sp, sp, #2
470+
; NOPAIR-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
471+
; NOPAIR-NEXT: ret
472+
;
473+
; PAIR-LABEL: test_clobbers_z_p_regs_negative:
474+
; PAIR: // %bb.0:
475+
; PAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
476+
; PAIR-NEXT: addvl sp, sp, #-2
477+
; PAIR-NEXT: str z9, [sp] // 16-byte Folded Spill
478+
; PAIR-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
479+
; PAIR-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
480+
; PAIR-NEXT: .cfi_offset w29, -16
481+
; PAIR-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
482+
; PAIR-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
483+
; PAIR-NEXT: //APP
484+
; PAIR-NEXT: //NO_APP
485+
; PAIR-NEXT: ldr z9, [sp] // 16-byte Folded Reload
486+
; PAIR-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
487+
; PAIR-NEXT: addvl sp, sp, #2
488+
; PAIR-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
489+
; PAIR-NEXT: ret
490+
call void asm sideeffect "", "~{z8},~{z9}"()
491+
ret void
492+
}

0 commit comments

Comments
 (0)