-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][SME] Reduce ptrue count when filling p-regs from z-regs #125523
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Currently, each expansion of `FILL_PPR_FROM_ZPR_SLOT_PSEUDO` creates a new ptrue instruction. This patch adds a simple method to reuse a previous ptrue instruction when expanding back-to-back fill pseudos.
@llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) ChangesCurrently, each expansion of Patch is 22.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125523.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 81523adeefceed..d3abd79b85a75f 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -4175,7 +4175,10 @@ int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
/// Attempts to scavenge a register from \p ScavengeableRegs given the used
/// registers in \p UsedRegs.
static Register tryScavengeRegister(LiveRegUnits const &UsedRegs,
- BitVector const &ScavengeableRegs) {
+ BitVector const &ScavengeableRegs,
+ Register PreferredReg) {
+ if (PreferredReg != AArch64::NoRegister && UsedRegs.available(PreferredReg))
+ return PreferredReg;
for (auto Reg : ScavengeableRegs.set_bits()) {
if (UsedRegs.available(Reg))
return Reg;
@@ -4212,11 +4215,12 @@ struct ScopedScavengeOrSpill {
Register SpillCandidate, const TargetRegisterClass &RC,
LiveRegUnits const &UsedRegs,
BitVector const &AllocatableRegs,
- std::optional<int> *MaybeSpillFI)
+ std::optional<int> *MaybeSpillFI,
+ Register PreferredReg = AArch64::NoRegister)
: MBB(MBB), MBBI(MBBI), RC(RC), TII(static_cast<const AArch64InstrInfo &>(
*MF.getSubtarget().getInstrInfo())),
TRI(*MF.getSubtarget().getRegisterInfo()) {
- FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs);
+ FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs, PreferredReg);
if (FreeReg != AArch64::NoRegister)
return;
assert(MaybeSpillFI && "Expected emergency spill slot FI information "
@@ -4331,12 +4335,10 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB,
/// spilling if necessary). If the status flags are in use at the point of
/// expansion they are preserved (by moving them to/from a GPR). This may cause
/// an additional spill if no GPR is free at the expansion point.
-static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB,
- MachineInstr &MI,
- const TargetRegisterInfo &TRI,
- LiveRegUnits const &UsedRegs,
- ScavengeableRegs const &SR,
- EmergencyStackSlots &SpillSlots) {
+static bool expandFillPPRFromZPRSlotPseudo(
+ MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI,
+ LiveRegUnits const &UsedRegs, ScavengeableRegs const &SR,
+ MachineInstr *&LastPTrue, EmergencyStackSlots &SpillSlots) {
MachineFunction &MF = *MBB.getParent();
auto *TII =
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -4347,7 +4349,9 @@ static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB,
ScopedScavengeOrSpill PredReg(
MF, MBB, MI, AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs, SR.PPR3bRegs,
- isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI);
+ isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI,
+ /*PreferredReg=*/
+ LastPTrue ? LastPTrue->getOperand(0).getReg() : AArch64::NoRegister);
// Elide NZCV spills if we know it is not used.
bool IsNZCVUsed = !UsedRegs.available(AArch64::NZCV);
@@ -4371,9 +4375,17 @@ static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB,
.addImm(AArch64SysReg::NZCV)
.addReg(AArch64::NZCV, RegState::Implicit)
.getInstr());
- MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::PTRUE_B))
- .addReg(*PredReg, RegState::Define)
- .addImm(31));
+
+ // Reuse previous ptrue if we know it has not been clobbered.
+ if (LastPTrue) {
+ assert(*PredReg == LastPTrue->getOperand(0).getReg());
+ LastPTrue->moveBefore(&MI);
+ } else {
+ LastPTrue = BuildMI(MBB, MI, DL, TII->get(AArch64::PTRUE_B))
+ .addReg(*PredReg, RegState::Define)
+ .addImm(31);
+ }
+ MachineInstrs.push_back(LastPTrue);
MachineInstrs.push_back(
BuildMI(MBB, MI, DL, TII->get(AArch64::CMPNE_PPzZI_B))
.addReg(MI.getOperand(0).getReg(), RegState::Define)
@@ -4402,19 +4414,24 @@ static bool expandSMEPPRToZPRSpillPseudos(MachineBasicBlock &MBB,
LiveRegUnits UsedRegs(TRI);
UsedRegs.addLiveOuts(MBB);
bool HasPPRSpills = false;
+ MachineInstr *LastPTrue = nullptr;
for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
UsedRegs.stepBackward(MI);
switch (MI.getOpcode()) {
case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
+ if (LastPTrue &&
+ MI.definesRegister(LastPTrue->getOperand(0).getReg(), &TRI))
+ LastPTrue = nullptr;
HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR,
- SpillSlots);
+ LastPTrue, SpillSlots);
MI.eraseFromParent();
break;
case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR, SpillSlots);
MI.eraseFromParent();
- break;
+ [[fallthrough]];
default:
+ LastPTrue = nullptr;
break;
}
}
diff --git a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
index b58f91ac68a932..bff0cacfd5190c 100644
--- a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
@@ -116,46 +116,34 @@ body: |
; EXPAND-NEXT: $p15 = IMPLICIT_DEF
;
; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.0)
- ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
;
; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
@@ -299,37 +287,26 @@ body: |
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
@@ -509,37 +486,26 @@ body: |
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
@@ -754,37 +720,26 @@ body: |
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.20)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.19)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.18)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.17)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.16)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.15)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.14)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.13)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.12)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.11)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.10)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 20, implicit $vg
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.22)
@@ -953,37 +908,26 @@ body: |
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12)
- ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11)
- ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10)
- ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9)
- ; EXPAND-NEXT: $...
[truncated]
|
…vm#125523) Currently, each expansion of `FILL_PPR_FROM_ZPR_SLOT_PSEUDO` creates a new ptrue instruction. This patch adds a simple method to reuse a previous ptrue instruction when expanding back-to-back fill pseudos.
…vm#125523) Currently, each expansion of `FILL_PPR_FROM_ZPR_SLOT_PSEUDO` creates a new ptrue instruction. This patch adds a simple method to reuse a previous ptrue instruction when expanding back-to-back fill pseudos.
…vm#125523) Currently, each expansion of `FILL_PPR_FROM_ZPR_SLOT_PSEUDO` creates a new ptrue instruction. This patch adds a simple method to reuse a previous ptrue instruction when expanding back-to-back fill pseudos.
Currently, each expansion of
FILL_PPR_FROM_ZPR_SLOT_PSEUDO
creates a new ptrue instruction. This patch adds a simple method to reuse a previous ptrue instruction when expanding back-to-back fill pseudos.