Skip to content

Commit f0df48a

Browse files
committed
[AArch64] Remove copy in SVE/SME predicate spill
7dc20ab introduced an extra COPY when spilling a PNR register, which can't be elided as the input (PNR predicate) and output (PPR predicate) register classes differ. This patch emits a new ConvertPNRtoPPR pseudo instruction instead. When this is expanded, it gets erased if the PNR is a subregister of the PPR, since the conversion is implicit, otherwise it is lowered to an ORR.
1 parent 72a60e7 commit f0df48a

File tree

5 files changed

+54
-4
lines changed

5 files changed

+54
-4
lines changed

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,23 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
11121112
default:
11131113
break;
11141114

1115+
case AArch64::ConvertPNRtoPPR: {
1116+
auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1117+
MachineOperand DstMO = MI.getOperand(0);
1118+
MachineOperand SrcMO = MI.getOperand(1);
1119+
unsigned SrcReg = SrcMO.getReg();
1120+
if (!TRI->isSubRegister(DstMO.getReg(), SrcReg)) {
1121+
unsigned SrcSuperReg = TRI->getMatchingSuperReg(SrcReg, AArch64::psub,
1122+
&AArch64::PPRRegClass);
1123+
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORR_PPzPP))
1124+
.add(DstMO)
1125+
.addReg(SrcSuperReg)
1126+
.addReg(SrcSuperReg)
1127+
.addReg(SrcSuperReg);
1128+
}
1129+
MI.eraseFromParent();
1130+
return true;
1131+
}
11151132
case AArch64::BSPv8i8:
11161133
case AArch64::BSPv16i8: {
11171134
Register DstReg = MI.getOperand(0).getReg();

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4805,6 +4805,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
48054805
bool Offset = true;
48064806
MCRegister PNRReg = MCRegister::NoRegister;
48074807
unsigned StackID = TargetStackID::Default;
4808+
const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
48084809
switch (TRI->getSpillSize(*RC)) {
48094810
case 1:
48104811
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -4823,8 +4824,9 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
48234824
"Unexpected register store without SVE2p1 or SME2");
48244825
if (SrcReg.isVirtual()) {
48254826
auto NewSrcReg =
4826-
MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass);
4827-
BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), NewSrcReg)
4827+
MF.getRegInfo().createVirtualRegister(&AArch64::PPR_p8to15RegClass);
4828+
BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ConvertPNRtoPPR),
4829+
NewSrcReg)
48284830
.addReg(SrcReg);
48294831
SrcReg = NewSrcReg;
48304832
} else

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2313,6 +2313,8 @@ let Predicates = [HasBF16, HasSVEorSME] in {
23132313
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
23142314
} // End HasBF16, HasSVEorSME
23152315

2316+
def ConvertPNRtoPPR : Pseudo<(outs PPRAny:$Pd), (ins PNRAny:$Pm), []>, Sched<[]>;
2317+
23162318
let Predicates = [HasSVEorSME] in {
23172319
// InstAliases
23182320
def : InstAlias<"mov $Zd, $Zn",
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# RUN: llc -mtriple=aarch64-linux-gnu -start-after=virtregrewriter -stop-after=aarch64-expand-pseudo -mattr=+sme2 -verify-machineinstrs -o - %s \
2+
# RUN: | FileCheck %s
3+
4+
--- |
5+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
6+
target triple = "aarch64-unknown-linux-gnu"
7+
8+
define void @test_convert_different_reg() #0 { entry: unreachable }
9+
10+
attributes #0 = { "target-features"="+sme2" }
11+
12+
---
13+
name: test_convert_different_reg
14+
tracksRegLiveness: true
15+
body: |
16+
bb.0.entry:
17+
; CHECK-LABEL: name: test_convert_different_reg
18+
; CHECK: renamable $pn8 = WHILEGE_CXX_B undef $x0, undef $x0, 0, implicit-def dead $nzcv
19+
; CHECK-NEXT: renamable $p9 = ORR_PPzPP $p8, $p8, $p8
20+
; CHECK-NEXT: STR_PXI killed renamable $p9, $sp, 7
21+
; CHECK-NEXT: renamable $p0 = LDR_PXI $sp, 7
22+
early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16
23+
frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
24+
frame-setup CFI_INSTRUCTION offset $w29, -16
25+
renamable $pn8 = WHILEGE_CXX_B undef $x0, undef $x0, 0, implicit-def dead $nzcv
26+
renamable $p9 = ConvertPNRtoPPR killed renamable $pn8
27+
STR_PXI killed renamable $p9, $sp, 7
28+
renamable $p0 = LDR_PXI $sp, 7
29+
early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16
30+
RET undef $lr

llvm/test/CodeGen/AArch64/spillfill-sve.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,7 @@ body: |
213213
214214
; EXPAND-LABEL: name: spills_fills_stack_id_virtreg_pnr
215215
; EXPAND: renamable $pn8 = WHILEGE_CXX_B
216-
; EXPAND: $p0 = ORR_PPzPP $p8, $p8, killed $p8
217-
; EXPAND: STR_PXI killed renamable $p0, $sp, 7
216+
; EXPAND: STR_PXI killed renamable $p8, $sp, 7
218217
;
219218
; EXPAND: renamable $p0 = LDR_PXI $sp, 7
220219
; EXPAND: $p8 = ORR_PPzPP $p0, $p0, killed $p0, implicit-def $pn8

0 commit comments

Comments
 (0)