Skip to content

Commit 4b69689

Browse files
[AArch64] Implement spill/fill of predicate pair register classes (#76068)
We are getting ICE with, e.g. ``` #include <arm_sve.h> void g(); svboolx2_t f0(int64_t i, int64_t n) { svboolx2_t r = svwhilelt_b16_x2(i, n); g(); return r; } ```
1 parent d82eccc commit 4b69689

File tree

5 files changed

+191
-3
lines changed

5 files changed

+191
-3
lines changed

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,15 @@ bool AArch64ExpandPseudo::expandSetTagLoop(
747747
bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
748748
MachineBasicBlock::iterator MBBI,
749749
unsigned Opc, unsigned N) {
750+
assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
751+
Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
752+
"Unexpected opcode");
753+
unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI)
754+
? RegState::Define
755+
: 0;
756+
unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
757+
? AArch64::zsub0
758+
: AArch64::psub0;
750759
const TargetRegisterInfo *TRI =
751760
MBB.getParent()->getSubtarget().getRegisterInfo();
752761
MachineInstr &MI = *MBBI;
@@ -756,9 +765,8 @@ bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
756765
assert(ImmOffset >= -256 && ImmOffset < 256 &&
757766
"Immediate spill offset out of range");
758767
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
759-
.addReg(
760-
TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
761-
Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
768+
.addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset),
769+
RState)
762770
.addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
763771
.addImm(ImmOffset);
764772
}
@@ -1492,12 +1500,16 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
14921500
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
14931501
case AArch64::STR_ZZXI:
14941502
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1503+
case AArch64::STR_PPXI:
1504+
return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
14951505
case AArch64::LDR_ZZZZXI:
14961506
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
14971507
case AArch64::LDR_ZZZXI:
14981508
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
14991509
case AArch64::LDR_ZZXI:
15001510
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1511+
case AArch64::LDR_PPXI:
1512+
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
15011513
case AArch64::BLR_RVMARKER:
15021514
return expandCALL_RVMARKER(MBB, MBBI);
15031515
case AArch64::BLR_BTI:

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3771,6 +3771,13 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
37713771
MinOffset = -256;
37723772
MaxOffset = 255;
37733773
break;
3774+
case AArch64::LDR_PPXI:
3775+
case AArch64::STR_PPXI:
3776+
Scale = TypeSize::getScalable(2);
3777+
Width = TypeSize::getScalable(2 * 2);
3778+
MinOffset = -256;
3779+
MaxOffset = 254;
3780+
break;
37743781
case AArch64::LDR_ZXI:
37753782
case AArch64::STR_ZXI:
37763783
Scale = TypeSize::getScalable(16);
@@ -4814,6 +4821,10 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
48144821
assert(SrcReg != AArch64::WSP);
48154822
} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
48164823
Opc = AArch64::STRSui;
4824+
else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
4825+
Opc = AArch64::STR_PPXI;
4826+
StackID = TargetStackID::ScalableVector;
4827+
}
48174828
break;
48184829
case 8:
48194830
if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
@@ -4990,6 +5001,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
49905001
assert(DestReg != AArch64::WSP);
49915002
} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
49925003
Opc = AArch64::LDRSui;
5004+
else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5005+
Opc = AArch64::LDR_PPXI;
5006+
StackID = TargetStackID::ScalableVector;
5007+
}
49935008
break;
49945009
case 8:
49955010
if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2398,11 +2398,13 @@ let Predicates = [HasSVEorSME] in {
23982398
def LDR_ZZXI : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
23992399
def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
24002400
def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2401+
def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
24012402
}
24022403
let mayStore = 1, hasSideEffects = 0 in {
24032404
def STR_ZZXI : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
24042405
def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
24052406
def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
2407+
def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
24062408
}
24072409

24082410
let AddedComplexity = 1 in {

llvm/test/CodeGen/AArch64/spillfill-sve.mir

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
target triple = "aarch64--linux-gnu"
88

99
define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr() #0 { entry: unreachable }
10+
define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr2() #0 { entry: unreachable }
11+
define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr2mul2() #0 { entry: unreachable }
1012
define aarch64_sve_vector_pcs void @spills_fills_stack_id_pnr() #1 { entry: unreachable }
1113
define aarch64_sve_vector_pcs void @spills_fills_stack_id_virtreg_pnr() #1 { entry: unreachable }
1214
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable }
@@ -64,6 +66,96 @@ body: |
6466
RET_ReallyLR
6567
...
6668
---
69+
name: spills_fills_stack_id_ppr2
70+
tracksRegLiveness: true
71+
registers:
72+
- { id: 0, class: ppr2 }
73+
stack:
74+
liveins:
75+
- { reg: '$p0_p1', virtual-reg: '%0' }
76+
body: |
77+
bb.0.entry:
78+
liveins: $p0_p1
79+
80+
; CHECK-LABEL: name: spills_fills_stack_id_ppr2
81+
; CHECK: stack:
82+
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 2
83+
; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ''
84+
85+
; EXPAND-LABEL: name: spills_fills_stack_id_ppr2
86+
; EXPAND: STR_PXI $p0, $sp, 6
87+
; EXPAND: STR_PXI $p1, $sp, 7
88+
; EXPAND: $p0 = LDR_PXI $sp, 6
89+
; EXPAND: $p1 = LDR_PXI $sp, 7
90+
91+
%0:ppr2 = COPY $p0_p1
92+
93+
$p0 = IMPLICIT_DEF
94+
$p1 = IMPLICIT_DEF
95+
$p2 = IMPLICIT_DEF
96+
$p3 = IMPLICIT_DEF
97+
$p4 = IMPLICIT_DEF
98+
$p5 = IMPLICIT_DEF
99+
$p6 = IMPLICIT_DEF
100+
$p7 = IMPLICIT_DEF
101+
$p8 = IMPLICIT_DEF
102+
$p9 = IMPLICIT_DEF
103+
$p10 = IMPLICIT_DEF
104+
$p11 = IMPLICIT_DEF
105+
$p12 = IMPLICIT_DEF
106+
$p13 = IMPLICIT_DEF
107+
$p14 = IMPLICIT_DEF
108+
$p15 = IMPLICIT_DEF
109+
110+
$p0_p1 = COPY %0
111+
RET_ReallyLR
112+
...
113+
---
114+
name: spills_fills_stack_id_ppr2mul2
115+
tracksRegLiveness: true
116+
registers:
117+
- { id: 0, class: ppr2mul2 }
118+
stack:
119+
liveins:
120+
- { reg: '$p0_p1', virtual-reg: '%0' }
121+
body: |
122+
bb.0.entry:
123+
liveins: $p0_p1
124+
125+
; CHECK-LABEL: name: spills_fills_stack_id_ppr2
126+
; CHECK: stack:
127+
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 2
128+
; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ''
129+
130+
; EXPAND-LABEL: name: spills_fills_stack_id_ppr2mul2
131+
; EXPAND: STR_PXI $p0, $sp, 6
132+
; EXPAND: STR_PXI $p1, $sp, 7
133+
; EXPAND: $p0 = LDR_PXI $sp, 6
134+
; EXPAND: $p1 = LDR_PXI $sp, 7
135+
136+
%0:ppr2mul2 = COPY $p0_p1
137+
138+
$p0 = IMPLICIT_DEF
139+
$p1 = IMPLICIT_DEF
140+
$p2 = IMPLICIT_DEF
141+
$p3 = IMPLICIT_DEF
142+
$p4 = IMPLICIT_DEF
143+
$p5 = IMPLICIT_DEF
144+
$p6 = IMPLICIT_DEF
145+
$p7 = IMPLICIT_DEF
146+
$p8 = IMPLICIT_DEF
147+
$p9 = IMPLICIT_DEF
148+
$p10 = IMPLICIT_DEF
149+
$p11 = IMPLICIT_DEF
150+
$p12 = IMPLICIT_DEF
151+
$p13 = IMPLICIT_DEF
152+
$p14 = IMPLICIT_DEF
153+
$p15 = IMPLICIT_DEF
154+
155+
$p0_p1 = COPY %0
156+
RET_ReallyLR
157+
...
158+
---
67159
name: spills_fills_stack_id_pnr
68160
tracksRegLiveness: true
69161
registers:
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
; RUN: llc < %s | FileCheck %s
2+
3+
; Derived from
4+
; #include <arm_sve.h>
5+
6+
; void g();
7+
8+
; svboolx2_t f0(int64_t i, int64_t n) {
9+
; svboolx2_t r = svwhilelt_b16_x2(i, n);
10+
; g();
11+
; return r;
12+
; }
13+
14+
; svboolx2_t f1(svcount_t n) {
15+
; svboolx2_t r = svpext_lane_c8_x2(n, 1);
16+
; g();
17+
; return r;
18+
; }
19+
;
20+
; Check that predicate register pairs are spilled/filled without an ICE in the backend.
21+
22+
target triple = "aarch64-unknown-linux"
23+
24+
define <vscale x 32 x i1> @f0(i64 %i, i64 %n) #0 {
25+
entry:
26+
%0 = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 %i, i64 %n)
27+
%1 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 0
28+
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
29+
%3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
30+
%4 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 1
31+
%5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %4)
32+
%6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
33+
tail call void @g()
34+
ret <vscale x 32 x i1> %6
35+
}
36+
; CHECK-LABEL: f0:
37+
; CHECK: whilelt { p0.h, p1.h }
38+
; CHECK: str p0, [sp, #6, mul vl]
39+
; CHECK: str p1, [sp, #7, mul vl]
40+
; CHECK: ldr p0, [sp, #6, mul vl]
41+
; CHECK: ldr p1, [sp, #7, mul vl]
42+
43+
define <vscale x 32 x i1> @f1(target("aarch64.svcount") %n) #0 {
44+
entry:
45+
%0 = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") %n, i32 1)
46+
%1 = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } %0, 0
47+
%2 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %1, i64 0)
48+
%3 = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } %0, 1
49+
%4 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %2, <vscale x 16 x i1> %3, i64 16)
50+
tail call void @g()
51+
ret <vscale x 32 x i1> %4
52+
}
53+
54+
; CHECK-LABEL: f1:
55+
; CHECK: pext { p0.b, p1.b }
56+
; CHECK: str p0, [sp, #6, mul vl]
57+
; CHECK: str p1, [sp, #7, mul vl]
58+
; CHECK: ldr p0, [sp, #6, mul vl]
59+
; CHECK: ldr p1, [sp, #7, mul vl]
60+
61+
declare void @g(...)
62+
declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64, i64)
63+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
64+
declare <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1>, <vscale x 16 x i1>, i64 immarg)
65+
declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount"), i32 immarg) #1
66+
67+
attributes #0 = { nounwind "target-features"="+sve,+sve2,+sve2p1" }

0 commit comments

Comments
 (0)