Skip to content

Commit 64da981

Browse files
committed
[AArch64][SME] Add support for Copy/Spill/Fill of strided ZPR2/ZPR4 registers.
This patch contains a few changes: * It changes the alignment of the strided/contiguous ZPR2/ZPR4 registers to 128-bits. This is important, because when we spill these registers to the stack, the address doesn't need to be 256/512 bits aligned because we split the single-store/reload pseudo instruction up into multiple STR_ZXI/LDR_ZXI (single vector store/load) instructions, which only require a 128-bit alignment. Additionally, an alignment larger than the stack-alignment is not supported for scalable vectors. * It adds support for these register classes in storeRegToStackSlot, loadRegFromStackSlot and copyPhysReg. * It adds tests only for the strided forms. There is no need to also test the contiguous forms, because a register such as z2_z3 or z4_z5_z6_z7 are also part of the regular ZPR2 and ZPR4 register classes, respectively, which are already covered and tested. Reviewed By: dtemirbulatov Differential Revision: https://reviews.llvm.org/D159189
1 parent 0f61612 commit 64da981

File tree

4 files changed

+150
-12
lines changed

4 files changed

+150
-12
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3669,8 +3669,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
36693669
}
36703670

36713671
// Copy a Z register pair by copying the individual sub-registers.
3672-
if (AArch64::ZPR2RegClass.contains(DestReg) &&
3673-
AArch64::ZPR2RegClass.contains(SrcReg)) {
3672+
if ((AArch64::ZPR2RegClass.contains(DestReg) ||
3673+
AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
3674+
(AArch64::ZPR2RegClass.contains(SrcReg) ||
3675+
AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
36743676
assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
36753677
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
36763678
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
@@ -3690,8 +3692,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
36903692
}
36913693

36923694
// Copy a Z register quad by copying the individual sub-registers.
3693-
if (AArch64::ZPR4RegClass.contains(DestReg) &&
3694-
AArch64::ZPR4RegClass.contains(SrcReg)) {
3695+
if ((AArch64::ZPR4RegClass.contains(DestReg) ||
3696+
AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
3697+
(AArch64::ZPR4RegClass.contains(SrcReg) ||
3698+
AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
36953699
assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
36963700
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
36973701
AArch64::zsub2, AArch64::zsub3};
@@ -4022,7 +4026,8 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
40224026
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
40234027
Opc = AArch64::ST1Twov2d;
40244028
Offset = false;
4025-
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
4029+
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
4030+
AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
40264031
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
40274032
Opc = AArch64::STR_ZZXI;
40284033
StackID = TargetStackID::ScalableVector;
@@ -4044,7 +4049,8 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
40444049
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
40454050
Opc = AArch64::ST1Fourv2d;
40464051
Offset = false;
4047-
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
4052+
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
4053+
AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
40484054
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
40494055
Opc = AArch64::STR_ZZZZXI;
40504056
StackID = TargetStackID::ScalableVector;
@@ -4178,7 +4184,8 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
41784184
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
41794185
Opc = AArch64::LD1Twov2d;
41804186
Offset = false;
4181-
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
4187+
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
4188+
AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
41824189
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
41834190
Opc = AArch64::LDR_ZZXI;
41844191
StackID = TargetStackID::ScalableVector;
@@ -4200,7 +4207,8 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
42004207
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
42014208
Opc = AArch64::LD1Fourv2d;
42024209
Offset = false;
4203-
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
4210+
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
4211+
AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
42044212
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
42054213
Opc = AArch64::LDR_ZZZZXI;
42064214
StackID = TargetStackID::ScalableVector;

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,16 +1331,16 @@ def ZStridedQuadsHi : RegisterTuples<[zsub0, zsub1, zsub2, zsub3], [
13311331
(trunc (rotl ZPR, 24), 4), (trunc (rotl ZPR, 28), 4)
13321332
]>;
13331333

1334-
def ZPR2Strided : RegisterClass<"AArch64", [untyped], 256,
1334+
def ZPR2Strided : RegisterClass<"AArch64", [untyped], 128,
13351335
(add ZStridedPairsLo, ZStridedPairsHi)> {
13361336
let Size = 256;
13371337
}
1338-
def ZPR4Strided : RegisterClass<"AArch64", [untyped], 512,
1338+
def ZPR4Strided : RegisterClass<"AArch64", [untyped], 128,
13391339
(add ZStridedQuadsLo, ZStridedQuadsHi)> {
13401340
let Size = 512;
13411341
}
13421342

1343-
def ZPR2StridedOrContiguous : RegisterClass<"AArch64", [untyped], 256,
1343+
def ZPR2StridedOrContiguous : RegisterClass<"AArch64", [untyped], 128,
13441344
(add ZStridedPairsLo, ZStridedPairsHi,
13451345
(decimate ZSeqPairs, 2))> {
13461346
let Size = 256;
@@ -1387,7 +1387,7 @@ let EncoderMethod = "EncodeZPR2StridedRegisterClass",
13871387
: RegisterOperand<ZPR2StridedOrContiguous, "printTypedVectorList<0,'d'>">;
13881388
}
13891389

1390-
def ZPR4StridedOrContiguous : RegisterClass<"AArch64", [untyped], 512,
1390+
def ZPR4StridedOrContiguous : RegisterClass<"AArch64", [untyped], 128,
13911391
(add ZStridedQuadsLo, ZStridedQuadsHi,
13921392
(decimate ZSeqQuads, 4))> {
13931393
let Size = 512;

llvm/test/CodeGen/AArch64/spillfill-sve.mir

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr() #0 { entry: unreachable }
1010
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable }
1111
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable }
12+
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #0 { entry: unreachable }
1213
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable }
1314
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable }
15+
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #0 { entry: unreachable }
1416

1517
attributes #0 = { nounwind "target-features"="+sve" }
1618

@@ -131,6 +133,43 @@ body: |
131133
RET_ReallyLR
132134
...
133135
---
136+
name: spills_fills_stack_id_zpr2strided
137+
tracksRegLiveness: true
138+
registers:
139+
- { id: 0, class: zpr2strided }
140+
stack:
141+
liveins:
142+
- { reg: '$z0_z8', virtual-reg: '%0' }
143+
body: |
144+
bb.0.entry:
145+
liveins: $z0_z8
146+
147+
; CHECK-LABEL: name: spills_fills_stack_id_zpr2strided
148+
; CHECK: stack:
149+
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 32, alignment: 16
150+
; CHECK-NEXT: stack-id: scalable-vector
151+
152+
; EXPAND-LABEL: name: spills_fills_stack_id_zpr2strided
153+
; EXPAND: STR_ZXI $z0, $sp, 0
154+
; EXPAND: STR_ZXI $z8, $sp, 1
155+
; EXPAND: $z0 = LDR_ZXI $sp, 0
156+
; EXPAND: $z8 = LDR_ZXI $sp, 1
157+
158+
%0:zpr2strided = COPY $z0_z8
159+
160+
$z0_z1_z2_z3 = IMPLICIT_DEF
161+
$z4_z5_z6_z7 = IMPLICIT_DEF
162+
$z8_z9_z10_z11 = IMPLICIT_DEF
163+
$z12_z13_z14_z15 = IMPLICIT_DEF
164+
$z16_z17_z18_z19 = IMPLICIT_DEF
165+
$z20_z21_z22_z23 = IMPLICIT_DEF
166+
$z24_z25_z26_z27 = IMPLICIT_DEF
167+
$z28_z29_z30_z31 = IMPLICIT_DEF
168+
169+
$z0_z8 = COPY %0
170+
RET_ReallyLR
171+
...
172+
---
134173
name: spills_fills_stack_id_zpr3
135174
tracksRegLiveness: true
136175
registers:
@@ -210,3 +249,44 @@ body: |
210249
$z0_z1_z2_z3 = COPY %0
211250
RET_ReallyLR
212251
...
252+
---
253+
name: spills_fills_stack_id_zpr4strided
254+
tracksRegLiveness: true
255+
registers:
256+
- { id: 0, class: zpr4strided }
257+
stack:
258+
liveins:
259+
- { reg: '$z0_z4_z8_z12', virtual-reg: '%0' }
260+
body: |
261+
bb.0.entry:
262+
liveins: $z0_z4_z8_z12
263+
264+
; CHECK-LABEL: name: spills_fills_stack_id_zpr4strided
265+
; CHECK: stack:
266+
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 16
267+
; CHECK-NEXT: stack-id: scalable-vector
268+
269+
; EXPAND-LABEL: name: spills_fills_stack_id_zpr4strided
270+
; EXPAND: STR_ZXI $z0, $sp, 0
271+
; EXPAND: STR_ZXI $z4, $sp, 1
272+
; EXPAND: STR_ZXI $z8, $sp, 2
273+
; EXPAND: STR_ZXI $z12, $sp, 3
274+
; EXPAND: $z0 = LDR_ZXI $sp, 0
275+
; EXPAND: $z4 = LDR_ZXI $sp, 1
276+
; EXPAND: $z8 = LDR_ZXI $sp, 2
277+
; EXPAND: $z12 = LDR_ZXI $sp, 3
278+
279+
%0:zpr4strided = COPY $z0_z4_z8_z12
280+
281+
$z0_z1_z2_z3 = IMPLICIT_DEF
282+
$z4_z5_z6_z7 = IMPLICIT_DEF
283+
$z8_z9_z10_z11 = IMPLICIT_DEF
284+
$z12_z13_z14_z15 = IMPLICIT_DEF
285+
$z16_z17_z18_z19 = IMPLICIT_DEF
286+
$z20_z21_z22_z23 = IMPLICIT_DEF
287+
$z24_z25_z26_z27 = IMPLICIT_DEF
288+
$z28_z29_z30_z31 = IMPLICIT_DEF
289+
290+
$z0_z4_z8_z12 = COPY %0
291+
RET_ReallyLR
292+
...

llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,29 @@ body: |
2323
$z0_z1 = COPY $z1_z2
2424
RET_ReallyLR
2525
26+
...
27+
---
28+
name: copy_zpr2strided
29+
alignment: 4
30+
tracksRegLiveness: true
31+
liveins:
32+
- { reg: '$z0_z1' }
33+
frameInfo:
34+
maxCallFrameSize: 0
35+
body: |
36+
bb.0:
37+
liveins: $z0_z1
38+
; CHECK-LABEL: name: copy_zpr2strided
39+
; CHECK: liveins: $z0_z1
40+
; CHECK: $z8 = ORR_ZZZ $z1, $z1
41+
; CHECK: $z0 = ORR_ZZZ $z0, $z0
42+
; CHECK: $z1 = ORR_ZZZ $z8, $z8
43+
; CHECK: $z0 = ORR_ZZZ $z0, $z0
44+
; CHECK: RET_ReallyLR
45+
$z0_z8 = COPY $z0_z1
46+
$z0_z1 = COPY $z0_z8
47+
RET_ReallyLR
48+
2649
...
2750
---
2851
name: copy_zpr3
@@ -76,3 +99,30 @@ body: |
7699
RET_ReallyLR
77100
78101
...
102+
---
103+
name: copy_zpr4strided
104+
alignment: 4
105+
tracksRegLiveness: true
106+
liveins:
107+
- { reg: '$z0_z1_z2_z3' }
108+
frameInfo:
109+
maxCallFrameSize: 0
110+
body: |
111+
bb.0:
112+
liveins: $z0_z1_z2_z3
113+
; CHECK-LABEL: name: copy_zpr4
114+
; CHECK: liveins: $z0_z1_z2_z3
115+
; CHECK: $z12 = ORR_ZZZ $z3, $z3
116+
; CHECK: $z8 = ORR_ZZZ $z2, $z2
117+
; CHECK: $z4 = ORR_ZZZ $z1, $z1
118+
; CHECK: $z0 = ORR_ZZZ $z0, $z0
119+
; CHECK: $z3 = ORR_ZZZ $z12, $z12
120+
; CHECK: $z2 = ORR_ZZZ $z8, $z8
121+
; CHECK: $z1 = ORR_ZZZ $z4, $z4
122+
; CHECK: $z0 = ORR_ZZZ $z0, $z0
123+
; CHECK: RET_ReallyLR
124+
$z0_z4_z8_z12 = COPY $z0_z1_z2_z3
125+
$z0_z1_z2_z3 = COPY $z0_z4_z8_z12
126+
RET_ReallyLR
127+
128+
...

0 commit comments

Comments
 (0)