Skip to content

Commit 0e21f12

Browse files
authored
[AArch64][GISel] Translate legal SVE formal arguments and select COPY for SVE (#95236)
This patch adds support for legal SVE fromal arguments in IRTranslator, and support for COPY with SVE. SVE arguments are allowed only if the hidden option `-aarch64-enable-gisel-sve` is enabled. Illegal types and predicates like `nxv8i1` are not supported yet.
1 parent 8b0d38b commit 0e21f12

10 files changed

+1103
-46
lines changed

llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,9 @@ bool AArch64GenRegisterBankInfo::checkValueMapImpl(unsigned Idx,
137137
unsigned Offset) {
138138
unsigned PartialMapBaseIdx = Idx - PartialMappingIdx::PMI_Min;
139139
const ValueMapping &Map =
140-
AArch64GenRegisterBankInfo::getValueMapping((PartialMappingIdx)FirstInBank, Size)[Offset];
140+
AArch64GenRegisterBankInfo::getValueMapping(
141+
(PartialMappingIdx)FirstInBank,
142+
TypeSize::getFixed(Size))[Offset];
141143
return Map.BreakDown == &PartMappings[PartialMapBaseIdx] &&
142144
Map.NumBreakDowns == 1;
143145
}
@@ -167,7 +169,7 @@ bool AArch64GenRegisterBankInfo::checkPartialMappingIdx(
167169
}
168170

169171
unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
170-
unsigned Size) {
172+
TypeSize Size) {
171173
if (RBIdx == PMI_FirstGPR) {
172174
if (Size <= 32)
173175
return 0;
@@ -178,17 +180,20 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
178180
return -1;
179181
}
180182
if (RBIdx == PMI_FirstFPR) {
181-
if (Size <= 16)
183+
const unsigned MinSize = Size.getKnownMinValue();
184+
assert(!Size.isScalable() || MinSize >= 128
185+
&& "Scalable vector types should have size of at least 128 bits");
186+
if (MinSize <= 16)
182187
return 0;
183-
if (Size <= 32)
188+
if (MinSize <= 32)
184189
return 1;
185-
if (Size <= 64)
190+
if (MinSize <= 64)
186191
return 2;
187-
if (Size <= 128)
192+
if (MinSize <= 128)
188193
return 3;
189-
if (Size <= 256)
194+
if (MinSize <= 256)
190195
return 4;
191-
if (Size <= 512)
196+
if (MinSize <= 512)
192197
return 5;
193198
return -1;
194199
}
@@ -197,7 +202,7 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
197202

198203
const RegisterBankInfo::ValueMapping *
199204
AArch64GenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx,
200-
unsigned Size) {
205+
const TypeSize Size) {
201206
assert(RBIdx != PartialMappingIdx::PMI_None && "No mapping needed for that");
202207
unsigned BaseIdxOffset = getRegBankBaseIdxOffset(RBIdx, Size);
203208
if (BaseIdxOffset == -1u)
@@ -221,7 +226,8 @@ const AArch64GenRegisterBankInfo::PartialMappingIdx
221226

222227
const RegisterBankInfo::ValueMapping *
223228
AArch64GenRegisterBankInfo::getCopyMapping(unsigned DstBankID,
224-
unsigned SrcBankID, unsigned Size) {
229+
unsigned SrcBankID,
230+
const TypeSize Size) {
225231
assert(DstBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
226232
assert(SrcBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
227233
PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID];

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
149149
// scalable vector types for all instruction, even if SVE is not yet supported
150150
// with some instructions.
151151
// See [AArch64TargetLowering::fallbackToDAGISel] for implementation details.
152-
static cl::opt<bool> EnableSVEGISel(
152+
cl::opt<bool> EnableSVEGISel(
153153
"aarch64-enable-gisel-sve", cl::Hidden,
154154
cl::desc("Enable / disable SVE scalable vectors in Global ISel"),
155155
cl::init(false));

llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
using namespace llvm;
5454
using namespace AArch64GISelUtils;
5555

56+
extern cl::opt<bool> EnableSVEGISel;
57+
5658
AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
5759
: CallLowering(&TLI) {}
5860

@@ -525,10 +527,10 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
525527

526528
bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
527529
auto &F = MF.getFunction();
528-
if (F.getReturnType()->isScalableTy() ||
529-
llvm::any_of(F.args(), [](const Argument &A) {
530-
return A.getType()->isScalableTy();
531-
}))
530+
if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() ||
531+
llvm::any_of(F.args(), [](const Argument &A) {
532+
return A.getType()->isScalableTy();
533+
})))
532534
return true;
533535
const auto &ST = MF.getSubtarget<AArch64Subtarget>();
534536
if (!ST.hasNEON() || !ST.hasFPARMv8()) {

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -597,8 +597,14 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
597597
/// Given a register bank, and size in bits, return the smallest register class
598598
/// that can represent that combination.
599599
static const TargetRegisterClass *
600-
getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
600+
getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
601601
bool GetAllRegSet = false) {
602+
if (SizeInBits.isScalable()) {
603+
assert(RB.getID() == AArch64::FPRRegBankID &&
604+
"Expected FPR regbank for scalable type size");
605+
return &AArch64::ZPRRegClass;
606+
}
607+
602608
unsigned RegBankID = RB.getID();
603609

604610
if (RegBankID == AArch64::GPRRegBankID) {
@@ -939,8 +945,9 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
939945
Register SrcReg = I.getOperand(1).getReg();
940946
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
941947
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
942-
unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
943-
unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
948+
949+
TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
950+
TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
944951

945952
// Special casing for cross-bank copies of s1s. We can technically represent
946953
// a 1-bit value with any size of register. The minimum size for a GPR is 32
@@ -951,7 +958,7 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
951958
// register bank. Or make a new helper that carries along some constraint
952959
// information.
953960
if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
954-
SrcSize = DstSize = 32;
961+
SrcSize = DstSize = TypeSize::getFixed(32);
955962

956963
return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
957964
getMinClassForRegBank(DstRegBank, DstSize, true)};
@@ -1016,8 +1023,8 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
10161023
return false;
10171024
}
10181025

1019-
unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
1020-
unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
1026+
const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1027+
const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
10211028
unsigned SubReg;
10221029

10231030
// If the source bank doesn't support a subregister copy small enough,

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -163,17 +163,18 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(
163163
unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
164164
(void)PartialMapDstIdx; \
165165
(void)PartialMapSrcIdx; \
166-
const ValueMapping *Map = getCopyMapping( \
167-
AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \
166+
const ValueMapping *Map = getCopyMapping(AArch64::RBNameDst##RegBankID, \
167+
AArch64::RBNameSrc##RegBankID, \
168+
TypeSize::getFixed(Size)); \
168169
(void)Map; \
169170
assert(Map[0].BreakDown == \
170171
&AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
171-
Map[0].NumBreakDowns == 1 && #RBNameDst #Size \
172-
" Dst is incorrectly initialized"); \
172+
Map[0].NumBreakDowns == 1 && \
173+
#RBNameDst #Size " Dst is incorrectly initialized"); \
173174
assert(Map[1].BreakDown == \
174175
&AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
175-
Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \
176-
" Src is incorrectly initialized"); \
176+
Map[1].NumBreakDowns == 1 && \
177+
#RBNameSrc #Size " Src is incorrectly initialized"); \
177178
\
178179
} while (false)
179180

@@ -218,7 +219,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(
218219

219220
unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
220221
const RegisterBank &B,
221-
TypeSize Size) const {
222+
const TypeSize Size) const {
222223
// What do we do with different size?
223224
// copy are same size.
224225
// Will introduce other hooks for different size:
@@ -258,6 +259,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
258259
case AArch64::QQQRegClassID:
259260
case AArch64::QQQQRegClassID:
260261
case AArch64::ZPRRegClassID:
262+
case AArch64::ZPR_3bRegClassID:
261263
return getRegBank(AArch64::FPRRegBankID);
262264
case AArch64::GPR32commonRegClassID:
263265
case AArch64::GPR32RegClassID:
@@ -304,7 +306,7 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
304306
case TargetOpcode::G_OR: {
305307
// 32 and 64-bit or can be mapped on either FPR or
306308
// GPR for the same cost.
307-
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
309+
TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
308310
if (Size != 32 && Size != 64)
309311
break;
310312

@@ -325,7 +327,7 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
325327
return AltMappings;
326328
}
327329
case TargetOpcode::G_BITCAST: {
328-
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
330+
TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
329331
if (Size != 32 && Size != 64)
330332
break;
331333

@@ -365,7 +367,7 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
365367
return AltMappings;
366368
}
367369
case TargetOpcode::G_LOAD: {
368-
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
370+
TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
369371
if (Size != 64)
370372
break;
371373

@@ -377,15 +379,17 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
377379
InstructionMappings AltMappings;
378380
const InstructionMapping &GPRMapping = getInstructionMapping(
379381
/*ID*/ 1, /*Cost*/ 1,
380-
getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
381-
// Addresses are GPR 64-bit.
382-
getValueMapping(PMI_FirstGPR, 64)}),
382+
getOperandsMapping(
383+
{getValueMapping(PMI_FirstGPR, Size),
384+
// Addresses are GPR 64-bit.
385+
getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}),
383386
/*NumOperands*/ 2);
384387
const InstructionMapping &FPRMapping = getInstructionMapping(
385388
/*ID*/ 2, /*Cost*/ 1,
386-
getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
387-
// Addresses are GPR 64-bit.
388-
getValueMapping(PMI_FirstGPR, 64)}),
389+
getOperandsMapping(
390+
{getValueMapping(PMI_FirstFPR, Size),
391+
// Addresses are GPR 64-bit.
392+
getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}),
389393
/*NumOperands*/ 2);
390394

391395
AltMappings.push_back(&GPRMapping);
@@ -437,7 +441,7 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
437441
"This code is for instructions with 3 or less operands");
438442

439443
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
440-
unsigned Size = Ty.getSizeInBits();
444+
TypeSize Size = Ty.getSizeInBits();
441445
bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
442446

443447
PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
@@ -714,9 +718,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
714718
// If both RB are null that means both registers are generic.
715719
// We shouldn't be here.
716720
assert(DstRB && SrcRB && "Both RegBank were nullptr");
717-
unsigned Size = getSizeInBits(DstReg, MRI, TRI);
721+
TypeSize Size = getSizeInBits(DstReg, MRI, TRI);
718722
return getInstructionMapping(
719-
DefaultMappingID, copyCost(*DstRB, *SrcRB, TypeSize::getFixed(Size)),
723+
DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
720724
getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
721725
// We only care about the mapping of the destination.
722726
/*NumOperands*/ 1);
@@ -727,15 +731,15 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
727731
case TargetOpcode::G_BITCAST: {
728732
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
729733
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
730-
unsigned Size = DstTy.getSizeInBits();
734+
TypeSize Size = DstTy.getSizeInBits();
731735
bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
732736
bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
733737
const RegisterBank &DstRB =
734738
DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
735739
const RegisterBank &SrcRB =
736740
SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
737741
return getInstructionMapping(
738-
DefaultMappingID, copyCost(DstRB, SrcRB, TypeSize::getFixed(Size)),
742+
DefaultMappingID, copyCost(DstRB, SrcRB, Size),
739743
getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
740744
// We only care about the mapping of the destination for COPY.
741745
/*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
@@ -1126,7 +1130,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
11261130
LLT Ty = MRI.getType(MI.getOperand(Idx).getReg());
11271131
if (!Ty.isValid())
11281132
continue;
1129-
auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
1133+
auto Mapping =
1134+
getValueMapping(OpRegBankIdx[Idx], TypeSize::getFixed(OpSize[Idx]));
11301135
if (!Mapping->isValid())
11311136
return getInvalidInstructionMapping();
11321137

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class AArch64GenRegisterBankInfo : public RegisterBankInfo {
7070
PartialMappingIdx LastAlias,
7171
ArrayRef<PartialMappingIdx> Order);
7272

73-
static unsigned getRegBankBaseIdxOffset(unsigned RBIdx, unsigned Size);
73+
static unsigned getRegBankBaseIdxOffset(unsigned RBIdx, TypeSize Size);
7474

7575
/// Get the pointer to the ValueMapping representing the RegisterBank
7676
/// at \p RBIdx with a size of \p Size.
@@ -80,13 +80,13 @@ class AArch64GenRegisterBankInfo : public RegisterBankInfo {
8080
///
8181
/// \pre \p RBIdx != PartialMappingIdx::None
8282
static const RegisterBankInfo::ValueMapping *
83-
getValueMapping(PartialMappingIdx RBIdx, unsigned Size);
83+
getValueMapping(PartialMappingIdx RBIdx, TypeSize Size);
8484

8585
/// Get the pointer to the ValueMapping of the operands of a copy
8686
/// instruction from the \p SrcBankID register bank to the \p DstBankID
8787
/// register bank with a size of \p Size.
8888
static const RegisterBankInfo::ValueMapping *
89-
getCopyMapping(unsigned DstBankID, unsigned SrcBankID, unsigned Size);
89+
getCopyMapping(unsigned DstBankID, unsigned SrcBankID, TypeSize Size);
9090

9191
/// Get the instruction mapping for G_FPEXT.
9292
///
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 %s -o - | FileCheck %s
3+
4+
;; Test the correct usage of the Z registers with multiple SVE arguments.
5+
6+
define void @formal_argument_nxv16i8_2(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, ptr %p) {
7+
; CHECK-LABEL: formal_argument_nxv16i8_2:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: ptrue p0.b
10+
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
11+
; CHECK-NEXT: st1b { z1.b }, p0, [x0]
12+
; CHECK-NEXT: ret
13+
store <vscale x 16 x i8> %0, ptr %p, align 16
14+
store <vscale x 16 x i8> %1, ptr %p, align 16
15+
ret void
16+
}
17+
18+
define void @formal_argument_nxv16i8_8(
19+
; CHECK-LABEL: formal_argument_nxv16i8_8:
20+
; CHECK: // %bb.0:
21+
; CHECK-NEXT: ptrue p0.b
22+
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
23+
; CHECK-NEXT: st1b { z1.b }, p0, [x0]
24+
; CHECK-NEXT: st1b { z2.b }, p0, [x0]
25+
; CHECK-NEXT: st1b { z3.b }, p0, [x0]
26+
; CHECK-NEXT: st1b { z4.b }, p0, [x0]
27+
; CHECK-NEXT: st1b { z5.b }, p0, [x0]
28+
; CHECK-NEXT: st1b { z6.b }, p0, [x0]
29+
; CHECK-NEXT: st1b { z7.b }, p0, [x0]
30+
; CHECK-NEXT: ret
31+
<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3,
32+
<vscale x 16 x i8> %4, <vscale x 16 x i8> %5, <vscale x 16 x i8> %6, <vscale x 16 x i8> %7,
33+
ptr %p) {
34+
35+
store <vscale x 16 x i8> %0, ptr %p, align 16
36+
store <vscale x 16 x i8> %1, ptr %p, align 16
37+
store <vscale x 16 x i8> %2, ptr %p, align 16
38+
store <vscale x 16 x i8> %3, ptr %p, align 16
39+
store <vscale x 16 x i8> %4, ptr %p, align 16
40+
store <vscale x 16 x i8> %5, ptr %p, align 16
41+
store <vscale x 16 x i8> %6, ptr %p, align 16
42+
store <vscale x 16 x i8> %7, ptr %p, align 16
43+
ret void
44+
}

0 commit comments

Comments
 (0)