Skip to content

Commit fbd77b0

Browse files
committed
[GlobalISel][AArch64] Legalize G_ADD, G_SUB, G_AND, G_OR, and G_XOR
for SVE. Credits: llvm#72976 LLVM ERROR: cannot select: %3:zpr(<vscale x 2 x s64>) = G_MUL %0:fpr, %1:fpr (in function: xmulnxv2i64) ;; mul define void @xmulnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) { entry: %c = mul <vscale x 2 x i64> %a, %b store <vscale x 2 x i64> %c, ptr %p, align 16 ret void } define void @mulnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) { entry: %c = mul <vscale x 4 x i32> %a, %b store <vscale x 4 x i32> %c, ptr %p, align 16 ret void } define void @mulnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) { entry: %c = mul <vscale x 8 x i16> %a, %b store <vscale x 8 x i16> %c, ptr %p, align 16 ret void } define void @mulnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) { entry: %c = mul <vscale x 16 x i8> %a, %b store <vscale x 16 x i8> %c, ptr %p, align 16 ret void }
1 parent 61a456b commit fbd77b0

File tree

7 files changed

+347
-9
lines changed

7 files changed

+347
-9
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -998,8 +998,7 @@ class LegalizeRuleSet {
998998
LegalizeAction::WidenScalar,
999999
[=](const LegalityQuery &Query) {
10001000
const LLT VecTy = Query.Types[TypeIdx];
1001-
return VecTy.isVector() && !VecTy.isScalable() &&
1002-
VecTy.getSizeInBits() < VectorSize;
1001+
return VecTy.isFixedVector() && VecTy.getSizeInBits() < VectorSize;
10031002
},
10041003
[=](const LegalityQuery &Query) {
10051004
const LLT VecTy = Query.Types[TypeIdx];
@@ -1172,7 +1171,7 @@ class LegalizeRuleSet {
11721171
LegalizeAction::MoreElements,
11731172
[=](const LegalityQuery &Query) {
11741173
LLT VecTy = Query.Types[TypeIdx];
1175-
return VecTy.isVector() && VecTy.getElementType() == EltTy &&
1174+
return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
11761175
VecTy.getNumElements() < MinElements;
11771176
},
11781177
[=](const LegalityQuery &Query) {
@@ -1190,7 +1189,7 @@ class LegalizeRuleSet {
11901189
LegalizeAction::MoreElements,
11911190
[=](const LegalityQuery &Query) {
11921191
LLT VecTy = Query.Types[TypeIdx];
1193-
return VecTy.isVector() && VecTy.getElementType() == EltTy &&
1192+
return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
11941193
(VecTy.getNumElements() % NumElts != 0);
11951194
},
11961195
[=](const LegalityQuery &Query) {
@@ -1210,7 +1209,7 @@ class LegalizeRuleSet {
12101209
LegalizeAction::FewerElements,
12111210
[=](const LegalityQuery &Query) {
12121211
LLT VecTy = Query.Types[TypeIdx];
1213-
return VecTy.isVector() && VecTy.getElementType() == EltTy &&
1212+
return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
12141213
VecTy.getNumElements() > MaxElements;
12151214
},
12161215
[=](const LegalityQuery &Query) {
@@ -1231,6 +1230,11 @@ class LegalizeRuleSet {
12311230
assert(MinTy.getElementType() == MaxTy.getElementType() &&
12321231
"Expected element types to agree");
12331232

1233+
if (MinTy.isScalableVector())
1234+
return actionIf(LegalizeAction::Unsupported, always);
1235+
if (MaxTy.isScalableVector())
1236+
return actionIf(LegalizeAction::Unsupported, always);
1237+
12341238
const LLT EltTy = MinTy.getElementType();
12351239
return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements())
12361240
.clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements());

llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
183183
const unsigned MinSize = Size.getKnownMinValue();
184184
assert((!Size.isScalable() || MinSize >= 128) &&
185185
"Scalable vector types should have size of at least 128 bits");
186+
if (Size.isScalable())
187+
return 3;
186188
if (MinSize <= 16)
187189
return 0;
188190
if (MinSize <= 32)

llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,8 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
393393
// i1 is a special case because SDAG i1 true is naturally zero extended
394394
// when widened using ANYEXT. We need to do it explicitly here.
395395
auto &Flags = CurArgInfo.Flags[0];
396-
if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() &&
397-
!Flags.isZExt()) {
396+
if (MRI.getType(CurVReg).getSizeInBits() == TypeSize::getFixed(1) &&
397+
!Flags.isSExt() && !Flags.isZExt()) {
398398
CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
399399
} else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
400400
1) {

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,7 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
615615
unsigned RegBankID = RB.getID();
616616

617617
if (RegBankID == AArch64::GPRRegBankID) {
618+
assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
618619
if (SizeInBits <= 32)
619620
return GetAllRegSet ? &AArch64::GPR32allRegClass
620621
: &AArch64::GPR32RegClass;
@@ -626,6 +627,12 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
626627
}
627628

628629
if (RegBankID == AArch64::FPRRegBankID) {
630+
if (SizeInBits.isScalable()) {
631+
assert(SizeInBits == TypeSize::getScalable(128) &&
632+
"Unexpected scalable register size");
633+
return &AArch64::ZPRRegClass;
634+
}
635+
629636
switch (SizeInBits) {
630637
default:
631638
return nullptr;
@@ -964,7 +971,8 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
964971
// then we can pull it into the helpers that get the appropriate class for a
965972
// register bank. Or make a new helper that carries along some constraint
966973
// information.
967-
if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
974+
if (SrcRegBank != DstRegBank &&
975+
(DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
968976
SrcSize = DstSize = TypeSize::getFixed(32);
969977

970978
return {getMinClassForRegBank(SrcRegBank, SrcSize, true),

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
9191

9292
const bool HasCSSC = ST.hasCSSC();
9393
const bool HasRCPC3 = ST.hasRCPC3();
94+
const bool HasSVE = ST.hasSVE();
9495

9596
getActionDefinitionsBuilder(
9697
{G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
@@ -127,7 +128,37 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
127128
.clampNumElements(0, v2s64, v2s64)
128129
.moreElementsToNextPow2(0);
129130

130-
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
131+
auto &IntegerArithmeticActions =
132+
getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR});
133+
if (HasSVE)
134+
IntegerArithmeticActions.legalFor({nxv16s8, nxv8s16, nxv4s32, nxv2s64});
135+
IntegerArithmeticActions
136+
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
137+
.widenScalarToNextPow2(0)
138+
.clampScalar(0, s32, s64)
139+
.clampMaxNumElements(0, s8, 16)
140+
.clampMaxNumElements(0, s16, 8)
141+
.clampNumElements(0, v2s32, v4s32)
142+
.clampNumElements(0, v2s64, v2s64)
143+
.minScalarOrEltIf(
144+
[=](const LegalityQuery &Query) {
145+
return Query.Types[0].getNumElements() <= 2;
146+
},
147+
0, s32)
148+
.minScalarOrEltIf(
149+
[=](const LegalityQuery &Query) {
150+
return Query.Types[0].getNumElements() <= 4;
151+
},
152+
0, s16)
153+
.minScalarOrEltIf(
154+
[=](const LegalityQuery &Query) {
155+
return Query.Types[0].getNumElements() <= 16;
156+
},
157+
0, s8)
158+
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
159+
.moreElementsToNextPow2(0);
160+
161+
getActionDefinitionsBuilder(G_MUL)
131162
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
132163
.widenScalarToNextPow2(0)
133164
.clampScalar(0, s32, s64)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64 -mattr=+sve -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
...
5+
---
6+
name: fp_inputs
7+
legalized: true
8+
body: |
9+
bb.0:
10+
liveins: $s0, $s1
11+
12+
; CHECK-LABEL: name: fp_inputs
13+
; CHECK: liveins: $s0, $s1
14+
; CHECK-NEXT: {{ $}}
15+
; CHECK-NEXT: %lhs:fpr(<vscale x 2 x s64>) = COPY $z0
16+
; CHECK-NEXT: %rhs:fpr(<vscale x 2 x s64>) = COPY $z1
17+
; CHECK-NEXT: %res:fpr(<vscale x 2 x s64>) = G_MUL %lhs, %rhs
18+
; CHECK-NEXT: $z0 = COPY %res(<vscale x 2 x s64>)
19+
; CHECK-NEXT: RET_ReallyLR implicit $z0
20+
%lhs:_(<vscale x 2 x s64>) = COPY $z0
21+
%rhs:_(<vscale x 2 x s64>) = COPY $z1
22+
%res:_(<vscale x 2 x s64>) = G_MUL %lhs, %rhs
23+
$z0 = COPY %res(<vscale x 2 x s64>)
24+
RET_ReallyLR implicit $z0
25+

0 commit comments

Comments
 (0)