Skip to content

[GlobalISel][AArch64] Legalize G_ADD, G_SUB, G_AND, G_OR, and G_XOR for SVE #110561

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -998,8 +998,7 @@ class LegalizeRuleSet {
LegalizeAction::WidenScalar,
[=](const LegalityQuery &Query) {
const LLT VecTy = Query.Types[TypeIdx];
return VecTy.isVector() && !VecTy.isScalable() &&
VecTy.getSizeInBits() < VectorSize;
return VecTy.isFixedVector() && VecTy.getSizeInBits() < VectorSize;
},
[=](const LegalityQuery &Query) {
const LLT VecTy = Query.Types[TypeIdx];
Expand Down Expand Up @@ -1172,7 +1171,7 @@ class LegalizeRuleSet {
LegalizeAction::MoreElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
return VecTy.isVector() && VecTy.getElementType() == EltTy &&
return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
VecTy.getNumElements() < MinElements;
},
[=](const LegalityQuery &Query) {
Expand All @@ -1190,7 +1189,7 @@ class LegalizeRuleSet {
LegalizeAction::MoreElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
return VecTy.isVector() && VecTy.getElementType() == EltTy &&
return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
(VecTy.getNumElements() % NumElts != 0);
},
[=](const LegalityQuery &Query) {
Expand All @@ -1210,7 +1209,7 @@ class LegalizeRuleSet {
LegalizeAction::FewerElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
return VecTy.isVector() && VecTy.getElementType() == EltTy &&
return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
VecTy.getNumElements() > MaxElements;
},
[=](const LegalityQuery &Query) {
Expand All @@ -1231,6 +1230,9 @@ class LegalizeRuleSet {
assert(MinTy.getElementType() == MaxTy.getElementType() &&
"Expected element types to agree");

assert((!MinTy.isScalableVector() && !MaxTy.isScalableVector()) &&
"Unexpected scalable vectors");

const LLT EltTy = MinTy.getElementType();
return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements())
.clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements());
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
const unsigned MinSize = Size.getKnownMinValue();
assert((!Size.isScalable() || MinSize >= 128) &&
"Scalable vector types should have size of at least 128 bits");
if (Size.isScalable())
return 3;
if (MinSize <= 16)
return 0;
if (MinSize <= 32)
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,8 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
// i1 is a special case because SDAG i1 true is naturally zero extended
// when widened using ANYEXT. We need to do it explicitly here.
auto &Flags = CurArgInfo.Flags[0];
if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() &&
!Flags.isZExt()) {
if (MRI.getType(CurVReg).getSizeInBits() == TypeSize::getFixed(1) &&
!Flags.isSExt() && !Flags.isZExt()) {
CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
} else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
1) {
Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,7 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
unsigned RegBankID = RB.getID();

if (RegBankID == AArch64::GPRRegBankID) {
assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
if (SizeInBits <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
Expand All @@ -626,6 +627,12 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
}

if (RegBankID == AArch64::FPRRegBankID) {
if (SizeInBits.isScalable()) {
assert(SizeInBits == TypeSize::getScalable(128) &&
"Unexpected scalable register size");
return &AArch64::ZPRRegClass;
}

switch (SizeInBits) {
default:
return nullptr;
Expand Down Expand Up @@ -964,7 +971,8 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
// then we can pull it into the helpers that get the appropriate class for a
// register bank. Or make a new helper that carries along some constraint
// information.
if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
if (SrcRegBank != DstRegBank &&
(DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
SrcSize = DstSize = TypeSize::getFixed(32);

return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
Expand Down
30 changes: 29 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)

const bool HasCSSC = ST.hasCSSC();
const bool HasRCPC3 = ST.hasRCPC3();
const bool HasSVE = ST.hasSVE();

getActionDefinitionsBuilder(
{G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
Expand Down Expand Up @@ -127,7 +128,34 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0);

getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
.legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
.clampMaxNumElements(0, s8, 16)
.clampMaxNumElements(0, s16, 8)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
return Query.Types[0].getNumElements() <= 2;
},
0, s32)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
return Query.Types[0].getNumElements() <= 4;
},
0, s16)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
return Query.Types[0].getNumElements() <= 16;
},
0, s8)
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
.moreElementsToNextPow2(0);

getActionDefinitionsBuilder(G_MUL)
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
# DEBUG-NEXT: G_MUL (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
Expand Down
208 changes: 208 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 | FileCheck %s
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s

;; add
define <vscale x 2 x i64> @addnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: addnxv2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = add <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %c
}

define <vscale x 4 x i32> @addnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: addnxv4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = add <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %c
}

define <vscale x 8 x i16> @addnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
; CHECK-LABEL: addnxv8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = add <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %c
}

define <vscale x 16 x i8> @addnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: addnxv16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = add <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %c
}

;; sub
define <vscale x 2 x i64> @subnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: subnxv2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = sub <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %c
}

define <vscale x 4 x i32> @subnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: subnxv4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = sub <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %c
}

define <vscale x 8 x i16> @subnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
; CHECK-LABEL: subnxv8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = sub <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %c
}

define <vscale x 16 x i8> @subnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: subnxv16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = sub <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %c
}

;; and
define <vscale x 2 x i64> @andnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: andnxv2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = and <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %c
}

define <vscale x 4 x i32> @andnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: andnxv4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = and <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %c
}

define <vscale x 8 x i16> @andnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
; CHECK-LABEL: andnxv8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = and <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %c
}

define <vscale x 16 x i8> @andnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: andnxv16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = and <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %c
}

;; or
define <vscale x 2 x i64> @ornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: ornxv2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = or <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %c
}

define <vscale x 4 x i32> @ornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: ornxv4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = or <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %c
}

define <vscale x 8 x i16> @ornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
; CHECK-LABEL: ornxv8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = or <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %c
}

define <vscale x 16 x i8> @ornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: ornxv16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = or <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %c
}

;; xor
define <vscale x 2 x i64> @xornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: xornxv2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = xor <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %c
}

define <vscale x 4 x i32> @xornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: xornxv4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = xor <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %c
}

define <vscale x 8 x i16> @xornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
; CHECK-LABEL: xornxv8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = xor <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %c
}

define <vscale x 16 x i8> @xornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: xornxv16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = xor <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %c
}
Loading