-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[GlobalISel][AArch64] Legalize G_ADD, G_SUB, G_AND, G_OR, and G_XOR for SVE #110561
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Thorsten Schütt (tschuett) Changesfor SVE. Credits: #72976 LLVM ERROR: cannot select: %3:zpr(<vscale x 2 x s64>) = G_MUL %0:fpr, %1:fpr (in function: xmulnxv2i64) ;; mul define void @mulnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) { entry: define void @mulnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) { entry: define void @mulnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) { entry: Full diff: https://github.com/llvm/llvm-project/pull/110561.diff 7 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 82e713f30ea31c..d42bfea2bd4438 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -970,8 +970,7 @@ class LegalizeRuleSet {
LegalizeAction::WidenScalar,
[=](const LegalityQuery &Query) {
const LLT VecTy = Query.Types[TypeIdx];
- return VecTy.isVector() && !VecTy.isScalable() &&
- VecTy.getSizeInBits() < VectorSize;
+ return VecTy.isFixedVector() && VecTy.getSizeInBits() < VectorSize;
},
[=](const LegalityQuery &Query) {
const LLT VecTy = Query.Types[TypeIdx];
@@ -1139,7 +1138,7 @@ class LegalizeRuleSet {
LegalizeAction::MoreElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
- return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+ return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
VecTy.getNumElements() < MinElements;
},
[=](const LegalityQuery &Query) {
@@ -1157,7 +1156,7 @@ class LegalizeRuleSet {
LegalizeAction::MoreElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
- return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+ return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
(VecTy.getNumElements() % NumElts != 0);
},
[=](const LegalityQuery &Query) {
@@ -1177,7 +1176,7 @@ class LegalizeRuleSet {
LegalizeAction::FewerElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
- return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+ return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
VecTy.getNumElements() > MaxElements;
},
[=](const LegalityQuery &Query) {
@@ -1198,6 +1197,11 @@ class LegalizeRuleSet {
assert(MinTy.getElementType() == MaxTy.getElementType() &&
"Expected element types to agree");
+ if (MinTy.isScalableVector())
+ return actionIf(LegalizeAction::Unsupported, always);
+ if (MaxTy.isScalableVector())
+ return actionIf(LegalizeAction::Unsupported, always);
+
const LLT EltTy = MinTy.getElementType();
return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements())
.clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements());
diff --git a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
index 82066b48c84b40..8ff59f60968beb 100644
--- a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
+++ b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -183,6 +183,8 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
const unsigned MinSize = Size.getKnownMinValue();
assert((!Size.isScalable() || MinSize >= 128) &&
"Scalable vector types should have size of at least 128 bits");
+ if (Size.isScalable())
+ return 3;
if (MinSize <= 16)
return 0;
if (MinSize <= 32)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 5aee7804de3e3f..6cbfb018b3183a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -393,8 +393,8 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
// i1 is a special case because SDAG i1 true is naturally zero extended
// when widened using ANYEXT. We need to do it explicitly here.
auto &Flags = CurArgInfo.Flags[0];
- if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() &&
- !Flags.isZExt()) {
+ if (MRI.getType(CurVReg).getSizeInBits() == TypeSize::getFixed(1) &&
+ !Flags.isSExt() && !Flags.isZExt()) {
CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
} else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
1) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index df0c09d32c074a..afea08ab092501 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -615,6 +615,7 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
unsigned RegBankID = RB.getID();
if (RegBankID == AArch64::GPRRegBankID) {
+ assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
if (SizeInBits <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
@@ -626,6 +627,12 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
}
if (RegBankID == AArch64::FPRRegBankID) {
+ if (SizeInBits.isScalable()) {
+ assert(SizeInBits == TypeSize::getScalable(128) &&
+ "Unexpected scalable register size");
+ return &AArch64::ZPRRegClass;
+ }
+
switch (SizeInBits) {
default:
return nullptr;
@@ -964,7 +971,8 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
// then we can pull it into the helpers that get the appropriate class for a
// register bank. Or make a new helper that carries along some constraint
// information.
- if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
+ if (SrcRegBank != DstRegBank &&
+ (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
SrcSize = DstSize = TypeSize::getFixed(32);
return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 51aeee023f2e34..910a4ab2ddc178 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -91,6 +91,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const bool HasCSSC = ST.hasCSSC();
const bool HasRCPC3 = ST.hasRCPC3();
+ const bool HasSVE = ST.hasSVE();
getActionDefinitionsBuilder(
{G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
@@ -127,7 +128,37 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0);
- getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
+ auto &IntegerArithmeticActions =
+ getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR});
+ if (HasSVE)
+ IntegerArithmeticActions.legalFor({nxv16s8, nxv8s16, nxv4s32, nxv2s64});
+ IntegerArithmeticActions
+ .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
+ .clampMaxNumElements(0, s8, 16)
+ .clampMaxNumElements(0, s16, 8)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getNumElements() <= 2;
+ },
+ 0, s32)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getNumElements() <= 4;
+ },
+ 0, s16)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getNumElements() <= 16;
+ },
+ 0, s8)
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+ .moreElementsToNextPow2(0);
+
+ getActionDefinitionsBuilder(G_MUL)
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-mul.mir
new file mode 100644
index 00000000000000..d2e76227741cb6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-mul.mir
@@ -0,0 +1,25 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -mattr=+sve -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name: fp_inputs
+legalized: true
+body: |
+ bb.0:
+ liveins: $s0, $s1
+
+ ; CHECK-LABEL: name: fp_inputs
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:fpr(<vscale x 2 x s64>) = COPY $z0
+ ; CHECK-NEXT: %rhs:fpr(<vscale x 2 x s64>) = COPY $z1
+ ; CHECK-NEXT: %res:fpr(<vscale x 2 x s64>) = G_MUL %lhs, %rhs
+ ; CHECK-NEXT: $z0 = COPY %res(<vscale x 2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $z0
+ %lhs:_(<vscale x 2 x s64>) = COPY $z0
+ %rhs:_(<vscale x 2 x s64>) = COPY $z1
+ %res:_(<vscale x 2 x s64>) = G_MUL %lhs, %rhs
+ $z0 = COPY %res(<vscale x 2 x s64>)
+ RET_ReallyLR implicit $z0
+
diff --git a/llvm/test/CodeGen/AArch64/sve-integer.ll b/llvm/test/CodeGen/AArch64/sve-integer.ll
new file mode 100644
index 00000000000000..ad66190839ce0f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-integer.ll
@@ -0,0 +1,268 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 | FileCheck %s
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
+
+;; add
+define void @addnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: addnxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = add <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @addnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: addnxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add z0.s, z0.s, z1.s
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = add <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @addnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: addnxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add z0.h, z0.h, z1.h
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = add <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @addnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: addnxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add z0.b, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = add <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
+
+;; sub
+define void @subnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: subnxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = sub <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @subnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: subnxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub z0.s, z0.s, z1.s
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = sub <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @subnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: subnxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub z0.h, z0.h, z1.h
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = sub <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @subnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: subnxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub z0.b, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = sub <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
+
+;; and
+define void @andnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: andnxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = and <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @andnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: andnxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = and <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @andnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: andnxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = and <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @andnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: andnxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = and <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
+
+;; or
+define void @ornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: ornxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = or <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @ornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: ornxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = or <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @ornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: ornxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = or <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @ornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: ornxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = or <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
+
+;; xor
+define void @xornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: xornxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = xor <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @xornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: xornxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = xor <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @xornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: xornxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = xor <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @xornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: xornxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = xor <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
|
@llvm/pr-subscribers-llvm-globalisel Author: Thorsten Schütt (tschuett) Changesfor SVE. Credits: #72976 LLVM ERROR: cannot select: %3:zpr(<vscale x 2 x s64>) = G_MUL %0:fpr, %1:fpr (in function: xmulnxv2i64) ;; mul define void @mulnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) { entry: define void @mulnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) { entry: define void @mulnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) { entry: Full diff: https://github.com/llvm/llvm-project/pull/110561.diff 7 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 82e713f30ea31c..d42bfea2bd4438 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -970,8 +970,7 @@ class LegalizeRuleSet {
LegalizeAction::WidenScalar,
[=](const LegalityQuery &Query) {
const LLT VecTy = Query.Types[TypeIdx];
- return VecTy.isVector() && !VecTy.isScalable() &&
- VecTy.getSizeInBits() < VectorSize;
+ return VecTy.isFixedVector() && VecTy.getSizeInBits() < VectorSize;
},
[=](const LegalityQuery &Query) {
const LLT VecTy = Query.Types[TypeIdx];
@@ -1139,7 +1138,7 @@ class LegalizeRuleSet {
LegalizeAction::MoreElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
- return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+ return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
VecTy.getNumElements() < MinElements;
},
[=](const LegalityQuery &Query) {
@@ -1157,7 +1156,7 @@ class LegalizeRuleSet {
LegalizeAction::MoreElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
- return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+ return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
(VecTy.getNumElements() % NumElts != 0);
},
[=](const LegalityQuery &Query) {
@@ -1177,7 +1176,7 @@ class LegalizeRuleSet {
LegalizeAction::FewerElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
- return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+ return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
VecTy.getNumElements() > MaxElements;
},
[=](const LegalityQuery &Query) {
@@ -1198,6 +1197,11 @@ class LegalizeRuleSet {
assert(MinTy.getElementType() == MaxTy.getElementType() &&
"Expected element types to agree");
+ if (MinTy.isScalableVector())
+ return actionIf(LegalizeAction::Unsupported, always);
+ if (MaxTy.isScalableVector())
+ return actionIf(LegalizeAction::Unsupported, always);
+
const LLT EltTy = MinTy.getElementType();
return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements())
.clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements());
diff --git a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
index 82066b48c84b40..8ff59f60968beb 100644
--- a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
+++ b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -183,6 +183,8 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
const unsigned MinSize = Size.getKnownMinValue();
assert((!Size.isScalable() || MinSize >= 128) &&
"Scalable vector types should have size of at least 128 bits");
+ if (Size.isScalable())
+ return 3;
if (MinSize <= 16)
return 0;
if (MinSize <= 32)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 5aee7804de3e3f..6cbfb018b3183a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -393,8 +393,8 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
// i1 is a special case because SDAG i1 true is naturally zero extended
// when widened using ANYEXT. We need to do it explicitly here.
auto &Flags = CurArgInfo.Flags[0];
- if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() &&
- !Flags.isZExt()) {
+ if (MRI.getType(CurVReg).getSizeInBits() == TypeSize::getFixed(1) &&
+ !Flags.isSExt() && !Flags.isZExt()) {
CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
} else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
1) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index df0c09d32c074a..afea08ab092501 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -615,6 +615,7 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
unsigned RegBankID = RB.getID();
if (RegBankID == AArch64::GPRRegBankID) {
+ assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
if (SizeInBits <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
@@ -626,6 +627,12 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
}
if (RegBankID == AArch64::FPRRegBankID) {
+ if (SizeInBits.isScalable()) {
+ assert(SizeInBits == TypeSize::getScalable(128) &&
+ "Unexpected scalable register size");
+ return &AArch64::ZPRRegClass;
+ }
+
switch (SizeInBits) {
default:
return nullptr;
@@ -964,7 +971,8 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
// then we can pull it into the helpers that get the appropriate class for a
// register bank. Or make a new helper that carries along some constraint
// information.
- if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
+ if (SrcRegBank != DstRegBank &&
+ (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
SrcSize = DstSize = TypeSize::getFixed(32);
return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 51aeee023f2e34..910a4ab2ddc178 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -91,6 +91,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const bool HasCSSC = ST.hasCSSC();
const bool HasRCPC3 = ST.hasRCPC3();
+ const bool HasSVE = ST.hasSVE();
getActionDefinitionsBuilder(
{G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
@@ -127,7 +128,37 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0);
- getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
+ auto &IntegerArithmeticActions =
+ getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR});
+ if (HasSVE)
+ IntegerArithmeticActions.legalFor({nxv16s8, nxv8s16, nxv4s32, nxv2s64});
+ IntegerArithmeticActions
+ .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
+ .clampMaxNumElements(0, s8, 16)
+ .clampMaxNumElements(0, s16, 8)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getNumElements() <= 2;
+ },
+ 0, s32)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getNumElements() <= 4;
+ },
+ 0, s16)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getNumElements() <= 16;
+ },
+ 0, s8)
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+ .moreElementsToNextPow2(0);
+
+ getActionDefinitionsBuilder(G_MUL)
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-mul.mir
new file mode 100644
index 00000000000000..d2e76227741cb6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-mul.mir
@@ -0,0 +1,25 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -mattr=+sve -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name: fp_inputs
+legalized: true
+body: |
+ bb.0:
+ liveins: $s0, $s1
+
+ ; CHECK-LABEL: name: fp_inputs
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:fpr(<vscale x 2 x s64>) = COPY $z0
+ ; CHECK-NEXT: %rhs:fpr(<vscale x 2 x s64>) = COPY $z1
+ ; CHECK-NEXT: %res:fpr(<vscale x 2 x s64>) = G_MUL %lhs, %rhs
+ ; CHECK-NEXT: $z0 = COPY %res(<vscale x 2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $z0
+ %lhs:_(<vscale x 2 x s64>) = COPY $z0
+ %rhs:_(<vscale x 2 x s64>) = COPY $z1
+ %res:_(<vscale x 2 x s64>) = G_MUL %lhs, %rhs
+ $z0 = COPY %res(<vscale x 2 x s64>)
+ RET_ReallyLR implicit $z0
+
diff --git a/llvm/test/CodeGen/AArch64/sve-integer.ll b/llvm/test/CodeGen/AArch64/sve-integer.ll
new file mode 100644
index 00000000000000..ad66190839ce0f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-integer.ll
@@ -0,0 +1,268 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 | FileCheck %s
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
+
+;; add
+define void @addnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: addnxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = add <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @addnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: addnxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add z0.s, z0.s, z1.s
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = add <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @addnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: addnxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add z0.h, z0.h, z1.h
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = add <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @addnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: addnxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add z0.b, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = add <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
+
+;; sub
+define void @subnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: subnxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = sub <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @subnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: subnxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub z0.s, z0.s, z1.s
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = sub <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @subnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: subnxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub z0.h, z0.h, z1.h
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = sub <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @subnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: subnxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub z0.b, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = sub <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
+
+;; and
+define void @andnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: andnxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = and <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @andnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: andnxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = and <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @andnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: andnxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = and <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @andnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: andnxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = and <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
+
+;; or
+define void @ornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: ornxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = or <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @ornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: ornxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = or <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @ornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: ornxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = or <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @ornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: ornxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = or <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
+
+;; xor
+define void @xornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: xornxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = xor <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @xornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: xornxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = xor <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @xornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: xornxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = xor <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @xornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: xornxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = xor <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
|
How is GlobalISel going to handle predication, e.g., |
First bet, there is no pattern for mul for SVE. |
There are indeed patterns missing for the SVE G_MUL. |
success:
|
The emitter does not support
but complains about multiclass. There are many interesting instructions hidden in There are no warnings about ADD_ZZZ and SUB_ZZZ despite them defined in
Add and Sub are SVE while Mul is SVE2?
contains the working instructions. Mul is in a different let. |
The complete error message about importing MUL_ZZZ:
|
There are several occurrences.
where AArch64ISD::MUL_PRED is C++ and not tablegen. |
Ping. |
For the SVE issue you are seeing - some instructions in SVE are "unpredicated", many require a predicate even if it is an all-on predicate vector. mul I believe is one where there is an predicated +sve instruction, and an unpredicated +sve2 version. SDAG will convert them into MUL_PRED, but that has obvious downsides. Do you mind if we have a first patch that just gets the arg-passing basics out of the way first, and start adding instruction support separately? Or does that all work already and all of this is for instruction support? We should be able to add tests that pass in values and store them, or load values and return them. I feel we need to make sure SVE is decently designed. I suppose that without any other suggestions we can follow how SDAG behaves. |
I have ARM DDI 0487K.a. It lists under SVE integer arithmetic:
but the unpredicated is actually SVE2 but listed in the SVE section. And Thanks for taking a look! |
At your speed. |
My plan was to legalize more instructions, but eventually we will run into predication and/or import issues. |
7407b90
to
3f9047b
Compare
Translate legal SVE formal arguments and select COPY for SVE Argument passing is already implemented. |
Ping. |
auto &IntegerArithmeticActions = | ||
getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR}); | ||
if (HasSVE) | ||
IntegerArithmeticActions.legalFor({nxv16s8, nxv8s16, nxv4s32, nxv2s64}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you rebase now that a3010c7 is in?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It should be
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
.legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
....
(You could make the argument that the HasSVE is unnecessary, as scalable-vectors are not supported without them and will just hit some failure elsewhere anyway. We might as well add it in though, now that we can without messing up the Definitions).
; CHECK-NEXT: ret | ||
entry: | ||
%c = add <vscale x 2 x i64> %a, %b | ||
store <vscale x 2 x i64> %c, ptr %p, align 16 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can these all return directly, as opposed to storing?
Can we reuse an existing test file like llvm/test/CodeGen/AArch64/sve-int-arith.ll?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use one of the existing tests with a new run line?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sve-int-arith.ll has too much illegal resp. unsupported tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, it is still crashing trying to lower the arguments, as opposed to falling back. Can you move this into AArch64/GlobalISel/sve-integer.ll for the moment then, and we can try to de-duplicate them later on.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sve-integer.ll works with ret
. Long term, we should merge the two files, but our SVE support is really limited right now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep this is no longer about ret - the tests look good now. This is about the duplication and moving them to the GlobalISel directory for the time being until more stuff works without crashing.
bc6c733
to
f1d0b15
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
Thanks for taking a look. |
5150968
to
068e785
Compare
for SVE. Credits: llvm#72976 LLVM ERROR: cannot select: %3:zpr(<vscale x 2 x s64>) = G_MUL %0:fpr, %1:fpr (in function: xmulnxv2i64) ;; mul define void @xmulnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) { entry: %c = mul <vscale x 2 x i64> %a, %b store <vscale x 2 x i64> %c, ptr %p, align 16 ret void } define void @mulnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) { entry: %c = mul <vscale x 4 x i32> %a, %b store <vscale x 4 x i32> %c, ptr %p, align 16 ret void } define void @mulnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) { entry: %c = mul <vscale x 8 x i16> %a, %b store <vscale x 8 x i16> %c, ptr %p, align 16 ret void } define void @mulnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) { entry: %c = mul <vscale x 16 x i8> %a, %b store <vscale x 16 x i8> %c, ptr %p, align 16 ret void }
6390da3
to
45aed33
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. LGTM
…or SVE (llvm#110561) Credits: llvm#72976 LLVM ERROR: cannot select: %3:zpr(<vscale x 2 x s64>) = G_MUL %0:fpr, %1:fpr (in function: xmulnxv2i64) ;; mul define void @xmulnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) { entry: %c = mul <vscale x 2 x i64> %a, %b store <vscale x 2 x i64> %c, ptr %p, align 16 ret void } define void @mulnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) { entry: %c = mul <vscale x 4 x i32> %a, %b store <vscale x 4 x i32> %c, ptr %p, align 16 ret void } define void @mulnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) { entry: %c = mul <vscale x 8 x i16> %a, %b store <vscale x 8 x i16> %c, ptr %p, align 16 ret void } define void @mulnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) { entry: %c = mul <vscale x 16 x i8> %a, %b store <vscale x 16 x i8> %c, ptr %p, align 16 ret void }
Credits: #72976
LLVM ERROR: cannot select: %3:zpr(<vscale x 2 x s64>) = G_MUL %0:fpr, %1:fpr (in function: xmulnxv2i64)
;; mul
define void @xmulnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
entry:
%c = mul <vscale x 2 x i64> %a, %b
store <vscale x 2 x i64> %c, ptr %p, align 16
ret void
}
define void @mulnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
entry:
%c = mul <vscale x 4 x i32> %a, %b
store <vscale x 4 x i32> %c, ptr %p, align 16
ret void
}
define void @mulnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
entry:
%c = mul <vscale x 8 x i16> %a, %b
store <vscale x 8 x i16> %c, ptr %p, align 16
ret void
}
define void @mulnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
entry:
%c = mul <vscale x 16 x i8> %a, %b
store <vscale x 16 x i8> %c, ptr %p, align 16
ret void
}