-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[GlobalISel][AArch64] Legalize G_UADDSAT, G_SADDSAT, G_USUBSAT, and G… #114664
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: Thorsten Schütt (tschuett) Changes…_SSUBSAT Full diff: https://github.com/llvm/llvm-project/pull/114664.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index b7541effafe5ce..3ea655f29fd24d 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -204,6 +204,8 @@ LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
LegalityPredicate LegalityPredicates::memSizeNotByteSizePow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
const LLT MemTy = Query.MMODescrs[MMOIdx].MemoryTy;
+ if (Query.MMODescrs[MMOIdx].MemoryTy.isScalableVector())
+ return true;
return !MemTy.isByteSized() ||
!llvm::has_single_bit<uint32_t>(
MemTy.getSizeInBytes().getKnownMinValue());
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index f162d1c2973cbc..8851ad7ac3503d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1280,6 +1280,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
.legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
+ .legalFor(HasSVE, {nxv2s64, nxv4s32, nxv8s16, nxv16s8})
.clampNumElements(0, v8s8, v16s8)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v2s32, v4s32)
diff --git a/llvm/test/CodeGen/AArch64/sve-saturating-arith.ll b/llvm/test/CodeGen/AArch64/sve-saturating-arith.ll
new file mode 100644
index 00000000000000..a14c86b3f3146f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-saturating-arith.ll
@@ -0,0 +1,171 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SDAG
+; RUN: llc -mtriple=aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GISEL
+
+; SQADD
+define <vscale x 16 x i8> @sqadd_i8_low(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqadd_i8_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqadd z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.sadd.sat.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @sqadd_i16_low(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqadd_i16_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqadd z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.sadd.sat.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @sqadd_i32_low(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqadd_i32_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqadd z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.sadd.sat.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @sqadd_i64_low(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqadd_i64_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqadd z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i64> @llvm.sadd.sat.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %res
+}
+
+; UQADD
+define <vscale x 16 x i8> @uqadd_i8_low(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqadd_i8_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqadd z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.uadd.sat.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @uqadd_i16_low(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqadd_i16_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqadd z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.uadd.sat.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @uqadd_i32_low(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqadd_i32_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqadd z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.uadd.sat.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @uqadd_i64_low(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqadd_i64_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqadd z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i64> @llvm.uadd.sat.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %res
+}
+
+; SQSUB
+define <vscale x 16 x i8> @sqsub_i8_low(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqsub_i8_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqsub z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.ssub.sat.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @sqsub_i16_low(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqsub_i16_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqsub z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.ssub.sat.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @sqsub_i32_low(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqsub_i32_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqsub z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.ssub.sat.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @sqsub_i64_low(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqsub_i64_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqsub z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i64> @llvm.ssub.sat.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %res
+}
+
+; UQSUB
+define <vscale x 16 x i8> @uqsub_i8_low(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqsub_i8_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqsub z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.usub.sat.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @uqsub_i16_low(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqsub_i16_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqsub z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.usub.sat.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @uqsub_i32_low(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqsub_i32_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqsub z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.usub.sat.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @uqsub_i64_low(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqsub_i64_low:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqsub z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i64> @llvm.usub.sat.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %res
+}
+
+declare <vscale x 16 x i8> @llvm.sadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.sadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.sadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.sadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 16 x i8> @llvm.uadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.uadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.uadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.uadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 16 x i8> @llvm.ssub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.ssub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.ssub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.ssub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 16 x i8> @llvm.usub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.usub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.usub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.usub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GISEL: {{.*}}
+; CHECK-SDAG: {{.*}}
|
f65ea57
to
ac98b16
Compare
ac98b16
to
4ac7ba7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This sounds OK, but is the memsize part needed for this patch?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. LGTM
llvm#114664) …_SSUBSAT sve-int-imm.ll also tests saturation, but it has unsupported splats.
…_SSUBSAT
sve-int-imm.ll also tests saturation, but it has unsupported splats.