Skip to content

Commit 2ff41b4

Browse files
author
Thorsten Schütt
authored
[GlobalISel][AArch64] Legalize G_UADDSAT, G_SADDSAT, G_USUBSAT, and G… (#114664)
…_SSUBSAT sve-int-imm.ll also tests saturation, but it has unsupported splats.
1 parent bdfadb1 commit 2ff41b4

File tree

2 files changed

+169
-0
lines changed

2 files changed

+169
-0
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,6 +1280,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
12801280

12811281
getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
12821282
.legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1283+
.legalFor(HasSVE, {nxv2s64, nxv4s32, nxv8s16, nxv16s8})
12831284
.clampNumElements(0, v8s8, v16s8)
12841285
.clampNumElements(0, v4s16, v8s16)
12851286
.clampNumElements(0, v2s32, v4s32)
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s
3+
; RUN: llc -mtriple=aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
4+
5+
; SQADD
6+
define <vscale x 16 x i8> @sqadd_i8_low(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
7+
; CHECK-LABEL: sqadd_i8_low:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: sqadd z0.b, z0.b, z1.b
10+
; CHECK-NEXT: ret
11+
%res = call <vscale x 16 x i8> @llvm.sadd.sat.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
12+
ret <vscale x 16 x i8> %res
13+
}
14+
15+
define <vscale x 8 x i16> @sqadd_i16_low(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
16+
; CHECK-LABEL: sqadd_i16_low:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: sqadd z0.h, z0.h, z1.h
19+
; CHECK-NEXT: ret
20+
%res = call <vscale x 8 x i16> @llvm.sadd.sat.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
21+
ret <vscale x 8 x i16> %res
22+
}
23+
24+
define <vscale x 4 x i32> @sqadd_i32_low(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
25+
; CHECK-LABEL: sqadd_i32_low:
26+
; CHECK: // %bb.0:
27+
; CHECK-NEXT: sqadd z0.s, z0.s, z1.s
28+
; CHECK-NEXT: ret
29+
%res = call <vscale x 4 x i32> @llvm.sadd.sat.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
30+
ret <vscale x 4 x i32> %res
31+
}
32+
33+
define <vscale x 2 x i64> @sqadd_i64_low(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
34+
; CHECK-LABEL: sqadd_i64_low:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: sqadd z0.d, z0.d, z1.d
37+
; CHECK-NEXT: ret
38+
%res = call <vscale x 2 x i64> @llvm.sadd.sat.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
39+
ret <vscale x 2 x i64> %res
40+
}
41+
42+
; UQADD
43+
define <vscale x 16 x i8> @uqadd_i8_low(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
44+
; CHECK-LABEL: uqadd_i8_low:
45+
; CHECK: // %bb.0:
46+
; CHECK-NEXT: uqadd z0.b, z0.b, z1.b
47+
; CHECK-NEXT: ret
48+
%res = call <vscale x 16 x i8> @llvm.uadd.sat.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
49+
ret <vscale x 16 x i8> %res
50+
}
51+
52+
define <vscale x 8 x i16> @uqadd_i16_low(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
53+
; CHECK-LABEL: uqadd_i16_low:
54+
; CHECK: // %bb.0:
55+
; CHECK-NEXT: uqadd z0.h, z0.h, z1.h
56+
; CHECK-NEXT: ret
57+
%res = call <vscale x 8 x i16> @llvm.uadd.sat.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
58+
ret <vscale x 8 x i16> %res
59+
}
60+
61+
define <vscale x 4 x i32> @uqadd_i32_low(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
62+
; CHECK-LABEL: uqadd_i32_low:
63+
; CHECK: // %bb.0:
64+
; CHECK-NEXT: uqadd z0.s, z0.s, z1.s
65+
; CHECK-NEXT: ret
66+
%res = call <vscale x 4 x i32> @llvm.uadd.sat.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
67+
ret <vscale x 4 x i32> %res
68+
}
69+
70+
define <vscale x 2 x i64> @uqadd_i64_low(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
71+
; CHECK-LABEL: uqadd_i64_low:
72+
; CHECK: // %bb.0:
73+
; CHECK-NEXT: uqadd z0.d, z0.d, z1.d
74+
; CHECK-NEXT: ret
75+
%res = call <vscale x 2 x i64> @llvm.uadd.sat.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
76+
ret <vscale x 2 x i64> %res
77+
}
78+
79+
; SQSUB
80+
define <vscale x 16 x i8> @sqsub_i8_low(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
81+
; CHECK-LABEL: sqsub_i8_low:
82+
; CHECK: // %bb.0:
83+
; CHECK-NEXT: sqsub z0.b, z0.b, z1.b
84+
; CHECK-NEXT: ret
85+
%res = call <vscale x 16 x i8> @llvm.ssub.sat.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
86+
ret <vscale x 16 x i8> %res
87+
}
88+
89+
define <vscale x 8 x i16> @sqsub_i16_low(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
90+
; CHECK-LABEL: sqsub_i16_low:
91+
; CHECK: // %bb.0:
92+
; CHECK-NEXT: sqsub z0.h, z0.h, z1.h
93+
; CHECK-NEXT: ret
94+
%res = call <vscale x 8 x i16> @llvm.ssub.sat.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
95+
ret <vscale x 8 x i16> %res
96+
}
97+
98+
define <vscale x 4 x i32> @sqsub_i32_low(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
99+
; CHECK-LABEL: sqsub_i32_low:
100+
; CHECK: // %bb.0:
101+
; CHECK-NEXT: sqsub z0.s, z0.s, z1.s
102+
; CHECK-NEXT: ret
103+
%res = call <vscale x 4 x i32> @llvm.ssub.sat.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
104+
ret <vscale x 4 x i32> %res
105+
}
106+
107+
define <vscale x 2 x i64> @sqsub_i64_low(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
108+
; CHECK-LABEL: sqsub_i64_low:
109+
; CHECK: // %bb.0:
110+
; CHECK-NEXT: sqsub z0.d, z0.d, z1.d
111+
; CHECK-NEXT: ret
112+
%res = call <vscale x 2 x i64> @llvm.ssub.sat.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
113+
ret <vscale x 2 x i64> %res
114+
}
115+
116+
; UQSUB
117+
define <vscale x 16 x i8> @uqsub_i8_low(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
118+
; CHECK-LABEL: uqsub_i8_low:
119+
; CHECK: // %bb.0:
120+
; CHECK-NEXT: uqsub z0.b, z0.b, z1.b
121+
; CHECK-NEXT: ret
122+
%res = call <vscale x 16 x i8> @llvm.usub.sat.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
123+
ret <vscale x 16 x i8> %res
124+
}
125+
126+
define <vscale x 8 x i16> @uqsub_i16_low(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
127+
; CHECK-LABEL: uqsub_i16_low:
128+
; CHECK: // %bb.0:
129+
; CHECK-NEXT: uqsub z0.h, z0.h, z1.h
130+
; CHECK-NEXT: ret
131+
%res = call <vscale x 8 x i16> @llvm.usub.sat.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
132+
ret <vscale x 8 x i16> %res
133+
}
134+
135+
define <vscale x 4 x i32> @uqsub_i32_low(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
136+
; CHECK-LABEL: uqsub_i32_low:
137+
; CHECK: // %bb.0:
138+
; CHECK-NEXT: uqsub z0.s, z0.s, z1.s
139+
; CHECK-NEXT: ret
140+
%res = call <vscale x 4 x i32> @llvm.usub.sat.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
141+
ret <vscale x 4 x i32> %res
142+
}
143+
144+
define <vscale x 2 x i64> @uqsub_i64_low(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
145+
; CHECK-LABEL: uqsub_i64_low:
146+
; CHECK: // %bb.0:
147+
; CHECK-NEXT: uqsub z0.d, z0.d, z1.d
148+
; CHECK-NEXT: ret
149+
%res = call <vscale x 2 x i64> @llvm.usub.sat.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
150+
ret <vscale x 2 x i64> %res
151+
}
152+
153+
declare <vscale x 16 x i8> @llvm.sadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
154+
declare <vscale x 8 x i16> @llvm.sadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
155+
declare <vscale x 4 x i32> @llvm.sadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
156+
declare <vscale x 2 x i64> @llvm.sadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
157+
declare <vscale x 16 x i8> @llvm.uadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
158+
declare <vscale x 8 x i16> @llvm.uadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
159+
declare <vscale x 4 x i32> @llvm.uadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
160+
declare <vscale x 2 x i64> @llvm.uadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
161+
declare <vscale x 16 x i8> @llvm.ssub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
162+
declare <vscale x 8 x i16> @llvm.ssub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
163+
declare <vscale x 4 x i32> @llvm.ssub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
164+
declare <vscale x 2 x i64> @llvm.ssub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
165+
declare <vscale x 16 x i8> @llvm.usub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
166+
declare <vscale x 8 x i16> @llvm.usub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
167+
declare <vscale x 4 x i32> @llvm.usub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
168+
declare <vscale x 2 x i64> @llvm.usub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)

0 commit comments

Comments
 (0)