Skip to content

Commit 61bb8b5

Browse files
committed
[AArch64] Convert sra(X, elt_size(X)-1) to cmlt(X, 0)
CMLT has twice the execution throughput of SSHR on Arm out-of-order cores. Differential Revision: https://reviews.llvm.org/D115457
1 parent ebb6bb7 commit 61bb8b5

18 files changed

+93
-68
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4174,6 +4174,21 @@ defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
41744174
defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
41754175
defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
41764176

4177+
def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
4178+
(CMLTv8i8rz V64:$Rn)>;
4179+
def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))),
4180+
(CMLTv4i16rz V64:$Rn)>;
4181+
def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))),
4182+
(CMLTv2i32rz V64:$Rn)>;
4183+
def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))),
4184+
(CMLTv16i8rz V128:$Rn)>;
4185+
def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))),
4186+
(CMLTv8i16rz V128:$Rn)>;
4187+
def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))),
4188+
(CMLTv4i32rz V128:$Rn)>;
4189+
def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))),
4190+
(CMLTv2i64rz V128:$Rn)>;
4191+
41774192
defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
41784193
defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
41794194
defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
@@ -4825,6 +4840,9 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar
48254840
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
48264841
int_aarch64_neon_usqadd>;
48274842

4843+
def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
4844+
(CMLTv1i64rz V64:$Rn)>;
4845+
48284846
def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
48294847
(FCVTASv1i64 FPR64:$Rn)>;
48304848
def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),

llvm/test/Analysis/CostModel/AArch64/vector-select.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ define <3 x i64> @v3i64_select_sle(<3 x i64> %a, <3 x i64> %b, <3 x i64> %c) {
143143
; CODE: bb.0
144144
; CODE-NEXT: ushll v{{.+}}.2d, v{{.+}}.2s, #0
145145
; CODE-NEXT: shl v{{.+}}.2d, v{{.+}}.2d, #63
146-
; CODE-NEXT: sshr v{{.+}}.2d, v{{.+}}.2d, #63
146+
; CODE-NEXT: cmlt v{{.+}}.2d, v{{.+}}.2d, #0
147147
; CODE-NEXT: bif v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
148148
; CODE-NEXT: ret
149149

llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -348,8 +348,8 @@ define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
348348
; CHECK-NEXT: mov.b v1[15], w8
349349
; CHECK-NEXT: shl.16b v0, v0, #7
350350
; CHECK-NEXT: shl.16b v1, v1, #7
351-
; CHECK-NEXT: sshr.16b v0, v0, #7
352-
; CHECK-NEXT: sshr.16b v1, v1, #7
351+
; CHECK-NEXT: cmlt.16b v0, v0, #0
352+
; CHECK-NEXT: cmlt.16b v1, v1, #0
353353
; CHECK-NEXT: ret
354354
%res = sext <32 x i1> %arg to <32 x i8>
355355
ret <32 x i8> %res
@@ -615,10 +615,10 @@ define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
615615
; CHECK-NEXT: shl.16b v2, v2, #7
616616
; CHECK-NEXT: shl.16b v4, v1, #7
617617
; CHECK-NEXT: shl.16b v5, v0, #7
618-
; CHECK-NEXT: sshr.16b v0, v3, #7
619-
; CHECK-NEXT: sshr.16b v1, v2, #7
620-
; CHECK-NEXT: sshr.16b v2, v4, #7
621-
; CHECK-NEXT: sshr.16b v3, v5, #7
618+
; CHECK-NEXT: cmlt.16b v0, v3, #0
619+
; CHECK-NEXT: cmlt.16b v1, v2, #0
620+
; CHECK-NEXT: cmlt.16b v2, v4, #0
621+
; CHECK-NEXT: cmlt.16b v3, v5, #0
622622
; CHECK-NEXT: ret
623623
%res = sext <64 x i1> %arg to <64 x i8>
624624
ret <64 x i8> %res

llvm/test/CodeGen/AArch64/arm64-vshr.ll

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,14 @@ entry:
4848

4949
define <1 x i64> @sshr_v1i64(<1 x i64> %A) nounwind {
5050
; CHECK-LABEL: sshr_v1i64:
51-
; CHECK: sshr d0, d0, #63
51+
; CHECK: sshr d0, d0, #42
52+
%tmp3 = ashr <1 x i64> %A, < i64 42 >
53+
ret <1 x i64> %tmp3
54+
}
55+
56+
define <1 x i64> @cmlt_v1i64(<1 x i64> %A) nounwind {
57+
; CHECK-LABEL: cmlt_v1i64:
58+
; CHECK: cmlt d0, d0, #0
5259
%tmp3 = ashr <1 x i64> %A, < i64 63 >
5360
ret <1 x i64> %tmp3
5461
}

llvm/test/CodeGen/AArch64/cmp-select-sign.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ define <7 x i8> @sign_7xi8(<7 x i8> %a) {
115115
; CHECK-LABEL: sign_7xi8:
116116
; CHECK: // %bb.0:
117117
; CHECK-NEXT: movi v1.8b, #1
118-
; CHECK-NEXT: sshr v0.8b, v0.8b, #7
118+
; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
119119
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
120120
; CHECK-NEXT: ret
121121
%c = icmp sgt <7 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -127,7 +127,7 @@ define <8 x i8> @sign_8xi8(<8 x i8> %a) {
127127
; CHECK-LABEL: sign_8xi8:
128128
; CHECK: // %bb.0:
129129
; CHECK-NEXT: movi v1.8b, #1
130-
; CHECK-NEXT: sshr v0.8b, v0.8b, #7
130+
; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
131131
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
132132
; CHECK-NEXT: ret
133133
%c = icmp sgt <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -139,7 +139,7 @@ define <16 x i8> @sign_16xi8(<16 x i8> %a) {
139139
; CHECK-LABEL: sign_16xi8:
140140
; CHECK: // %bb.0:
141141
; CHECK-NEXT: movi v1.16b, #1
142-
; CHECK-NEXT: sshr v0.16b, v0.16b, #7
142+
; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
143143
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
144144
; CHECK-NEXT: ret
145145
%c = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -150,7 +150,7 @@ define <16 x i8> @sign_16xi8(<16 x i8> %a) {
150150
define <3 x i32> @sign_3xi32(<3 x i32> %a) {
151151
; CHECK-LABEL: sign_3xi32:
152152
; CHECK: // %bb.0:
153-
; CHECK-NEXT: sshr v0.4s, v0.4s, #31
153+
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
154154
; CHECK-NEXT: orr v0.4s, #1
155155
; CHECK-NEXT: ret
156156
%c = icmp sgt <3 x i32> %a, <i32 -1, i32 -1, i32 -1>
@@ -161,7 +161,7 @@ define <3 x i32> @sign_3xi32(<3 x i32> %a) {
161161
define <4 x i32> @sign_4xi32(<4 x i32> %a) {
162162
; CHECK-LABEL: sign_4xi32:
163163
; CHECK: // %bb.0:
164-
; CHECK-NEXT: sshr v0.4s, v0.4s, #31
164+
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
165165
; CHECK-NEXT: orr v0.4s, #1
166166
; CHECK-NEXT: ret
167167
%c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -177,7 +177,7 @@ define <4 x i32> @sign_4xi32_multi_use(<4 x i32> %a) {
177177
; CHECK-NEXT: .cfi_def_cfa_offset 32
178178
; CHECK-NEXT: .cfi_offset w30, -16
179179
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
180-
; CHECK-NEXT: sshr v2.4s, v0.4s, #31
180+
; CHECK-NEXT: cmlt v2.4s, v0.4s, #0
181181
; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s
182182
; CHECK-NEXT: orr v2.4s, #1
183183
; CHECK-NEXT: xtn v0.4h, v0.4s
@@ -214,7 +214,7 @@ define <4 x i32> @not_sign_4xi32_2(<4 x i32> %a) {
214214
; CHECK-LABEL: not_sign_4xi32_2:
215215
; CHECK: // %bb.0:
216216
; CHECK-NEXT: adrp x8, .LCPI17_0
217-
; CHECK-NEXT: sshr v0.4s, v0.4s, #31
217+
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
218218
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
219219
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
220220
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/dag-numsignbits.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ define void @signbits_vXi1(<4 x i16> %a1) {
1919
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1]
2020
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
2121
; CHECK-NEXT: shl v0.4h, v0.4h, #15
22-
; CHECK-NEXT: sshr v0.4h, v0.4h, #15
22+
; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
2323
; CHECK-NEXT: umov w0, v0.h[0]
2424
; CHECK-NEXT: umov w3, v0.h[3]
2525
; CHECK-NEXT: b foo

llvm/test/CodeGen/AArch64/div_minsize.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ entry:
3535
define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
3636
entry:
3737
; CHECK: sdiv_vec8x16_minsize
38-
; CHECK: sshr v1.8h, v0.8h, #15
38+
; CHECK: cmlt v1.8h, v0.8h, #0
3939
; CHECK: usra v0.8h, v1.8h, #11
4040
; CHECK: sshr v0.8h, v0.8h, #5
4141
; CHECK: ret

llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) {
167167
; CHECK: // %bb.0:
168168
; CHECK-NEXT: shl v0.16b, v0.16b, #7
169169
; CHECK-NEXT: movi v1.16b, #128
170-
; CHECK-NEXT: sshr v0.16b, v0.16b, #7
170+
; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
171171
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
172172
; CHECK-NEXT: ret
173173
%shl = select <16 x i1> %t, <16 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>, <16 x i8> zeroinitializer
@@ -180,7 +180,7 @@ define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
180180
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
181181
; CHECK-NEXT: movi v1.8h, #128
182182
; CHECK-NEXT: shl v0.8h, v0.8h, #15
183-
; CHECK-NEXT: sshr v0.8h, v0.8h, #15
183+
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
184184
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
185185
; CHECK-NEXT: ret
186186
%shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer
@@ -193,7 +193,7 @@ define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
193193
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
194194
; CHECK-NEXT: movi v1.4s, #64
195195
; CHECK-NEXT: shl v0.4s, v0.4s, #31
196-
; CHECK-NEXT: sshr v0.4s, v0.4s, #31
196+
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
197197
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
198198
; CHECK-NEXT: ret
199199
%shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer
@@ -207,7 +207,7 @@ define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) {
207207
; CHECK-NEXT: mov w8, #65536
208208
; CHECK-NEXT: dup v1.2d, x8
209209
; CHECK-NEXT: shl v0.2d, v0.2d, #63
210-
; CHECK-NEXT: sshr v0.2d, v0.2d, #63
210+
; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
211211
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
212212
; CHECK-NEXT: ret
213213
%shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer

llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ define <4 x i32> @test_srem_one_ne(<4 x i32> %X) nounwind {
184184
define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind {
185185
; CHECK-LABEL: test_srem_pow2:
186186
; CHECK: // %bb.0:
187-
; CHECK-NEXT: sshr v2.4s, v0.4s, #31
187+
; CHECK-NEXT: cmlt v2.4s, v0.4s, #0
188188
; CHECK-NEXT: mov v3.16b, v0.16b
189189
; CHECK-NEXT: movi v1.4s, #1
190190
; CHECK-NEXT: usra v3.4s, v2.4s, #28
@@ -203,7 +203,7 @@ define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind {
203203
define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
204204
; CHECK-LABEL: test_srem_int_min:
205205
; CHECK: // %bb.0:
206-
; CHECK-NEXT: sshr v2.4s, v0.4s, #31
206+
; CHECK-NEXT: cmlt v2.4s, v0.4s, #0
207207
; CHECK-NEXT: mov v3.16b, v0.16b
208208
; CHECK-NEXT: movi v1.4s, #128, lsl #24
209209
; CHECK-NEXT: usra v3.4s, v2.4s, #1

llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask
2525
; CHECK-LABEL: select_v4f16:
2626
; CHECK: // %bb.0:
2727
; CHECK-NEXT: shl v2.4h, v2.4h, #15
28-
; CHECK-NEXT: sshr v2.4h, v2.4h, #15
28+
; CHECK-NEXT: cmlt v2.4h, v2.4h, #0
2929
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
3030
; CHECK-NEXT: ret
3131
%sel = select <4 x i1> %mask, <4 x half> %op1, <4 x half> %op2
@@ -38,7 +38,7 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask
3838
; CHECK: // %bb.0:
3939
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
4040
; CHECK-NEXT: shl v2.8h, v2.8h, #15
41-
; CHECK-NEXT: sshr v2.8h, v2.8h, #15
41+
; CHECK-NEXT: cmlt v2.8h, v2.8h, #0
4242
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
4343
; CHECK-NEXT: ret
4444
%sel = select <8 x i1> %mask, <8 x half> %op1, <8 x half> %op2
@@ -122,7 +122,7 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m
122122
; CHECK-LABEL: select_v2f32:
123123
; CHECK: // %bb.0:
124124
; CHECK-NEXT: shl v2.2s, v2.2s, #31
125-
; CHECK-NEXT: sshr v2.2s, v2.2s, #31
125+
; CHECK-NEXT: cmlt v2.2s, v2.2s, #0
126126
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
127127
; CHECK-NEXT: ret
128128
%sel = select <2 x i1> %mask, <2 x float> %op1, <2 x float> %op2
@@ -135,7 +135,7 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %m
135135
; CHECK: // %bb.0:
136136
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
137137
; CHECK-NEXT: shl v2.4s, v2.4s, #31
138-
; CHECK-NEXT: sshr v2.4s, v2.4s, #31
138+
; CHECK-NEXT: cmlt v2.4s, v2.4s, #0
139139
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
140140
; CHECK-NEXT: ret
141141
%sel = select <4 x i1> %mask, <4 x float> %op1, <4 x float> %op2
@@ -233,7 +233,7 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1>
233233
; CHECK: // %bb.0:
234234
; CHECK-NEXT: ushll v2.2d, v2.2s, #0
235235
; CHECK-NEXT: shl v2.2d, v2.2d, #63
236-
; CHECK-NEXT: sshr v2.2d, v2.2d, #63
236+
; CHECK-NEXT: cmlt v2.2d, v2.2d, #0
237237
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
238238
; CHECK-NEXT: ret
239239
%sel = select <2 x i1> %mask, <2 x double> %op1, <2 x double> %op2

llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 {
2525
; CHECK-LABEL: select_v8i8:
2626
; CHECK: // %bb.0:
2727
; CHECK-NEXT: shl v2.8b, v2.8b, #7
28-
; CHECK-NEXT: sshr v2.8b, v2.8b, #7
28+
; CHECK-NEXT: cmlt v2.8b, v2.8b, #0
2929
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
3030
; CHECK-NEXT: ret
3131
%sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2
@@ -37,7 +37,7 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
3737
; CHECK-LABEL: select_v16i8:
3838
; CHECK: // %bb.0:
3939
; CHECK-NEXT: shl v2.16b, v2.16b, #7
40-
; CHECK-NEXT: sshr v2.16b, v2.16b, #7
40+
; CHECK-NEXT: cmlt v2.16b, v2.16b, #0
4141
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
4242
; CHECK-NEXT: ret
4343
%sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2
@@ -1137,7 +1137,7 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) #
11371137
; CHECK-LABEL: select_v4i16:
11381138
; CHECK: // %bb.0:
11391139
; CHECK-NEXT: shl v2.4h, v2.4h, #15
1140-
; CHECK-NEXT: sshr v2.4h, v2.4h, #15
1140+
; CHECK-NEXT: cmlt v2.4h, v2.4h, #0
11411141
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
11421142
; CHECK-NEXT: ret
11431143
%sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2
@@ -1150,7 +1150,7 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #
11501150
; CHECK: // %bb.0:
11511151
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
11521152
; CHECK-NEXT: shl v2.8h, v2.8h, #15
1153-
; CHECK-NEXT: sshr v2.8h, v2.8h, #15
1153+
; CHECK-NEXT: cmlt v2.8h, v2.8h, #0
11541154
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
11551155
; CHECK-NEXT: ret
11561156
%sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2
@@ -1767,7 +1767,7 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) #
17671767
; CHECK-LABEL: select_v2i32:
17681768
; CHECK: // %bb.0:
17691769
; CHECK-NEXT: shl v2.2s, v2.2s, #31
1770-
; CHECK-NEXT: sshr v2.2s, v2.2s, #31
1770+
; CHECK-NEXT: cmlt v2.2s, v2.2s, #0
17711771
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
17721772
; CHECK-NEXT: ret
17731773
%sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2
@@ -1780,7 +1780,7 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) #
17801780
; CHECK: // %bb.0:
17811781
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
17821782
; CHECK-NEXT: shl v2.4s, v2.4s, #31
1783-
; CHECK-NEXT: sshr v2.4s, v2.4s, #31
1783+
; CHECK-NEXT: cmlt v2.4s, v2.4s, #0
17841784
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
17851785
; CHECK-NEXT: ret
17861786
%sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2
@@ -2110,7 +2110,7 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) #
21102110
; CHECK: // %bb.0:
21112111
; CHECK-NEXT: ushll v2.2d, v2.2s, #0
21122112
; CHECK-NEXT: shl v2.2d, v2.2d, #63
2113-
; CHECK-NEXT: sshr v2.2d, v2.2d, #63
2113+
; CHECK-NEXT: cmlt v2.2d, v2.2d, #0
21142114
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
21152115
; CHECK-NEXT: ret
21162116
%sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ define void @masked_gather_v2f16(<2 x half>* %a, <2 x half*>* %b) #0 {
632632
; CHECK-NEXT: mov v0.h[0], w8
633633
; CHECK-NEXT: mov v0.h[1], w9
634634
; CHECK-NEXT: shl v0.4h, v0.4h, #15
635-
; CHECK-NEXT: sshr v0.4h, v0.4h, #15
635+
; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
636636
; CHECK-NEXT: sunpklo z0.s, z0.h
637637
; CHECK-NEXT: sunpklo z0.d, z0.s
638638
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ define <2 x half> @masked_load_v2f16(<2 x half>* %ap, <2 x half>* %bp) #0 {
4242
; CHECK-NEXT: mov v0.h[0], w8
4343
; CHECK-NEXT: mov v0.h[1], w9
4444
; CHECK-NEXT: shl v0.4h, v0.4h, #15
45-
; CHECK-NEXT: sshr v0.4h, v0.4h, #15
45+
; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
4646
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
4747
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
4848
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ define void @masked_scatter_v2f16(<2 x half>* %a, <2 x half*>* %b) #0 {
581581
; CHECK-NEXT: mov v0.h[0], w8
582582
; CHECK-NEXT: mov v0.h[1], w9
583583
; CHECK-NEXT: shl v0.4h, v0.4h, #15
584-
; CHECK-NEXT: sshr v0.4h, v0.4h, #15
584+
; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
585585
; CHECK-NEXT: sunpklo z0.s, z0.h
586586
; CHECK-NEXT: sunpklo z0.d, z0.s
587587
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) #0 {
4242
; CHECK-NEXT: mov v0.h[0], w8
4343
; CHECK-NEXT: mov v0.h[1], w9
4444
; CHECK-NEXT: shl v0.4h, v0.4h, #15
45-
; CHECK-NEXT: sshr v0.4h, v0.4h, #15
45+
; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
4646
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
4747
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
4848
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/vec_uaddo.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,12 @@ define <16 x i32> @uaddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
152152
; CHECK-NEXT: ushll v3.4s, v3.4h, #0
153153
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
154154
; CHECK-NEXT: shl v5.4s, v0.4s, #31
155-
; CHECK-NEXT: sshr v0.4s, v2.4s, #31
155+
; CHECK-NEXT: cmlt v0.4s, v2.4s, #0
156156
; CHECK-NEXT: shl v3.4s, v3.4s, #31
157157
; CHECK-NEXT: shl v6.4s, v1.4s, #31
158-
; CHECK-NEXT: sshr v1.4s, v5.4s, #31
159-
; CHECK-NEXT: sshr v2.4s, v3.4s, #31
160-
; CHECK-NEXT: sshr v3.4s, v6.4s, #31
158+
; CHECK-NEXT: cmlt v1.4s, v5.4s, #0
159+
; CHECK-NEXT: cmlt v2.4s, v3.4s, #0
160+
; CHECK-NEXT: cmlt v3.4s, v6.4s, #0
161161
; CHECK-NEXT: ret
162162
%t = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1)
163163
%val = extractvalue {<16 x i8>, <16 x i1>} %t, 0
@@ -180,8 +180,8 @@ define <8 x i32> @uaddo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
180180
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
181181
; CHECK-NEXT: shl v1.4s, v1.4s, #31
182182
; CHECK-NEXT: shl v3.4s, v0.4s, #31
183-
; CHECK-NEXT: sshr v0.4s, v1.4s, #31
184-
; CHECK-NEXT: sshr v1.4s, v3.4s, #31
183+
; CHECK-NEXT: cmlt v0.4s, v1.4s, #0
184+
; CHECK-NEXT: cmlt v1.4s, v3.4s, #0
185185
; CHECK-NEXT: ret
186186
%t = call {<8 x i16>, <8 x i1>} @llvm.uadd.with.overflow.v8i16(<8 x i16> %a0, <8 x i16> %a1)
187187
%val = extractvalue {<8 x i16>, <8 x i1>} %t, 0
@@ -296,7 +296,7 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
296296
; CHECK-NEXT: stp x8, x9, [x10, #16]
297297
; CHECK-NEXT: shl v0.2s, v0.2s, #31
298298
; CHECK-NEXT: stp x11, x12, [x10]
299-
; CHECK-NEXT: sshr v0.2s, v0.2s, #31
299+
; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
300300
; CHECK-NEXT: ret
301301
%t = call {<2 x i128>, <2 x i1>} @llvm.uadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
302302
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0

0 commit comments

Comments
 (0)