Skip to content

Commit e2faf72

Browse files
committed
[x86] add tests for psubus; NFC
1 parent c983aed commit e2faf72

File tree

1 file changed

+86
-1
lines changed

1 file changed

+86
-1
lines changed

llvm/test/CodeGen/X86/psubus.ll

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
1010
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
1111

12+
declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
13+
1214
define <8 x i16> @test1(<8 x i16> %x) nounwind {
1315
; SSE-LABEL: test1:
1416
; SSE: # %bb.0: # %vector.ph
@@ -90,7 +92,7 @@ define <16 x i8> @ashr_xor_and_commute_uses(<16 x i8> %x, <16 x i8>* %p1, <16 x
9092
ret <16 x i8> %res
9193
}
9294

93-
define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x, <4 x i32>* %p1, <4 x i32>* %p2) nounwind {
95+
define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {
9496
; SSE-LABEL: ashr_xor_and_custom:
9597
; SSE: # %bb.0:
9698
; SSE-NEXT: movdqa %xmm0, %xmm1
@@ -125,6 +127,48 @@ define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x, <4 x i32>* %p1, <4 x i32>* %
125127
ret <4 x i32> %res
126128
}
127129

130+
define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
131+
; SSE2OR3-LABEL: usubsat_custom:
132+
; SSE2OR3: # %bb.0:
133+
; SSE2OR3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
134+
; SSE2OR3-NEXT: pxor %xmm0, %xmm1
135+
; SSE2OR3-NEXT: pxor %xmm2, %xmm2
136+
; SSE2OR3-NEXT: pcmpgtd %xmm2, %xmm1
137+
; SSE2OR3-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
138+
; SSE2OR3-NEXT: pand %xmm1, %xmm0
139+
; SSE2OR3-NEXT: retq
140+
;
141+
; SSE41-LABEL: usubsat_custom:
142+
; SSE41: # %bb.0:
143+
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <2147483648,2147483648,2147483648,u>
144+
; SSE41-NEXT: pmaxud %xmm1, %xmm0
145+
; SSE41-NEXT: psubd %xmm1, %xmm0
146+
; SSE41-NEXT: retq
147+
;
148+
; AVX1-LABEL: usubsat_custom:
149+
; AVX1: # %bb.0:
150+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <2147483648,2147483648,2147483648,u>
151+
; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
152+
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
153+
; AVX1-NEXT: retq
154+
;
155+
; AVX2-LABEL: usubsat_custom:
156+
; AVX2: # %bb.0:
157+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
158+
; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
159+
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
160+
; AVX2-NEXT: retq
161+
;
162+
; AVX512-LABEL: usubsat_custom:
163+
; AVX512: # %bb.0:
164+
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
165+
; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
166+
; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm0
167+
; AVX512-NEXT: retq
168+
%res = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 undef>)
169+
ret <4 x i32> %res
170+
}
171+
128172
define <8 x i16> @test2(<8 x i16> %x) nounwind {
129173
; SSE-LABEL: test2:
130174
; SSE: # %bb.0: # %vector.ph
@@ -302,6 +346,47 @@ vector.ph:
302346
ret <16 x i16> %res
303347
}
304348

349+
define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind {
350+
; SSE-LABEL: ashr_xor_and_v16i16:
351+
; SSE: # %bb.0:
352+
; SSE-NEXT: movdqa %xmm1, %xmm2
353+
; SSE-NEXT: psraw $15, %xmm2
354+
; SSE-NEXT: movdqa %xmm0, %xmm3
355+
; SSE-NEXT: psraw $15, %xmm3
356+
; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
357+
; SSE-NEXT: pand %xmm2, %xmm1
358+
; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
359+
; SSE-NEXT: pand %xmm3, %xmm0
360+
; SSE-NEXT: retq
361+
;
362+
; AVX1-LABEL: ashr_xor_and_v16i16:
363+
; AVX1: # %bb.0:
364+
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
365+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
366+
; AVX1-NEXT: vpsraw $15, %xmm2, %xmm2
367+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
368+
; AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
369+
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
370+
; AVX1-NEXT: retq
371+
;
372+
; AVX2-LABEL: ashr_xor_and_v16i16:
373+
; AVX2: # %bb.0:
374+
; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1
375+
; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
376+
; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
377+
; AVX2-NEXT: retq
378+
;
379+
; AVX512-LABEL: ashr_xor_and_v16i16:
380+
; AVX512: # %bb.0:
381+
; AVX512-NEXT: vpsraw $15, %ymm0, %ymm1
382+
; AVX512-NEXT: vpternlogq $72, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
383+
; AVX512-NEXT: retq
384+
%signsplat = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
385+
%flipsign = xor <16 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
386+
%res = and <16 x i16> %signsplat, %flipsign
387+
ret <16 x i16> %res
388+
}
389+
305390
define <16 x i16> @test8(<16 x i16> %x) nounwind {
306391
; SSE-LABEL: test8:
307392
; SSE: # %bb.0: # %vector.ph

0 commit comments

Comments
 (0)