|
9 | 9 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
|
10 | 10 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
|
11 | 11 |
|
| 12 | +declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) |
| 13 | + |
12 | 14 | define <8 x i16> @test1(<8 x i16> %x) nounwind {
|
13 | 15 | ; SSE-LABEL: test1:
|
14 | 16 | ; SSE: # %bb.0: # %vector.ph
|
@@ -90,7 +92,7 @@ define <16 x i8> @ashr_xor_and_commute_uses(<16 x i8> %x, <16 x i8>* %p1, <16 x
|
90 | 92 | ret <16 x i8> %res
|
91 | 93 | }
|
92 | 94 |
|
93 |
| -define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x, <4 x i32>* %p1, <4 x i32>* %p2) nounwind { |
| 95 | +define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind { |
94 | 96 | ; SSE-LABEL: ashr_xor_and_custom:
|
95 | 97 | ; SSE: # %bb.0:
|
96 | 98 | ; SSE-NEXT: movdqa %xmm0, %xmm1
|
@@ -125,6 +127,48 @@ define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x, <4 x i32>* %p1, <4 x i32>* %
|
125 | 127 | ret <4 x i32> %res
|
126 | 128 | }
|
127 | 129 |
|
| 130 | +define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind { |
| 131 | +; SSE2OR3-LABEL: usubsat_custom: |
| 132 | +; SSE2OR3: # %bb.0: |
| 133 | +; SSE2OR3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] |
| 134 | +; SSE2OR3-NEXT: pxor %xmm0, %xmm1 |
| 135 | +; SSE2OR3-NEXT: pxor %xmm2, %xmm2 |
| 136 | +; SSE2OR3-NEXT: pcmpgtd %xmm2, %xmm1 |
| 137 | +; SSE2OR3-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| 138 | +; SSE2OR3-NEXT: pand %xmm1, %xmm0 |
| 139 | +; SSE2OR3-NEXT: retq |
| 140 | +; |
| 141 | +; SSE41-LABEL: usubsat_custom: |
| 142 | +; SSE41: # %bb.0: |
| 143 | +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <2147483648,2147483648,2147483648,u> |
| 144 | +; SSE41-NEXT: pmaxud %xmm1, %xmm0 |
| 145 | +; SSE41-NEXT: psubd %xmm1, %xmm0 |
| 146 | +; SSE41-NEXT: retq |
| 147 | +; |
| 148 | +; AVX1-LABEL: usubsat_custom: |
| 149 | +; AVX1: # %bb.0: |
| 150 | +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <2147483648,2147483648,2147483648,u> |
| 151 | +; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 |
| 152 | +; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 |
| 153 | +; AVX1-NEXT: retq |
| 154 | +; |
| 155 | +; AVX2-LABEL: usubsat_custom: |
| 156 | +; AVX2: # %bb.0: |
| 157 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] |
| 158 | +; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 |
| 159 | +; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 |
| 160 | +; AVX2-NEXT: retq |
| 161 | +; |
| 162 | +; AVX512-LABEL: usubsat_custom: |
| 163 | +; AVX512: # %bb.0: |
| 164 | +; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] |
| 165 | +; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 |
| 166 | +; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm0 |
| 167 | +; AVX512-NEXT: retq |
| 168 | + %res = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 undef>) |
| 169 | + ret <4 x i32> %res |
| 170 | +} |
| 171 | + |
128 | 172 | define <8 x i16> @test2(<8 x i16> %x) nounwind {
|
129 | 173 | ; SSE-LABEL: test2:
|
130 | 174 | ; SSE: # %bb.0: # %vector.ph
|
@@ -302,6 +346,47 @@ vector.ph:
|
302 | 346 | ret <16 x i16> %res
|
303 | 347 | }
|
304 | 348 |
|
| 349 | +define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind { |
| 350 | +; SSE-LABEL: ashr_xor_and_v16i16: |
| 351 | +; SSE: # %bb.0: |
| 352 | +; SSE-NEXT: movdqa %xmm1, %xmm2 |
| 353 | +; SSE-NEXT: psraw $15, %xmm2 |
| 354 | +; SSE-NEXT: movdqa %xmm0, %xmm3 |
| 355 | +; SSE-NEXT: psraw $15, %xmm3 |
| 356 | +; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| 357 | +; SSE-NEXT: pand %xmm2, %xmm1 |
| 358 | +; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| 359 | +; SSE-NEXT: pand %xmm3, %xmm0 |
| 360 | +; SSE-NEXT: retq |
| 361 | +; |
| 362 | +; AVX1-LABEL: ashr_xor_and_v16i16: |
| 363 | +; AVX1: # %bb.0: |
| 364 | +; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1 |
| 365 | +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 |
| 366 | +; AVX1-NEXT: vpsraw $15, %xmm2, %xmm2 |
| 367 | +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| 368 | +; AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| 369 | +; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 |
| 370 | +; AVX1-NEXT: retq |
| 371 | +; |
| 372 | +; AVX2-LABEL: ashr_xor_and_v16i16: |
| 373 | +; AVX2: # %bb.0: |
| 374 | +; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1 |
| 375 | +; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| 376 | +; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 |
| 377 | +; AVX2-NEXT: retq |
| 378 | +; |
| 379 | +; AVX512-LABEL: ashr_xor_and_v16i16: |
| 380 | +; AVX512: # %bb.0: |
| 381 | +; AVX512-NEXT: vpsraw $15, %ymm0, %ymm1 |
| 382 | +; AVX512-NEXT: vpternlogq $72, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 |
| 383 | +; AVX512-NEXT: retq |
| 384 | + %signsplat = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> |
| 385 | + %flipsign = xor <16 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768> |
| 386 | + %res = and <16 x i16> %signsplat, %flipsign |
| 387 | + ret <16 x i16> %res |
| 388 | +} |
| 389 | + |
305 | 390 | define <16 x i16> @test8(<16 x i16> %x) nounwind {
|
306 | 391 | ; SSE-LABEL: test8:
|
307 | 392 | ; SSE: # %bb.0: # %vector.ph
|
|
0 commit comments