|
6 | 6 |
|
7 | 7 | ; should Fold select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
|
8 | 8 | define <4 x i32> @masked_select_const(<4 x i32> %a, <4 x i32> %x, <4 x i32> %y) {
|
9 |
| -; SSE2-LABEL: masked_select_const: |
10 |
| -; SSE2: # %bb.0: |
11 |
| -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [4294967272,4294967272,4294967272,4294967272] |
12 |
| -; SSE2-NEXT: paddd %xmm0, %xmm3 |
13 |
| -; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 |
14 |
| -; SSE2-NEXT: pand %xmm1, %xmm3 |
15 |
| -; SSE2-NEXT: pandn %xmm0, %xmm1 |
16 |
| -; SSE2-NEXT: por %xmm1, %xmm3 |
17 |
| -; SSE2-NEXT: movdqa %xmm3, %xmm0 |
18 |
| -; SSE2-NEXT: retq |
19 |
| -; |
20 |
| -; SSE41-LABEL: masked_select_const: |
21 |
| -; SSE41: # %bb.0: |
22 |
| -; SSE41-NEXT: movdqa %xmm0, %xmm3 |
23 |
| -; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [4294967272,4294967272,4294967272,4294967272] |
24 |
| -; SSE41-NEXT: paddd %xmm0, %xmm4 |
25 |
| -; SSE41-NEXT: pcmpgtd %xmm2, %xmm1 |
26 |
| -; SSE41-NEXT: movdqa %xmm1, %xmm0 |
27 |
| -; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm3 |
28 |
| -; SSE41-NEXT: movaps %xmm3, %xmm0 |
29 |
| -; SSE41-NEXT: retq |
| 9 | +; SSE-LABEL: masked_select_const: |
| 10 | +; SSE: # %bb.0: |
| 11 | +; SSE-NEXT: pcmpgtd %xmm2, %xmm1 |
| 12 | +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| 13 | +; SSE-NEXT: paddd %xmm1, %xmm0 |
| 14 | +; SSE-NEXT: retq |
30 | 15 | ;
|
31 | 16 | ; AVX1-LABEL: masked_select_const:
|
32 | 17 | ; AVX1: # %bb.0:
|
33 |
| -; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 |
34 | 18 | ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
|
35 |
| -; AVX1-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0 |
| 19 | +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 |
| 20 | +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
36 | 21 | ; AVX1-NEXT: retq
|
37 | 22 | ;
|
38 | 23 | ; AVX2-LABEL: masked_select_const:
|
39 | 24 | ; AVX2: # %bb.0:
|
40 | 25 | ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [4294967272,4294967272,4294967272,4294967272]
|
41 |
| -; AVX2-NEXT: vpaddd %xmm3, %xmm0, %xmm3 |
42 | 26 | ; AVX2-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
|
43 |
| -; AVX2-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0 |
| 27 | +; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1 |
| 28 | +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
44 | 29 | ; AVX2-NEXT: retq
|
45 | 30 | %sub.i = add <4 x i32> %a, <i32 -24, i32 -24, i32 -24, i32 -24>
|
46 | 31 | %cmp.i = icmp sgt <4 x i32> %x, %y
|
|
0 commit comments