1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
3
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41
4
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
5
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
6
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
7
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX
8
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX
2
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE, SSE2
3
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE, SSE41
4
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE, SSE42
5
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1OR2
6
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1OR2
7
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512F
8
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512BW
9
9
10
10
define i8 @test_demandedbits_umin_ult (i8 %a0 , i8 %a1 ) {
11
11
; CHECK-LABEL: test_demandedbits_umin_ult:
@@ -60,26 +60,12 @@ define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) {
60
60
}
61
61
62
62
define <16 x i8 > @test_v16i8_reassociation (<16 x i8 > %a ) {
63
- ; SSE2-LABEL: test_v16i8_reassociation:
64
- ; SSE2: # %bb.0:
65
- ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
66
- ; SSE2-NEXT: pminub %xmm1, %xmm0
67
- ; SSE2-NEXT: pminub %xmm1, %xmm0
68
- ; SSE2-NEXT: retq
69
- ;
70
- ; SSE41-LABEL: test_v16i8_reassociation:
71
- ; SSE41: # %bb.0:
72
- ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
73
- ; SSE41-NEXT: pminub %xmm1, %xmm0
74
- ; SSE41-NEXT: pminub %xmm1, %xmm0
75
- ; SSE41-NEXT: retq
76
- ;
77
- ; SSE42-LABEL: test_v16i8_reassociation:
78
- ; SSE42: # %bb.0:
79
- ; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
80
- ; SSE42-NEXT: pminub %xmm1, %xmm0
81
- ; SSE42-NEXT: pminub %xmm1, %xmm0
82
- ; SSE42-NEXT: retq
63
+ ; SSE-LABEL: test_v16i8_reassociation:
64
+ ; SSE: # %bb.0:
65
+ ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
66
+ ; SSE-NEXT: pminub %xmm1, %xmm0
67
+ ; SSE-NEXT: pminub %xmm1, %xmm0
68
+ ; SSE-NEXT: retq
83
69
;
84
70
; AVX-LABEL: test_v16i8_reassociation:
85
71
; AVX: # %bb.0:
@@ -91,4 +77,60 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
91
77
%2 = call <16 x i8 > @llvm.umin.v16i8 (<16 x i8 > %1 , <16 x i8 > <i8 0 , i8 1 , i8 2 , i8 3 , i8 4 , i8 5 , i8 6 , i8 7 , i8 8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 >)
92
78
ret <16 x i8 > %2
93
79
}
80
+
81
+ define <16 x i8 > @test_v16i8_demandedbits (<16 x i8 > %x , <16 x i8 > %y , <16 x i8 > %a , <16 x i8 > %b ) {
82
+ ; SSE2-LABEL: test_v16i8_demandedbits:
83
+ ; SSE2: # %bb.0:
84
+ ; SSE2-NEXT: pminub %xmm1, %xmm0
85
+ ; SSE2-NEXT: pxor %xmm1, %xmm1
86
+ ; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
87
+ ; SSE2-NEXT: pand %xmm1, %xmm3
88
+ ; SSE2-NEXT: pandn %xmm2, %xmm1
89
+ ; SSE2-NEXT: por %xmm3, %xmm1
90
+ ; SSE2-NEXT: movdqa %xmm1, %xmm0
91
+ ; SSE2-NEXT: retq
92
+ ;
93
+ ; SSE41-LABEL: test_v16i8_demandedbits:
94
+ ; SSE41: # %bb.0:
95
+ ; SSE41-NEXT: andps %xmm1, %xmm0
96
+ ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
97
+ ; SSE41-NEXT: movdqa %xmm2, %xmm0
98
+ ; SSE41-NEXT: retq
99
+ ;
100
+ ; SSE42-LABEL: test_v16i8_demandedbits:
101
+ ; SSE42: # %bb.0:
102
+ ; SSE42-NEXT: andps %xmm1, %xmm0
103
+ ; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm2
104
+ ; SSE42-NEXT: movdqa %xmm2, %xmm0
105
+ ; SSE42-NEXT: retq
106
+ ;
107
+ ; AVX1OR2-LABEL: test_v16i8_demandedbits:
108
+ ; AVX1OR2: # %bb.0:
109
+ ; AVX1OR2-NEXT: vpand %xmm1, %xmm0, %xmm0
110
+ ; AVX1OR2-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
111
+ ; AVX1OR2-NEXT: retq
112
+ ;
113
+ ; AVX512F-LABEL: test_v16i8_demandedbits:
114
+ ; AVX512F: # %bb.0:
115
+ ; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0
116
+ ; AVX512F-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
117
+ ; AVX512F-NEXT: retq
118
+ ;
119
+ ; AVX512BW-LABEL: test_v16i8_demandedbits:
120
+ ; AVX512BW: # %bb.0:
121
+ ; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
122
+ ; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
123
+ ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
124
+ ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
125
+ ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1
126
+ ; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1}
127
+ ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
128
+ ; AVX512BW-NEXT: vzeroupper
129
+ ; AVX512BW-NEXT: retq
130
+ %umin = tail call <16 x i8 > @llvm.umin.v16i8 (<16 x i8 > %x , <16 x i8 > %y )
131
+ %cmp = icmp sge <16 x i8 > %umin , zeroinitializer
132
+ %res = select <16 x i1 > %cmp , <16 x i8 > %a , <16 x i8 > %b
133
+ ret <16 x i8 > %res
134
+ }
135
+
94
136
declare <16 x i8 > @llvm.umin.v16i8 (<16 x i8 > %x , <16 x i8 > %y )
0 commit comments