Skip to content

Commit ca10a6c

Browse files
committed
[X86] Add test coverage for min/max signbit simplification
If we're only demanding the signbit from a min/max then we can simplify this to a logic op
1 parent 96377e5 commit ca10a6c

File tree

4 files changed

+264
-62
lines changed

4 files changed

+264
-62
lines changed

llvm/test/CodeGen/X86/combine-smax.ll

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
33
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
44
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE42
5-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
7-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX2
8-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
6+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
7+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX2,AVX512F
8+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512BW
99

1010
define <16 x i8> @test_v16i8_nosignbit(<16 x i8> %a, <16 x i8> %b) {
1111
; SSE2-LABEL: test_v16i8_nosignbit:
@@ -87,4 +87,63 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
8787
%2 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %1, <16 x i8> zeroinitializer)
8888
ret <16 x i8> %2
8989
}
90+
91+
define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
92+
; SSE2-LABEL: test_v16i8_demandedbits:
93+
; SSE2: # %bb.0:
94+
; SSE2-NEXT: movdqa %xmm0, %xmm4
95+
; SSE2-NEXT: pcmpgtb %xmm1, %xmm4
96+
; SSE2-NEXT: pand %xmm4, %xmm0
97+
; SSE2-NEXT: pandn %xmm1, %xmm4
98+
; SSE2-NEXT: por %xmm0, %xmm4
99+
; SSE2-NEXT: pxor %xmm0, %xmm0
100+
; SSE2-NEXT: pcmpgtb %xmm4, %xmm0
101+
; SSE2-NEXT: pand %xmm0, %xmm3
102+
; SSE2-NEXT: pandn %xmm2, %xmm0
103+
; SSE2-NEXT: por %xmm3, %xmm0
104+
; SSE2-NEXT: retq
105+
;
106+
; SSE41-LABEL: test_v16i8_demandedbits:
107+
; SSE41: # %bb.0:
108+
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
109+
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
110+
; SSE41-NEXT: movdqa %xmm2, %xmm0
111+
; SSE41-NEXT: retq
112+
;
113+
; SSE42-LABEL: test_v16i8_demandedbits:
114+
; SSE42: # %bb.0:
115+
; SSE42-NEXT: pmaxsb %xmm1, %xmm0
116+
; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm2
117+
; SSE42-NEXT: movdqa %xmm2, %xmm0
118+
; SSE42-NEXT: retq
119+
;
120+
; AVX1OR2-LABEL: test_v16i8_demandedbits:
121+
; AVX1OR2: # %bb.0:
122+
; AVX1OR2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
123+
; AVX1OR2-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
124+
; AVX1OR2-NEXT: retq
125+
;
126+
; AVX512F-LABEL: test_v16i8_demandedbits:
127+
; AVX512F: # %bb.0:
128+
; AVX512F-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
129+
; AVX512F-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
130+
; AVX512F-NEXT: retq
131+
;
132+
; AVX512BW-LABEL: test_v16i8_demandedbits:
133+
; AVX512BW: # %bb.0:
134+
; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
135+
; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
136+
; AVX512BW-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
137+
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
138+
; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1
139+
; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1}
140+
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
141+
; AVX512BW-NEXT: vzeroupper
142+
; AVX512BW-NEXT: retq
143+
%smax = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y)
144+
%cmp = icmp sge <16 x i8> %smax, zeroinitializer
145+
%res = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
146+
ret <16 x i8> %res
147+
}
148+
90149
declare <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y)

llvm/test/CodeGen/X86/combine-smin.ll

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
33
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
44
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE42
5-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
7-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX2
8-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
6+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
7+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX2,AVX512F
8+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2,AVX512BW
99

1010
define <16 x i8> @test_v16i8_nosignbit(<16 x i8> %a, <16 x i8> %b) {
1111
; SSE2-LABEL: test_v16i8_nosignbit:
@@ -89,4 +89,63 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
8989
%2 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %1, <16 x i8> zeroinitializer)
9090
ret <16 x i8> %2
9191
}
92+
93+
define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
94+
; SSE2-LABEL: test_v16i8_demandedbits:
95+
; SSE2: # %bb.0:
96+
; SSE2-NEXT: movdqa %xmm1, %xmm4
97+
; SSE2-NEXT: pcmpgtb %xmm0, %xmm4
98+
; SSE2-NEXT: pand %xmm4, %xmm0
99+
; SSE2-NEXT: pandn %xmm1, %xmm4
100+
; SSE2-NEXT: por %xmm0, %xmm4
101+
; SSE2-NEXT: pxor %xmm0, %xmm0
102+
; SSE2-NEXT: pcmpgtb %xmm4, %xmm0
103+
; SSE2-NEXT: pand %xmm0, %xmm3
104+
; SSE2-NEXT: pandn %xmm2, %xmm0
105+
; SSE2-NEXT: por %xmm3, %xmm0
106+
; SSE2-NEXT: retq
107+
;
108+
; SSE41-LABEL: test_v16i8_demandedbits:
109+
; SSE41: # %bb.0:
110+
; SSE41-NEXT: pminsb %xmm1, %xmm0
111+
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
112+
; SSE41-NEXT: movdqa %xmm2, %xmm0
113+
; SSE41-NEXT: retq
114+
;
115+
; SSE42-LABEL: test_v16i8_demandedbits:
116+
; SSE42: # %bb.0:
117+
; SSE42-NEXT: pminsb %xmm1, %xmm0
118+
; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm2
119+
; SSE42-NEXT: movdqa %xmm2, %xmm0
120+
; SSE42-NEXT: retq
121+
;
122+
; AVX1OR2-LABEL: test_v16i8_demandedbits:
123+
; AVX1OR2: # %bb.0:
124+
; AVX1OR2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
125+
; AVX1OR2-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
126+
; AVX1OR2-NEXT: retq
127+
;
128+
; AVX512F-LABEL: test_v16i8_demandedbits:
129+
; AVX512F: # %bb.0:
130+
; AVX512F-NEXT: vpminsb %xmm1, %xmm0, %xmm0
131+
; AVX512F-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
132+
; AVX512F-NEXT: retq
133+
;
134+
; AVX512BW-LABEL: test_v16i8_demandedbits:
135+
; AVX512BW: # %bb.0:
136+
; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
137+
; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
138+
; AVX512BW-NEXT: vpminsb %xmm1, %xmm0, %xmm0
139+
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
140+
; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1
141+
; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1}
142+
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
143+
; AVX512BW-NEXT: vzeroupper
144+
; AVX512BW-NEXT: retq
145+
%smin = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y)
146+
%cmp = icmp sge <16 x i8> %smin, zeroinitializer
147+
%res = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
148+
ret <16 x i8> %res
149+
}
150+
92151
declare <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y)

llvm/test/CodeGen/X86/combine-umax.ll

Lines changed: 69 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
4-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE42
5-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
6-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
7-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX
8-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2
6+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2
7+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512F
8+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512BW
99

1010
define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) {
1111
; SSE2-LABEL: test_v8i16_nosignbit:
@@ -43,26 +43,12 @@ define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) {
4343
}
4444

4545
define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
46-
; SSE2-LABEL: test_v16i8_reassociation:
47-
; SSE2: # %bb.0:
48-
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
49-
; SSE2-NEXT: pmaxub %xmm1, %xmm0
50-
; SSE2-NEXT: pmaxub %xmm1, %xmm0
51-
; SSE2-NEXT: retq
52-
;
53-
; SSE41-LABEL: test_v16i8_reassociation:
54-
; SSE41: # %bb.0:
55-
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
56-
; SSE41-NEXT: pmaxub %xmm1, %xmm0
57-
; SSE41-NEXT: pmaxub %xmm1, %xmm0
58-
; SSE41-NEXT: retq
59-
;
60-
; SSE42-LABEL: test_v16i8_reassociation:
61-
; SSE42: # %bb.0:
62-
; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
63-
; SSE42-NEXT: pmaxub %xmm1, %xmm0
64-
; SSE42-NEXT: pmaxub %xmm1, %xmm0
65-
; SSE42-NEXT: retq
46+
; SSE-LABEL: test_v16i8_reassociation:
47+
; SSE: # %bb.0:
48+
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
49+
; SSE-NEXT: pmaxub %xmm1, %xmm0
50+
; SSE-NEXT: pmaxub %xmm1, %xmm0
51+
; SSE-NEXT: retq
6652
;
6753
; AVX-LABEL: test_v16i8_reassociation:
6854
; AVX: # %bb.0:
@@ -74,4 +60,60 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
7460
%2 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
7561
ret <16 x i8> %2
7662
}
63+
64+
define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
65+
; SSE2-LABEL: test_v16i8_demandedbits:
66+
; SSE2: # %bb.0:
67+
; SSE2-NEXT: pmaxub %xmm1, %xmm0
68+
; SSE2-NEXT: pxor %xmm1, %xmm1
69+
; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
70+
; SSE2-NEXT: pand %xmm1, %xmm3
71+
; SSE2-NEXT: pandn %xmm2, %xmm1
72+
; SSE2-NEXT: por %xmm3, %xmm1
73+
; SSE2-NEXT: movdqa %xmm1, %xmm0
74+
; SSE2-NEXT: retq
75+
;
76+
; SSE41-LABEL: test_v16i8_demandedbits:
77+
; SSE41: # %bb.0:
78+
; SSE41-NEXT: orps %xmm1, %xmm0
79+
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
80+
; SSE41-NEXT: movdqa %xmm2, %xmm0
81+
; SSE41-NEXT: retq
82+
;
83+
; SSE42-LABEL: test_v16i8_demandedbits:
84+
; SSE42: # %bb.0:
85+
; SSE42-NEXT: orps %xmm1, %xmm0
86+
; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm2
87+
; SSE42-NEXT: movdqa %xmm2, %xmm0
88+
; SSE42-NEXT: retq
89+
;
90+
; AVX1OR2-LABEL: test_v16i8_demandedbits:
91+
; AVX1OR2: # %bb.0:
92+
; AVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0
93+
; AVX1OR2-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
94+
; AVX1OR2-NEXT: retq
95+
;
96+
; AVX512F-LABEL: test_v16i8_demandedbits:
97+
; AVX512F: # %bb.0:
98+
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
99+
; AVX512F-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
100+
; AVX512F-NEXT: retq
101+
;
102+
; AVX512BW-LABEL: test_v16i8_demandedbits:
103+
; AVX512BW: # %bb.0:
104+
; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
105+
; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
106+
; AVX512BW-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
107+
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
108+
; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1
109+
; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1}
110+
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
111+
; AVX512BW-NEXT: vzeroupper
112+
; AVX512BW-NEXT: retq
113+
%umax = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %y)
114+
%cmp = icmp sge <16 x i8> %umax, zeroinitializer
115+
%res = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
116+
ret <16 x i8> %res
117+
}
118+
77119
declare <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %y)

llvm/test/CodeGen/X86/combine-umin.ll

Lines changed: 69 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41
4-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
5-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
6-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
7-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX
8-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1OR2
6+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1OR2
7+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512F
8+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512BW
99

1010
define i8 @test_demandedbits_umin_ult(i8 %a0, i8 %a1) {
1111
; CHECK-LABEL: test_demandedbits_umin_ult:
@@ -60,26 +60,12 @@ define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) {
6060
}
6161

6262
define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
63-
; SSE2-LABEL: test_v16i8_reassociation:
64-
; SSE2: # %bb.0:
65-
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
66-
; SSE2-NEXT: pminub %xmm1, %xmm0
67-
; SSE2-NEXT: pminub %xmm1, %xmm0
68-
; SSE2-NEXT: retq
69-
;
70-
; SSE41-LABEL: test_v16i8_reassociation:
71-
; SSE41: # %bb.0:
72-
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
73-
; SSE41-NEXT: pminub %xmm1, %xmm0
74-
; SSE41-NEXT: pminub %xmm1, %xmm0
75-
; SSE41-NEXT: retq
76-
;
77-
; SSE42-LABEL: test_v16i8_reassociation:
78-
; SSE42: # %bb.0:
79-
; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
80-
; SSE42-NEXT: pminub %xmm1, %xmm0
81-
; SSE42-NEXT: pminub %xmm1, %xmm0
82-
; SSE42-NEXT: retq
63+
; SSE-LABEL: test_v16i8_reassociation:
64+
; SSE: # %bb.0:
65+
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
66+
; SSE-NEXT: pminub %xmm1, %xmm0
67+
; SSE-NEXT: pminub %xmm1, %xmm0
68+
; SSE-NEXT: retq
8369
;
8470
; AVX-LABEL: test_v16i8_reassociation:
8571
; AVX: # %bb.0:
@@ -91,4 +77,60 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
9177
%2 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
9278
ret <16 x i8> %2
9379
}
80+
81+
define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
82+
; SSE2-LABEL: test_v16i8_demandedbits:
83+
; SSE2: # %bb.0:
84+
; SSE2-NEXT: pminub %xmm1, %xmm0
85+
; SSE2-NEXT: pxor %xmm1, %xmm1
86+
; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
87+
; SSE2-NEXT: pand %xmm1, %xmm3
88+
; SSE2-NEXT: pandn %xmm2, %xmm1
89+
; SSE2-NEXT: por %xmm3, %xmm1
90+
; SSE2-NEXT: movdqa %xmm1, %xmm0
91+
; SSE2-NEXT: retq
92+
;
93+
; SSE41-LABEL: test_v16i8_demandedbits:
94+
; SSE41: # %bb.0:
95+
; SSE41-NEXT: andps %xmm1, %xmm0
96+
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
97+
; SSE41-NEXT: movdqa %xmm2, %xmm0
98+
; SSE41-NEXT: retq
99+
;
100+
; SSE42-LABEL: test_v16i8_demandedbits:
101+
; SSE42: # %bb.0:
102+
; SSE42-NEXT: andps %xmm1, %xmm0
103+
; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm2
104+
; SSE42-NEXT: movdqa %xmm2, %xmm0
105+
; SSE42-NEXT: retq
106+
;
107+
; AVX1OR2-LABEL: test_v16i8_demandedbits:
108+
; AVX1OR2: # %bb.0:
109+
; AVX1OR2-NEXT: vpand %xmm1, %xmm0, %xmm0
110+
; AVX1OR2-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
111+
; AVX1OR2-NEXT: retq
112+
;
113+
; AVX512F-LABEL: test_v16i8_demandedbits:
114+
; AVX512F: # %bb.0:
115+
; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0
116+
; AVX512F-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0
117+
; AVX512F-NEXT: retq
118+
;
119+
; AVX512BW-LABEL: test_v16i8_demandedbits:
120+
; AVX512BW: # %bb.0:
121+
; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
122+
; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
123+
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
124+
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
125+
; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1
126+
; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1}
127+
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
128+
; AVX512BW-NEXT: vzeroupper
129+
; AVX512BW-NEXT: retq
130+
%umin = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %y)
131+
%cmp = icmp sge <16 x i8> %umin, zeroinitializer
132+
%res = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
133+
ret <16 x i8> %res
134+
}
135+
94136
declare <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %y)

0 commit comments

Comments
 (0)