Skip to content

Commit b12b999

Browse files
committed
[ValueTracking][X86][NFC] Add Tests for KnownBits of phadd/phsub
1 parent 026686b commit b12b999

File tree

2 files changed

+468
-0
lines changed

2 files changed

+468
-0
lines changed
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S -passes=instcombine < %s | FileCheck %s
3+
4+
define <4 x i1> @hadd_and_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
5+
; CHECK-LABEL: define <4 x i1> @hadd_and_eq_v4i32(
6+
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[X]], <i32 3, i32 3, i32 3, i32 3>
9+
; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[Y]], <i32 3, i32 3, i32 3, i32 3>
10+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
11+
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[TMP2]], <i32 -8, i32 -8, i32 -8, i32 -8>
12+
; CHECK-NEXT: [[RET:%.*]] = icmp eq <4 x i32> [[TMP3]], <i32 3, i32 4, i32 5, i32 6>
13+
; CHECK-NEXT: ret <4 x i1> [[RET]]
14+
;
15+
entry:
16+
%0 = and <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
17+
%1 = and <4 x i32> %y, <i32 3, i32 3, i32 3, i32 3>
18+
%2 = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %0, <4 x i32> %1)
19+
%3 = and <4 x i32> %2, <i32 -8, i32 -8, i32 -8, i32 -8>
20+
%ret = icmp eq <4 x i32> %3, <i32 3, i32 4, i32 5, i32 6>
21+
ret <4 x i1> %ret
22+
}
23+
24+
define <8 x i1> @hadd_and_eq_v8i16(<8 x i16> %x, <8 x i16> %y) {
25+
; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16(
26+
; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
27+
; CHECK-NEXT: entry:
28+
; CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[X]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
29+
; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[Y]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
30+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
31+
; CHECK-NEXT: [[TMP3:%.*]] = and <8 x i16> [[TMP2]], <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
32+
; CHECK-NEXT: [[RET:%.*]] = icmp eq <8 x i16> [[TMP3]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
33+
; CHECK-NEXT: ret <8 x i1> [[RET]]
34+
;
35+
entry:
36+
%0 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
37+
%1 = and <8 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
38+
%2 = tail call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %0, <8 x i16> %1)
39+
%3 = and <8 x i16> %2, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
40+
%ret = icmp eq <8 x i16> %3, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
41+
ret <8 x i1> %ret
42+
}
43+
44+
define <8 x i1> @hadd_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) {
45+
; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16_sat(
46+
; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
47+
; CHECK-NEXT: entry:
48+
; CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[X]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
49+
; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[Y]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
50+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
51+
; CHECK-NEXT: [[TMP3:%.*]] = and <8 x i16> [[TMP2]], <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
52+
; CHECK-NEXT: [[RET:%.*]] = icmp eq <8 x i16> [[TMP3]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
53+
; CHECK-NEXT: ret <8 x i1> [[RET]]
54+
;
55+
entry:
56+
%0 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
57+
%1 = and <8 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
58+
%2 = tail call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %0, <8 x i16> %1)
59+
%3 = and <8 x i16> %2, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
60+
%ret = icmp eq <8 x i16> %3, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
61+
ret <8 x i1> %ret
62+
}
63+
64+
define <8 x i1> @hadd_and_eq_v8i32(<8 x i32> %x, <8 x i32> %y) {
65+
; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i32(
66+
; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
67+
; CHECK-NEXT: entry:
68+
; CHECK-NEXT: [[TMP0:%.*]] = and <8 x i32> [[X]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
69+
; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i32> [[Y]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
70+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP0]], <8 x i32> [[TMP1]])
71+
; CHECK-NEXT: [[TMP3:%.*]] = and <8 x i32> [[TMP2]], <i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8>
72+
; CHECK-NEXT: [[RET:%.*]] = icmp eq <8 x i32> [[TMP3]], <i32 3, i32 4, i32 5, i32 6, i32 3, i32 4, i32 5, i32 6>
73+
; CHECK-NEXT: ret <8 x i1> [[RET]]
74+
;
75+
entry:
76+
%0 = and <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
77+
%1 = and <8 x i32> %y, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
78+
%2 = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %0, <8 x i32> %1)
79+
%3 = and <8 x i32> %2, <i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8>
80+
%ret = icmp eq <8 x i32> %3, <i32 3, i32 4, i32 5, i32 6, i32 3, i32 4, i32 5, i32 6>
81+
ret <8 x i1> %ret
82+
}
83+
84+
define <16 x i1> @hadd_and_eq_v16i16(<16 x i16> %x, <16 x i16> %y) {
85+
; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16(
86+
; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
87+
; CHECK-NEXT: entry:
88+
; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i16> [[X]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
89+
; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i16> [[Y]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
90+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP0]], <16 x i16> [[TMP1]])
91+
; CHECK-NEXT: [[TMP3:%.*]] = and <16 x i16> [[TMP2]], <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
92+
; CHECK-NEXT: [[RET:%.*]] = icmp eq <16 x i16> [[TMP3]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
93+
; CHECK-NEXT: ret <16 x i1> [[RET]]
94+
;
95+
entry:
96+
%0 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
97+
%1 = and <16 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
98+
%2 = tail call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %0, <16 x i16> %1)
99+
%3 = and <16 x i16> %2, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
100+
%ret = icmp eq <16 x i16> %3, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
101+
ret <16 x i1> %ret
102+
}
103+
104+
define <16 x i1> @hadd_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) {
105+
; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16_sat(
106+
; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
107+
; CHECK-NEXT: entry:
108+
; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i16> [[X]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
109+
; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i16> [[Y]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
110+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP0]], <16 x i16> [[TMP1]])
111+
; CHECK-NEXT: [[TMP3:%.*]] = and <16 x i16> [[TMP2]], <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
112+
; CHECK-NEXT: [[RET:%.*]] = icmp eq <16 x i16> [[TMP3]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
113+
; CHECK-NEXT: ret <16 x i1> [[RET]]
114+
;
115+
entry:
116+
%0 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
117+
%1 = and <16 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
118+
%2 = tail call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %0, <16 x i16> %1)
119+
%3 = and <16 x i16> %2, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
120+
%ret = icmp eq <16 x i16> %3, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
121+
ret <16 x i1> %ret
122+
}
123+
124+
define <4 x i1> @hsub_trunc_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
125+
; CHECK-LABEL: define <4 x i1> @hsub_trunc_eq_v4i32(
126+
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
127+
; CHECK-NEXT: entry:
128+
; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[X]], <i32 65535, i32 65535, i32 65535, i32 65535>
129+
; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[Y]], <i32 65535, i32 65535, i32 65535, i32 65535>
130+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
131+
; CHECK-NEXT: [[CONV:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
132+
; CHECK-NEXT: [[RET:%.*]] = icmp eq <4 x i16> [[CONV]], <i16 3, i16 4, i16 5, i16 6>
133+
; CHECK-NEXT: ret <4 x i1> [[RET]]
134+
;
135+
entry:
136+
%0 = or <4 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535>
137+
%1 = or <4 x i32> %y, <i32 65535, i32 65535, i32 65535, i32 65535>
138+
%2 = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %0, <4 x i32> %1)
139+
%conv = trunc <4 x i32> %2 to <4 x i16>
140+
%ret = icmp eq <4 x i16> %conv, <i16 3, i16 4, i16 5, i16 6>
141+
ret <4 x i1> %ret
142+
}
143+
144+
define <8 x i1> @hsub_trunc_eq_v8i16(<8 x i16> %x, <8 x i16> %y) {
145+
; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i16(
146+
; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
147+
; CHECK-NEXT: entry:
148+
; CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[X]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
149+
; CHECK-NEXT: [[TMP1:%.*]] = or <8 x i16> [[Y]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
150+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
151+
; CHECK-NEXT: [[CONV:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
152+
; CHECK-NEXT: [[RET:%.*]] = icmp eq <8 x i8> [[CONV]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
153+
; CHECK-NEXT: ret <8 x i1> [[RET]]
154+
;
155+
entry:
156+
%0 = or <8 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
157+
%1 = or <8 x i16> %y, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
158+
%2 = tail call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %0, <8 x i16> %1)
159+
%conv = trunc <8 x i16> %2 to <8 x i8>
160+
%ret = icmp eq <8 x i8> %conv, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
161+
ret <8 x i1> %ret
162+
}
163+
164+
define <8 x i1> @hsub_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) {
165+
; CHECK-LABEL: define <8 x i1> @hsub_and_eq_v8i16_sat(
166+
; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
167+
; CHECK-NEXT: entry:
168+
; CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[X]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
169+
; CHECK-NEXT: [[TMP1:%.*]] = or <8 x i16> [[Y]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
170+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
171+
; CHECK-NEXT: [[TMP3:%.*]] = and <8 x i16> [[TMP2]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
172+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i16> [[TMP3]], zeroinitializer
173+
; CHECK-NEXT: ret <8 x i1> [[TMP4]]
174+
;
175+
entry:
176+
%0 = or <8 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
177+
%1 = or <8 x i16> %y, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
178+
%2 = tail call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %0, <8 x i16> %1)
179+
%3 = and <8 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
180+
%4 = icmp eq <8 x i16> %3, zeroinitializer
181+
ret <8 x i1> %4
182+
}
183+
184+
define <8 x i1> @hsub_trunc_eq_v8i32(<8 x i32> %x, <8 x i32> %y) {
185+
; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i32(
186+
; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
187+
; CHECK-NEXT: entry:
188+
; CHECK-NEXT: [[TMP0:%.*]] = or <8 x i32> [[X]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
189+
; CHECK-NEXT: [[TMP1:%.*]] = or <8 x i32> [[Y]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
190+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[TMP0]], <8 x i32> [[TMP1]])
191+
; CHECK-NEXT: [[CONV:%.*]] = trunc <8 x i32> [[TMP2]] to <8 x i16>
192+
; CHECK-NEXT: [[RET:%.*]] = icmp eq <8 x i16> [[CONV]], <i16 3, i16 4, i16 5, i16 6, i16 3, i16 4, i16 5, i16 6>
193+
; CHECK-NEXT: ret <8 x i1> [[RET]]
194+
;
195+
entry:
196+
%0 = or <8 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
197+
%1 = or <8 x i32> %y, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
198+
%2 = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %0, <8 x i32> %1)
199+
%conv = trunc <8 x i32> %2 to <8 x i16>
200+
%ret = icmp eq <8 x i16> %conv, <i16 3, i16 4, i16 5, i16 6, i16 3, i16 4, i16 5, i16 6>
201+
ret <8 x i1> %ret
202+
}
203+
204+
define <16 x i1> @hsub_trunc_eq_v16i16(<16 x i16> %x, <16 x i16> %y) {
205+
; CHECK-LABEL: define <16 x i1> @hsub_trunc_eq_v16i16(
206+
; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
207+
; CHECK-NEXT: entry:
208+
; CHECK-NEXT: [[TMP0:%.*]] = or <16 x i16> [[X]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
209+
; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i16> [[Y]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
210+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[TMP0]], <16 x i16> [[TMP1]])
211+
; CHECK-NEXT: [[CONV:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
212+
; CHECK-NEXT: [[RET:%.*]] = icmp eq <16 x i8> [[CONV]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
213+
; CHECK-NEXT: ret <16 x i1> [[RET]]
214+
;
215+
entry:
216+
%0 = or <16 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
217+
%1 = or <16 x i16> %y, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
218+
%2 = tail call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %0, <16 x i16> %1)
219+
%conv = trunc <16 x i16> %2 to <16 x i8>
220+
%ret = icmp eq <16 x i8> %conv, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
221+
ret <16 x i1> %ret
222+
}
223+
224+
define <16 x i1> @hsub_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) {
225+
; CHECK-LABEL: define <16 x i1> @hsub_and_eq_v16i16_sat(
226+
; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
227+
; CHECK-NEXT: entry:
228+
; CHECK-NEXT: [[TMP0:%.*]] = or <16 x i16> [[X]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
229+
; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i16> [[Y]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
230+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[TMP0]], <16 x i16> [[TMP1]])
231+
; CHECK-NEXT: [[TMP3:%.*]] = and <16 x i16> [[TMP2]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
232+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <16 x i16> [[TMP3]], zeroinitializer
233+
; CHECK-NEXT: ret <16 x i1> [[TMP4]]
234+
;
235+
entry:
236+
%0 = or <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
237+
%1 = or <16 x i16> %y, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
238+
%2 = tail call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %0, <16 x i16> %1)
239+
%3 = and <16 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
240+
%4 = icmp eq <16 x i16> %3, zeroinitializer
241+
ret <16 x i1> %4
242+
}

0 commit comments

Comments
 (0)