Skip to content

Commit 789cc81

Browse files
committed
[X86] Add -x86-experimental-vector-widening command lines to pmulh.ll
I've only added sse2 and sse4.1 variants as I'm only interested in the two v4i16 tests and I don't expect that to different with AVX other than a v prefix. llvm-svn: 346834
1 parent dd1a928 commit 789cc81

File tree

1 file changed

+83
-43
lines changed

1 file changed

+83
-43
lines changed

llvm/test/CodeGen/X86/pmulh.ll

Lines changed: 83 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,53 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 --check-prefix=SSE2-PROMOTE
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 --check-prefix=SSE2-WIDEN
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 --check-prefix=SSE41-PROMOTE
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 --check-prefix=SSE41-WIDEN
46
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
57
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
68
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
79

810
define <4 x i16> @mulhuw_v4i16(<4 x i16> %a, <4 x i16> %b) {
9-
; SSE2-LABEL: mulhuw_v4i16:
10-
; SSE2: # %bb.0:
11-
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
12-
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
13-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
14-
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
15-
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
16-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
17-
; SSE2-NEXT: pmulhuw %xmm1, %xmm0
18-
; SSE2-NEXT: pxor %xmm1, %xmm1
19-
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
20-
; SSE2-NEXT: retq
11+
; SSE2-PROMOTE-LABEL: mulhuw_v4i16:
12+
; SSE2-PROMOTE: # %bb.0:
13+
; SSE2-PROMOTE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
14+
; SSE2-PROMOTE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
15+
; SSE2-PROMOTE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
16+
; SSE2-PROMOTE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
17+
; SSE2-PROMOTE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
18+
; SSE2-PROMOTE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
19+
; SSE2-PROMOTE-NEXT: pmulhuw %xmm1, %xmm0
20+
; SSE2-PROMOTE-NEXT: pxor %xmm1, %xmm1
21+
; SSE2-PROMOTE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
22+
; SSE2-PROMOTE-NEXT: retq
2123
;
22-
; SSE41-LABEL: mulhuw_v4i16:
23-
; SSE41: # %bb.0:
24-
; SSE41-NEXT: pxor %xmm2, %xmm2
25-
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
26-
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
27-
; SSE41-NEXT: pmulld %xmm1, %xmm0
28-
; SSE41-NEXT: psrld $16, %xmm0
29-
; SSE41-NEXT: retq
24+
; SSE2-WIDEN-LABEL: mulhuw_v4i16:
25+
; SSE2-WIDEN: # %bb.0:
26+
; SSE2-WIDEN-NEXT: pmulhuw %xmm1, %xmm0
27+
; SSE2-WIDEN-NEXT: pxor %xmm1, %xmm1
28+
; SSE2-WIDEN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
29+
; SSE2-WIDEN-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
30+
; SSE2-WIDEN-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
31+
; SSE2-WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
32+
; SSE2-WIDEN-NEXT: retq
33+
;
34+
; SSE41-PROMOTE-LABEL: mulhuw_v4i16:
35+
; SSE41-PROMOTE: # %bb.0:
36+
; SSE41-PROMOTE-NEXT: pxor %xmm2, %xmm2
37+
; SSE41-PROMOTE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
38+
; SSE41-PROMOTE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
39+
; SSE41-PROMOTE-NEXT: pmulld %xmm1, %xmm0
40+
; SSE41-PROMOTE-NEXT: psrld $16, %xmm0
41+
; SSE41-PROMOTE-NEXT: retq
42+
;
43+
; SSE41-WIDEN-LABEL: mulhuw_v4i16:
44+
; SSE41-WIDEN: # %bb.0:
45+
; SSE41-WIDEN-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
46+
; SSE41-WIDEN-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
47+
; SSE41-WIDEN-NEXT: pmulld %xmm2, %xmm0
48+
; SSE41-WIDEN-NEXT: psrld $16, %xmm0
49+
; SSE41-WIDEN-NEXT: packusdw %xmm0, %xmm0
50+
; SSE41-WIDEN-NEXT: retq
3051
;
3152
; AVX-LABEL: mulhuw_v4i16:
3253
; AVX: # %bb.0:
@@ -45,28 +66,47 @@ define <4 x i16> @mulhuw_v4i16(<4 x i16> %a, <4 x i16> %b) {
4566
}
4667

4768
define <4 x i16> @mulhw_v4i16(<4 x i16> %a, <4 x i16> %b) {
48-
; SSE2-LABEL: mulhw_v4i16:
49-
; SSE2: # %bb.0:
50-
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
51-
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
52-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
53-
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
54-
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
55-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
56-
; SSE2-NEXT: pmulhw %xmm1, %xmm0
57-
; SSE2-NEXT: pxor %xmm1, %xmm1
58-
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
59-
; SSE2-NEXT: retq
69+
; SSE2-PROMOTE-LABEL: mulhw_v4i16:
70+
; SSE2-PROMOTE: # %bb.0:
71+
; SSE2-PROMOTE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
72+
; SSE2-PROMOTE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
73+
; SSE2-PROMOTE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
74+
; SSE2-PROMOTE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
75+
; SSE2-PROMOTE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
76+
; SSE2-PROMOTE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
77+
; SSE2-PROMOTE-NEXT: pmulhw %xmm1, %xmm0
78+
; SSE2-PROMOTE-NEXT: pxor %xmm1, %xmm1
79+
; SSE2-PROMOTE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
80+
; SSE2-PROMOTE-NEXT: retq
81+
;
82+
; SSE2-WIDEN-LABEL: mulhw_v4i16:
83+
; SSE2-WIDEN: # %bb.0:
84+
; SSE2-WIDEN-NEXT: pmulhw %xmm1, %xmm0
85+
; SSE2-WIDEN-NEXT: pxor %xmm1, %xmm1
86+
; SSE2-WIDEN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
87+
; SSE2-WIDEN-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
88+
; SSE2-WIDEN-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
89+
; SSE2-WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
90+
; SSE2-WIDEN-NEXT: retq
91+
;
92+
; SSE41-PROMOTE-LABEL: mulhw_v4i16:
93+
; SSE41-PROMOTE: # %bb.0:
94+
; SSE41-PROMOTE-NEXT: pslld $16, %xmm0
95+
; SSE41-PROMOTE-NEXT: psrad $16, %xmm0
96+
; SSE41-PROMOTE-NEXT: pslld $16, %xmm1
97+
; SSE41-PROMOTE-NEXT: psrad $16, %xmm1
98+
; SSE41-PROMOTE-NEXT: pmulld %xmm1, %xmm0
99+
; SSE41-PROMOTE-NEXT: psrld $16, %xmm0
100+
; SSE41-PROMOTE-NEXT: retq
60101
;
61-
; SSE41-LABEL: mulhw_v4i16:
62-
; SSE41: # %bb.0:
63-
; SSE41-NEXT: pslld $16, %xmm0
64-
; SSE41-NEXT: psrad $16, %xmm0
65-
; SSE41-NEXT: pslld $16, %xmm1
66-
; SSE41-NEXT: psrad $16, %xmm1
67-
; SSE41-NEXT: pmulld %xmm1, %xmm0
68-
; SSE41-NEXT: psrld $16, %xmm0
69-
; SSE41-NEXT: retq
102+
; SSE41-WIDEN-LABEL: mulhw_v4i16:
103+
; SSE41-WIDEN: # %bb.0:
104+
; SSE41-WIDEN-NEXT: pmovsxwd %xmm0, %xmm2
105+
; SSE41-WIDEN-NEXT: pmovsxwd %xmm1, %xmm0
106+
; SSE41-WIDEN-NEXT: pmulld %xmm2, %xmm0
107+
; SSE41-WIDEN-NEXT: psrld $16, %xmm0
108+
; SSE41-WIDEN-NEXT: packusdw %xmm0, %xmm0
109+
; SSE41-WIDEN-NEXT: retq
70110
;
71111
; AVX-LABEL: mulhw_v4i16:
72112
; AVX: # %bb.0:

0 commit comments

Comments
 (0)