1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix =SSE
3
- ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix =AVX
4
- ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix =AVX
5
- ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix =AVX
2
+ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes =SSE
3
+ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes =AVX,AVX1
4
+ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes =AVX,AVX2
5
+ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes =AVX,AVX512
6
6
7
7
; Combine tests involving SSE41 target shuffles (BLEND,INSERTPS,MOVZX)
8
8
@@ -29,6 +29,30 @@ define <4 x i32> @combine_blend_of_permutes_v4i32(<2 x i64> %a0, <2 x i64> %a1)
29
29
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
30
30
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
31
31
; SSE-NEXT: retq
32
+ ;
33
+ ; AVX1-LABEL: combine_blend_of_permutes_v4i32:
34
+ ; AVX1: # %bb.0:
35
+ ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
36
+ ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,0,1]
37
+ ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
38
+ ; AVX1-NEXT: retq
39
+ ;
40
+ ; AVX2-LABEL: combine_blend_of_permutes_v4i32:
41
+ ; AVX2: # %bb.0:
42
+ ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
43
+ ; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,0,1]
44
+ ; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
45
+ ; AVX2-NEXT: retq
46
+ ;
47
+ ; AVX512-LABEL: combine_blend_of_permutes_v4i32:
48
+ ; AVX512: # %bb.0:
49
+ ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
50
+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
51
+ ; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm2 = [2,19,0,17]
52
+ ; AVX512-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
53
+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
54
+ ; AVX512-NEXT: vzeroupper
55
+ ; AVX512-NEXT: retq
32
56
%s0 = shufflevector <2 x i64 > %a0 , <2 x i64 > undef , <2 x i32 > <i32 1 , i32 0 >
33
57
%s1 = shufflevector <2 x i64 > %a1 , <2 x i64 > undef , <2 x i32 > <i32 1 , i32 0 >
34
58
%x0 = bitcast <2 x i64 > %s0 to <4 x i32 >
@@ -71,6 +95,107 @@ define <16 x i8> @PR50049(ptr %p1, ptr %p2) {
71
95
; SSE-NEXT: pand %xmm5, %xmm1
72
96
; SSE-NEXT: packuswb %xmm1, %xmm0
73
97
; SSE-NEXT: retq
98
+ ;
99
+ ; AVX1-LABEL: PR50049:
100
+ ; AVX1: # %bb.0:
101
+ ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
102
+ ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
103
+ ; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2
104
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4,7,10,13,128,128,128,128,128,128,u,u,u,u,u]
105
+ ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
106
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,3,6,9,12,15,u,u,u,u,u]
107
+ ; AVX1-NEXT: vpshufb %xmm4, %xmm0, %xmm0
108
+ ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
109
+ ; AVX1-NEXT: vmovdqa (%rsi), %xmm2
110
+ ; AVX1-NEXT: vmovdqa 16(%rsi), %xmm5
111
+ ; AVX1-NEXT: vmovdqa 32(%rsi), %xmm6
112
+ ; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm3
113
+ ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2
114
+ ; AVX1-NEXT: vpor %xmm3, %xmm2, %xmm2
115
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [5,6,7,8,9,10,128,128,128,128,128,0,1,2,3,4]
116
+ ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
117
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,2,5,8,11,14,128,128,128,128,128]
118
+ ; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm5
119
+ ; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
120
+ ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
121
+ ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
122
+ ; AVX1-NEXT: vpshufb %xmm4, %xmm1, %xmm1
123
+ ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
124
+ ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
125
+ ; AVX1-NEXT: vpmullw %xmm5, %xmm1, %xmm1
126
+ ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
127
+ ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
128
+ ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
129
+ ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
130
+ ; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0
131
+ ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
132
+ ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
133
+ ; AVX1-NEXT: retq
134
+ ;
135
+ ; AVX2-LABEL: PR50049:
136
+ ; AVX2: # %bb.0:
137
+ ; AVX2-NEXT: vmovdqa (%rdi), %xmm0
138
+ ; AVX2-NEXT: vmovdqa 16(%rdi), %xmm1
139
+ ; AVX2-NEXT: vmovdqa 32(%rdi), %xmm2
140
+ ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4,7,10,13,128,128,128,128,128,128,u,u,u,u,u]
141
+ ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
142
+ ; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,3,6,9,12,15,u,u,u,u,u]
143
+ ; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
144
+ ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
145
+ ; AVX2-NEXT: vmovdqa (%rsi), %xmm2
146
+ ; AVX2-NEXT: vmovdqa 16(%rsi), %xmm5
147
+ ; AVX2-NEXT: vmovdqa 32(%rsi), %xmm6
148
+ ; AVX2-NEXT: vpshufb %xmm3, %xmm6, %xmm3
149
+ ; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
150
+ ; AVX2-NEXT: vpor %xmm3, %xmm2, %xmm2
151
+ ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [5,6,7,8,9,10,128,128,128,128,128,0,1,2,3,4]
152
+ ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
153
+ ; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,2,5,8,11,14,128,128,128,128,128]
154
+ ; AVX2-NEXT: vpshufb %xmm4, %xmm5, %xmm5
155
+ ; AVX2-NEXT: vpor %xmm5, %xmm2, %xmm2
156
+ ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
157
+ ; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
158
+ ; AVX2-NEXT: vpshufb %xmm4, %xmm1, %xmm1
159
+ ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
160
+ ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
161
+ ; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
162
+ ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
163
+ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
164
+ ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
165
+ ; AVX2-NEXT: vzeroupper
166
+ ; AVX2-NEXT: retq
167
+ ;
168
+ ; AVX512-LABEL: PR50049:
169
+ ; AVX512: # %bb.0:
170
+ ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
171
+ ; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
172
+ ; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2
173
+ ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4,7,10,13,128,128,128,128,128,128,u,u,u,u,u]
174
+ ; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm2
175
+ ; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,3,6,9,12,15,u,u,u,u,u]
176
+ ; AVX512-NEXT: vpshufb %xmm4, %xmm0, %xmm0
177
+ ; AVX512-NEXT: vpor %xmm2, %xmm0, %xmm0
178
+ ; AVX512-NEXT: vmovdqa (%rsi), %xmm2
179
+ ; AVX512-NEXT: vmovdqa 16(%rsi), %xmm5
180
+ ; AVX512-NEXT: vmovdqa 32(%rsi), %xmm6
181
+ ; AVX512-NEXT: vpshufb %xmm3, %xmm6, %xmm3
182
+ ; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2
183
+ ; AVX512-NEXT: vpor %xmm3, %xmm2, %xmm2
184
+ ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [5,6,7,8,9,10,128,128,128,128,128,0,1,2,3,4]
185
+ ; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm2
186
+ ; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,2,5,8,11,14,128,128,128,128,128]
187
+ ; AVX512-NEXT: vpshufb %xmm4, %xmm5, %xmm5
188
+ ; AVX512-NEXT: vpor %xmm5, %xmm2, %xmm2
189
+ ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
190
+ ; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
191
+ ; AVX512-NEXT: vpshufb %xmm4, %xmm1, %xmm1
192
+ ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
193
+ ; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
194
+ ; AVX512-NEXT: vpmullw %ymm2, %ymm0, %ymm0
195
+ ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
196
+ ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
197
+ ; AVX512-NEXT: vzeroupper
198
+ ; AVX512-NEXT: retq
74
199
%x1 = load <48 x i8 >, ptr %p1 , align 16
75
200
%x2 = load <48 x i8 >, ptr %p2 , align 16
76
201
%s1 = shufflevector <48 x i8 > %x1 , <48 x i8 > poison, <16 x i32 > <i32 0 , i32 3 , i32 6 , i32 9 , i32 12 , i32 15 , i32 18 , i32 21 , i32 24 , i32 27 , i32 30 , i32 33 , i32 36 , i32 39 , i32 42 , i32 45 >
0 commit comments