Skip to content

Commit 84ae50e

Browse files
author
git apple-llvm automerger
committed
Merge commit '6c7853080451' from llvm.org/main into next
2 parents b327310 + 6c78530 commit 84ae50e

File tree

1 file changed

+129
-4
lines changed

1 file changed

+129
-4
lines changed

llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll

Lines changed: 129 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3-
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
4-
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
5-
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX
2+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
4+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
5+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512
66

77
; Combine tests involving SSE41 target shuffles (BLEND,INSERTPS,MOVZX)
88

@@ -29,6 +29,30 @@ define <4 x i32> @combine_blend_of_permutes_v4i32(<2 x i64> %a0, <2 x i64> %a1)
2929
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
3030
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
3131
; SSE-NEXT: retq
32+
;
33+
; AVX1-LABEL: combine_blend_of_permutes_v4i32:
34+
; AVX1: # %bb.0:
35+
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
36+
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,0,1]
37+
; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
38+
; AVX1-NEXT: retq
39+
;
40+
; AVX2-LABEL: combine_blend_of_permutes_v4i32:
41+
; AVX2: # %bb.0:
42+
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
43+
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,0,1]
44+
; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
45+
; AVX2-NEXT: retq
46+
;
47+
; AVX512-LABEL: combine_blend_of_permutes_v4i32:
48+
; AVX512: # %bb.0:
49+
; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
50+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
51+
; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm2 = [2,19,0,17]
52+
; AVX512-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
53+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
54+
; AVX512-NEXT: vzeroupper
55+
; AVX512-NEXT: retq
3256
%s0 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
3357
%s1 = shufflevector <2 x i64> %a1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
3458
%x0 = bitcast <2 x i64> %s0 to <4 x i32>
@@ -71,6 +95,107 @@ define <16 x i8> @PR50049(ptr %p1, ptr %p2) {
7195
; SSE-NEXT: pand %xmm5, %xmm1
7296
; SSE-NEXT: packuswb %xmm1, %xmm0
7397
; SSE-NEXT: retq
98+
;
99+
; AVX1-LABEL: PR50049:
100+
; AVX1: # %bb.0:
101+
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
102+
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
103+
; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2
104+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4,7,10,13,128,128,128,128,128,128,u,u,u,u,u]
105+
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
106+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,3,6,9,12,15,u,u,u,u,u]
107+
; AVX1-NEXT: vpshufb %xmm4, %xmm0, %xmm0
108+
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
109+
; AVX1-NEXT: vmovdqa (%rsi), %xmm2
110+
; AVX1-NEXT: vmovdqa 16(%rsi), %xmm5
111+
; AVX1-NEXT: vmovdqa 32(%rsi), %xmm6
112+
; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm3
113+
; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2
114+
; AVX1-NEXT: vpor %xmm3, %xmm2, %xmm2
115+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [5,6,7,8,9,10,128,128,128,128,128,0,1,2,3,4]
116+
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
117+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,2,5,8,11,14,128,128,128,128,128]
118+
; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm5
119+
; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
120+
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
121+
; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
122+
; AVX1-NEXT: vpshufb %xmm4, %xmm1, %xmm1
123+
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
124+
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
125+
; AVX1-NEXT: vpmullw %xmm5, %xmm1, %xmm1
126+
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
127+
; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
128+
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
129+
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
130+
; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0
131+
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
132+
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
133+
; AVX1-NEXT: retq
134+
;
135+
; AVX2-LABEL: PR50049:
136+
; AVX2: # %bb.0:
137+
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
138+
; AVX2-NEXT: vmovdqa 16(%rdi), %xmm1
139+
; AVX2-NEXT: vmovdqa 32(%rdi), %xmm2
140+
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4,7,10,13,128,128,128,128,128,128,u,u,u,u,u]
141+
; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
142+
; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,3,6,9,12,15,u,u,u,u,u]
143+
; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
144+
; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
145+
; AVX2-NEXT: vmovdqa (%rsi), %xmm2
146+
; AVX2-NEXT: vmovdqa 16(%rsi), %xmm5
147+
; AVX2-NEXT: vmovdqa 32(%rsi), %xmm6
148+
; AVX2-NEXT: vpshufb %xmm3, %xmm6, %xmm3
149+
; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
150+
; AVX2-NEXT: vpor %xmm3, %xmm2, %xmm2
151+
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [5,6,7,8,9,10,128,128,128,128,128,0,1,2,3,4]
152+
; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
153+
; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,2,5,8,11,14,128,128,128,128,128]
154+
; AVX2-NEXT: vpshufb %xmm4, %xmm5, %xmm5
155+
; AVX2-NEXT: vpor %xmm5, %xmm2, %xmm2
156+
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
157+
; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
158+
; AVX2-NEXT: vpshufb %xmm4, %xmm1, %xmm1
159+
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
160+
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
161+
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
162+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
163+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
164+
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
165+
; AVX2-NEXT: vzeroupper
166+
; AVX2-NEXT: retq
167+
;
168+
; AVX512-LABEL: PR50049:
169+
; AVX512: # %bb.0:
170+
; AVX512-NEXT: vmovdqa (%rdi), %xmm0
171+
; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
172+
; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2
173+
; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4,7,10,13,128,128,128,128,128,128,u,u,u,u,u]
174+
; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm2
175+
; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,3,6,9,12,15,u,u,u,u,u]
176+
; AVX512-NEXT: vpshufb %xmm4, %xmm0, %xmm0
177+
; AVX512-NEXT: vpor %xmm2, %xmm0, %xmm0
178+
; AVX512-NEXT: vmovdqa (%rsi), %xmm2
179+
; AVX512-NEXT: vmovdqa 16(%rsi), %xmm5
180+
; AVX512-NEXT: vmovdqa 32(%rsi), %xmm6
181+
; AVX512-NEXT: vpshufb %xmm3, %xmm6, %xmm3
182+
; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2
183+
; AVX512-NEXT: vpor %xmm3, %xmm2, %xmm2
184+
; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [5,6,7,8,9,10,128,128,128,128,128,0,1,2,3,4]
185+
; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm2
186+
; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,2,5,8,11,14,128,128,128,128,128]
187+
; AVX512-NEXT: vpshufb %xmm4, %xmm5, %xmm5
188+
; AVX512-NEXT: vpor %xmm5, %xmm2, %xmm2
189+
; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
190+
; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
191+
; AVX512-NEXT: vpshufb %xmm4, %xmm1, %xmm1
192+
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
193+
; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
194+
; AVX512-NEXT: vpmullw %ymm2, %ymm0, %ymm0
195+
; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
196+
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
197+
; AVX512-NEXT: vzeroupper
198+
; AVX512-NEXT: retq
74199
%x1 = load <48 x i8>, ptr %p1, align 16
75200
%x2 = load <48 x i8>, ptr %p2, align 16
76201
%s1 = shufflevector <48 x i8> %x1, <48 x i8> poison, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>

0 commit comments

Comments
 (0)