Skip to content

Commit 6195ed8

Browse files
committed
[X86] Match (or (and A, B), (andn (A, C))) to VPTERNLOG with AVX512.
This uses a similar isel pattern as we used for vpcmov with XOP. llvm-svn: 373154
1 parent fef62e1 commit 6195ed8

File tree

6 files changed

+122
-127
lines changed

6 files changed

+122
-127
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11533,6 +11533,49 @@ let Predicates = [HasVLX] in {
1153311533
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
1153411534
}
1153511535

11536+
let Predicates = [HasVLX] in {
11537+
def : Pat<(v16i8 (or (and VR128X:$src1, VR128X:$src2),
11538+
(X86andnp VR128X:$src1, VR128X:$src3))),
11539+
(VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
11540+
def : Pat<(v8i16 (or (and VR128X:$src1, VR128X:$src2),
11541+
(X86andnp VR128X:$src1, VR128X:$src3))),
11542+
(VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
11543+
def : Pat<(v4i32 (or (and VR128X:$src1, VR128X:$src2),
11544+
(X86andnp VR128X:$src1, VR128X:$src3))),
11545+
(VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
11546+
def : Pat<(v2i64 (or (and VR128X:$src1, VR128X:$src2),
11547+
(X86andnp VR128X:$src1, VR128X:$src3))),
11548+
(VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
11549+
11550+
def : Pat<(v32i8 (or (and VR256X:$src1, VR256X:$src2),
11551+
(X86andnp VR256X:$src1, VR256X:$src3))),
11552+
(VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
11553+
def : Pat<(v16i16 (or (and VR256X:$src1, VR256X:$src2),
11554+
(X86andnp VR256X:$src1, VR256X:$src3))),
11555+
(VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
11556+
def : Pat<(v8i32 (or (and VR256X:$src1, VR256X:$src2),
11557+
(X86andnp VR256X:$src1, VR256X:$src3))),
11558+
(VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
11559+
def : Pat<(v4i64 (or (and VR256X:$src1, VR256X:$src2),
11560+
(X86andnp VR256X:$src1, VR256X:$src3))),
11561+
(VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
11562+
}
11563+
11564+
let Predicates = [HasAVX512] in {
11565+
def : Pat<(v64i8 (or (and VR512:$src1, VR512:$src2),
11566+
(X86andnp VR512:$src1, VR512:$src3))),
11567+
(VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
11568+
def : Pat<(v32i16 (or (and VR512:$src1, VR512:$src2),
11569+
(X86andnp VR512:$src1, VR512:$src3))),
11570+
(VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
11571+
def : Pat<(v16i32 (or (and VR512:$src1, VR512:$src2),
11572+
(X86andnp VR512:$src1, VR512:$src3))),
11573+
(VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
11574+
def : Pat<(v8i64 (or (and VR512:$src1, VR512:$src2),
11575+
(X86andnp VR512:$src1, VR512:$src3))),
11576+
(VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
11577+
}
11578+
1153611579
//===----------------------------------------------------------------------===//
1153711580
// AVX-512 - FixupImm
1153811581
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/X86/vector-fshl-512.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1548,16 +1548,12 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
15481548
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
15491549
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
15501550
; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm3
1551-
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1552-
; AVX512VL-NEXT: vpandn %ymm3, %ymm4, %ymm3
15531551
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2
1554-
; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
1555-
; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
1552+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1553+
; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm4, %ymm2
15561554
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
1557-
; AVX512VL-NEXT: vpandn %ymm1, %ymm4, %ymm1
15581555
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
1559-
; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
1560-
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
1556+
; AVX512VL-NEXT: vpternlogq $226, %ymm1, %ymm4, %ymm0
15611557
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
15621558
; AVX512VL-NEXT: retq
15631559
;

llvm/test/CodeGen/X86/vector-fshl-rot-512.ll

Lines changed: 26 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -172,47 +172,39 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
172172
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
173173
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
174174
; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm4
175-
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
176-
; AVX512VL-NEXT: vpandn %ymm4, %ymm5, %ymm4
177-
; AVX512VL-NEXT: vpsllw $4, %ymm3, %ymm6
178-
; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
179-
; AVX512VL-NEXT: vpor %ymm4, %ymm6, %ymm4
180-
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
181-
; AVX512VL-NEXT: vpand %ymm6, %ymm2, %ymm2
175+
; AVX512VL-NEXT: vpsllw $4, %ymm3, %ymm5
176+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
177+
; AVX512VL-NEXT: vpternlogq $226, %ymm4, %ymm6, %ymm5
178+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
179+
; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
182180
; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
183-
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
184-
; AVX512VL-NEXT: vpsrlw $6, %ymm3, %ymm4
185-
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
186-
; AVX512VL-NEXT: vpandn %ymm4, %ymm7, %ymm4
187-
; AVX512VL-NEXT: vpsllw $2, %ymm3, %ymm8
188-
; AVX512VL-NEXT: vpand %ymm7, %ymm8, %ymm8
189-
; AVX512VL-NEXT: vpor %ymm4, %ymm8, %ymm4
181+
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm3
182+
; AVX512VL-NEXT: vpsrlw $6, %ymm3, %ymm5
183+
; AVX512VL-NEXT: vpsllw $2, %ymm3, %ymm7
184+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
185+
; AVX512VL-NEXT: vpternlogq $226, %ymm5, %ymm8, %ymm7
190186
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
191-
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
192-
; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm4
193-
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
194-
; AVX512VL-NEXT: vpand %ymm8, %ymm4, %ymm4
187+
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm7, %ymm3, %ymm3
188+
; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm5
189+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
190+
; AVX512VL-NEXT: vpand %ymm7, %ymm5, %ymm5
195191
; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm9
196-
; AVX512VL-NEXT: vpor %ymm4, %ymm9, %ymm4
192+
; AVX512VL-NEXT: vpor %ymm5, %ymm9, %ymm5
197193
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
198-
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
194+
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm2
199195
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
200-
; AVX512VL-NEXT: vpandn %ymm3, %ymm5, %ymm3
201-
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
202-
; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
203-
; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
204-
; AVX512VL-NEXT: vpand %ymm6, %ymm1, %ymm1
196+
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm5
197+
; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm6, %ymm5
198+
; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
205199
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
206-
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
200+
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm5, %ymm0, %ymm0
207201
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
208-
; AVX512VL-NEXT: vpandn %ymm3, %ymm7, %ymm3
209202
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm4
210-
; AVX512VL-NEXT: vpand %ymm7, %ymm4, %ymm4
211-
; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
203+
; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm8, %ymm4
212204
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
213-
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
205+
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
214206
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3
215-
; AVX512VL-NEXT: vpand %ymm8, %ymm3, %ymm3
207+
; AVX512VL-NEXT: vpand %ymm7, %ymm3, %ymm3
216208
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm4
217209
; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
218210
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
@@ -826,16 +818,12 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
826818
; AVX512VL: # %bb.0:
827819
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
828820
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
829-
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
830-
; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
831821
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
832-
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
833-
; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
822+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
823+
; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm1
834824
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
835-
; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
836825
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
837-
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
838-
; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
826+
; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm0
839827
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
840828
; AVX512VL-NEXT: retq
841829
;

llvm/test/CodeGen/X86/vector-fshr-512.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1532,16 +1532,12 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
15321532
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
15331533
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
15341534
; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm3
1535-
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1536-
; AVX512VL-NEXT: vpandn %ymm3, %ymm4, %ymm3
15371535
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2
1538-
; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
1539-
; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
1536+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1537+
; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm4, %ymm2
15401538
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
1541-
; AVX512VL-NEXT: vpandn %ymm1, %ymm4, %ymm1
15421539
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
1543-
; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
1544-
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
1540+
; AVX512VL-NEXT: vpternlogq $226, %ymm1, %ymm4, %ymm0
15451541
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
15461542
; AVX512VL-NEXT: retq
15471543
;

llvm/test/CodeGen/X86/vector-fshr-rot-512.ll

Lines changed: 26 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -181,50 +181,42 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
181181
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
182182
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
183183
; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm4
184-
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
185-
; AVX512VL-NEXT: vpandn %ymm4, %ymm5, %ymm4
186-
; AVX512VL-NEXT: vpsllw $4, %ymm3, %ymm6
187-
; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
188-
; AVX512VL-NEXT: vpor %ymm4, %ymm6, %ymm4
189-
; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
190-
; AVX512VL-NEXT: vpsubb %ymm2, %ymm6, %ymm2
184+
; AVX512VL-NEXT: vpsllw $4, %ymm3, %ymm5
185+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
186+
; AVX512VL-NEXT: vpternlogq $226, %ymm4, %ymm6, %ymm5
187+
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
188+
; AVX512VL-NEXT: vpsubb %ymm2, %ymm4, %ymm2
191189
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
192190
; AVX512VL-NEXT: vpand %ymm7, %ymm2, %ymm2
193191
; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
194-
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
195-
; AVX512VL-NEXT: vpsrlw $6, %ymm3, %ymm4
196-
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
197-
; AVX512VL-NEXT: vpandn %ymm4, %ymm8, %ymm4
198-
; AVX512VL-NEXT: vpsllw $2, %ymm3, %ymm9
199-
; AVX512VL-NEXT: vpand %ymm8, %ymm9, %ymm9
200-
; AVX512VL-NEXT: vpor %ymm4, %ymm9, %ymm4
192+
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm3
193+
; AVX512VL-NEXT: vpsrlw $6, %ymm3, %ymm5
194+
; AVX512VL-NEXT: vpsllw $2, %ymm3, %ymm8
195+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
196+
; AVX512VL-NEXT: vpternlogq $226, %ymm5, %ymm9, %ymm8
201197
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
202-
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
203-
; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm4
204-
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
205-
; AVX512VL-NEXT: vpand %ymm9, %ymm4, %ymm4
198+
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm8, %ymm3, %ymm3
199+
; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm5
200+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
201+
; AVX512VL-NEXT: vpand %ymm8, %ymm5, %ymm5
206202
; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm10
207-
; AVX512VL-NEXT: vpor %ymm4, %ymm10, %ymm4
203+
; AVX512VL-NEXT: vpor %ymm5, %ymm10, %ymm5
208204
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
209-
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
205+
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm2
210206
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
211-
; AVX512VL-NEXT: vpandn %ymm3, %ymm5, %ymm3
212-
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
213-
; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
214-
; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
215-
; AVX512VL-NEXT: vpsubb %ymm1, %ymm6, %ymm1
207+
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm5
208+
; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm6, %ymm5
209+
; AVX512VL-NEXT: vpsubb %ymm1, %ymm4, %ymm1
216210
; AVX512VL-NEXT: vpand %ymm7, %ymm1, %ymm1
217211
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
218-
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
212+
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm5, %ymm0, %ymm0
219213
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
220-
; AVX512VL-NEXT: vpandn %ymm3, %ymm8, %ymm3
221214
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm4
222-
; AVX512VL-NEXT: vpand %ymm8, %ymm4, %ymm4
223-
; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
215+
; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm9, %ymm4
224216
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
225-
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
217+
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
226218
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3
227-
; AVX512VL-NEXT: vpand %ymm9, %ymm3, %ymm3
219+
; AVX512VL-NEXT: vpand %ymm8, %ymm3, %ymm3
228220
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm4
229221
; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3
230222
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
@@ -846,16 +838,12 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
846838
; AVX512VL: # %bb.0:
847839
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
848840
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
849-
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
850-
; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
851841
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
852-
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
853-
; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
842+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
843+
; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm1
854844
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
855-
; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2
856845
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
857-
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
858-
; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
846+
; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm0
859847
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
860848
; AVX512VL-NEXT: retq
861849
;

0 commit comments

Comments
 (0)