Skip to content

Commit 0ac4aac

Browse files
committed
[X86] Enable canonicalizeBitSelect for AVX512 since we can use VPTERNLOG now.
llvm-svn: 373155
1 parent 6195ed8 commit 0ac4aac

18 files changed

+413
-236
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39582,7 +39582,7 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
3958239582
const X86Subtarget &Subtarget) {
3958339583
assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");
3958439584

39585-
EVT VT = N->getValueType(0);
39585+
MVT VT = N->getSimpleValueType(0);
3958639586
if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0)
3958739587
return SDValue();
3958839588

@@ -39591,10 +39591,12 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
3959139591
if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
3959239592
return SDValue();
3959339593

39594-
// On XOP we'll lower to PCMOV so accept one use, otherwise only
39595-
// do this if either mask has multiple uses already.
39596-
if (!(Subtarget.hasXOP() || !N0.getOperand(1).hasOneUse() ||
39597-
!N1.getOperand(1).hasOneUse()))
39594+
// On XOP we'll lower to PCMOV so accept one use. With AVX512, we can use
39595+
// VPTERNLOG. Otherwise only do this if either mask has multiple uses already.
39596+
bool UseVPTERNLOG = (Subtarget.hasAVX512() && VT.is512BitVector()) ||
39597+
Subtarget.hasVLX();
39598+
if (!(Subtarget.hasXOP() || UseVPTERNLOG ||
39599+
!N0.getOperand(1).hasOneUse() || !N1.getOperand(1).hasOneUse()))
3959839600
return SDValue();
3959939601

3960039602
// Attempt to extract constant byte masks.

llvm/test/CodeGen/X86/combine-bitselect.ll

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -329,9 +329,7 @@ define <8 x i64> @bitselect_v8i64_rr(<8 x i64>, <8 x i64>) {
329329
;
330330
; AVX512F-LABEL: bitselect_v8i64_rr:
331331
; AVX512F: # %bb.0:
332-
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
333-
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
334-
; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0
332+
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
335333
; AVX512F-NEXT: retq
336334
%3 = and <8 x i64> %0, <i64 4294967296, i64 12884901890, i64 12884901890, i64 12884901890, i64 4294967296, i64 12884901890, i64 12884901890, i64 12884901890>
337335
%4 = and <8 x i64> %1, <i64 -4294967297, i64 -12884901891, i64 -12884901891, i64 -12884901891, i64 -4294967297, i64 -12884901891, i64 -12884901891, i64 -12884901891>
@@ -405,9 +403,7 @@ define <8 x i64> @bitselect_v8i64_rm(<8 x i64>, <8 x i64>* nocapture readonly) {
405403
; AVX512F-LABEL: bitselect_v8i64_rm:
406404
; AVX512F: # %bb.0:
407405
; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1
408-
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
409-
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
410-
; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0
406+
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
411407
; AVX512F-NEXT: retq
412408
%3 = load <8 x i64>, <8 x i64>* %1
413409
%4 = and <8 x i64> %0, <i64 8589934593, i64 3, i64 8589934593, i64 3, i64 8589934593, i64 3, i64 8589934593, i64 3>
@@ -482,9 +478,7 @@ define <8 x i64> @bitselect_v8i64_mr(<8 x i64>* nocapture readonly, <8 x i64>) {
482478
; AVX512F-LABEL: bitselect_v8i64_mr:
483479
; AVX512F: # %bb.0:
484480
; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1
485-
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
486-
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
487-
; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0
481+
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
488482
; AVX512F-NEXT: retq
489483
%3 = load <8 x i64>, <8 x i64>* %0
490484
%4 = and <8 x i64> %3, <i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296>
@@ -554,11 +548,9 @@ define <8 x i64> @bitselect_v8i64_mm(<8 x i64>* nocapture readonly, <8 x i64>* n
554548
;
555549
; AVX512F-LABEL: bitselect_v8i64_mm:
556550
; AVX512F: # %bb.0:
557-
; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0
558-
; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm1
559-
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
560-
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
561-
; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0
551+
; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1
552+
; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm0
553+
; AVX512F-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
562554
; AVX512F-NEXT: retq
563555
%3 = load <8 x i64>, <8 x i64>* %0
564556
%4 = load <8 x i64>, <8 x i64>* %1

llvm/test/CodeGen/X86/vec-copysign-avx512.ll

Lines changed: 26 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -3,102 +3,60 @@
33
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.10.0 -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VLDQ
44

55
define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind {
6-
; AVX512VL-LABEL: v4f32:
7-
; AVX512VL: ## %bb.0:
8-
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
9-
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
10-
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
11-
; AVX512VL-NEXT: retq
12-
;
13-
; AVX512VLDQ-LABEL: v4f32:
14-
; AVX512VLDQ: ## %bb.0:
15-
; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to4}, %xmm1, %xmm1
16-
; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
17-
; AVX512VLDQ-NEXT: vorps %xmm1, %xmm0, %xmm0
18-
; AVX512VLDQ-NEXT: retq
6+
; CHECK-LABEL: v4f32:
7+
; CHECK: ## %bb.0:
8+
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
9+
; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
10+
; CHECK-NEXT: retq
1911
%tmp = tail call <4 x float> @llvm.copysign.v4f32( <4 x float> %a, <4 x float> %b )
2012
ret <4 x float> %tmp
2113
}
2214

2315
define <8 x float> @v8f32(<8 x float> %a, <8 x float> %b) nounwind {
24-
; AVX512VL-LABEL: v8f32:
25-
; AVX512VL: ## %bb.0:
26-
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm1, %ymm1
27-
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
28-
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
29-
; AVX512VL-NEXT: retq
30-
;
31-
; AVX512VLDQ-LABEL: v8f32:
32-
; AVX512VLDQ: ## %bb.0:
33-
; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to8}, %ymm1, %ymm1
34-
; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0
35-
; AVX512VLDQ-NEXT: vorps %ymm1, %ymm0, %ymm0
36-
; AVX512VLDQ-NEXT: retq
16+
; CHECK-LABEL: v8f32:
17+
; CHECK: ## %bb.0:
18+
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
19+
; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0
20+
; CHECK-NEXT: retq
3721
%tmp = tail call <8 x float> @llvm.copysign.v8f32( <8 x float> %a, <8 x float> %b )
3822
ret <8 x float> %tmp
3923
}
4024

4125
define <16 x float> @v16f32(<16 x float> %a, <16 x float> %b) nounwind {
42-
; AVX512VL-LABEL: v16f32:
43-
; AVX512VL: ## %bb.0:
44-
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm1, %zmm1
45-
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
46-
; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
47-
; AVX512VL-NEXT: retq
48-
;
49-
; AVX512VLDQ-LABEL: v16f32:
50-
; AVX512VLDQ: ## %bb.0:
51-
; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm1, %zmm1
52-
; AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
53-
; AVX512VLDQ-NEXT: vorps %zmm1, %zmm0, %zmm0
54-
; AVX512VLDQ-NEXT: retq
26+
; CHECK-LABEL: v16f32:
27+
; CHECK: ## %bb.0:
28+
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
29+
; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
30+
; CHECK-NEXT: retq
5531
%tmp = tail call <16 x float> @llvm.copysign.v16f32( <16 x float> %a, <16 x float> %b )
5632
ret <16 x float> %tmp
5733
}
5834

5935
define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind {
6036
; CHECK-LABEL: v2f64:
6137
; CHECK: ## %bb.0:
62-
; CHECK-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1
63-
; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
64-
; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
38+
; CHECK-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
6539
; CHECK-NEXT: retq
6640
%tmp = tail call <2 x double> @llvm.copysign.v2f64( <2 x double> %a, <2 x double> %b )
6741
ret <2 x double> %tmp
6842
}
6943

7044
define <4 x double> @v4f64(<4 x double> %a, <4 x double> %b) nounwind {
71-
; AVX512VL-LABEL: v4f64:
72-
; AVX512VL: ## %bb.0:
73-
; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm1, %ymm1
74-
; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
75-
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
76-
; AVX512VL-NEXT: retq
77-
;
78-
; AVX512VLDQ-LABEL: v4f64:
79-
; AVX512VLDQ: ## %bb.0:
80-
; AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to4}, %ymm1, %ymm1
81-
; AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
82-
; AVX512VLDQ-NEXT: vorpd %ymm1, %ymm0, %ymm0
83-
; AVX512VLDQ-NEXT: retq
45+
; CHECK-LABEL: v4f64:
46+
; CHECK: ## %bb.0:
47+
; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
48+
; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0
49+
; CHECK-NEXT: retq
8450
%tmp = tail call <4 x double> @llvm.copysign.v4f64( <4 x double> %a, <4 x double> %b )
8551
ret <4 x double> %tmp
8652
}
8753

8854
define <8 x double> @v8f64(<8 x double> %a, <8 x double> %b) nounwind {
89-
; AVX512VL-LABEL: v8f64:
90-
; AVX512VL: ## %bb.0:
91-
; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
92-
; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
93-
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
94-
; AVX512VL-NEXT: retq
95-
;
96-
; AVX512VLDQ-LABEL: v8f64:
97-
; AVX512VLDQ: ## %bb.0:
98-
; AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm1, %zmm1
99-
; AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
100-
; AVX512VLDQ-NEXT: vorpd %zmm1, %zmm0, %zmm0
101-
; AVX512VLDQ-NEXT: retq
55+
; CHECK-LABEL: v8f64:
56+
; CHECK: ## %bb.0:
57+
; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
58+
; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
59+
; CHECK-NEXT: retq
10260
%tmp = tail call <8 x double> @llvm.copysign.v8f64( <8 x double> %a, <8 x double> %b )
10361
ret <8 x double> %tmp
10462
}

llvm/test/CodeGen/X86/vector-fshl-128.ll

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2980,14 +2980,53 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
29802980
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
29812981
; AVX-NEXT: retq
29822982
;
2983-
; AVX512-LABEL: splatconstant_funnnel_v16i8:
2984-
; AVX512: # %bb.0:
2985-
; AVX512-NEXT: vpsrlw $4, %xmm1, %xmm1
2986-
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
2987-
; AVX512-NEXT: vpsllw $4, %xmm0, %xmm0
2988-
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
2989-
; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
2990-
; AVX512-NEXT: retq
2983+
; AVX512F-LABEL: splatconstant_funnnel_v16i8:
2984+
; AVX512F: # %bb.0:
2985+
; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1
2986+
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
2987+
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
2988+
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
2989+
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
2990+
; AVX512F-NEXT: retq
2991+
;
2992+
; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
2993+
; AVX512VL: # %bb.0:
2994+
; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm1
2995+
; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
2996+
; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
2997+
; AVX512VL-NEXT: retq
2998+
;
2999+
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
3000+
; AVX512BW: # %bb.0:
3001+
; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1
3002+
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
3003+
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
3004+
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
3005+
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
3006+
; AVX512BW-NEXT: retq
3007+
;
3008+
; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8:
3009+
; AVX512VBMI2: # %bb.0:
3010+
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1
3011+
; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
3012+
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
3013+
; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
3014+
; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
3015+
; AVX512VBMI2-NEXT: retq
3016+
;
3017+
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
3018+
; AVX512VLBW: # %bb.0:
3019+
; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm1
3020+
; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
3021+
; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
3022+
; AVX512VLBW-NEXT: retq
3023+
;
3024+
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
3025+
; AVX512VLVBMI2: # %bb.0:
3026+
; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1
3027+
; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
3028+
; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
3029+
; AVX512VLVBMI2-NEXT: retq
29913030
;
29923031
; XOP-LABEL: splatconstant_funnnel_v16i8:
29933032
; XOP: # %bb.0:

llvm/test/CodeGen/X86/vector-fshl-256.ll

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2503,14 +2503,53 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
25032503
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
25042504
; AVX2-NEXT: retq
25052505
;
2506-
; AVX512-LABEL: splatconstant_funnnel_v32i8:
2507-
; AVX512: # %bb.0:
2508-
; AVX512-NEXT: vpsrlw $4, %ymm1, %ymm1
2509-
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
2510-
; AVX512-NEXT: vpsllw $4, %ymm0, %ymm0
2511-
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
2512-
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
2513-
; AVX512-NEXT: retq
2506+
; AVX512F-LABEL: splatconstant_funnnel_v32i8:
2507+
; AVX512F: # %bb.0:
2508+
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
2509+
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
2510+
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
2511+
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
2512+
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
2513+
; AVX512F-NEXT: retq
2514+
;
2515+
; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
2516+
; AVX512VL: # %bb.0:
2517+
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
2518+
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
2519+
; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
2520+
; AVX512VL-NEXT: retq
2521+
;
2522+
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
2523+
; AVX512BW: # %bb.0:
2524+
; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1
2525+
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
2526+
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0
2527+
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
2528+
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
2529+
; AVX512BW-NEXT: retq
2530+
;
2531+
; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8:
2532+
; AVX512VBMI2: # %bb.0:
2533+
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1
2534+
; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
2535+
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
2536+
; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
2537+
; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
2538+
; AVX512VBMI2-NEXT: retq
2539+
;
2540+
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
2541+
; AVX512VLBW: # %bb.0:
2542+
; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm1
2543+
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
2544+
; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
2545+
; AVX512VLBW-NEXT: retq
2546+
;
2547+
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
2548+
; AVX512VLVBMI2: # %bb.0:
2549+
; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1
2550+
; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
2551+
; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
2552+
; AVX512VLVBMI2-NEXT: retq
25142553
;
25152554
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
25162555
; XOPAVX1: # %bb.0:

llvm/test/CodeGen/X86/vector-fshl-512.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,37 +1560,29 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
15601560
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
15611561
; AVX512BW: # %bb.0:
15621562
; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1
1563-
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
15641563
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
1565-
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1566-
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1564+
; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
15671565
; AVX512BW-NEXT: retq
15681566
;
15691567
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
15701568
; AVX512VBMI2: # %bb.0:
15711569
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
1572-
; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
15731570
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
1574-
; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1575-
; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
1571+
; AVX512VBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
15761572
; AVX512VBMI2-NEXT: retq
15771573
;
15781574
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
15791575
; AVX512VLBW: # %bb.0:
15801576
; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1
1581-
; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
15821577
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
1583-
; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1584-
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1578+
; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
15851579
; AVX512VLBW-NEXT: retq
15861580
;
15871581
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
15881582
; AVX512VLVBMI2: # %bb.0:
15891583
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
1590-
; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
15911584
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
1592-
; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1593-
; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
1585+
; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
15941586
; AVX512VLVBMI2-NEXT: retq
15951587
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
15961588
ret <64 x i8> %res

0 commit comments

Comments
 (0)