Skip to content

Commit 494bfd9

Browse files
committed
[X86] Enable isel to fold broadcast loads that have been bitcasted from FP into a vpternlog.
llvm-svn: 373157
1 parent b6a2207 commit 494bfd9

File tree

2 files changed

+101
-10
lines changed

2 files changed

+101
-10
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11436,6 +11436,102 @@ defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
1143611436
defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
1143711437
avx512vl_i64_info>, VEX_W;
1143811438

11439+
// Patterns to fold bitcasted FP broadcasts.
11440+
// FIXME: Need better DAG canonicalization.
11441+
let Predicates = [HasVLX] in {
11442+
def : Pat<(X86vpternlog VR128X:$src1, VR128X:$src2,
11443+
(bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src3)))),
11444+
(i8 timm:$src4)),
11445+
(VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11446+
timm:$src4)>;
11447+
def : Pat<(X86vpternlog (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src3)))),
11448+
VR128X:$src2, VR128X:$src1, (i8 timm:$src4)),
11449+
(VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11450+
(VPTERNLOG321_imm8 timm:$src4))>;
11451+
def : Pat<(X86vpternlog VR128X:$src1,
11452+
(bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src3)))),
11453+
VR128X:$src2, (i8 timm:$src4)),
11454+
(VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11455+
(VPTERNLOG132_imm8 timm:$src4))>;
11456+
11457+
def : Pat<(X86vpternlog VR128X:$src1, VR128X:$src2,
11458+
(bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src3)))),
11459+
(i8 timm:$src4)),
11460+
(VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11461+
timm:$src4)>;
11462+
def : Pat<(X86vpternlog (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src3)))),
11463+
VR128X:$src2, VR128X:$src1, (i8 timm:$src4)),
11464+
(VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11465+
(VPTERNLOG321_imm8 timm:$src4))>;
11466+
def : Pat<(X86vpternlog VR128X:$src1,
11467+
(bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src3)))),
11468+
VR128X:$src2, (i8 timm:$src4)),
11469+
(VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11470+
(VPTERNLOG132_imm8 timm:$src4))>;
11471+
11472+
def : Pat<(X86vpternlog VR256X:$src1, VR256X:$src2,
11473+
(bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src3)))),
11474+
(i8 timm:$src4)),
11475+
(VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11476+
timm:$src4)>;
11477+
def : Pat<(X86vpternlog (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src3)))),
11478+
VR256X:$src2, VR256X:$src1, (i8 timm:$src4)),
11479+
(VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11480+
(VPTERNLOG321_imm8 timm:$src4))>;
11481+
def : Pat<(X86vpternlog VR256X:$src1,
11482+
(bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src3)))),
11483+
VR256X:$src2, (i8 timm:$src4)),
11484+
(VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11485+
(VPTERNLOG132_imm8 timm:$src4))>;
11486+
11487+
def : Pat<(X86vpternlog VR256X:$src1, VR256X:$src2,
11488+
(bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src3)))),
11489+
(i8 timm:$src4)),
11490+
(VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11491+
timm:$src4)>;
11492+
def : Pat<(X86vpternlog (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src3)))),
11493+
VR256X:$src2, VR256X:$src1, (i8 timm:$src4)),
11494+
(VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11495+
(VPTERNLOG321_imm8 timm:$src4))>;
11496+
def : Pat<(X86vpternlog VR256X:$src1,
11497+
(bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src3)))),
11498+
VR256X:$src2, (i8 timm:$src4)),
11499+
(VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11500+
(VPTERNLOG132_imm8 timm:$src4))>;
11501+
}
11502+
11503+
let Predicates = [HasAVX512] in {
11504+
def : Pat<(X86vpternlog VR512:$src1, VR512:$src2,
11505+
(bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src3)))),
11506+
(i8 timm:$src4)),
11507+
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11508+
timm:$src4)>;
11509+
def : Pat<(X86vpternlog (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src3)))),
11510+
VR512:$src2, VR512:$src1, (i8 timm:$src4)),
11511+
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11512+
(VPTERNLOG321_imm8 timm:$src4))>;
11513+
def : Pat<(X86vpternlog VR512:$src1,
11514+
(bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src3)))),
11515+
VR512:$src2, (i8 timm:$src4)),
11516+
(VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11517+
(VPTERNLOG132_imm8 timm:$src4))>;
11518+
11519+
def : Pat<(X86vpternlog VR512:$src1, VR512:$src2,
11520+
(bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src3)))),
11521+
(i8 timm:$src4)),
11522+
(VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11523+
timm:$src4)>;
11524+
def : Pat<(X86vpternlog (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src3)))),
11525+
VR512:$src2, VR512:$src1, (i8 timm:$src4)),
11526+
(VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11527+
(VPTERNLOG321_imm8 timm:$src4))>;
11528+
def : Pat<(X86vpternlog VR512:$src1,
11529+
(bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src3)))),
11530+
VR512:$src2, (i8 timm:$src4)),
11531+
(VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11532+
(VPTERNLOG132_imm8 timm:$src4))>;
11533+
}
11534+
1143911535
// Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
1144011536
let Predicates = [HasVLX] in {
1144111537
def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,

llvm/test/CodeGen/X86/vec-copysign-avx512.ll

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind {
66
; CHECK-LABEL: v4f32:
77
; CHECK: ## %bb.0:
8-
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
9-
; CHECK-NEXT: vpternlogd $226, %xmm1, %xmm2, %xmm0
8+
; CHECK-NEXT: vpternlogd $228, {{.*}}(%rip){1to4}, %xmm1, %xmm0
109
; CHECK-NEXT: retq
1110
%tmp = tail call <4 x float> @llvm.copysign.v4f32( <4 x float> %a, <4 x float> %b )
1211
ret <4 x float> %tmp
@@ -15,8 +14,7 @@ define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind {
1514
define <8 x float> @v8f32(<8 x float> %a, <8 x float> %b) nounwind {
1615
; CHECK-LABEL: v8f32:
1716
; CHECK: ## %bb.0:
18-
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
19-
; CHECK-NEXT: vpternlogd $226, %ymm1, %ymm2, %ymm0
17+
; CHECK-NEXT: vpternlogd $228, {{.*}}(%rip){1to8}, %ymm1, %ymm0
2018
; CHECK-NEXT: retq
2119
%tmp = tail call <8 x float> @llvm.copysign.v8f32( <8 x float> %a, <8 x float> %b )
2220
ret <8 x float> %tmp
@@ -25,8 +23,7 @@ define <8 x float> @v8f32(<8 x float> %a, <8 x float> %b) nounwind {
2523
define <16 x float> @v16f32(<16 x float> %a, <16 x float> %b) nounwind {
2624
; CHECK-LABEL: v16f32:
2725
; CHECK: ## %bb.0:
28-
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
29-
; CHECK-NEXT: vpternlogd $226, %zmm1, %zmm2, %zmm0
26+
; CHECK-NEXT: vpternlogd $228, {{.*}}(%rip){1to16}, %zmm1, %zmm0
3027
; CHECK-NEXT: retq
3128
%tmp = tail call <16 x float> @llvm.copysign.v16f32( <16 x float> %a, <16 x float> %b )
3229
ret <16 x float> %tmp
@@ -44,8 +41,7 @@ define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind {
4441
define <4 x double> @v4f64(<4 x double> %a, <4 x double> %b) nounwind {
4542
; CHECK-LABEL: v4f64:
4643
; CHECK: ## %bb.0:
47-
; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
48-
; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0
44+
; CHECK-NEXT: vpternlogq $228, {{.*}}(%rip){1to4}, %ymm1, %ymm0
4945
; CHECK-NEXT: retq
5046
%tmp = tail call <4 x double> @llvm.copysign.v4f64( <4 x double> %a, <4 x double> %b )
5147
ret <4 x double> %tmp
@@ -54,8 +50,7 @@ define <4 x double> @v4f64(<4 x double> %a, <4 x double> %b) nounwind {
5450
define <8 x double> @v8f64(<8 x double> %a, <8 x double> %b) nounwind {
5551
; CHECK-LABEL: v8f64:
5652
; CHECK: ## %bb.0:
57-
; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
58-
; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
53+
; CHECK-NEXT: vpternlogq $228, {{.*}}(%rip){1to8}, %zmm1, %zmm0
5954
; CHECK-NEXT: retq
6055
%tmp = tail call <8 x double> @llvm.copysign.v8f64( <8 x double> %a, <8 x double> %b )
6156
ret <8 x double> %tmp

0 commit comments

Comments
 (0)