Skip to content

Commit d643a39

Browse files
committed
[X86] Use any_fadd/sub/mul/div/sqrt with the AVX512 scalar_*_patterns.
Making sure not to use them with patterns for masked instructions. Also fix FMA patterns that were matching strict_fma+x86selects to masked instructions.
1 parent 2f94ce0 commit d643a39

File tree

1 file changed

+70
-68
lines changed

1 file changed

+70
-68
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 70 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -6746,7 +6746,8 @@ defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRn
67466746
defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
67476747
defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
67486748

6749-
multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6749+
multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp,
6750+
SDNode RndOp, string Prefix,
67506751
string Suffix, SDNode Move,
67516752
X86VectorVTInfo _, PatLeaf ZeroFP> {
67526753
let Predicates = [HasAVX512] in {
@@ -6789,7 +6790,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
67896790

67906791
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
67916792
(X86selects VK1WM:$mask,
6792-
(Op _.FRC:$src2,
6793+
(MaskedOp _.FRC:$src2,
67936794
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
67946795
_.FRC:$src3),
67956796
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
@@ -6800,7 +6801,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
68006801

68016802
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
68026803
(X86selects VK1WM:$mask,
6803-
(Op _.FRC:$src2,
6804+
(MaskedOp _.FRC:$src2,
68046805
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
68056806
(_.ScalarLdFrag addr:$src3)),
68066807
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
@@ -6810,17 +6811,17 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
68106811

68116812
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
68126813
(X86selects VK1WM:$mask,
6813-
(Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6814-
(_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6814+
(MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6815+
(_.ScalarLdFrag addr:$src3), _.FRC:$src2),
68156816
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
68166817
(!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
68176818
VR128X:$src1, VK1WM:$mask,
68186819
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
68196820

68206821
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
68216822
(X86selects VK1WM:$mask,
6822-
(Op _.FRC:$src2, _.FRC:$src3,
6823-
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6823+
(MaskedOp _.FRC:$src2, _.FRC:$src3,
6824+
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
68246825
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
68256826
(!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
68266827
VR128X:$src1, VK1WM:$mask,
@@ -6829,18 +6830,18 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
68296830

68306831
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
68316832
(X86selects VK1WM:$mask,
6832-
(Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6833-
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6833+
(MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6834+
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
68346835
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
68356836
(!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
68366837
VR128X:$src1, VK1WM:$mask,
68376838
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
68386839

68396840
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
68406841
(X86selects VK1WM:$mask,
6841-
(Op _.FRC:$src2,
6842-
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6843-
_.FRC:$src3),
6842+
(MaskedOp _.FRC:$src2,
6843+
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6844+
_.FRC:$src3),
68446845
(_.EltVT ZeroFP)))))),
68456846
(!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
68466847
VR128X:$src1, VK1WM:$mask,
@@ -6849,8 +6850,8 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
68496850

68506851
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
68516852
(X86selects VK1WM:$mask,
6852-
(Op _.FRC:$src2, _.FRC:$src3,
6853-
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6853+
(MaskedOp _.FRC:$src2, _.FRC:$src3,
6854+
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
68546855
(_.EltVT ZeroFP)))))),
68556856
(!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
68566857
VR128X:$src1, VK1WM:$mask,
@@ -6859,27 +6860,27 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
68596860

68606861
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
68616862
(X86selects VK1WM:$mask,
6862-
(Op _.FRC:$src2,
6863-
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6864-
(_.ScalarLdFrag addr:$src3)),
6863+
(MaskedOp _.FRC:$src2,
6864+
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6865+
(_.ScalarLdFrag addr:$src3)),
68656866
(_.EltVT ZeroFP)))))),
68666867
(!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
68676868
VR128X:$src1, VK1WM:$mask,
68686869
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
68696870

68706871
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
68716872
(X86selects VK1WM:$mask,
6872-
(Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6873-
_.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6873+
(MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6874+
_.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
68746875
(_.EltVT ZeroFP)))))),
68756876
(!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
68766877
VR128X:$src1, VK1WM:$mask,
68776878
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
68786879

68796880
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
68806881
(X86selects VK1WM:$mask,
6881-
(Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6882-
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6882+
(MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6883+
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
68836884
(_.EltVT ZeroFP)))))),
68846885
(!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
68856886
VR128X:$src1, VK1WM:$mask,
@@ -6948,23 +6949,23 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
69486949
}
69496950
}
69506951

6951-
defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SS",
6952-
X86Movss, v4f32x_info, fp32imm0>;
6953-
defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86FmsubRnd, "VFMSUB", "SS",
6954-
X86Movss, v4f32x_info, fp32imm0>;
6955-
defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
6956-
X86Movss, v4f32x_info, fp32imm0>;
6957-
defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
6958-
X86Movss, v4f32x_info, fp32imm0>;
6952+
defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
6953+
"SS", X86Movss, v4f32x_info, fp32imm0>;
6954+
defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
6955+
"SS", X86Movss, v4f32x_info, fp32imm0>;
6956+
defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
6957+
"SS", X86Movss, v4f32x_info, fp32imm0>;
6958+
defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
6959+
"SS", X86Movss, v4f32x_info, fp32imm0>;
69596960

6960-
defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SD",
6961-
X86Movsd, v2f64x_info, fp64imm0>;
6962-
defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86FmsubRnd, "VFMSUB", "SD",
6963-
X86Movsd, v2f64x_info, fp64imm0>;
6964-
defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
6965-
X86Movsd, v2f64x_info, fp64imm0>;
6966-
defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
6967-
X86Movsd, v2f64x_info, fp64imm0>;
6961+
defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
6962+
"SD", X86Movsd, v2f64x_info, fp64imm0>;
6963+
defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
6964+
"SD", X86Movsd, v2f64x_info, fp64imm0>;
6965+
defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
6966+
"SD", X86Movsd, v2f64x_info, fp64imm0>;
6967+
defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
6968+
"SD", X86Movsd, v2f64x_info, fp64imm0>;
69686969

69696970
//===----------------------------------------------------------------------===//
69706971
// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
@@ -11656,88 +11657,89 @@ defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
1165611657

1165711658
// TODO: Some canonicalization in lowering would simplify the number of
1165811659
// patterns we have to try to match.
11659-
multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11660-
X86VectorVTInfo _, PatLeaf ZeroFP> {
11660+
multiclass AVX512_scalar_math_fp_patterns<SDNode Op, SDNode MaskedOp,
11661+
string OpcPrefix, SDNode MoveNode,
11662+
X86VectorVTInfo _, PatLeaf ZeroFP> {
1166111663
let Predicates = [HasAVX512] in {
1166211664
// extracted scalar math op with insert via movss
1166311665
def : Pat<(MoveNode
1166411666
(_.VT VR128X:$dst),
1166511667
(_.VT (scalar_to_vector
1166611668
(Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
1166711669
_.FRC:$src)))),
11668-
(!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11670+
(!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
1166911671
(_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
1167011672
def : Pat<(MoveNode
1167111673
(_.VT VR128X:$dst),
1167211674
(_.VT (scalar_to_vector
1167311675
(Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
1167411676
(_.ScalarLdFrag addr:$src))))),
11675-
(!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11677+
(!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
1167611678

1167711679
// extracted masked scalar math op with insert via movss
1167811680
def : Pat<(MoveNode (_.VT VR128X:$src1),
1167911681
(scalar_to_vector
1168011682
(X86selects VK1WM:$mask,
11681-
(Op (_.EltVT
11682-
(extractelt (_.VT VR128X:$src1), (iPTR 0))),
11683-
_.FRC:$src2),
11683+
(MaskedOp (_.EltVT
11684+
(extractelt (_.VT VR128X:$src1), (iPTR 0))),
11685+
_.FRC:$src2),
1168411686
_.FRC:$src0))),
11685-
(!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11687+
(!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
1168611688
(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
1168711689
VK1WM:$mask, _.VT:$src1,
1168811690
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
1168911691
def : Pat<(MoveNode (_.VT VR128X:$src1),
1169011692
(scalar_to_vector
1169111693
(X86selects VK1WM:$mask,
11692-
(Op (_.EltVT
11693-
(extractelt (_.VT VR128X:$src1), (iPTR 0))),
11694-
(_.ScalarLdFrag addr:$src2)),
11694+
(MaskedOp (_.EltVT
11695+
(extractelt (_.VT VR128X:$src1), (iPTR 0))),
11696+
(_.ScalarLdFrag addr:$src2)),
1169511697
_.FRC:$src0))),
11696-
(!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11698+
(!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
1169711699
(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
1169811700
VK1WM:$mask, _.VT:$src1, addr:$src2)>;
1169911701

1170011702
// extracted masked scalar math op with insert via movss
1170111703
def : Pat<(MoveNode (_.VT VR128X:$src1),
1170211704
(scalar_to_vector
1170311705
(X86selects VK1WM:$mask,
11704-
(Op (_.EltVT
11705-
(extractelt (_.VT VR128X:$src1), (iPTR 0))),
11706-
_.FRC:$src2), (_.EltVT ZeroFP)))),
11707-
(!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11706+
(MaskedOp (_.EltVT
11707+
(extractelt (_.VT VR128X:$src1), (iPTR 0))),
11708+
_.FRC:$src2), (_.EltVT ZeroFP)))),
11709+
(!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
1170811710
VK1WM:$mask, _.VT:$src1,
1170911711
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
1171011712
def : Pat<(MoveNode (_.VT VR128X:$src1),
1171111713
(scalar_to_vector
1171211714
(X86selects VK1WM:$mask,
11713-
(Op (_.EltVT
11714-
(extractelt (_.VT VR128X:$src1), (iPTR 0))),
11715-
(_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11716-
(!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11715+
(MaskedOp (_.EltVT
11716+
(extractelt (_.VT VR128X:$src1), (iPTR 0))),
11717+
(_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11718+
(!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
1171711719
}
1171811720
}
1171911721

11720-
defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11721-
defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11722-
defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11723-
defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11722+
defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11723+
defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11724+
defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11725+
defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
1172411726

11725-
defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11726-
defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11727-
defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11728-
defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11727+
defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11728+
defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11729+
defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11730+
defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
1172911731

1173011732
multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
1173111733
SDNode Move, X86VectorVTInfo _> {
1173211734
let Predicates = [HasAVX512] in {
1173311735
def : Pat<(_.VT (Move _.VT:$dst,
1173411736
(scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11735-
(!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11737+
(!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
1173611738
}
1173711739
}
1173811740

11739-
defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11740-
defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11741+
defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11742+
defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
1174111743

1174211744
//===----------------------------------------------------------------------===//
1174311745
// AES instructions

0 commit comments

Comments
 (0)