@@ -6746,7 +6746,8 @@ defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRn
6746
6746
defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6747
6747
defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6748
6748
6749
- multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6749
+ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp,
6750
+ SDNode RndOp, string Prefix,
6750
6751
string Suffix, SDNode Move,
6751
6752
X86VectorVTInfo _, PatLeaf ZeroFP> {
6752
6753
let Predicates = [HasAVX512] in {
@@ -6789,7 +6790,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6789
6790
6790
6791
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6791
6792
(X86selects VK1WM:$mask,
6792
- (Op _.FRC:$src2,
6793
+ (MaskedOp _.FRC:$src2,
6793
6794
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6794
6795
_.FRC:$src3),
6795
6796
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
@@ -6800,7 +6801,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6800
6801
6801
6802
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6802
6803
(X86selects VK1WM:$mask,
6803
- (Op _.FRC:$src2,
6804
+ (MaskedOp _.FRC:$src2,
6804
6805
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6805
6806
(_.ScalarLdFrag addr:$src3)),
6806
6807
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
@@ -6810,17 +6811,17 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6810
6811
6811
6812
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6812
6813
(X86selects VK1WM:$mask,
6813
- (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6814
- (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6814
+ (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6815
+ (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6815
6816
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6816
6817
(!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6817
6818
VR128X:$src1, VK1WM:$mask,
6818
6819
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6819
6820
6820
6821
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6821
6822
(X86selects VK1WM:$mask,
6822
- (Op _.FRC:$src2, _.FRC:$src3,
6823
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6823
+ (MaskedOp _.FRC:$src2, _.FRC:$src3,
6824
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6824
6825
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6825
6826
(!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6826
6827
VR128X:$src1, VK1WM:$mask,
@@ -6829,18 +6830,18 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6829
6830
6830
6831
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6831
6832
(X86selects VK1WM:$mask,
6832
- (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6833
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6833
+ (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6834
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6834
6835
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6835
6836
(!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6836
6837
VR128X:$src1, VK1WM:$mask,
6837
6838
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6838
6839
6839
6840
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6840
6841
(X86selects VK1WM:$mask,
6841
- (Op _.FRC:$src2,
6842
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6843
- _.FRC:$src3),
6842
+ (MaskedOp _.FRC:$src2,
6843
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6844
+ _.FRC:$src3),
6844
6845
(_.EltVT ZeroFP)))))),
6845
6846
(!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6846
6847
VR128X:$src1, VK1WM:$mask,
@@ -6849,8 +6850,8 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6849
6850
6850
6851
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6851
6852
(X86selects VK1WM:$mask,
6852
- (Op _.FRC:$src2, _.FRC:$src3,
6853
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6853
+ (MaskedOp _.FRC:$src2, _.FRC:$src3,
6854
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6854
6855
(_.EltVT ZeroFP)))))),
6855
6856
(!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6856
6857
VR128X:$src1, VK1WM:$mask,
@@ -6859,27 +6860,27 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6859
6860
6860
6861
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6861
6862
(X86selects VK1WM:$mask,
6862
- (Op _.FRC:$src2,
6863
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6864
- (_.ScalarLdFrag addr:$src3)),
6863
+ (MaskedOp _.FRC:$src2,
6864
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6865
+ (_.ScalarLdFrag addr:$src3)),
6865
6866
(_.EltVT ZeroFP)))))),
6866
6867
(!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6867
6868
VR128X:$src1, VK1WM:$mask,
6868
6869
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6869
6870
6870
6871
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6871
6872
(X86selects VK1WM:$mask,
6872
- (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6873
- _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6873
+ (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6874
+ _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6874
6875
(_.EltVT ZeroFP)))))),
6875
6876
(!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6876
6877
VR128X:$src1, VK1WM:$mask,
6877
6878
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6878
6879
6879
6880
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6880
6881
(X86selects VK1WM:$mask,
6881
- (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6882
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6882
+ (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6883
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6883
6884
(_.EltVT ZeroFP)))))),
6884
6885
(!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6885
6886
VR128X:$src1, VK1WM:$mask,
@@ -6948,23 +6949,23 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6948
6949
}
6949
6950
}
6950
6951
6951
- defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD" , "SS ",
6952
- X86Movss, v4f32x_info, fp32imm0>;
6953
- defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86FmsubRnd, "VFMSUB" , "SS ",
6954
- X86Movss, v4f32x_info, fp32imm0>;
6955
- defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86FnmaddRnd, "VFNMADD" , "SS ",
6956
- X86Movss, v4f32x_info, fp32imm0>;
6957
- defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86FnmsubRnd, "VFNMSUB" , "SS ",
6958
- X86Movss, v4f32x_info, fp32imm0>;
6952
+ defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd , "VFMADD ",
6953
+ "SS", X86Movss, v4f32x_info, fp32imm0>;
6954
+ defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd , "VFMSUB ",
6955
+ "SS", X86Movss, v4f32x_info, fp32imm0>;
6956
+ defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd , "VFNMADD ",
6957
+ "SS", X86Movss, v4f32x_info, fp32imm0>;
6958
+ defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd , "VFNMSUB ",
6959
+ "SS", X86Movss, v4f32x_info, fp32imm0>;
6959
6960
6960
- defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD" , "SD ",
6961
- X86Movsd, v2f64x_info, fp64imm0>;
6962
- defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86FmsubRnd, "VFMSUB" , "SD ",
6963
- X86Movsd, v2f64x_info, fp64imm0>;
6964
- defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86FnmaddRnd, "VFNMADD" , "SD ",
6965
- X86Movsd, v2f64x_info, fp64imm0>;
6966
- defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86FnmsubRnd, "VFNMSUB" , "SD ",
6967
- X86Movsd, v2f64x_info, fp64imm0>;
6961
+ defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd , "VFMADD ",
6962
+ "SD", X86Movsd, v2f64x_info, fp64imm0>;
6963
+ defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd , "VFMSUB ",
6964
+ "SD", X86Movsd, v2f64x_info, fp64imm0>;
6965
+ defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd , "VFNMADD ",
6966
+ "SD", X86Movsd, v2f64x_info, fp64imm0>;
6967
+ defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd , "VFNMSUB ",
6968
+ "SD", X86Movsd, v2f64x_info, fp64imm0>;
6968
6969
6969
6970
//===----------------------------------------------------------------------===//
6970
6971
// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
@@ -11656,88 +11657,89 @@ defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11656
11657
11657
11658
// TODO: Some canonicalization in lowering would simplify the number of
11658
11659
// patterns we have to try to match.
11659
- multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11660
- X86VectorVTInfo _, PatLeaf ZeroFP> {
11660
+ multiclass AVX512_scalar_math_fp_patterns<SDNode Op, SDNode MaskedOp,
11661
+ string OpcPrefix, SDNode MoveNode,
11662
+ X86VectorVTInfo _, PatLeaf ZeroFP> {
11661
11663
let Predicates = [HasAVX512] in {
11662
11664
// extracted scalar math op with insert via movss
11663
11665
def : Pat<(MoveNode
11664
11666
(_.VT VR128X:$dst),
11665
11667
(_.VT (scalar_to_vector
11666
11668
(Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11667
11669
_.FRC:$src)))),
11668
- (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11670
+ (!cast<Instruction>("V"#OpcPrefix#" Zrr_Int" ) _.VT:$dst,
11669
11671
(_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11670
11672
def : Pat<(MoveNode
11671
11673
(_.VT VR128X:$dst),
11672
11674
(_.VT (scalar_to_vector
11673
11675
(Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11674
11676
(_.ScalarLdFrag addr:$src))))),
11675
- (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11677
+ (!cast<Instruction>("V"#OpcPrefix#" Zrm_Int" ) _.VT:$dst, addr:$src)>;
11676
11678
11677
11679
// extracted masked scalar math op with insert via movss
11678
11680
def : Pat<(MoveNode (_.VT VR128X:$src1),
11679
11681
(scalar_to_vector
11680
11682
(X86selects VK1WM:$mask,
11681
- (Op (_.EltVT
11682
- (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11683
- _.FRC:$src2),
11683
+ (MaskedOp (_.EltVT
11684
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11685
+ _.FRC:$src2),
11684
11686
_.FRC:$src0))),
11685
- (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11687
+ (!cast<Instruction>("V"#OpcPrefix#" Zrr_Intk" )
11686
11688
(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11687
11689
VK1WM:$mask, _.VT:$src1,
11688
11690
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11689
11691
def : Pat<(MoveNode (_.VT VR128X:$src1),
11690
11692
(scalar_to_vector
11691
11693
(X86selects VK1WM:$mask,
11692
- (Op (_.EltVT
11693
- (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11694
- (_.ScalarLdFrag addr:$src2)),
11694
+ (MaskedOp (_.EltVT
11695
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11696
+ (_.ScalarLdFrag addr:$src2)),
11695
11697
_.FRC:$src0))),
11696
- (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11698
+ (!cast<Instruction>("V"#OpcPrefix#" Zrm_Intk" )
11697
11699
(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11698
11700
VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11699
11701
11700
11702
// extracted masked scalar math op with insert via movss
11701
11703
def : Pat<(MoveNode (_.VT VR128X:$src1),
11702
11704
(scalar_to_vector
11703
11705
(X86selects VK1WM:$mask,
11704
- (Op (_.EltVT
11705
- (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11706
- _.FRC:$src2), (_.EltVT ZeroFP)))),
11707
- (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11706
+ (MaskedOp (_.EltVT
11707
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11708
+ _.FRC:$src2), (_.EltVT ZeroFP)))),
11709
+ (!cast<I>("V"#OpcPrefix#" Zrr_Intkz")
11708
11710
VK1WM:$mask, _.VT:$src1,
11709
11711
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11710
11712
def : Pat<(MoveNode (_.VT VR128X:$src1),
11711
11713
(scalar_to_vector
11712
11714
(X86selects VK1WM:$mask,
11713
- (Op (_.EltVT
11714
- (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11715
- (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11716
- (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11715
+ (MaskedOp (_.EltVT
11716
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11717
+ (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11718
+ (!cast<I>("V"#OpcPrefix#" Zrm_Intkz" ) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11717
11719
}
11718
11720
}
11719
11721
11720
- defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11721
- defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11722
- defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11723
- defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11722
+ defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11723
+ defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11724
+ defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11725
+ defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11724
11726
11725
- defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11726
- defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11727
- defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11728
- defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11727
+ defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11728
+ defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11729
+ defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11730
+ defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11729
11731
11730
11732
multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11731
11733
SDNode Move, X86VectorVTInfo _> {
11732
11734
let Predicates = [HasAVX512] in {
11733
11735
def : Pat<(_.VT (Move _.VT:$dst,
11734
11736
(scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11735
- (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11737
+ (!cast<Instruction>("V"#OpcPrefix#" Zr_Int" ) _.VT:$dst, _.VT:$src)>;
11736
11738
}
11737
11739
}
11738
11740
11739
- defm : AVX512_scalar_unary_math_patterns<fsqrt , "SQRTSS", X86Movss, v4f32x_info>;
11740
- defm : AVX512_scalar_unary_math_patterns<fsqrt , "SQRTSD", X86Movsd, v2f64x_info>;
11741
+ defm : AVX512_scalar_unary_math_patterns<any_fsqrt , "SQRTSS", X86Movss, v4f32x_info>;
11742
+ defm : AVX512_scalar_unary_math_patterns<any_fsqrt , "SQRTSD", X86Movsd, v2f64x_info>;
11741
11743
11742
11744
//===----------------------------------------------------------------------===//
11743
11745
// AES instructions
0 commit comments