@@ -4714,62 +4714,122 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
4714
4714
4715
4715
// Horizontal ops
4716
4716
multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
4717
- X86MemOperand x86memop, Intrinsic IntId , bit Is2Addr = 1> {
4717
+ X86MemOperand x86memop, SDNode OpNode , bit Is2Addr = 1> {
4718
4718
def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
4719
4719
!if(Is2Addr,
4720
4720
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4721
4721
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4722
- [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
4722
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
4723
4723
4724
4724
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
4725
4725
!if(Is2Addr,
4726
4726
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4727
4727
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4728
- [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
4728
+ [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
4729
4729
}
4730
4730
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
4731
- X86MemOperand x86memop, Intrinsic IntId , bit Is2Addr = 1> {
4731
+ X86MemOperand x86memop, SDNode OpNode , bit Is2Addr = 1> {
4732
4732
def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
4733
4733
!if(Is2Addr,
4734
4734
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4735
4735
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4736
- [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
4736
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
4737
4737
4738
4738
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
4739
4739
!if(Is2Addr,
4740
4740
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4741
4741
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4742
- [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
4742
+ [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
4743
4743
}
4744
4744
4745
4745
let Predicates = [HasAVX] in {
4746
4746
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
4747
- int_x86_sse3_hadd_ps , 0>, VEX_4V;
4747
+ X86fhadd , 0>, VEX_4V;
4748
4748
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
4749
- int_x86_sse3_hadd_pd , 0>, VEX_4V;
4749
+ X86fhadd , 0>, VEX_4V;
4750
4750
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
4751
- int_x86_sse3_hsub_ps , 0>, VEX_4V;
4751
+ X86fhsub , 0>, VEX_4V;
4752
4752
defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
4753
- int_x86_sse3_hsub_pd , 0>, VEX_4V;
4753
+ X86fhsub , 0>, VEX_4V;
4754
4754
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
4755
- int_x86_avx_hadd_ps_256 , 0>, VEX_4V;
4755
+ X86fhadd , 0>, VEX_4V;
4756
4756
defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
4757
- int_x86_avx_hadd_pd_256 , 0>, VEX_4V;
4757
+ X86fhadd , 0>, VEX_4V;
4758
4758
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
4759
- int_x86_avx_hsub_ps_256 , 0>, VEX_4V;
4759
+ X86fhsub , 0>, VEX_4V;
4760
4760
defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
4761
- int_x86_avx_hsub_pd_256, 0>, VEX_4V;
4761
+ X86fhsub, 0>, VEX_4V;
4762
+ }
4763
+
4764
+ let Predicates = [HasAVX] in {
4765
+ def : Pat<(int_x86_sse3_hadd_ps (v4f32 VR128:$src1), VR128:$src2),
4766
+ (VHADDPSrr VR128:$src1, VR128:$src2)>;
4767
+ def : Pat<(int_x86_sse3_hadd_ps (v4f32 VR128:$src1), (memop addr:$src2)),
4768
+ (VHADDPSrm VR128:$src1, addr:$src2)>;
4769
+
4770
+ def : Pat<(int_x86_sse3_hadd_pd (v2f64 VR128:$src1), VR128:$src2),
4771
+ (VHADDPDrr VR128:$src1, VR128:$src2)>;
4772
+ def : Pat<(int_x86_sse3_hadd_pd (v2f64 VR128:$src1), (memop addr:$src2)),
4773
+ (VHADDPDrm VR128:$src1, addr:$src2)>;
4774
+
4775
+ def : Pat<(int_x86_sse3_hsub_ps (v4f32 VR128:$src1), VR128:$src2),
4776
+ (VHSUBPSrr VR128:$src1, VR128:$src2)>;
4777
+ def : Pat<(int_x86_sse3_hsub_ps (v4f32 VR128:$src1), (memop addr:$src2)),
4778
+ (VHSUBPSrm VR128:$src1, addr:$src2)>;
4779
+
4780
+ def : Pat<(int_x86_sse3_hsub_pd (v2f64 VR128:$src1), VR128:$src2),
4781
+ (VHSUBPDrr VR128:$src1, VR128:$src2)>;
4782
+ def : Pat<(int_x86_sse3_hsub_pd (v2f64 VR128:$src1), (memop addr:$src2)),
4783
+ (VHSUBPDrm VR128:$src1, addr:$src2)>;
4784
+
4785
+ def : Pat<(int_x86_avx_hadd_ps_256 (v8f32 VR256:$src1), VR256:$src2),
4786
+ (VHADDPSYrr VR256:$src1, VR256:$src2)>;
4787
+ def : Pat<(int_x86_avx_hadd_ps_256 (v8f32 VR256:$src1), (memop addr:$src2)),
4788
+ (VHADDPSYrm VR256:$src1, addr:$src2)>;
4789
+
4790
+ def : Pat<(int_x86_avx_hadd_pd_256 (v4f64 VR256:$src1), VR256:$src2),
4791
+ (VHADDPDYrr VR256:$src1, VR256:$src2)>;
4792
+ def : Pat<(int_x86_avx_hadd_pd_256 (v4f64 VR256:$src1), (memop addr:$src2)),
4793
+ (VHADDPDYrm VR256:$src1, addr:$src2)>;
4794
+
4795
+ def : Pat<(int_x86_avx_hsub_ps_256 (v8f32 VR256:$src1), VR256:$src2),
4796
+ (VHSUBPSYrr VR256:$src1, VR256:$src2)>;
4797
+ def : Pat<(int_x86_avx_hsub_ps_256 (v8f32 VR256:$src1), (memop addr:$src2)),
4798
+ (VHSUBPSYrm VR256:$src1, addr:$src2)>;
4799
+
4800
+ def : Pat<(int_x86_avx_hsub_pd_256 (v4f64 VR256:$src1), VR256:$src2),
4801
+ (VHSUBPDYrr VR256:$src1, VR256:$src2)>;
4802
+ def : Pat<(int_x86_avx_hsub_pd_256 (v4f64 VR256:$src1), (memop addr:$src2)),
4803
+ (VHSUBPDYrm VR256:$src1, addr:$src2)>;
4762
4804
}
4763
4805
4764
4806
let Constraints = "$src1 = $dst" in {
4765
- defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem,
4766
- int_x86_sse3_hadd_ps>;
4767
- defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem,
4768
- int_x86_sse3_hadd_pd>;
4769
- defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem,
4770
- int_x86_sse3_hsub_ps>;
4771
- defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem,
4772
- int_x86_sse3_hsub_pd>;
4807
+ defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
4808
+ defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
4809
+ defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
4810
+ defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
4811
+ }
4812
+
4813
+ let Predicates = [HasSSE3] in {
4814
+ def : Pat<(int_x86_sse3_hadd_ps (v4f32 VR128:$src1), VR128:$src2),
4815
+ (HADDPSrr VR128:$src1, VR128:$src2)>;
4816
+ def : Pat<(int_x86_sse3_hadd_ps (v4f32 VR128:$src1), (memop addr:$src2)),
4817
+ (HADDPSrm VR128:$src1, addr:$src2)>;
4818
+
4819
+ def : Pat<(int_x86_sse3_hadd_pd (v2f64 VR128:$src1), VR128:$src2),
4820
+ (HADDPDrr VR128:$src1, VR128:$src2)>;
4821
+ def : Pat<(int_x86_sse3_hadd_pd (v2f64 VR128:$src1), (memop addr:$src2)),
4822
+ (HADDPDrm VR128:$src1, addr:$src2)>;
4823
+
4824
+ def : Pat<(int_x86_sse3_hsub_ps (v4f32 VR128:$src1), VR128:$src2),
4825
+ (HSUBPSrr VR128:$src1, VR128:$src2)>;
4826
+ def : Pat<(int_x86_sse3_hsub_ps (v4f32 VR128:$src1), (memop addr:$src2)),
4827
+ (HSUBPSrm VR128:$src1, addr:$src2)>;
4828
+
4829
+ def : Pat<(int_x86_sse3_hsub_pd (v2f64 VR128:$src1), VR128:$src2),
4830
+ (HSUBPDrr VR128:$src1, VR128:$src2)>;
4831
+ def : Pat<(int_x86_sse3_hsub_pd (v2f64 VR128:$src1), (memop addr:$src2)),
4832
+ (HSUBPDrm VR128:$src1, addr:$src2)>;
4773
4833
}
4774
4834
4775
4835
//===---------------------------------------------------------------------===//
0 commit comments