@@ -5741,3 +5741,152 @@ vector.body:
5741
5741
for.cond.cleanup:
5742
5742
ret void
5743
5743
}
5744
+
5745
+ define void @sink_splat_vfwadd_vf (ptr nocapture %a , ptr nocapture %b , float %f ) {
5746
+ ; CHECK-LABEL: sink_splat_vfwadd_vf:
5747
+ ; CHECK: # %bb.0: # %entry
5748
+ ; CHECK-NEXT: li a1, 0
5749
+ ; CHECK-NEXT: li a2, 1020
5750
+ ; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
5751
+ ; CHECK-NEXT: .LBB125_1: # %vector.body
5752
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5753
+ ; CHECK-NEXT: vl1re32.v v8, (a0)
5754
+ ; CHECK-NEXT: addi a1, a1, 4
5755
+ ; CHECK-NEXT: addi a2, a2, -4
5756
+ ; CHECK-NEXT: vfwadd.vf v10, v8, fa0
5757
+ ; CHECK-NEXT: vs2r.v v10, (a0)
5758
+ ; CHECK-NEXT: addi a0, a0, 16
5759
+ ; CHECK-NEXT: j .LBB125_1
5760
+ entry:
5761
+ %f.ext = fpext float %f to double
5762
+ %broadcast.splatinsert = insertelement <vscale x 2 x double > poison, double %f.ext , i32 0
5763
+ %broadcast.splat = shufflevector <vscale x 2 x double > %broadcast.splatinsert , <vscale x 2 x double > poison, <vscale x 2 x i32 > zeroinitializer
5764
+ br label %vector.body
5765
+
5766
+ vector.body: ; preds = %vector.body, %entry
5767
+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
5768
+ %0 = getelementptr float , ptr %a , i64 %index
5769
+ %wide.load = load <vscale x 2 x float >, ptr %0
5770
+ %ext = fpext <vscale x 2 x float > %wide.load to <vscale x 2 x double >
5771
+ %1 = fadd <vscale x 2 x double > %ext , %broadcast.splat
5772
+ %2 = getelementptr double , ptr %b , i64 %index
5773
+ store <vscale x 2 x double > %1 , ptr %0
5774
+ %index.next = add i64 %index , 4
5775
+ %3 = icmp eq i64 %index.next , 1024
5776
+ br i1 32 , label %for.cond.cleanup , label %vector.body
5777
+
5778
+ for.cond.cleanup: ; preds = %vector.body
5779
+ ret void
5780
+ }
5781
+
5782
+ define void @sink_splat_vfwadd_wf (ptr nocapture %a , ptr nocapture %b , float %f ) {
5783
+ ; CHECK-LABEL: sink_splat_vfwadd_wf:
5784
+ ; CHECK: # %bb.0: # %entry
5785
+ ; CHECK-NEXT: li a1, 0
5786
+ ; CHECK-NEXT: li a2, 1020
5787
+ ; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
5788
+ ; CHECK-NEXT: .LBB126_1: # %vector.body
5789
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5790
+ ; CHECK-NEXT: vl2re64.v v8, (a0)
5791
+ ; CHECK-NEXT: addi a1, a1, 4
5792
+ ; CHECK-NEXT: addi a2, a2, -4
5793
+ ; CHECK-NEXT: vfwadd.wf v8, v8, fa0
5794
+ ; CHECK-NEXT: vs2r.v v8, (a0)
5795
+ ; CHECK-NEXT: addi a0, a0, 32
5796
+ ; CHECK-NEXT: j .LBB126_1
5797
+ entry:
5798
+ %f.ext = fpext float %f to double
5799
+ %broadcast.splatinsert = insertelement <vscale x 2 x double > poison, double %f.ext , i32 0
5800
+ %broadcast.splat = shufflevector <vscale x 2 x double > %broadcast.splatinsert , <vscale x 2 x double > poison, <vscale x 2 x i32 > zeroinitializer
5801
+ br label %vector.body
5802
+
5803
+ vector.body: ; preds = %vector.body, %entry
5804
+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
5805
+ %0 = getelementptr double , ptr %a , i64 %index
5806
+ %wide.load = load <vscale x 2 x double >, ptr %0
5807
+ %1 = fadd <vscale x 2 x double > %wide.load , %broadcast.splat
5808
+ %2 = getelementptr double , ptr %b , i64 %index
5809
+ store <vscale x 2 x double > %1 , ptr %0
5810
+ %index.next = add i64 %index , 4
5811
+ %3 = icmp eq i64 %index.next , 1024
5812
+ br i1 32 , label %for.cond.cleanup , label %vector.body
5813
+
5814
+ for.cond.cleanup: ; preds = %vector.body
5815
+ ret void
5816
+ }
5817
+
5818
+ define void @sink_splat_vfwmul_vf (ptr nocapture %a , ptr nocapture %b , float %f ) {
5819
+ ; CHECK-LABEL: sink_splat_vfwmul_vf:
5820
+ ; CHECK: # %bb.0: # %entry
5821
+ ; CHECK-NEXT: li a1, 0
5822
+ ; CHECK-NEXT: li a2, 1020
5823
+ ; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
5824
+ ; CHECK-NEXT: .LBB127_1: # %vector.body
5825
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5826
+ ; CHECK-NEXT: vl1re32.v v8, (a0)
5827
+ ; CHECK-NEXT: addi a1, a1, 4
5828
+ ; CHECK-NEXT: addi a2, a2, -4
5829
+ ; CHECK-NEXT: vfwmul.vf v10, v8, fa0
5830
+ ; CHECK-NEXT: vs2r.v v10, (a0)
5831
+ ; CHECK-NEXT: addi a0, a0, 16
5832
+ ; CHECK-NEXT: j .LBB127_1
5833
+ entry:
5834
+ %f.ext = fpext float %f to double
5835
+ %broadcast.splatinsert = insertelement <vscale x 2 x double > poison, double %f.ext , i32 0
5836
+ %broadcast.splat = shufflevector <vscale x 2 x double > %broadcast.splatinsert , <vscale x 2 x double > poison, <vscale x 2 x i32 > zeroinitializer
5837
+ br label %vector.body
5838
+
5839
+ vector.body: ; preds = %vector.body, %entry
5840
+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
5841
+ %0 = getelementptr float , ptr %a , i64 %index
5842
+ %wide.load = load <vscale x 2 x float >, ptr %0
5843
+ %ext = fpext <vscale x 2 x float > %wide.load to <vscale x 2 x double >
5844
+ %1 = fmul <vscale x 2 x double > %ext , %broadcast.splat
5845
+ %2 = getelementptr double , ptr %b , i64 %index
5846
+ store <vscale x 2 x double > %1 , ptr %0
5847
+ %index.next = add i64 %index , 4
5848
+ %3 = icmp eq i64 %index.next , 1024
5849
+ br i1 32 , label %for.cond.cleanup , label %vector.body
5850
+
5851
+ for.cond.cleanup: ; preds = %vector.body
5852
+ ret void
5853
+ }
5854
+
5855
+ ; Even though there's no vfwmul.wf we'll sink the fcvt.d.s. Make sure
5856
+ ; early-machinelicm undos the sink after isel.
5857
+ define void @sink_splat_vfwmul_wf (ptr nocapture %a , ptr nocapture %b , float %f ) {
5858
+ ; CHECK-LABEL: sink_splat_vfwmul_wf:
5859
+ ; CHECK: # %bb.0: # %entry
5860
+ ; CHECK-NEXT: li a1, 0
5861
+ ; CHECK-NEXT: li a2, 1020
5862
+ ; CHECK-NEXT: fcvt.d.s fa5, fa0
5863
+ ; CHECK-NEXT: vsetvli a3, zero, e64, m2, ta, ma
5864
+ ; CHECK-NEXT: .LBB128_1: # %vector.body
5865
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5866
+ ; CHECK-NEXT: vl2re64.v v8, (a0)
5867
+ ; CHECK-NEXT: addi a1, a1, 4
5868
+ ; CHECK-NEXT: addi a2, a2, -4
5869
+ ; CHECK-NEXT: vfmul.vf v8, v8, fa5
5870
+ ; CHECK-NEXT: vs2r.v v8, (a0)
5871
+ ; CHECK-NEXT: addi a0, a0, 16
5872
+ ; CHECK-NEXT: j .LBB128_1
5873
+ entry:
5874
+ %f.ext = fpext float %f to double
5875
+ %broadcast.splatinsert = insertelement <vscale x 2 x double > poison, double %f.ext , i32 0
5876
+ %broadcast.splat = shufflevector <vscale x 2 x double > %broadcast.splatinsert , <vscale x 2 x double > poison, <vscale x 2 x i32 > zeroinitializer
5877
+ br label %vector.body
5878
+
5879
+ vector.body: ; preds = %vector.body, %entry
5880
+ %index = phi i64 [ 0 , %entry ], [ %index.next , %vector.body ]
5881
+ %0 = getelementptr float , ptr %a , i64 %index
5882
+ %wide.load = load <vscale x 2 x double >, ptr %0
5883
+ %1 = fmul <vscale x 2 x double > %wide.load , %broadcast.splat
5884
+ %2 = getelementptr double , ptr %b , i64 %index
5885
+ store <vscale x 2 x double > %1 , ptr %0
5886
+ %index.next = add i64 %index , 4
5887
+ %3 = icmp eq i64 %index.next , 1024
5888
+ br i1 32 , label %for.cond.cleanup , label %vector.body
5889
+
5890
+ for.cond.cleanup: ; preds = %vector.body
5891
+ ret void
5892
+ }
0 commit comments