@@ -889,17 +889,12 @@ define half @vreduce_ord_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
889
889
; CHECK-NEXT: csrr a0, vlenb
890
890
; CHECK-NEXT: srli a0, a0, 3
891
891
; CHECK-NEXT: slli a1, a0, 1
892
- ; CHECK-NEXT: add a1, a1, a0
893
892
; CHECK-NEXT: add a0, a1, a0
894
- ; CHECK-NEXT: lui a2, 1048568
895
- ; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
896
- ; CHECK-NEXT: vmv.v.x v9, a2
897
- ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
898
- ; CHECK-NEXT: vslideup.vx v8, v9, a1
899
- ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
893
+ ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
900
894
; CHECK-NEXT: vfmv.s.f v9, fa0
901
- ; CHECK-NEXT: vfredosum.vs v8, v8, v9
902
- ; CHECK-NEXT: vfmv.f.s fa0, v8
895
+ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
896
+ ; CHECK-NEXT: vfredosum.vs v9, v8, v9
897
+ ; CHECK-NEXT: vfmv.f.s fa0, v9
903
898
; CHECK-NEXT: ret
904
899
%red = call half @llvm.vector.reduce.fadd.nxv3f16 (half %s , <vscale x 3 x half > %v )
905
900
ret half %red
@@ -910,18 +905,15 @@ declare half @llvm.vector.reduce.fadd.nxv6f16(half, <vscale x 6 x half>)
910
905
define half @vreduce_ord_fadd_nxv6f16 (<vscale x 6 x half > %v , half %s ) {
911
906
; CHECK-LABEL: vreduce_ord_fadd_nxv6f16:
912
907
; CHECK: # %bb.0:
913
- ; CHECK-NEXT: lui a0, 1048568
914
- ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
915
- ; CHECK-NEXT: vmv.v.x v10, a0
916
908
; CHECK-NEXT: csrr a0, vlenb
917
- ; CHECK-NEXT: srli a0, a0, 2
918
- ; CHECK-NEXT: add a1, a0, a0
919
- ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
920
- ; CHECK-NEXT: vslideup.vx v9, v10, a0
921
- ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
909
+ ; CHECK-NEXT: srli a1, a0, 3
910
+ ; CHECK-NEXT: slli a1, a1, 1
911
+ ; CHECK-NEXT: sub a0, a0, a1
912
+ ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
922
913
; CHECK-NEXT: vfmv.s.f v10, fa0
923
- ; CHECK-NEXT: vfredosum.vs v8, v8, v10
924
- ; CHECK-NEXT: vfmv.f.s fa0, v8
914
+ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
915
+ ; CHECK-NEXT: vfredosum.vs v10, v8, v10
916
+ ; CHECK-NEXT: vfmv.f.s fa0, v10
925
917
; CHECK-NEXT: ret
926
918
%red = call half @llvm.vector.reduce.fadd.nxv6f16 (half %s , <vscale x 6 x half > %v )
927
919
ret half %red
@@ -932,22 +924,15 @@ declare half @llvm.vector.reduce.fadd.nxv10f16(half, <vscale x 10 x half>)
932
924
define half @vreduce_ord_fadd_nxv10f16 (<vscale x 10 x half > %v , half %s ) {
933
925
; CHECK-LABEL: vreduce_ord_fadd_nxv10f16:
934
926
; CHECK: # %bb.0:
935
- ; CHECK-NEXT: lui a0, 1048568
936
- ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
937
- ; CHECK-NEXT: vmv.v.x v12, a0
938
927
; CHECK-NEXT: csrr a0, vlenb
939
- ; CHECK-NEXT: srli a0, a0, 2
940
- ; CHECK-NEXT: add a1, a0, a0
941
- ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
942
- ; CHECK-NEXT: vslideup.vx v10, v12, a0
943
- ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
944
- ; CHECK-NEXT: vmv.v.v v11, v12
945
- ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
946
- ; CHECK-NEXT: vslideup.vx v11, v12, a0
947
- ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
928
+ ; CHECK-NEXT: srli a0, a0, 3
929
+ ; CHECK-NEXT: li a1, 10
930
+ ; CHECK-NEXT: mul a0, a0, a1
931
+ ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
948
932
; CHECK-NEXT: vfmv.s.f v12, fa0
949
- ; CHECK-NEXT: vfredosum.vs v8, v8, v12
950
- ; CHECK-NEXT: vfmv.f.s fa0, v8
933
+ ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
934
+ ; CHECK-NEXT: vfredosum.vs v12, v8, v12
935
+ ; CHECK-NEXT: vfmv.f.s fa0, v12
951
936
; CHECK-NEXT: ret
952
937
%red = call half @llvm.vector.reduce.fadd.nxv10f16 (half %s , <vscale x 10 x half > %v )
953
938
ret half %red
@@ -958,13 +943,16 @@ declare half @llvm.vector.reduce.fadd.nxv12f16(half, <vscale x 12 x half>)
958
943
define half @vreduce_ord_fadd_nxv12f16 (<vscale x 12 x half > %v , half %s ) {
959
944
; CHECK-LABEL: vreduce_ord_fadd_nxv12f16:
960
945
; CHECK: # %bb.0:
961
- ; CHECK-NEXT: lui a0, 1048568
962
- ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
963
- ; CHECK-NEXT: vmv.v.x v11, a0
946
+ ; CHECK-NEXT: csrr a0, vlenb
947
+ ; CHECK-NEXT: srli a0, a0, 3
948
+ ; CHECK-NEXT: slli a1, a0, 2
949
+ ; CHECK-NEXT: slli a0, a0, 4
950
+ ; CHECK-NEXT: sub a0, a0, a1
951
+ ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
964
952
; CHECK-NEXT: vfmv.s.f v12, fa0
965
- ; CHECK-NEXT: vsetvli a0, zero , e16, m4, ta, ma
966
- ; CHECK-NEXT: vfredosum.vs v8 , v8, v12
967
- ; CHECK-NEXT: vfmv.f.s fa0, v8
953
+ ; CHECK-NEXT: vsetvli zero, a0 , e16, m4, ta, ma
954
+ ; CHECK-NEXT: vfredosum.vs v12 , v8, v12
955
+ ; CHECK-NEXT: vfmv.f.s fa0, v12
968
956
; CHECK-NEXT: ret
969
957
%red = call half @llvm.vector.reduce.fadd.nxv12f16 (half %s , <vscale x 12 x half > %v )
970
958
ret half %red
@@ -977,17 +965,14 @@ define half @vreduce_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
977
965
; CHECK-NEXT: csrr a0, vlenb
978
966
; CHECK-NEXT: srli a0, a0, 3
979
967
; CHECK-NEXT: slli a1, a0, 1
980
- ; CHECK-NEXT: add a1, a1, a0
981
968
; CHECK-NEXT: add a0, a1, a0
982
- ; CHECK-NEXT: lui a2, 1048568
983
- ; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
984
- ; CHECK-NEXT: vmv.v.x v9, a2
985
- ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
986
- ; CHECK-NEXT: vslideup.vx v8, v9, a1
987
- ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
969
+ ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
988
970
; CHECK-NEXT: vfmv.s.f v9, fa0
989
- ; CHECK-NEXT: vfredusum.vs v8, v8, v9
990
- ; CHECK-NEXT: vfmv.f.s fa0, v8
971
+ ; CHECK-NEXT: lui a1, 1048568
972
+ ; CHECK-NEXT: vmv.s.x v10, a1
973
+ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
974
+ ; CHECK-NEXT: vfredusum.vs v10, v8, v9
975
+ ; CHECK-NEXT: vfmv.f.s fa0, v10
991
976
; CHECK-NEXT: ret
992
977
%red = call reassoc half @llvm.vector.reduce.fadd.nxv3f16 (half %s , <vscale x 3 x half > %v )
993
978
ret half %red
@@ -996,18 +981,17 @@ define half @vreduce_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
996
981
define half @vreduce_fadd_nxv6f16 (<vscale x 6 x half > %v , half %s ) {
997
982
; CHECK-LABEL: vreduce_fadd_nxv6f16:
998
983
; CHECK: # %bb.0:
999
- ; CHECK-NEXT: lui a0, 1048568
1000
- ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
1001
- ; CHECK-NEXT: vmv.v.x v10, a0
1002
984
; CHECK-NEXT: csrr a0, vlenb
1003
- ; CHECK-NEXT: srli a0, a0, 2
1004
- ; CHECK-NEXT: add a1, a0, a0
1005
- ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1006
- ; CHECK-NEXT: vslideup.vx v9, v10, a0
1007
- ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
985
+ ; CHECK-NEXT: srli a1, a0, 3
986
+ ; CHECK-NEXT: slli a1, a1, 1
987
+ ; CHECK-NEXT: sub a0, a0, a1
988
+ ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1008
989
; CHECK-NEXT: vfmv.s.f v10, fa0
1009
- ; CHECK-NEXT: vfredusum.vs v8, v8, v10
1010
- ; CHECK-NEXT: vfmv.f.s fa0, v8
990
+ ; CHECK-NEXT: lui a1, 1048568
991
+ ; CHECK-NEXT: vmv.s.x v11, a1
992
+ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
993
+ ; CHECK-NEXT: vfredusum.vs v11, v8, v10
994
+ ; CHECK-NEXT: vfmv.f.s fa0, v11
1011
995
; CHECK-NEXT: ret
1012
996
%red = call reassoc half @llvm.vector.reduce.fadd.nxv6f16 (half %s , <vscale x 6 x half > %v )
1013
997
ret half %red
0 commit comments