You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+98Lines changed: 98 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -992,6 +992,104 @@ entry:
992
992
ret <vscale x 1 x i16> %1
993
993
}
994
994
995
+
; Test reductions don't have a vmerge folded into them, since the mask affects
996
+
; the result.
997
+
998
+
declare <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
999
+
<vscale x 2 x i32>,
1000
+
<vscale x 2 x i32>,
1001
+
<vscale x 2 x i32>,
1002
+
i64)
1003
+
1004
+
define <vscale x 2 x i32> @vredsum(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64%vl) {
1005
+
; CHECK-LABEL: vredsum:
1006
+
; CHECK: # %bb.0:
1007
+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1008
+
; CHECK-NEXT: vmv1r.v v11, v8
1009
+
; CHECK-NEXT: vredsum.vs v11, v9, v10
1010
+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
1011
+
; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
1012
+
; CHECK-NEXT: ret
1013
+
%a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
1014
+
<vscale x 2 x i32> %passthru,
1015
+
<vscale x 2 x i32> %x,
1016
+
<vscale x 2 x i32> %y,
1017
+
i64%vl)
1018
+
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64%vl)
1019
+
ret <vscale x 2 x i32> %b
1020
+
}
1021
+
1022
+
declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
1023
+
<vscale x 2 x float>,
1024
+
<vscale x 2 x float>,
1025
+
<vscale x 2 x float>,
1026
+
i64, i64)
1027
+
1028
+
define <vscale x 2 x float> @vfredusum(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i64%vl) {
1029
+
; CHECK-LABEL: vfredusum:
1030
+
; CHECK: # %bb.0:
1031
+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1032
+
; CHECK-NEXT: fsrmi a0, 0
1033
+
; CHECK-NEXT: vmv1r.v v11, v8
1034
+
; CHECK-NEXT: vfredusum.vs v11, v9, v10
1035
+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
1036
+
; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
1037
+
; CHECK-NEXT: fsrm a0
1038
+
; CHECK-NEXT: ret
1039
+
%a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
1040
+
<vscale x 2 x float> %passthru,
1041
+
<vscale x 2 x float> %x,
1042
+
<vscale x 2 x float> %y,
1043
+
i640, i64%vl)
1044
+
%b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64%vl)
1045
+
ret <vscale x 2 x float> %b
1046
+
}
1047
+
1048
+
; However we can fold it in if the mask is all ones.
1049
+
define <vscale x 2 x i32> @vredsum_allones_mask(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64%vl) {
1050
+
; CHECK-LABEL: vredsum_allones_mask:
1051
+
; CHECK: # %bb.0:
1052
+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1053
+
; CHECK-NEXT: vmv1r.v v11, v8
1054
+
; CHECK-NEXT: vredsum.vs v11, v9, v10
1055
+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
1056
+
; CHECK-NEXT: vmv.v.v v8, v11
1057
+
; CHECK-NEXT: ret
1058
+
%splat = insertelement <vscale x 2 x i1> poison, i1 -1, i320
1059
+
%mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1060
+
1061
+
%a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
1062
+
<vscale x 2 x i32> %passthru,
1063
+
<vscale x 2 x i32> %x,
1064
+
<vscale x 2 x i32> %y,
1065
+
i64%vl)
1066
+
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %mask, i64%vl)
1067
+
ret <vscale x 2 x i32> %b
1068
+
}
1069
+
1070
+
define <vscale x 2 x float> @vfredusum_allones_mask(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, i64%vl) {
1071
+
; CHECK-LABEL: vfredusum_allones_mask:
1072
+
; CHECK: # %bb.0:
1073
+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1074
+
; CHECK-NEXT: fsrmi a0, 0
1075
+
; CHECK-NEXT: vmv1r.v v11, v8
1076
+
; CHECK-NEXT: vfredusum.vs v11, v9, v10
1077
+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
1078
+
; CHECK-NEXT: vmv.v.v v8, v11
1079
+
; CHECK-NEXT: fsrm a0
1080
+
; CHECK-NEXT: ret
1081
+
%splat = insertelement <vscale x 2 x i1> poison, i1 -1, i320
1082
+
%mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1083
+
1084
+
%a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
1085
+
<vscale x 2 x float> %passthru,
1086
+
<vscale x 2 x float> %x,
1087
+
<vscale x 2 x float> %y,
1088
+
i640, i64%vl)
1089
+
%b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %mask, i64%vl)
1090
+
ret <vscale x 2 x float> %b
1091
+
}
1092
+
995
1093
declare <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16>, <vscale x 32 x i16>* nocapture, i64)
996
1094
declare <vscale x 32 x i8> @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i8, <vscale x 32 x i1>, i64, i64 immarg)
997
1095
declare <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i64)
0 commit comments