Skip to content

Commit fd48044

Browse files
committed
[RISCV] Add tests for pseudos that shouldn't have vmerge folded into them. NFC
1 parent c82cc62 commit fd48044

File tree

1 file changed

+98
-0
lines changed

1 file changed

+98
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,104 @@ entry:
992992
ret <vscale x 1 x i16> %1
993993
}
994994

995+
; Test reductions don't have a vmerge folded into them, since the mask affects
996+
; the result.
997+
998+
declare <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
999+
<vscale x 2 x i32>,
1000+
<vscale x 2 x i32>,
1001+
<vscale x 2 x i32>,
1002+
i64)
1003+
1004+
define <vscale x 2 x i32> @vredsum(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %vl) {
1005+
; CHECK-LABEL: vredsum:
1006+
; CHECK: # %bb.0:
1007+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1008+
; CHECK-NEXT: vmv1r.v v11, v8
1009+
; CHECK-NEXT: vredsum.vs v11, v9, v10
1010+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
1011+
; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
1012+
; CHECK-NEXT: ret
1013+
%a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
1014+
<vscale x 2 x i32> %passthru,
1015+
<vscale x 2 x i32> %x,
1016+
<vscale x 2 x i32> %y,
1017+
i64 %vl)
1018+
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 %vl)
1019+
ret <vscale x 2 x i32> %b
1020+
}
1021+
1022+
declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
1023+
<vscale x 2 x float>,
1024+
<vscale x 2 x float>,
1025+
<vscale x 2 x float>,
1026+
i64, i64)
1027+
1028+
define <vscale x 2 x float> @vfredusum(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i64 %vl) {
1029+
; CHECK-LABEL: vfredusum:
1030+
; CHECK: # %bb.0:
1031+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1032+
; CHECK-NEXT: fsrmi a0, 0
1033+
; CHECK-NEXT: vmv1r.v v11, v8
1034+
; CHECK-NEXT: vfredusum.vs v11, v9, v10
1035+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
1036+
; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
1037+
; CHECK-NEXT: fsrm a0
1038+
; CHECK-NEXT: ret
1039+
%a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
1040+
<vscale x 2 x float> %passthru,
1041+
<vscale x 2 x float> %x,
1042+
<vscale x 2 x float> %y,
1043+
i64 0, i64 %vl)
1044+
%b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 %vl)
1045+
ret <vscale x 2 x float> %b
1046+
}
1047+
1048+
; However we can fold it in if the mask is all ones.
1049+
define <vscale x 2 x i32> @vredsum_allones_mask(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %vl) {
1050+
; CHECK-LABEL: vredsum_allones_mask:
1051+
; CHECK: # %bb.0:
1052+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1053+
; CHECK-NEXT: vmv1r.v v11, v8
1054+
; CHECK-NEXT: vredsum.vs v11, v9, v10
1055+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
1056+
; CHECK-NEXT: vmv.v.v v8, v11
1057+
; CHECK-NEXT: ret
1058+
%splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
1059+
%mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1060+
1061+
%a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
1062+
<vscale x 2 x i32> %passthru,
1063+
<vscale x 2 x i32> %x,
1064+
<vscale x 2 x i32> %y,
1065+
i64 %vl)
1066+
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %mask, i64 %vl)
1067+
ret <vscale x 2 x i32> %b
1068+
}
1069+
1070+
define <vscale x 2 x float> @vfredusum_allones_mask(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, i64 %vl) {
1071+
; CHECK-LABEL: vfredusum_allones_mask:
1072+
; CHECK: # %bb.0:
1073+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1074+
; CHECK-NEXT: fsrmi a0, 0
1075+
; CHECK-NEXT: vmv1r.v v11, v8
1076+
; CHECK-NEXT: vfredusum.vs v11, v9, v10
1077+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
1078+
; CHECK-NEXT: vmv.v.v v8, v11
1079+
; CHECK-NEXT: fsrm a0
1080+
; CHECK-NEXT: ret
1081+
%splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
1082+
%mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1083+
1084+
%a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
1085+
<vscale x 2 x float> %passthru,
1086+
<vscale x 2 x float> %x,
1087+
<vscale x 2 x float> %y,
1088+
i64 0, i64 %vl)
1089+
%b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %mask, i64 %vl)
1090+
ret <vscale x 2 x float> %b
1091+
}
1092+
9951093
declare <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16>, <vscale x 32 x i16>* nocapture, i64)
9961094
declare <vscale x 32 x i8> @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i8, <vscale x 32 x i1>, i64, i64 immarg)
9971095
declare <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i64)

0 commit comments

Comments
 (0)