Skip to content

Commit b75dad0

Browse files
authored
[RISCV] Support VP_SPLAT mask operations (#132345)
When val is a constant, it will: (vp.splat val, mask, vl) -> (select val, (riscv_vmset_vl vl), (riscv_vmclr_vl vl)) Otherwise: (vp.splat val, mask, vl) -> (vmsne_vl (vmv_v_x_vl (zext val), vl), splat(zero), mask, vl) --------- Co-authored-by: yanming <[email protected]>
1 parent 76b999d commit b75dad0

File tree

2 files changed

+128
-2
lines changed

2 files changed

+128
-2
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
810810

811811
setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
812812
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
813+
setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
813814

814815
setOperationPromotedToType(
815816
ISD::VECTOR_SPLICE, VT,
@@ -12786,8 +12787,26 @@ SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
1278612787
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
1278712788
}
1278812789

12789-
SDValue Result =
12790-
lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12790+
SDValue Result;
12791+
if (VT.getScalarType() == MVT::i1) {
12792+
if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
12793+
Result =
12794+
DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
12795+
ContainerVT, VL);
12796+
} else {
12797+
MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8);
12798+
SDValue LHS =
12799+
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT),
12800+
DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL);
12801+
SDValue RHS = DAG.getConstant(0, DL, WidenVT);
12802+
Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
12803+
{LHS, RHS, DAG.getCondCode(ISD::SETNE),
12804+
DAG.getUNDEF(ContainerVT), Mask, VL});
12805+
}
12806+
} else {
12807+
Result =
12808+
lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12809+
}
1279112810

1279212811
if (!VT.isFixedLengthVector())
1279312812
return Result;
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4+
5+
define <vscale x 1 x i1> @vp_splat_nxv1i1_true_unmasked(i32 zeroext %evl) {
6+
; CHECK-LABEL: vp_splat_nxv1i1_true_unmasked:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
9+
; CHECK-NEXT: vmset.m v0
10+
; CHECK-NEXT: ret
11+
%splat = call <vscale x 1 x i1> @llvm.experimental.vp.splat.nxv1i1(i1 true, <vscale x 1 x i1> splat (i1 true), i32 %evl)
12+
ret <vscale x 1 x i1> %splat
13+
}
14+
15+
define <vscale x 1 x i1> @vp_splat_nxv1i1_false_unmasked(i32 zeroext %evl) {
16+
; CHECK-LABEL: vp_splat_nxv1i1_false_unmasked:
17+
; CHECK: # %bb.0:
18+
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
19+
; CHECK-NEXT: vmclr.m v0
20+
; CHECK-NEXT: ret
21+
%splat = call <vscale x 1 x i1> @llvm.experimental.vp.splat.nxv1i1(i1 false, <vscale x 1 x i1> splat (i1 true), i32 %evl)
22+
ret <vscale x 1 x i1> %splat
23+
}
24+
25+
define <vscale x 1 x i1> @vp_splat_nxv1i1(i1 %val, <vscale x 1 x i1> %m, i32 zeroext %evl) {
26+
; CHECK-LABEL: vp_splat_nxv1i1:
27+
; CHECK: # %bb.0:
28+
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
29+
; CHECK-NEXT: vmv.v.x v8, a0
30+
; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t
31+
; CHECK-NEXT: ret
32+
%splat = call <vscale x 1 x i1> @llvm.experimental.vp.splat.nxv1i1(i1 %val, <vscale x 1 x i1> %m, i32 %evl)
33+
ret <vscale x 1 x i1> %splat
34+
}
35+
36+
define <vscale x 2 x i1> @vp_splat_nxv2i1(i1 %val, <vscale x 2 x i1> %m, i32 zeroext %evl) {
37+
; CHECK-LABEL: vp_splat_nxv2i1:
38+
; CHECK: # %bb.0:
39+
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
40+
; CHECK-NEXT: vmv.v.x v8, a0
41+
; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t
42+
; CHECK-NEXT: ret
43+
%splat = call <vscale x 2 x i1> @llvm.experimental.vp.splat.nxv2i1(i1 %val, <vscale x 2 x i1> %m, i32 %evl)
44+
ret <vscale x 2 x i1> %splat
45+
}
46+
47+
define <vscale x 4 x i1> @vp_splat_nxv4i1(i1 %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
48+
; CHECK-LABEL: vp_splat_nxv4i1:
49+
; CHECK: # %bb.0:
50+
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
51+
; CHECK-NEXT: vmv.v.x v8, a0
52+
; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t
53+
; CHECK-NEXT: ret
54+
%splat = call <vscale x 4 x i1> @llvm.experimental.vp.splat.nxv4i1(i1 %val, <vscale x 4 x i1> %m, i32 %evl)
55+
ret <vscale x 4 x i1> %splat
56+
}
57+
58+
define <vscale x 8 x i1> @vp_splat_nxv8i1(i1 %val, <vscale x 8 x i1> %m, i32 zeroext %evl) {
59+
; CHECK-LABEL: vp_splat_nxv8i1:
60+
; CHECK: # %bb.0:
61+
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
62+
; CHECK-NEXT: vmv.v.x v8, a0
63+
; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t
64+
; CHECK-NEXT: ret
65+
%splat = call <vscale x 8 x i1> @llvm.experimental.vp.splat.nxv8i1(i1 %val, <vscale x 8 x i1> %m, i32 %evl)
66+
ret <vscale x 8 x i1> %splat
67+
}
68+
69+
define <vscale x 16 x i1> @vp_splat_nxv16i1(i1 %val, <vscale x 16 x i1> %m, i32 zeroext %evl) {
70+
; CHECK-LABEL: vp_splat_nxv16i1:
71+
; CHECK: # %bb.0:
72+
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
73+
; CHECK-NEXT: vmv.v.x v10, a0
74+
; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t
75+
; CHECK-NEXT: vmv1r.v v0, v8
76+
; CHECK-NEXT: ret
77+
%splat = call <vscale x 16 x i1> @llvm.experimental.vp.splat.nxv16i1(i1 %val, <vscale x 16 x i1> %m, i32 %evl)
78+
ret <vscale x 16 x i1> %splat
79+
}
80+
81+
define <vscale x 32 x i1> @vp_splat_nxv32i1(i1 %val, <vscale x 32 x i1> %m, i32 zeroext %evl) {
82+
; CHECK-LABEL: vp_splat_nxv32i1:
83+
; CHECK: # %bb.0:
84+
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
85+
; CHECK-NEXT: vmv.v.x v12, a0
86+
; CHECK-NEXT: vmsne.vi v8, v12, 0, v0.t
87+
; CHECK-NEXT: vmv1r.v v0, v8
88+
; CHECK-NEXT: ret
89+
%splat = call <vscale x 32 x i1> @llvm.experimental.vp.splat.nxv32i1(i1 %val, <vscale x 32 x i1> %m, i32 %evl)
90+
ret <vscale x 32 x i1> %splat
91+
}
92+
93+
define <vscale x 64 x i1> @vp_splat_nxv64i1(i1 %val, <vscale x 64 x i1> %m, i32 zeroext %evl) {
94+
; CHECK-LABEL: vp_splat_nxv64i1:
95+
; CHECK: # %bb.0:
96+
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
97+
; CHECK-NEXT: vmv.v.x v16, a0
98+
; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t
99+
; CHECK-NEXT: vmv1r.v v0, v8
100+
; CHECK-NEXT: ret
101+
%splat = call <vscale x 64 x i1> @llvm.experimental.vp.splat.nxv64i1(i1 %val, <vscale x 64 x i1> %m, i32 %evl)
102+
ret <vscale x 64 x i1> %splat
103+
}
104+
105+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
106+
; RV32: {{.*}}
107+
; RV64: {{.*}}

0 commit comments

Comments
 (0)