Skip to content

Commit a44738c

Browse files
committed
[RISCV] Support VP_SPLAT mask operations
Support VP_SPLAT mask operations, convert it to select operation.
1 parent 75ab43b commit a44738c

File tree

2 files changed

+166
-2
lines changed

2 files changed

+166
-2
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
810810

811811
setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
812812
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
813+
setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
813814

814815
setOperationPromotedToType(
815816
ISD::VECTOR_SPLICE, VT,
@@ -12773,8 +12774,18 @@ SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
1277312774
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
1277412775
}
1277512776

12776-
SDValue Result =
12777-
lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12777+
SDValue Result;
12778+
if (VT.getScalarType() == MVT::i1) {
12779+
SDValue TrueV = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12780+
SDValue FalseV = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
12781+
Result = DAG.getSelect(DL, VT, Val, TrueV, FalseV);
12782+
if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
12783+
Result =
12784+
DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Result, Mask, VL);
12785+
} else {
12786+
Result =
12787+
lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12788+
}
1277812789

1277912790
if (!VT.isFixedLengthVector())
1278012791
return Result;
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4+
5+
define <vscale x 1 x i1> @vp_splat_nxv1i1_true_unmasked(i32 zeroext %evl) {
6+
; CHECK-LABEL: vp_splat_nxv1i1_true_unmasked:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
9+
; CHECK-NEXT: vmset.m v0
10+
; CHECK-NEXT: ret
11+
%splat = call <vscale x 1 x i1> @llvm.experimental.vp.splat.nxv1i1(i1 true, <vscale x 1 x i1> splat (i1 true), i32 %evl)
12+
ret <vscale x 1 x i1> %splat
13+
}
14+
15+
define <vscale x 1 x i1> @vp_splat_nxv1i1_false_unmasked(i32 zeroext %evl) {
16+
; CHECK-LABEL: vp_splat_nxv1i1_false_unmasked:
17+
; CHECK: # %bb.0:
18+
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
19+
; CHECK-NEXT: vmclr.m v0
20+
; CHECK-NEXT: ret
21+
%splat = call <vscale x 1 x i1> @llvm.experimental.vp.splat.nxv1i1(i1 false, <vscale x 1 x i1> splat (i1 true), i32 %evl)
22+
ret <vscale x 1 x i1> %splat
23+
}
24+
25+
define <vscale x 1 x i1> @vp_splat_nxv1i1(i1 %val, <vscale x 1 x i1> %m, i32 zeroext %evl) {
26+
; CHECK-LABEL: vp_splat_nxv1i1:
27+
; CHECK: # %bb.0:
28+
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
29+
; CHECK-NEXT: vmclr.m v8
30+
; CHECK-NEXT: andi a0, a0, 1
31+
; CHECK-NEXT: vmset.m v9
32+
; CHECK-NEXT: vmv.v.x v10, a0
33+
; CHECK-NEXT: vmsne.vi v10, v10, 0
34+
; CHECK-NEXT: vmandn.mm v8, v8, v10
35+
; CHECK-NEXT: vmand.mm v9, v9, v10
36+
; CHECK-NEXT: vmor.mm v8, v9, v8
37+
; CHECK-NEXT: vmand.mm v0, v8, v0
38+
; CHECK-NEXT: ret
39+
%splat = call <vscale x 1 x i1> @llvm.experimental.vp.splat.nxv1i1(i1 %val, <vscale x 1 x i1> %m, i32 %evl)
40+
ret <vscale x 1 x i1> %splat
41+
}
42+
43+
define <vscale x 2 x i1> @vp_splat_nxv2i1(i1 %val, <vscale x 2 x i1> %m, i32 zeroext %evl) {
44+
; CHECK-LABEL: vp_splat_nxv2i1:
45+
; CHECK: # %bb.0:
46+
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
47+
; CHECK-NEXT: vmclr.m v8
48+
; CHECK-NEXT: andi a0, a0, 1
49+
; CHECK-NEXT: vmset.m v9
50+
; CHECK-NEXT: vmv.v.x v10, a0
51+
; CHECK-NEXT: vmsne.vi v10, v10, 0
52+
; CHECK-NEXT: vmandn.mm v8, v8, v10
53+
; CHECK-NEXT: vmand.mm v9, v9, v10
54+
; CHECK-NEXT: vmor.mm v8, v9, v8
55+
; CHECK-NEXT: vmand.mm v0, v8, v0
56+
; CHECK-NEXT: ret
57+
%splat = call <vscale x 2 x i1> @llvm.experimental.vp.splat.nxv2i1(i1 %val, <vscale x 2 x i1> %m, i32 %evl)
58+
ret <vscale x 2 x i1> %splat
59+
}
60+
61+
define <vscale x 4 x i1> @vp_splat_nxv4i1(i1 %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
62+
; CHECK-LABEL: vp_splat_nxv4i1:
63+
; CHECK: # %bb.0:
64+
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
65+
; CHECK-NEXT: vmclr.m v8
66+
; CHECK-NEXT: andi a0, a0, 1
67+
; CHECK-NEXT: vmset.m v9
68+
; CHECK-NEXT: vmv.v.x v10, a0
69+
; CHECK-NEXT: vmsne.vi v10, v10, 0
70+
; CHECK-NEXT: vmandn.mm v8, v8, v10
71+
; CHECK-NEXT: vmand.mm v9, v9, v10
72+
; CHECK-NEXT: vmor.mm v8, v9, v8
73+
; CHECK-NEXT: vmand.mm v0, v8, v0
74+
; CHECK-NEXT: ret
75+
%splat = call <vscale x 4 x i1> @llvm.experimental.vp.splat.nxv4i1(i1 %val, <vscale x 4 x i1> %m, i32 %evl)
76+
ret <vscale x 4 x i1> %splat
77+
}
78+
79+
define <vscale x 8 x i1> @vp_splat_nxv8i1(i1 %val, <vscale x 8 x i1> %m, i32 zeroext %evl) {
80+
; CHECK-LABEL: vp_splat_nxv8i1:
81+
; CHECK: # %bb.0:
82+
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
83+
; CHECK-NEXT: vmclr.m v8
84+
; CHECK-NEXT: andi a0, a0, 1
85+
; CHECK-NEXT: vmset.m v9
86+
; CHECK-NEXT: vmv.v.x v10, a0
87+
; CHECK-NEXT: vmsne.vi v10, v10, 0
88+
; CHECK-NEXT: vmandn.mm v8, v8, v10
89+
; CHECK-NEXT: vmand.mm v9, v9, v10
90+
; CHECK-NEXT: vmor.mm v8, v9, v8
91+
; CHECK-NEXT: vmand.mm v0, v8, v0
92+
; CHECK-NEXT: ret
93+
%splat = call <vscale x 8 x i1> @llvm.experimental.vp.splat.nxv8i1(i1 %val, <vscale x 8 x i1> %m, i32 %evl)
94+
ret <vscale x 8 x i1> %splat
95+
}
96+
97+
define <vscale x 16 x i1> @vp_splat_nxv16i1(i1 %val, <vscale x 16 x i1> %m, i32 zeroext %evl) {
98+
; CHECK-LABEL: vp_splat_nxv16i1:
99+
; CHECK: # %bb.0:
100+
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
101+
; CHECK-NEXT: vmclr.m v10
102+
; CHECK-NEXT: andi a0, a0, 1
103+
; CHECK-NEXT: vmv.v.x v8, a0
104+
; CHECK-NEXT: vmsne.vi v11, v8, 0
105+
; CHECK-NEXT: vmset.m v8
106+
; CHECK-NEXT: vmandn.mm v9, v10, v11
107+
; CHECK-NEXT: vmand.mm v8, v8, v11
108+
; CHECK-NEXT: vmor.mm v8, v8, v9
109+
; CHECK-NEXT: vmand.mm v0, v8, v0
110+
; CHECK-NEXT: ret
111+
%splat = call <vscale x 16 x i1> @llvm.experimental.vp.splat.nxv16i1(i1 %val, <vscale x 16 x i1> %m, i32 %evl)
112+
ret <vscale x 16 x i1> %splat
113+
}
114+
115+
define <vscale x 32 x i1> @vp_splat_nxv32i1(i1 %val, <vscale x 32 x i1> %m, i32 zeroext %evl) {
116+
; CHECK-LABEL: vp_splat_nxv32i1:
117+
; CHECK: # %bb.0:
118+
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
119+
; CHECK-NEXT: vmclr.m v12
120+
; CHECK-NEXT: andi a0, a0, 1
121+
; CHECK-NEXT: vmv.v.x v8, a0
122+
; CHECK-NEXT: vmsne.vi v13, v8, 0
123+
; CHECK-NEXT: vmset.m v8
124+
; CHECK-NEXT: vmandn.mm v9, v12, v13
125+
; CHECK-NEXT: vmand.mm v8, v8, v13
126+
; CHECK-NEXT: vmor.mm v8, v8, v9
127+
; CHECK-NEXT: vmand.mm v0, v8, v0
128+
; CHECK-NEXT: ret
129+
%splat = call <vscale x 32 x i1> @llvm.experimental.vp.splat.nxv32i1(i1 %val, <vscale x 32 x i1> %m, i32 %evl)
130+
ret <vscale x 32 x i1> %splat
131+
}
132+
133+
define <vscale x 64 x i1> @vp_splat_nxv64i1(i1 %val, <vscale x 64 x i1> %m, i32 zeroext %evl) {
134+
; CHECK-LABEL: vp_splat_nxv64i1:
135+
; CHECK: # %bb.0:
136+
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
137+
; CHECK-NEXT: vmclr.m v16
138+
; CHECK-NEXT: andi a0, a0, 1
139+
; CHECK-NEXT: vmv.v.x v8, a0
140+
; CHECK-NEXT: vmsne.vi v17, v8, 0
141+
; CHECK-NEXT: vmset.m v8
142+
; CHECK-NEXT: vmandn.mm v9, v16, v17
143+
; CHECK-NEXT: vmand.mm v8, v8, v17
144+
; CHECK-NEXT: vmor.mm v8, v8, v9
145+
; CHECK-NEXT: vmand.mm v0, v8, v0
146+
; CHECK-NEXT: ret
147+
%splat = call <vscale x 64 x i1> @llvm.experimental.vp.splat.nxv64i1(i1 %val, <vscale x 64 x i1> %m, i32 %evl)
148+
ret <vscale x 64 x i1> %splat
149+
}
150+
151+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
152+
; RV32: {{.*}}
153+
; RV64: {{.*}}

0 commit comments

Comments
 (0)