Skip to content

Commit d065b03

Browse files
committed
[RISCV] Optimize vp.load with an all-ones mask
Similar to D110206, this patch optimizes unmasked vp.load intrinsics to avoid the need of a vmset instruction to set the mask. It does so by selecting a riscv_vle intrinsic rather than a riscv_vle_mask intrinsic. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D113022
1 parent 052a291 commit d065b03

File tree

3 files changed

+35
-51
lines changed

3 files changed

+35
-51
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4871,24 +4871,38 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
48714871
PassThru = MLoad->getPassThru();
48724872
}
48734873

4874+
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
4875+
48744876
MVT XLenVT = Subtarget.getXLenVT();
48754877

48764878
MVT ContainerVT = VT;
48774879
if (VT.isFixedLengthVector()) {
48784880
ContainerVT = getContainerForFixedLengthVector(VT);
4879-
MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4880-
4881-
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
48824881
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
4882+
if (!IsUnmasked) {
4883+
MVT MaskVT =
4884+
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4885+
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4886+
}
48834887
}
48844888

48854889
if (!VL)
48864890
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
48874891

4892+
unsigned IntID =
4893+
IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
4894+
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
4895+
if (!IsUnmasked)
4896+
Ops.push_back(PassThru);
4897+
Ops.push_back(BasePtr);
4898+
if (!IsUnmasked)
4899+
Ops.push_back(Mask);
4900+
Ops.push_back(VL);
4901+
if (!IsUnmasked)
4902+
Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
4903+
48884904
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4889-
SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT);
4890-
SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
4891-
SDValue Ops[] = {Chain, IntID, PassThru, BasePtr, Mask, VL, Policy};
4905+
48924906
SDValue Result =
48934907
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
48944908
Chain = Result.getValue(1);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,8 @@ define <4 x i8> @vpload_v4i8(<4 x i8>* %ptr, <4 x i1> %m, i32 zeroext %evl) {
3131
define <4 x i8> @vpload_v4i8_allones_mask(<4 x i8>* %ptr, i32 zeroext %evl) {
3232
; CHECK-LABEL: vpload_v4i8_allones_mask:
3333
; CHECK: # %bb.0:
34-
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
35-
; CHECK-NEXT: vmset.m v0
3634
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu
37-
; CHECK-NEXT: vle8.v v8, (a0), v0.t
35+
; CHECK-NEXT: vle8.v v8, (a0)
3836
; CHECK-NEXT: ret
3937
%a = insertelement <4 x i1> undef, i1 true, i32 0
4038
%b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
@@ -93,10 +91,8 @@ define <8 x i16> @vpload_v8i16(<8 x i16>* %ptr, <8 x i1> %m, i32 zeroext %evl) {
9391
define <8 x i16> @vpload_v8i16_allones_mask(<8 x i16>* %ptr, i32 zeroext %evl) {
9492
; CHECK-LABEL: vpload_v8i16_allones_mask:
9593
; CHECK: # %bb.0:
96-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
97-
; CHECK-NEXT: vmset.m v0
9894
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
99-
; CHECK-NEXT: vle16.v v8, (a0), v0.t
95+
; CHECK-NEXT: vle16.v v8, (a0)
10096
; CHECK-NEXT: ret
10197
%a = insertelement <8 x i1> undef, i1 true, i32 0
10298
%b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -143,10 +139,8 @@ define <8 x i32> @vpload_v8i32(<8 x i32>* %ptr, <8 x i1> %m, i32 zeroext %evl) {
143139
define <8 x i32> @vpload_v8i32_allones_mask(<8 x i32>* %ptr, i32 zeroext %evl) {
144140
; CHECK-LABEL: vpload_v8i32_allones_mask:
145141
; CHECK: # %bb.0:
146-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
147-
; CHECK-NEXT: vmset.m v0
148142
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu
149-
; CHECK-NEXT: vle32.v v8, (a0), v0.t
143+
; CHECK-NEXT: vle32.v v8, (a0)
150144
; CHECK-NEXT: ret
151145
%a = insertelement <8 x i1> undef, i1 true, i32 0
152146
%b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -181,10 +175,8 @@ define <4 x i64> @vpload_v4i64(<4 x i64>* %ptr, <4 x i1> %m, i32 zeroext %evl) {
181175
define <4 x i64> @vpload_v4i64_allones_mask(<4 x i64>* %ptr, i32 zeroext %evl) {
182176
; CHECK-LABEL: vpload_v4i64_allones_mask:
183177
; CHECK: # %bb.0:
184-
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
185-
; CHECK-NEXT: vmset.m v0
186178
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu
187-
; CHECK-NEXT: vle64.v v8, (a0), v0.t
179+
; CHECK-NEXT: vle64.v v8, (a0)
188180
; CHECK-NEXT: ret
189181
%a = insertelement <4 x i1> undef, i1 true, i32 0
190182
%b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
@@ -219,10 +211,8 @@ define <2 x half> @vpload_v2f16(<2 x half>* %ptr, <2 x i1> %m, i32 zeroext %evl)
219211
define <2 x half> @vpload_v2f16_allones_mask(<2 x half>* %ptr, i32 zeroext %evl) {
220212
; CHECK-LABEL: vpload_v2f16_allones_mask:
221213
; CHECK: # %bb.0:
222-
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
223-
; CHECK-NEXT: vmset.m v0
224214
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu
225-
; CHECK-NEXT: vle16.v v8, (a0), v0.t
215+
; CHECK-NEXT: vle16.v v8, (a0)
226216
; CHECK-NEXT: ret
227217
%a = insertelement <2 x i1> undef, i1 true, i32 0
228218
%b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
@@ -293,10 +283,8 @@ define <8 x float> @vpload_v8f32(<8 x float>* %ptr, <8 x i1> %m, i32 zeroext %ev
293283
define <8 x float> @vpload_v8f32_allones_mask(<8 x float>* %ptr, i32 zeroext %evl) {
294284
; CHECK-LABEL: vpload_v8f32_allones_mask:
295285
; CHECK: # %bb.0:
296-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
297-
; CHECK-NEXT: vmset.m v0
298286
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu
299-
; CHECK-NEXT: vle32.v v8, (a0), v0.t
287+
; CHECK-NEXT: vle32.v v8, (a0)
300288
; CHECK-NEXT: ret
301289
%a = insertelement <8 x i1> undef, i1 true, i32 0
302290
%b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -331,10 +319,8 @@ define <4 x double> @vpload_v4f64(<4 x double>* %ptr, <4 x i1> %m, i32 zeroext %
331319
define <4 x double> @vpload_v4f64_allones_mask(<4 x double>* %ptr, i32 zeroext %evl) {
332320
; CHECK-LABEL: vpload_v4f64_allones_mask:
333321
; CHECK: # %bb.0:
334-
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
335-
; CHECK-NEXT: vmset.m v0
336322
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu
337-
; CHECK-NEXT: vle64.v v8, (a0), v0.t
323+
; CHECK-NEXT: vle64.v v8, (a0)
338324
; CHECK-NEXT: ret
339325
%a = insertelement <4 x i1> undef, i1 true, i32 0
340326
%b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer

llvm/test/CodeGen/RISCV/rvv/vpload.ll

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,8 @@ define <vscale x 1 x i8> @vpload_nxv1i8(<vscale x 1 x i8>* %ptr, <vscale x 1 x i
1919
define <vscale x 1 x i8> @vpload_nxv1i8_allones_mask(<vscale x 1 x i8>* %ptr, i32 zeroext %evl) {
2020
; CHECK-LABEL: vpload_nxv1i8_allones_mask:
2121
; CHECK: # %bb.0:
22-
; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, mu
23-
; CHECK-NEXT: vmset.m v0
2422
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu
25-
; CHECK-NEXT: vle8.v v8, (a0), v0.t
23+
; CHECK-NEXT: vle8.v v8, (a0)
2624
; CHECK-NEXT: ret
2725
%a = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
2826
%b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -69,10 +67,8 @@ define <vscale x 8 x i8> @vpload_nxv8i8(<vscale x 8 x i8>* %ptr, <vscale x 8 x i
6967
define <vscale x 8 x i8> @vpload_nxv8i8_allones_mask(<vscale x 8 x i8>* %ptr, i32 zeroext %evl) {
7068
; CHECK-LABEL: vpload_nxv8i8_allones_mask:
7169
; CHECK: # %bb.0:
72-
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, mu
73-
; CHECK-NEXT: vmset.m v0
7470
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu
75-
; CHECK-NEXT: vle8.v v8, (a0), v0.t
71+
; CHECK-NEXT: vle8.v v8, (a0)
7672
; CHECK-NEXT: ret
7773
%a = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
7874
%b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
@@ -107,10 +103,8 @@ define <vscale x 2 x i16> @vpload_nxv2i16(<vscale x 2 x i16>* %ptr, <vscale x 2
107103
define <vscale x 2 x i16> @vpload_nxv2i16_allones_mask(<vscale x 2 x i16>* %ptr, i32 zeroext %evl) {
108104
; CHECK-LABEL: vpload_nxv2i16_allones_mask:
109105
; CHECK: # %bb.0:
110-
; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, mu
111-
; CHECK-NEXT: vmset.m v0
112106
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu
113-
; CHECK-NEXT: vle16.v v8, (a0), v0.t
107+
; CHECK-NEXT: vle16.v v8, (a0)
114108
; CHECK-NEXT: ret
115109
%a = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
116110
%b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
@@ -181,10 +175,8 @@ define <vscale x 4 x i32> @vpload_nxv4i32(<vscale x 4 x i32>* %ptr, <vscale x 4
181175
define <vscale x 4 x i32> @vpload_nxv4i32_allones_mask(<vscale x 4 x i32>* %ptr, i32 zeroext %evl) {
182176
; CHECK-LABEL: vpload_nxv4i32_allones_mask:
183177
; CHECK: # %bb.0:
184-
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, mu
185-
; CHECK-NEXT: vmset.m v0
186178
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu
187-
; CHECK-NEXT: vle32.v v8, (a0), v0.t
179+
; CHECK-NEXT: vle32.v v8, (a0)
188180
; CHECK-NEXT: ret
189181
%a = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
190182
%b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
@@ -219,10 +211,8 @@ define <vscale x 1 x i64> @vpload_nxv1i64(<vscale x 1 x i64>* %ptr, <vscale x 1
219211
define <vscale x 1 x i64> @vpload_nxv1i64_allones_mask(<vscale x 1 x i64>* %ptr, i32 zeroext %evl) {
220212
; CHECK-LABEL: vpload_nxv1i64_allones_mask:
221213
; CHECK: # %bb.0:
222-
; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, mu
223-
; CHECK-NEXT: vmset.m v0
224214
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu
225-
; CHECK-NEXT: vle64.v v8, (a0), v0.t
215+
; CHECK-NEXT: vle64.v v8, (a0)
226216
; CHECK-NEXT: ret
227217
%a = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
228218
%b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -293,10 +283,8 @@ define <vscale x 2 x half> @vpload_nxv2f16(<vscale x 2 x half>* %ptr, <vscale x
293283
define <vscale x 2 x half> @vpload_nxv2f16_allones_mask(<vscale x 2 x half>* %ptr, i32 zeroext %evl) {
294284
; CHECK-LABEL: vpload_nxv2f16_allones_mask:
295285
; CHECK: # %bb.0:
296-
; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, mu
297-
; CHECK-NEXT: vmset.m v0
298286
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu
299-
; CHECK-NEXT: vle16.v v8, (a0), v0.t
287+
; CHECK-NEXT: vle16.v v8, (a0)
300288
; CHECK-NEXT: ret
301289
%a = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
302290
%b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
@@ -379,10 +367,8 @@ define <vscale x 8 x float> @vpload_nxv8f32(<vscale x 8 x float>* %ptr, <vscale
379367
define <vscale x 8 x float> @vpload_nxv8f32_allones_mask(<vscale x 8 x float>* %ptr, i32 zeroext %evl) {
380368
; CHECK-LABEL: vpload_nxv8f32_allones_mask:
381369
; CHECK: # %bb.0:
382-
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, mu
383-
; CHECK-NEXT: vmset.m v0
384370
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu
385-
; CHECK-NEXT: vle32.v v8, (a0), v0.t
371+
; CHECK-NEXT: vle32.v v8, (a0)
386372
; CHECK-NEXT: ret
387373
%a = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
388374
%b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
@@ -429,10 +415,8 @@ define <vscale x 4 x double> @vpload_nxv4f64(<vscale x 4 x double>* %ptr, <vscal
429415
define <vscale x 4 x double> @vpload_nxv4f64_allones_mask(<vscale x 4 x double>* %ptr, i32 zeroext %evl) {
430416
; CHECK-LABEL: vpload_nxv4f64_allones_mask:
431417
; CHECK: # %bb.0:
432-
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, mu
433-
; CHECK-NEXT: vmset.m v0
434418
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu
435-
; CHECK-NEXT: vle64.v v8, (a0), v0.t
419+
; CHECK-NEXT: vle64.v v8, (a0)
436420
; CHECK-NEXT: ret
437421
%a = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
438422
%b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer

0 commit comments

Comments
 (0)