Skip to content

Commit bd5cd43

Browse files
committed
[RISCV] Try to optimize vp.splice to vslide1up.
Fold (vp.splice (insert_elt poison, scalar, 0), vec, 0, mask, 1, vl) to (vslide1up vec, scalar, mask. vl). Fold (vp.splice (splat_vector scalar), vec, 0, mask, 1, vl) to (vslide1up vec, scalar, mask. vl).
1 parent e36658c commit bd5cd43

File tree

3 files changed

+42
-40
lines changed

3 files changed

+42
-40
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13238,6 +13238,8 @@ SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
1323813238
SDValue
1323913239
RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
1324013240
SelectionDAG &DAG) const {
13241+
using namespace SDPatternMatch;
13242+
1324113243
SDLoc DL(Op);
1324213244

1324313245
SDValue Op1 = Op.getOperand(0);
@@ -13282,6 +13284,30 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
1328213284
SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
1328313285
}
1328413286

13287+
auto getVectorFirstEle = [](SDValue Vec) {
13288+
SDValue FirstEle;
13289+
if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13290+
return FirstEle;
13291+
13292+
if (sd_match(Vec,
13293+
m_AnyOf(m_Opc(ISD::SPLAT_VECTOR), m_Opc(ISD::BUILD_VECTOR))))
13294+
return Vec.getOperand(0);
13295+
13296+
return SDValue();
13297+
};
13298+
13299+
if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13300+
if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13301+
SDValue Result =
13302+
DAG.getNode(ContainerVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13303+
: RISCVISD::VSLIDE1UP_VL,
13304+
DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2, FirstEle,
13305+
Mask, EVL2);
13306+
return VT.isFixedLengthVector()
13307+
? convertFromScalableVector(VT, Result, DAG, Subtarget)
13308+
: Result;
13309+
}
13310+
1328513311
int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
1328613312
SDValue DownOffset, UpOffset;
1328713313
if (ImmValue >= 0) {

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -292,11 +292,8 @@ define <8 x half> @test_vp_splice_v8f16_masked(<8 x half> %va, <8 x half> %vb, <
292292
define <4 x i32> @test_vp_splice_v4i32_with_firstelt(i32 %first, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evl) {
293293
; CHECK-LABEL: test_vp_splice_v4i32_with_firstelt:
294294
; CHECK: # %bb.0:
295-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
296-
; CHECK-NEXT: vmv.s.x v9, a0
297-
; CHECK-NEXT: vslidedown.vi v9, v9, 0, v0.t
298-
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
299-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
295+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
296+
; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t
300297
; CHECK-NEXT: vmv.v.v v8, v9
301298
; CHECK-NEXT: ret
302299
%va = insertelement <4 x i32> poison, i32 %first, i32 0
@@ -307,11 +304,8 @@ define <4 x i32> @test_vp_splice_v4i32_with_firstelt(i32 %first, <4 x i32> %vb,
307304
define <4 x i32> @test_vp_splice_v4i32_with_splat_firstelt(i32 %first, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evl) {
308305
; CHECK-LABEL: test_vp_splice_v4i32_with_splat_firstelt:
309306
; CHECK: # %bb.0:
310-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
311-
; CHECK-NEXT: vmv.v.x v9, a0
312-
; CHECK-NEXT: vslidedown.vi v9, v9, 0, v0.t
313-
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
314-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
307+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
308+
; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t
315309
; CHECK-NEXT: vmv.v.v v8, v9
316310
; CHECK-NEXT: ret
317311
%ins = insertelement <4 x i32> poison, i32 %first, i32 0
@@ -323,11 +317,8 @@ define <4 x i32> @test_vp_splice_v4i32_with_splat_firstelt(i32 %first, <4 x i32>
323317
define <4 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first, <4 x float> %vb, <4 x i1> %mask, i32 zeroext %evl) {
324318
; CHECK-LABEL: test_vp_splice_nxv2f32_with_firstelt:
325319
; CHECK: # %bb.0:
326-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
327-
; CHECK-NEXT: vfmv.s.f v9, fa0
328-
; CHECK-NEXT: vslidedown.vi v9, v9, 0, v0.t
329-
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
330-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
320+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
321+
; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t
331322
; CHECK-NEXT: vmv.v.v v8, v9
332323
; CHECK-NEXT: ret
333324
%va = insertelement <4 x float> poison, float %first, i32 0
@@ -338,11 +329,8 @@ define <4 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first, <4 x floa
338329
define <4 x half> @test_vp_splice_nxv2f16_with_firstelt(half %first, <4 x half> %vb, <4 x i1> %mask, i32 zeroext %evl) {
339330
; CHECK-LABEL: test_vp_splice_nxv2f16_with_firstelt:
340331
; CHECK: # %bb.0:
341-
; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
342-
; CHECK-NEXT: vfmv.s.f v9, fa0
343-
; CHECK-NEXT: vslidedown.vi v9, v9, 0, v0.t
344-
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
345-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
332+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
333+
; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t
346334
; CHECK-NEXT: vmv1r.v v8, v9
347335
; CHECK-NEXT: ret
348336
%va = insertelement <4 x half> poison, half %first, i32 0

llvm/test/CodeGen/RISCV/rvv/vp-splice.ll

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -431,11 +431,8 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64_negative_offset(<vscale x 16
431431
define <vscale x 2 x i32> @test_vp_splice_nxv2i32_with_firstelt(i32 %first, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
432432
; CHECK-LABEL: test_vp_splice_nxv2i32_with_firstelt:
433433
; CHECK: # %bb.0:
434-
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
435-
; CHECK-NEXT: vmv.s.x v9, a0
436-
; CHECK-NEXT: vslidedown.vi v9, v9, 0, v0.t
437-
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
438-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
434+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
435+
; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t
439436
; CHECK-NEXT: vmv.v.v v8, v9
440437
; CHECK-NEXT: ret
441438
%va = insertelement <vscale x 2 x i32> poison, i32 %first, i32 0
@@ -446,11 +443,8 @@ define <vscale x 2 x i32> @test_vp_splice_nxv2i32_with_firstelt(i32 %first, <vsc
446443
define <vscale x 2 x i32> @test_vp_splice_nxv2i32_with_splat_firstelt(i32 %first, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
447444
; CHECK-LABEL: test_vp_splice_nxv2i32_with_splat_firstelt:
448445
; CHECK: # %bb.0:
449-
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
450-
; CHECK-NEXT: vmv.v.x v9, a0
451-
; CHECK-NEXT: vslidedown.vi v9, v9, 0, v0.t
452-
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
453-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
446+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
447+
; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t
454448
; CHECK-NEXT: vmv.v.v v8, v9
455449
; CHECK-NEXT: ret
456450
%ins = insertelement <vscale x 2 x i32> poison, i32 %first, i32 0
@@ -462,11 +456,8 @@ define <vscale x 2 x i32> @test_vp_splice_nxv2i32_with_splat_firstelt(i32 %first
462456
define <vscale x 2 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first, <vscale x 2 x float> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
463457
; CHECK-LABEL: test_vp_splice_nxv2f32_with_firstelt:
464458
; CHECK: # %bb.0:
465-
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
466-
; CHECK-NEXT: vfmv.s.f v9, fa0
467-
; CHECK-NEXT: vslidedown.vi v9, v9, 0, v0.t
468-
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
469-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
459+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
460+
; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t
470461
; CHECK-NEXT: vmv.v.v v8, v9
471462
; CHECK-NEXT: ret
472463
%va = insertelement <vscale x 2 x float> poison, float %first, i32 0
@@ -477,11 +468,8 @@ define <vscale x 2 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first,
477468
define <vscale x 2 x half> @test_vp_splice_nxv2f16_with_firstelt(half %first, <vscale x 2 x half> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
478469
; CHECK-LABEL: test_vp_splice_nxv2f16_with_firstelt:
479470
; CHECK: # %bb.0:
480-
; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
481-
; CHECK-NEXT: vfmv.s.f v9, fa0
482-
; CHECK-NEXT: vslidedown.vi v9, v9, 0, v0.t
483-
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
484-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
471+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
472+
; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t
485473
; CHECK-NEXT: vmv1r.v v8, v9
486474
; CHECK-NEXT: ret
487475
%va = insertelement <vscale x 2 x half> poison, half %first, i32 0

0 commit comments

Comments
 (0)