Skip to content

Commit 9c8def0

Browse files
committed
[RISCV] Try to optimize vp.splice to vslide1up.
Fold (vp.splice (insert_elt poison, scalar, 0), vec, 0, mask, 1, vl) to (vslide1up vec, scalar, mask. vl). Fold (vp.splice (splat_vector scalar), vec, 0, mask, 1, vl) to (vslide1up vec, scalar, mask. vl).
1 parent adff330 commit 9c8def0

File tree

3 files changed

+57
-40
lines changed

3 files changed

+57
-40
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13241,6 +13241,8 @@ SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
1324113241
SDValue
1324213242
RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
1324313243
SelectionDAG &DAG) const {
13244+
using namespace SDPatternMatch;
13245+
1324413246
SDLoc DL(Op);
1324513247

1324613248
SDValue Op1 = Op.getOperand(0);
@@ -13285,6 +13287,41 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
1328513287
SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
1328613288
}
1328713289

13290+
auto getVectorFirstEle = [](SDValue Vec) {
13291+
SDValue FirstEle;
13292+
if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13293+
return FirstEle;
13294+
13295+
if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
13296+
Vec.getOpcode() == ISD::BUILD_VECTOR)
13297+
return Vec.getOperand(0);
13298+
13299+
return SDValue();
13300+
};
13301+
13302+
if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13303+
if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13304+
MVT EltVT = ContainerVT.getVectorElementType();
13305+
SDValue Result;
13306+
if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
13307+
EltVT == MVT::bf16) {
13308+
EltVT = EltVT.changeTypeToInteger();
13309+
ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13310+
FirstEle =
13311+
DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
13312+
}
13313+
Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13314+
: RISCVISD::VSLIDE1UP_VL,
13315+
DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
13316+
FirstEle, Mask, EVL2);
13317+
Result = DAG.getBitcast(
13318+
ContainerVT.changeVectorElementType(VT.getVectorElementType()),
13319+
Result);
13320+
return VT.isFixedLengthVector()
13321+
? convertFromScalableVector(VT, Result, DAG, Subtarget)
13322+
: Result;
13323+
}
13324+
1328813325
int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
1328913326
SDValue DownOffset, UpOffset;
1329013327
if (ImmValue >= 0) {

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -303,10 +303,8 @@ define <8 x half> @test_vp_splice_v8f16_masked(<8 x half> %va, <8 x half> %vb, <
303303
define <4 x i32> @test_vp_splice_v4i32_with_firstelt(i32 %first, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evl) {
304304
; CHECK-LABEL: test_vp_splice_v4i32_with_firstelt:
305305
; CHECK: # %bb.0:
306-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
307-
; CHECK-NEXT: vmv.s.x v9, a0
308-
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
309-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
306+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
307+
; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t
310308
; CHECK-NEXT: vmv.v.v v8, v9
311309
; CHECK-NEXT: ret
312310
%va = insertelement <4 x i32> poison, i32 %first, i32 0
@@ -317,10 +315,8 @@ define <4 x i32> @test_vp_splice_v4i32_with_firstelt(i32 %first, <4 x i32> %vb,
317315
define <4 x i32> @test_vp_splice_v4i32_with_splat_firstelt(i32 %first, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evl) {
318316
; CHECK-LABEL: test_vp_splice_v4i32_with_splat_firstelt:
319317
; CHECK: # %bb.0:
320-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
321-
; CHECK-NEXT: vmv.v.x v9, a0
322-
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
323-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
318+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
319+
; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t
324320
; CHECK-NEXT: vmv.v.v v8, v9
325321
; CHECK-NEXT: ret
326322
%ins = insertelement <4 x i32> poison, i32 %first, i32 0
@@ -332,10 +328,8 @@ define <4 x i32> @test_vp_splice_v4i32_with_splat_firstelt(i32 %first, <4 x i32>
332328
define <4 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first, <4 x float> %vb, <4 x i1> %mask, i32 zeroext %evl) {
333329
; CHECK-LABEL: test_vp_splice_nxv2f32_with_firstelt:
334330
; CHECK: # %bb.0:
335-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
336-
; CHECK-NEXT: vfmv.s.f v9, fa0
337-
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
338-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
331+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
332+
; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t
339333
; CHECK-NEXT: vmv.v.v v8, v9
340334
; CHECK-NEXT: ret
341335
%va = insertelement <4 x float> poison, float %first, i32 0
@@ -346,10 +340,8 @@ define <4 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first, <4 x floa
346340
define <4 x half> @test_vp_splice_nxv2f16_with_firstelt(half %first, <4 x half> %vb, <4 x i1> %mask, i32 zeroext %evl) {
347341
; CHECK-LABEL: test_vp_splice_nxv2f16_with_firstelt:
348342
; CHECK: # %bb.0:
349-
; CHECK-NEXT: vsetivli zero, 4, e16, m1, ta, ma
350-
; CHECK-NEXT: vfmv.s.f v9, fa0
351-
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
352-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
343+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
344+
; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t
353345
; CHECK-NEXT: vmv1r.v v8, v9
354346
; CHECK-NEXT: ret
355347
%va = insertelement <4 x half> poison, half %first, i32 0

llvm/test/CodeGen/RISCV/rvv/vp-splice.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -509,10 +509,8 @@ define <vscale x 2 x bfloat> @test_vp_splice_nxv2bf16_masked(<vscale x 2 x bfloa
509509
define <vscale x 2 x i32> @test_vp_splice_nxv2i32_with_firstelt(i32 %first, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
510510
; CHECK-LABEL: test_vp_splice_nxv2i32_with_firstelt:
511511
; CHECK: # %bb.0:
512-
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
513-
; CHECK-NEXT: vmv.s.x v9, a0
514-
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
515-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
512+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
513+
; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t
516514
; CHECK-NEXT: vmv.v.v v8, v9
517515
; CHECK-NEXT: ret
518516
%va = insertelement <vscale x 2 x i32> poison, i32 %first, i32 0
@@ -523,10 +521,8 @@ define <vscale x 2 x i32> @test_vp_splice_nxv2i32_with_firstelt(i32 %first, <vsc
523521
define <vscale x 2 x i32> @test_vp_splice_nxv2i32_with_splat_firstelt(i32 %first, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
524522
; CHECK-LABEL: test_vp_splice_nxv2i32_with_splat_firstelt:
525523
; CHECK: # %bb.0:
526-
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
527-
; CHECK-NEXT: vmv.v.x v9, a0
528-
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
529-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
524+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
525+
; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t
530526
; CHECK-NEXT: vmv.v.v v8, v9
531527
; CHECK-NEXT: ret
532528
%ins = insertelement <vscale x 2 x i32> poison, i32 %first, i32 0
@@ -538,10 +534,8 @@ define <vscale x 2 x i32> @test_vp_splice_nxv2i32_with_splat_firstelt(i32 %first
538534
define <vscale x 2 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first, <vscale x 2 x float> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
539535
; CHECK-LABEL: test_vp_splice_nxv2f32_with_firstelt:
540536
; CHECK: # %bb.0:
541-
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
542-
; CHECK-NEXT: vfmv.s.f v9, fa0
543-
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
544-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
537+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
538+
; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t
545539
; CHECK-NEXT: vmv.v.v v8, v9
546540
; CHECK-NEXT: ret
547541
%va = insertelement <vscale x 2 x float> poison, float %first, i32 0
@@ -552,20 +546,16 @@ define <vscale x 2 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first,
552546
define <vscale x 2 x half> @test_vp_splice_nxv2f16_with_firstelt(half %first, <vscale x 2 x half> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
553547
; ZVFH-LABEL: test_vp_splice_nxv2f16_with_firstelt:
554548
; ZVFH: # %bb.0:
555-
; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma
556-
; ZVFH-NEXT: vfmv.s.f v9, fa0
557-
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
558-
; ZVFH-NEXT: vslideup.vi v9, v8, 1, v0.t
549+
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
550+
; ZVFH-NEXT: vfslide1up.vf v9, v8, fa0, v0.t
559551
; ZVFH-NEXT: vmv1r.v v8, v9
560552
; ZVFH-NEXT: ret
561553
;
562554
; ZVFHMIN-LABEL: test_vp_splice_nxv2f16_with_firstelt:
563555
; ZVFHMIN: # %bb.0:
564556
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
565-
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
566-
; ZVFHMIN-NEXT: vmv.s.x v9, a1
567-
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
568-
; ZVFHMIN-NEXT: vslideup.vi v9, v8, 1, v0.t
557+
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
558+
; ZVFHMIN-NEXT: vslide1up.vx v9, v8, a1, v0.t
569559
; ZVFHMIN-NEXT: vmv1r.v v8, v9
570560
; ZVFHMIN-NEXT: ret
571561
%va = insertelement <vscale x 2 x half> poison, half %first, i32 0
@@ -577,10 +567,8 @@ define <vscale x 2 x bfloat> @test_vp_splice_nxv2bf16_with_firstelt(bfloat %firs
577567
; CHECK-LABEL: test_vp_splice_nxv2bf16_with_firstelt:
578568
; CHECK: # %bb.0:
579569
; CHECK-NEXT: fmv.x.h a1, fa0
580-
; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
581-
; CHECK-NEXT: vmv.s.x v9, a1
582-
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
583-
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
570+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
571+
; CHECK-NEXT: vslide1up.vx v9, v8, a1, v0.t
584572
; CHECK-NEXT: vmv1r.v v8, v9
585573
; CHECK-NEXT: ret
586574
%va = insertelement <vscale x 2 x bfloat> poison, bfloat %first, i32 0

0 commit comments

Comments
 (0)