Skip to content

Commit 10edc3d

Browse files
authored
[RISCV] Try to optimize vp.splice to vslide1up. (#144871)
Fold (vp.splice (insert_elt poison, scalar, 0), vec, 0, mask, 1, vl) to (vslide1up vec, scalar, mask, vl). Fold (vp.splice (splat_vector scalar), vec, 0, mask, 1, vl) to (vslide1up vec, scalar, mask, vl).
1 parent 020fefb commit 10edc3d

File tree

3 files changed

+161
-4
lines changed

3 files changed

+161
-4
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13241,6 +13241,8 @@ SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
1324113241
SDValue
1324213242
RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
1324313243
SelectionDAG &DAG) const {
13244+
using namespace SDPatternMatch;
13245+
1324413246
SDLoc DL(Op);
1324513247

1324613248
SDValue Op1 = Op.getOperand(0);
@@ -13285,6 +13287,42 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
1328513287
SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
1328613288
}
1328713289

13290+
auto getVectorFirstEle = [](SDValue Vec) {
13291+
SDValue FirstEle;
13292+
if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13293+
return FirstEle;
13294+
13295+
if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
13296+
Vec.getOpcode() == ISD::BUILD_VECTOR)
13297+
return Vec.getOperand(0);
13298+
13299+
return SDValue();
13300+
};
13301+
13302+
if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13303+
if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13304+
MVT EltVT = ContainerVT.getVectorElementType();
13305+
SDValue Result;
13306+
if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
13307+
EltVT == MVT::bf16) {
13308+
EltVT = EltVT.changeTypeToInteger();
13309+
ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13310+
Op2 = DAG.getBitcast(ContainerVT, Op2);
13311+
FirstEle =
13312+
DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
13313+
}
13314+
Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13315+
: RISCVISD::VSLIDE1UP_VL,
13316+
DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
13317+
FirstEle, Mask, EVL2);
13318+
Result = DAG.getBitcast(
13319+
ContainerVT.changeVectorElementType(VT.getVectorElementType()),
13320+
Result);
13321+
return VT.isFixedLengthVector()
13322+
? convertFromScalableVector(VT, Result, DAG, Subtarget)
13323+
: Result;
13324+
}
13325+
1328813326
int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
1328913327
SDValue DownOffset, UpOffset;
1329013328
if (ImmValue >= 0) {

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,3 +299,52 @@ define <8 x half> @test_vp_splice_v8f16_masked(<8 x half> %va, <8 x half> %vb, <
299299
%v = call <8 x half> @llvm.experimental.vp.splice.v8f16(<8 x half> %va, <8 x half> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb)
300300
ret <8 x half> %v
301301
}
302+
303+
define <4 x i32> @test_vp_splice_v4i32_with_firstelt(i32 %first, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evl) {
304+
; CHECK-LABEL: test_vp_splice_v4i32_with_firstelt:
305+
; CHECK: # %bb.0:
306+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
307+
; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t
308+
; CHECK-NEXT: vmv.v.v v8, v9
309+
; CHECK-NEXT: ret
310+
%va = insertelement <4 x i32> poison, i32 %first, i32 0
311+
%v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl)
312+
ret <4 x i32> %v
313+
}
314+
315+
define <4 x i32> @test_vp_splice_v4i32_with_splat_firstelt(i32 %first, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evl) {
316+
; CHECK-LABEL: test_vp_splice_v4i32_with_splat_firstelt:
317+
; CHECK: # %bb.0:
318+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
319+
; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t
320+
; CHECK-NEXT: vmv.v.v v8, v9
321+
; CHECK-NEXT: ret
322+
%ins = insertelement <4 x i32> poison, i32 %first, i32 0
323+
%splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
324+
%v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %splat, <4 x i32> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl)
325+
ret <4 x i32> %v
326+
}
327+
328+
define <4 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first, <4 x float> %vb, <4 x i1> %mask, i32 zeroext %evl) {
329+
; CHECK-LABEL: test_vp_splice_nxv2f32_with_firstelt:
330+
; CHECK: # %bb.0:
331+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
332+
; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t
333+
; CHECK-NEXT: vmv.v.v v8, v9
334+
; CHECK-NEXT: ret
335+
%va = insertelement <4 x float> poison, float %first, i32 0
336+
%v = call <4 x float> @llvm.experimental.vp.splice.nxv2f32(<4 x float> %va, <4 x float> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl)
337+
ret <4 x float> %v
338+
}
339+
340+
define <4 x half> @test_vp_splice_nxv2f16_with_firstelt(half %first, <4 x half> %vb, <4 x i1> %mask, i32 zeroext %evl) {
341+
; CHECK-LABEL: test_vp_splice_nxv2f16_with_firstelt:
342+
; CHECK: # %bb.0:
343+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
344+
; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t
345+
; CHECK-NEXT: vmv1r.v v8, v9
346+
; CHECK-NEXT: ret
347+
%va = insertelement <4 x half> poison, half %first, i32 0
348+
%v = call <4 x half> @llvm.experimental.vp.splice.nxv2f16(<4 x half> %va, <4 x half> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl)
349+
ret <4 x half> %v
350+
}

llvm/test/CodeGen/RISCV/rvv/vp-splice.ll

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs \
3-
; RUN: < %s | FileCheck %s
4-
; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs \
5-
; RUN: < %s | FileCheck %s
2+
; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfh,+zvfbfmin -verify-machineinstrs \
3+
; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4+
; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs \
5+
; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
66

77
define <vscale x 2 x i64> @test_vp_splice_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
88
; CHECK-LABEL: test_vp_splice_nxv2i64:
@@ -505,3 +505,73 @@ define <vscale x 2 x bfloat> @test_vp_splice_nxv2bf16_masked(<vscale x 2 x bfloa
505505
%v = call <vscale x 2 x bfloat> @llvm.experimental.vp.splice.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 5, <vscale x 2 x i1> %mask, i32 %evla, i32 %evlb)
506506
ret <vscale x 2 x bfloat> %v
507507
}
508+
509+
define <vscale x 2 x i32> @test_vp_splice_nxv2i32_with_firstelt(i32 %first, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
510+
; CHECK-LABEL: test_vp_splice_nxv2i32_with_firstelt:
511+
; CHECK: # %bb.0:
512+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
513+
; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t
514+
; CHECK-NEXT: vmv.v.v v8, v9
515+
; CHECK-NEXT: ret
516+
%va = insertelement <vscale x 2 x i32> poison, i32 %first, i32 0
517+
%v = call <vscale x 2 x i32> @llvm.experimental.vp.splice.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, i32 0, <vscale x 2 x i1> %mask, i32 1, i32 %evl)
518+
ret <vscale x 2 x i32> %v
519+
}
520+
521+
define <vscale x 2 x i32> @test_vp_splice_nxv2i32_with_splat_firstelt(i32 %first, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
522+
; CHECK-LABEL: test_vp_splice_nxv2i32_with_splat_firstelt:
523+
; CHECK: # %bb.0:
524+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
525+
; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t
526+
; CHECK-NEXT: vmv.v.v v8, v9
527+
; CHECK-NEXT: ret
528+
%ins = insertelement <vscale x 2 x i32> poison, i32 %first, i32 0
529+
%splat = shufflevector <vscale x 2 x i32> %ins, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
530+
%v = call <vscale x 2 x i32> @llvm.experimental.vp.splice.nxv2i32(<vscale x 2 x i32> %splat, <vscale x 2 x i32> %vb, i32 0, <vscale x 2 x i1> %mask, i32 1, i32 %evl)
531+
ret <vscale x 2 x i32> %v
532+
}
533+
534+
define <vscale x 2 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first, <vscale x 2 x float> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
535+
; CHECK-LABEL: test_vp_splice_nxv2f32_with_firstelt:
536+
; CHECK: # %bb.0:
537+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
538+
; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t
539+
; CHECK-NEXT: vmv.v.v v8, v9
540+
; CHECK-NEXT: ret
541+
%va = insertelement <vscale x 2 x float> poison, float %first, i32 0
542+
%v = call <vscale x 2 x float> @llvm.experimental.vp.splice.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, i32 0, <vscale x 2 x i1> %mask, i32 1, i32 %evl)
543+
ret <vscale x 2 x float> %v
544+
}
545+
546+
define <vscale x 2 x half> @test_vp_splice_nxv2f16_with_firstelt(half %first, <vscale x 2 x half> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
547+
; ZVFH-LABEL: test_vp_splice_nxv2f16_with_firstelt:
548+
; ZVFH: # %bb.0:
549+
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
550+
; ZVFH-NEXT: vfslide1up.vf v9, v8, fa0, v0.t
551+
; ZVFH-NEXT: vmv1r.v v8, v9
552+
; ZVFH-NEXT: ret
553+
;
554+
; ZVFHMIN-LABEL: test_vp_splice_nxv2f16_with_firstelt:
555+
; ZVFHMIN: # %bb.0:
556+
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
557+
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
558+
; ZVFHMIN-NEXT: vslide1up.vx v9, v8, a1, v0.t
559+
; ZVFHMIN-NEXT: vmv1r.v v8, v9
560+
; ZVFHMIN-NEXT: ret
561+
%va = insertelement <vscale x 2 x half> poison, half %first, i32 0
562+
%v = call <vscale x 2 x half> @llvm.experimental.vp.splice.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, i32 0, <vscale x 2 x i1> %mask, i32 1, i32 %evl)
563+
ret <vscale x 2 x half> %v
564+
}
565+
566+
define <vscale x 2 x bfloat> @test_vp_splice_nxv2bf16_with_firstelt(bfloat %first, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
567+
; CHECK-LABEL: test_vp_splice_nxv2bf16_with_firstelt:
568+
; CHECK: # %bb.0:
569+
; CHECK-NEXT: fmv.x.h a1, fa0
570+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
571+
; CHECK-NEXT: vslide1up.vx v9, v8, a1, v0.t
572+
; CHECK-NEXT: vmv1r.v v8, v9
573+
; CHECK-NEXT: ret
574+
%va = insertelement <vscale x 2 x bfloat> poison, bfloat %first, i32 0
575+
%v = call <vscale x 2 x bfloat> @llvm.experimental.vp.splice.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 0, <vscale x 2 x i1> %mask, i32 1, i32 %evl)
576+
ret <vscale x 2 x bfloat> %v
577+
}

0 commit comments

Comments
 (0)