Skip to content

Commit fd465f3

Browse files
committed
[RISCV] Move vmv_s_x and vfmv_s_f special casing to DAG combine
We'd discussed this in the original set of patches months ago, but decided against it. I think we should reverse ourselves here as the code is significantly more readable, and we do pick up cases we'd missed by not calling the appropriate helper routine. Differential Revision: https://reviews.llvm.org/D158854
1 parent e015d38 commit fd465f3

File tree

7 files changed

+65
-72
lines changed

7 files changed

+65
-72
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 33 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3698,20 +3698,10 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
36983698
DAG.getConstant(0, DL, XLenVT));
36993699
}
37003700

3701-
if (VT.isFloatingPoint()) {
3702-
// TODO: Use vmv.v.i for appropriate constants
3703-
// Use M1 or smaller to avoid over constraining register allocation
3704-
const MVT M1VT = getLMUL1VT(VT);
3705-
auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
3706-
SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT,
3707-
DAG.getUNDEF(InnerVT), Scalar, VL);
3708-
if (VT != InnerVT)
3709-
Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3710-
DAG.getUNDEF(VT),
3711-
Result, DAG.getConstant(0, DL, XLenVT));
3712-
return Result;
3713-
}
37143701

3702+
if (VT.isFloatingPoint())
3703+
return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
3704+
DAG.getUNDEF(VT), Scalar, VL);
37153705

37163706
// Avoid the tricky legalization cases by falling back to using the
37173707
// splat code which already handles it gracefully.
@@ -3727,24 +3717,8 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
37273717
unsigned ExtOpc =
37283718
isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
37293719
Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3730-
// We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
3731-
// higher would involve overly constraining the register allocator for
3732-
// no purpose.
3733-
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar)) {
3734-
if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) &&
3735-
VT.bitsLE(getLMUL1VT(VT)))
3736-
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
3737-
}
3738-
// Use M1 or smaller to avoid over constraining register allocation
3739-
const MVT M1VT = getLMUL1VT(VT);
3740-
auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
3741-
SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT,
3742-
DAG.getUNDEF(InnerVT), Scalar, VL);
3743-
if (VT != InnerVT)
3744-
Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3745-
DAG.getUNDEF(VT),
3746-
Result, DAG.getConstant(0, DL, XLenVT));
3747-
return Result;
3720+
return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
3721+
DAG.getUNDEF(VT), Scalar, VL);
37483722
}
37493723

37503724
// Is this a shuffle extracts either the even or odd elements of a vector?
@@ -13386,6 +13360,8 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
1338613360
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1338713361
DAGCombinerInfo &DCI) const {
1338813362
SelectionDAG &DAG = DCI.DAG;
13363+
const MVT XLenVT = Subtarget.getXLenVT();
13364+
SDLoc DL(N);
1338913365

1339013366
// Helper to call SimplifyDemandedBits on an operand of N where only some low
1339113367
// bits are demanded. N will be added to the Worklist if it was not deleted.
@@ -13417,8 +13393,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1341713393
return DCI.CombineTo(N, Lo, Hi);
1341813394
}
1341913395

13420-
SDLoc DL(N);
13421-
1342213396
// It's cheaper to materialise two 32-bit integers than to load a double
1342313397
// from the constant pool and transfer it to integer registers through the
1342413398
// stack.
@@ -13752,7 +13726,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1375213726

1375313727
}
1375413728
EVT IndexVT = Index.getValueType();
13755-
MVT XLenVT = Subtarget.getXLenVT();
1375613729
// RISC-V indexed loads only support the "unsigned unscaled" addressing
1375713730
// mode, so anything else must be manually legalized.
1375813731
bool NeedsIdxLegalization =
@@ -14002,6 +13975,32 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1400213975
return Src.getOperand(0);
1400313976
// TODO: Use insert_subvector/extract_subvector to change widen/narrow?
1400413977
}
13978+
[[fallthrough]];
13979+
}
13980+
case RISCVISD::VMV_S_X_VL: {
13981+
const MVT VT = N->getSimpleValueType(0);
13982+
SDValue Passthru = N->getOperand(0);
13983+
SDValue Scalar = N->getOperand(1);
13984+
SDValue VL = N->getOperand(2);
13985+
13986+
// Use M1 or smaller to avoid over constraining register allocation
13987+
const MVT M1VT = getLMUL1VT(VT);
13988+
if (M1VT.bitsLT(VT) && Passthru.isUndef()) {
13989+
SDValue Result =
13990+
DAG.getNode(N->getOpcode(), DL, M1VT, Passthru, Scalar, VL);
13991+
Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
13992+
Result, DAG.getConstant(0, DL, XLenVT));
13993+
return Result;
13994+
}
13995+
13996+
// We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
13997+
// higher would involve overly constraining the register allocator for
13998+
// no purpose.
13999+
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
14000+
Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
14001+
VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
14002+
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
14003+
1400514004
break;
1400614005
}
1400714006
case ISD::INTRINSIC_VOID:

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -419,19 +419,17 @@ define void @insertelt_v8i64_0_store(ptr %x) {
419419
define <8 x i64> @insertelt_v8i64(<8 x i64> %a, i32 %idx) {
420420
; RV32-LABEL: insertelt_v8i64:
421421
; RV32: # %bb.0:
422-
; RV32-NEXT: li a1, -1
423422
; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma
424-
; RV32-NEXT: vmv.s.x v12, a1
423+
; RV32-NEXT: vmv.v.i v12, -1
425424
; RV32-NEXT: addi a1, a0, 1
426425
; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma
427426
; RV32-NEXT: vslideup.vx v8, v12, a0
428427
; RV32-NEXT: ret
429428
;
430429
; RV64-LABEL: insertelt_v8i64:
431430
; RV64: # %bb.0:
432-
; RV64-NEXT: li a1, -1
433431
; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
434-
; RV64-NEXT: vmv.s.x v12, a1
432+
; RV64-NEXT: vmv.v.i v12, -1
435433
; RV64-NEXT: slli a0, a0, 32
436434
; RV64-NEXT: srli a0, a0, 32
437435
; RV64-NEXT: addi a1, a0, 1
@@ -499,19 +497,17 @@ define void @insertelt_c6_v8i64_0_store(ptr %x) {
499497
define <8 x i64> @insertelt_c6_v8i64(<8 x i64> %a, i32 %idx) {
500498
; RV32-LABEL: insertelt_c6_v8i64:
501499
; RV32: # %bb.0:
502-
; RV32-NEXT: li a1, 6
503500
; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma
504-
; RV32-NEXT: vmv.s.x v12, a1
501+
; RV32-NEXT: vmv.v.i v12, 6
505502
; RV32-NEXT: addi a1, a0, 1
506503
; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma
507504
; RV32-NEXT: vslideup.vx v8, v12, a0
508505
; RV32-NEXT: ret
509506
;
510507
; RV64-LABEL: insertelt_c6_v8i64:
511508
; RV64: # %bb.0:
512-
; RV64-NEXT: li a1, 6
513509
; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
514-
; RV64-NEXT: vmv.s.x v12, a1
510+
; RV64-NEXT: vmv.v.i v12, 6
515511
; RV64-NEXT: slli a0, a0, 32
516512
; RV64-NEXT: srli a0, a0, 32
517513
; RV64-NEXT: addi a1, a0, 1

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -696,18 +696,18 @@ define void @buildvec_vid_step1o2_add3_v4i16(ptr %z0, ptr %z1, ptr %z2, ptr %z3,
696696
; CHECK-NEXT: vsrl.vi v8, v8, 1
697697
; CHECK-NEXT: vadd.vi v8, v8, 3
698698
; CHECK-NEXT: vse16.v v8, (a0)
699+
; CHECK-NEXT: vmv.v.i v9, 3
699700
; CHECK-NEXT: vse16.v v8, (a1)
700701
; CHECK-NEXT: vse16.v v8, (a2)
701702
; CHECK-NEXT: vse16.v v8, (a3)
702703
; CHECK-NEXT: vse16.v v8, (a4)
703-
; CHECK-NEXT: vmv.v.i v8, 3
704-
; CHECK-NEXT: vmv.v.i v9, 4
704+
; CHECK-NEXT: vmv.v.i v8, 4
705705
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
706-
; CHECK-NEXT: vslideup.vi v9, v8, 1
706+
; CHECK-NEXT: vslideup.vi v8, v9, 1
707707
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
708-
; CHECK-NEXT: vse16.v v9, (a5)
708+
; CHECK-NEXT: vse16.v v8, (a5)
709709
; CHECK-NEXT: li a0, 4
710-
; CHECK-NEXT: vslide1down.vx v8, v8, a0
710+
; CHECK-NEXT: vslide1down.vx v8, v9, a0
711711
; CHECK-NEXT: vse16.v v8, (a6)
712712
; CHECK-NEXT: ret
713713
store <4 x i16> <i16 3, i16 3, i16 4, i16 4>, ptr %z0

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -945,15 +945,15 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
945945
; RV64-NEXT: addi a1, a1, 16
946946
; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
947947
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
948-
; RV64-NEXT: li a1, 1
949948
; RV64-NEXT: vmv.v.i v12, 7
950-
; RV64-NEXT: csrr a2, vlenb
951-
; RV64-NEXT: slli a3, a2, 4
952-
; RV64-NEXT: add a2, a3, a2
953-
; RV64-NEXT: add a2, sp, a2
954-
; RV64-NEXT: addi a2, a2, 16
955-
; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill
956-
; RV64-NEXT: vmv.s.x v16, a1
949+
; RV64-NEXT: csrr a1, vlenb
950+
; RV64-NEXT: slli a2, a1, 4
951+
; RV64-NEXT: add a1, a2, a1
952+
; RV64-NEXT: add a1, sp, a1
953+
; RV64-NEXT: addi a1, a1, 16
954+
; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
955+
; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
956+
; RV64-NEXT: vmv.v.i v16, 1
957957
; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
958958
; RV64-NEXT: csrr a1, vlenb
959959
; RV64-NEXT: add a1, sp, a1

llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -781,9 +781,9 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_0_c10(<vscale x 2 x i64> %v) {
781781
define <vscale x 2 x i64> @insertelt_nxv2i64_imm_c10(<vscale x 2 x i64> %v) {
782782
; CHECK-LABEL: insertelt_nxv2i64_imm_c10:
783783
; CHECK: # %bb.0:
784-
; CHECK-NEXT: li a0, 10
784+
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
785+
; CHECK-NEXT: vmv.v.i v10, 10
785786
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
786-
; CHECK-NEXT: vmv.s.x v10, a0
787787
; CHECK-NEXT: vslideup.vi v8, v10, 3
788788
; CHECK-NEXT: ret
789789
%r = insertelement <vscale x 2 x i64> %v, i64 10, i32 3
@@ -793,9 +793,8 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_imm_c10(<vscale x 2 x i64> %v) {
793793
define <vscale x 2 x i64> @insertelt_nxv2i64_idx_c10(<vscale x 2 x i64> %v, i32 %idx) {
794794
; CHECK-LABEL: insertelt_nxv2i64_idx_c10:
795795
; CHECK: # %bb.0:
796-
; CHECK-NEXT: li a1, 10
797-
; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
798-
; CHECK-NEXT: vmv.s.x v10, a1
796+
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
797+
; CHECK-NEXT: vmv.v.i v10, 10
799798
; CHECK-NEXT: addi a1, a0, 1
800799
; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma
801800
; CHECK-NEXT: vslideup.vx v8, v10, a0
@@ -818,9 +817,9 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_0_cn1(<vscale x 2 x i64> %v) {
818817
define <vscale x 2 x i64> @insertelt_nxv2i64_imm_cn1(<vscale x 2 x i64> %v) {
819818
; CHECK-LABEL: insertelt_nxv2i64_imm_cn1:
820819
; CHECK: # %bb.0:
821-
; CHECK-NEXT: li a0, -1
820+
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
821+
; CHECK-NEXT: vmv.v.i v10, -1
822822
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
823-
; CHECK-NEXT: vmv.s.x v10, a0
824823
; CHECK-NEXT: vslideup.vi v8, v10, 3
825824
; CHECK-NEXT: ret
826825
%r = insertelement <vscale x 2 x i64> %v, i64 -1, i32 3
@@ -830,9 +829,8 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_imm_cn1(<vscale x 2 x i64> %v) {
830829
define <vscale x 2 x i64> @insertelt_nxv2i64_idx_cn1(<vscale x 2 x i64> %v, i32 %idx) {
831830
; CHECK-LABEL: insertelt_nxv2i64_idx_cn1:
832831
; CHECK: # %bb.0:
833-
; CHECK-NEXT: li a1, -1
834-
; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
835-
; CHECK-NEXT: vmv.s.x v10, a1
832+
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
833+
; CHECK-NEXT: vmv.v.i v10, -1
836834
; CHECK-NEXT: addi a1, a0, 1
837835
; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma
838836
; CHECK-NEXT: vslideup.vx v8, v10, a0

llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ define i32 @splat_vector_split_i64() {
88
; CHECK: # %bb.0:
99
; CHECK-NEXT: addi sp, sp, -16
1010
; CHECK-NEXT: .cfi_def_cfa_offset 16
11-
; CHECK-NEXT: li a0, 3
12-
; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
13-
; CHECK-NEXT: vmv.s.x v10, a0
11+
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
12+
; CHECK-NEXT: vmv.v.i v10, 3
13+
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1414
; CHECK-NEXT: vmv.v.i v8, 0
1515
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
1616
; CHECK-NEXT: vslideup.vi v8, v10, 3

llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -668,10 +668,10 @@ define void @test_srem_vec(ptr %X) nounwind {
668668
; RV32MV-NEXT: vmv.v.i v10, 1
669669
; RV32MV-NEXT: vmerge.vim v10, v10, -1, v0
670670
; RV32MV-NEXT: vand.vv v8, v8, v10
671-
; RV32MV-NEXT: li a0, 2
672-
; RV32MV-NEXT: vmv.s.x v10, a0
673-
; RV32MV-NEXT: li a0, 1
674-
; RV32MV-NEXT: vmv.s.x v12, a0
671+
; RV32MV-NEXT: vsetivli zero, 8, e32, m1, ta, ma
672+
; RV32MV-NEXT: vmv.v.i v10, 2
673+
; RV32MV-NEXT: vmv.v.i v12, 1
674+
; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
675675
; RV32MV-NEXT: vmv.v.i v14, 0
676676
; RV32MV-NEXT: vsetivli zero, 3, e32, m2, tu, ma
677677
; RV32MV-NEXT: vslideup.vi v14, v12, 2

0 commit comments

Comments
 (0)