Skip to content

Commit 4067ab3

Browse files
committed
[RISCV] use vwadd.vx for extended splat
1 parent ab6a238 commit 4067ab3

File tree

4 files changed

+181
-244
lines changed

4 files changed

+181
-244
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 48 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13597,7 +13597,8 @@ struct NodeExtensionHelper {
1359713597

1359813598
/// Check if this instance represents a splat.
1359913599
bool isSplat() const {
13600-
return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL;
13600+
return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13601+
OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
1360113602
}
1360213603

1360313604
/// Get the extended opcode.
@@ -13641,6 +13642,8 @@ struct NodeExtensionHelper {
1364113642
case RISCVISD::VZEXT_VL:
1364213643
case RISCVISD::FP_EXTEND_VL:
1364313644
return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13645+
case ISD::SPLAT_VECTOR:
13646+
return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
1364413647
case RISCVISD::VMV_V_X_VL:
1364513648
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
1364613649
DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
@@ -13776,6 +13779,47 @@ struct NodeExtensionHelper {
1377613779
/// Check if this node needs to be fully folded or extended for all users.
1377713780
bool needToPromoteOtherUsers() const { return EnforceOneUse; }
1377813781

13782+
void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
13783+
const RISCVSubtarget &Subtarget) {
13784+
unsigned Opc = OrigOperand.getOpcode();
13785+
MVT VT = OrigOperand.getSimpleValueType();
13786+
13787+
assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
13788+
"Unexpected Opcode");
13789+
13790+
// The pasthru must be undef for tail agnostic.
13791+
if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
13792+
return;
13793+
13794+
// Get the scalar value.
13795+
SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
13796+
: OrigOperand.getOperand(1);
13797+
13798+
// See if we have enough sign bits or zero bits in the scalar to use a
13799+
// widening opcode by splatting to smaller element size.
13800+
unsigned EltBits = VT.getScalarSizeInBits();
13801+
unsigned ScalarBits = Op.getValueSizeInBits();
13802+
// Make sure we're getting all element bits from the scalar register.
13803+
// FIXME: Support implicit sign extension of vmv.v.x?
13804+
if (ScalarBits < EltBits)
13805+
return;
13806+
13807+
unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13808+
// If the narrow type cannot be expressed with a legal VMV,
13809+
// this is not a valid candidate.
13810+
if (NarrowSize < 8)
13811+
return;
13812+
13813+
if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13814+
SupportsSExt = true;
13815+
13816+
if (DAG.MaskedValueIsZero(Op,
13817+
APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
13818+
SupportsZExt = true;
13819+
13820+
EnforceOneUse = false;
13821+
}
13822+
1377913823
/// Helper method to set the various fields of this struct based on the
1378013824
/// type of \p Root.
1378113825
void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
@@ -13814,43 +13858,10 @@ struct NodeExtensionHelper {
1381413858
case RISCVISD::FP_EXTEND_VL:
1381513859
SupportsFPExt = true;
1381613860
break;
13817-
case RISCVISD::VMV_V_X_VL: {
13818-
// Historically, we didn't care about splat values not disappearing during
13819-
// combines.
13820-
EnforceOneUse = false;
13821-
13822-
// The operand is a splat of a scalar.
13823-
13824-
// The pasthru must be undef for tail agnostic.
13825-
if (!OrigOperand.getOperand(0).isUndef())
13826-
break;
13827-
13828-
// Get the scalar value.
13829-
SDValue Op = OrigOperand.getOperand(1);
13830-
13831-
// See if we have enough sign bits or zero bits in the scalar to use a
13832-
// widening opcode by splatting to smaller element size.
13833-
MVT VT = Root->getSimpleValueType(0);
13834-
unsigned EltBits = VT.getScalarSizeInBits();
13835-
unsigned ScalarBits = Op.getValueSizeInBits();
13836-
// Make sure we're getting all element bits from the scalar register.
13837-
// FIXME: Support implicit sign extension of vmv.v.x?
13838-
if (ScalarBits < EltBits)
13839-
break;
13840-
13841-
unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13842-
// If the narrow type cannot be expressed with a legal VMV,
13843-
// this is not a valid candidate.
13844-
if (NarrowSize < 8)
13845-
break;
13846-
13847-
if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13848-
SupportsSExt = true;
13849-
if (DAG.MaskedValueIsZero(Op,
13850-
APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
13851-
SupportsZExt = true;
13861+
case ISD::SPLAT_VECTOR:
13862+
case RISCVISD::VMV_V_X_VL:
13863+
fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
1385213864
break;
13853-
}
1385413865
default:
1385513866
break;
1385613867
}

llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll

Lines changed: 0 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,22 +1229,6 @@ define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) {
12291229
; RV64I-NEXT: vsrl.vx v8, v8, a0
12301230
; RV64I-NEXT: ret
12311231
;
1232-
; CHECK-F-LABEL: ctlz_nxv1i64:
1233-
; CHECK-F: # %bb.0:
1234-
; CHECK-F-NEXT: li a0, 190
1235-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1236-
; CHECK-F-NEXT: vmv.v.x v9, a0
1237-
; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1238-
; CHECK-F-NEXT: fsrmi a0, 1
1239-
; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
1240-
; CHECK-F-NEXT: vsrl.vi v8, v10, 23
1241-
; CHECK-F-NEXT: vwsubu.wv v9, v9, v8
1242-
; CHECK-F-NEXT: li a1, 64
1243-
; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1244-
; CHECK-F-NEXT: vminu.vx v8, v9, a1
1245-
; CHECK-F-NEXT: fsrm a0
1246-
; CHECK-F-NEXT: ret
1247-
;
12481232
; CHECK-D-LABEL: ctlz_nxv1i64:
12491233
; CHECK-D: # %bb.0:
12501234
; CHECK-D-NEXT: vsetvli a0, zero, e64, m1, ta, ma
@@ -1370,22 +1354,6 @@ define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) {
13701354
; RV64I-NEXT: vsrl.vx v8, v8, a0
13711355
; RV64I-NEXT: ret
13721356
;
1373-
; CHECK-F-LABEL: ctlz_nxv2i64:
1374-
; CHECK-F: # %bb.0:
1375-
; CHECK-F-NEXT: li a0, 190
1376-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
1377-
; CHECK-F-NEXT: vmv.v.x v10, a0
1378-
; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1379-
; CHECK-F-NEXT: fsrmi a0, 1
1380-
; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8
1381-
; CHECK-F-NEXT: vsrl.vi v8, v12, 23
1382-
; CHECK-F-NEXT: vwsubu.wv v10, v10, v8
1383-
; CHECK-F-NEXT: li a1, 64
1384-
; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
1385-
; CHECK-F-NEXT: vminu.vx v8, v10, a1
1386-
; CHECK-F-NEXT: fsrm a0
1387-
; CHECK-F-NEXT: ret
1388-
;
13891357
; CHECK-D-LABEL: ctlz_nxv2i64:
13901358
; CHECK-D: # %bb.0:
13911359
; CHECK-D-NEXT: vsetvli a0, zero, e64, m2, ta, ma
@@ -1511,22 +1479,6 @@ define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) {
15111479
; RV64I-NEXT: vsrl.vx v8, v8, a0
15121480
; RV64I-NEXT: ret
15131481
;
1514-
; CHECK-F-LABEL: ctlz_nxv4i64:
1515-
; CHECK-F: # %bb.0:
1516-
; CHECK-F-NEXT: li a0, 190
1517-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
1518-
; CHECK-F-NEXT: vmv.v.x v12, a0
1519-
; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1520-
; CHECK-F-NEXT: fsrmi a0, 1
1521-
; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8
1522-
; CHECK-F-NEXT: vsrl.vi v8, v16, 23
1523-
; CHECK-F-NEXT: vwsubu.wv v12, v12, v8
1524-
; CHECK-F-NEXT: li a1, 64
1525-
; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
1526-
; CHECK-F-NEXT: vminu.vx v8, v12, a1
1527-
; CHECK-F-NEXT: fsrm a0
1528-
; CHECK-F-NEXT: ret
1529-
;
15301482
; CHECK-D-LABEL: ctlz_nxv4i64:
15311483
; CHECK-D: # %bb.0:
15321484
; CHECK-D-NEXT: vsetvli a0, zero, e64, m4, ta, ma
@@ -1652,22 +1604,6 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
16521604
; RV64I-NEXT: vsrl.vx v8, v8, a0
16531605
; RV64I-NEXT: ret
16541606
;
1655-
; CHECK-F-LABEL: ctlz_nxv8i64:
1656-
; CHECK-F: # %bb.0:
1657-
; CHECK-F-NEXT: li a0, 190
1658-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1659-
; CHECK-F-NEXT: vmv.v.x v16, a0
1660-
; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1661-
; CHECK-F-NEXT: fsrmi a0, 1
1662-
; CHECK-F-NEXT: vfncvt.f.xu.w v24, v8
1663-
; CHECK-F-NEXT: vsrl.vi v8, v24, 23
1664-
; CHECK-F-NEXT: vwsubu.wv v16, v16, v8
1665-
; CHECK-F-NEXT: li a1, 64
1666-
; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1667-
; CHECK-F-NEXT: vminu.vx v8, v16, a1
1668-
; CHECK-F-NEXT: fsrm a0
1669-
; CHECK-F-NEXT: ret
1670-
;
16711607
; CHECK-D-LABEL: ctlz_nxv8i64:
16721608
; CHECK-D: # %bb.0:
16731609
; CHECK-D-NEXT: vsetvli a0, zero, e64, m8, ta, ma
@@ -2835,20 +2771,6 @@ define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
28352771
; RV64I-NEXT: vsrl.vx v8, v8, a0
28362772
; RV64I-NEXT: ret
28372773
;
2838-
; CHECK-F-LABEL: ctlz_zero_undef_nxv1i64:
2839-
; CHECK-F: # %bb.0:
2840-
; CHECK-F-NEXT: li a0, 190
2841-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
2842-
; CHECK-F-NEXT: vmv.v.x v9, a0
2843-
; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
2844-
; CHECK-F-NEXT: fsrmi a0, 1
2845-
; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
2846-
; CHECK-F-NEXT: vsrl.vi v8, v10, 23
2847-
; CHECK-F-NEXT: vwsubu.wv v9, v9, v8
2848-
; CHECK-F-NEXT: fsrm a0
2849-
; CHECK-F-NEXT: vmv1r.v v8, v9
2850-
; CHECK-F-NEXT: ret
2851-
;
28522774
; CHECK-D-LABEL: ctlz_zero_undef_nxv1i64:
28532775
; CHECK-D: # %bb.0:
28542776
; CHECK-D-NEXT: vsetvli a0, zero, e64, m1, ta, ma
@@ -2971,20 +2893,6 @@ define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
29712893
; RV64I-NEXT: vsrl.vx v8, v8, a0
29722894
; RV64I-NEXT: ret
29732895
;
2974-
; CHECK-F-LABEL: ctlz_zero_undef_nxv2i64:
2975-
; CHECK-F: # %bb.0:
2976-
; CHECK-F-NEXT: li a0, 190
2977-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
2978-
; CHECK-F-NEXT: vmv.v.x v10, a0
2979-
; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2980-
; CHECK-F-NEXT: fsrmi a0, 1
2981-
; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8
2982-
; CHECK-F-NEXT: vsrl.vi v8, v12, 23
2983-
; CHECK-F-NEXT: vwsubu.wv v10, v10, v8
2984-
; CHECK-F-NEXT: fsrm a0
2985-
; CHECK-F-NEXT: vmv2r.v v8, v10
2986-
; CHECK-F-NEXT: ret
2987-
;
29882896
; CHECK-D-LABEL: ctlz_zero_undef_nxv2i64:
29892897
; CHECK-D: # %bb.0:
29902898
; CHECK-D-NEXT: vsetvli a0, zero, e64, m2, ta, ma
@@ -3107,20 +3015,6 @@ define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
31073015
; RV64I-NEXT: vsrl.vx v8, v8, a0
31083016
; RV64I-NEXT: ret
31093017
;
3110-
; CHECK-F-LABEL: ctlz_zero_undef_nxv4i64:
3111-
; CHECK-F: # %bb.0:
3112-
; CHECK-F-NEXT: li a0, 190
3113-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
3114-
; CHECK-F-NEXT: vmv.v.x v12, a0
3115-
; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3116-
; CHECK-F-NEXT: fsrmi a0, 1
3117-
; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8
3118-
; CHECK-F-NEXT: vsrl.vi v8, v16, 23
3119-
; CHECK-F-NEXT: vwsubu.wv v12, v12, v8
3120-
; CHECK-F-NEXT: fsrm a0
3121-
; CHECK-F-NEXT: vmv4r.v v8, v12
3122-
; CHECK-F-NEXT: ret
3123-
;
31243018
; CHECK-D-LABEL: ctlz_zero_undef_nxv4i64:
31253019
; CHECK-D: # %bb.0:
31263020
; CHECK-D-NEXT: vsetvli a0, zero, e64, m4, ta, ma
@@ -3243,20 +3137,6 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
32433137
; RV64I-NEXT: vsrl.vx v8, v8, a0
32443138
; RV64I-NEXT: ret
32453139
;
3246-
; CHECK-F-LABEL: ctlz_zero_undef_nxv8i64:
3247-
; CHECK-F: # %bb.0:
3248-
; CHECK-F-NEXT: vmv8r.v v16, v8
3249-
; CHECK-F-NEXT: li a0, 190
3250-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
3251-
; CHECK-F-NEXT: vmv.v.x v8, a0
3252-
; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
3253-
; CHECK-F-NEXT: fsrmi a0, 1
3254-
; CHECK-F-NEXT: vfncvt.f.xu.w v24, v16
3255-
; CHECK-F-NEXT: vsrl.vi v16, v24, 23
3256-
; CHECK-F-NEXT: vwsubu.wv v8, v8, v16
3257-
; CHECK-F-NEXT: fsrm a0
3258-
; CHECK-F-NEXT: ret
3259-
;
32603140
; CHECK-D-LABEL: ctlz_zero_undef_nxv8i64:
32613141
; CHECK-D: # %bb.0:
32623142
; CHECK-D-NEXT: vsetvli a0, zero, e64, m8, ta, ma

0 commit comments

Comments
 (0)