Skip to content

Commit 1aeda4b

Browse files
committed
[RISCV] use vwadd.vx for extended splat
1 parent 5e5e1c1 commit 1aeda4b

File tree

4 files changed

+188
-246
lines changed

4 files changed

+188
-246
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 55 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -13605,7 +13605,8 @@ struct NodeExtensionHelper {
1360513605

1360613606
/// Check if this instance represents a splat.
1360713607
bool isSplat() const {
13608-
return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL;
13608+
return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13609+
OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
1360913610
}
1361013611

1361113612
/// Get the extended opcode.
@@ -13649,6 +13650,8 @@ struct NodeExtensionHelper {
1364913650
case RISCVISD::VZEXT_VL:
1365013651
case RISCVISD::FP_EXTEND_VL:
1365113652
return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13653+
case ISD::SPLAT_VECTOR:
13654+
return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
1365213655
case RISCVISD::VMV_V_X_VL:
1365313656
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
1365413657
DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
@@ -13781,6 +13784,54 @@ struct NodeExtensionHelper {
1378113784
/// Check if this node needs to be fully folded or extended for all users.
1378213785
bool needToPromoteOtherUsers() const { return EnforceOneUse; }
1378313786

13787+
void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
13788+
const RISCVSubtarget &Subtarget) {
13789+
unsigned Opc = OrigOperand.getOpcode();
13790+
MVT VT = OrigOperand.getSimpleValueType();
13791+
13792+
assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
13793+
"Unexpected Opcode");
13794+
13795+
// The pasthru must be undef for tail agnostic.
13796+
if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
13797+
return;
13798+
13799+
// Get the scalar value.
13800+
SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
13801+
: OrigOperand.getOperand(1);
13802+
13803+
// See if we have enough sign bits or zero bits in the scalar to use a
13804+
// widening opcode by splatting to smaller element size.
13805+
unsigned EltBits = VT.getScalarSizeInBits();
13806+
unsigned ScalarBits = Op.getValueSizeInBits();
13807+
// Make sure we're getting all element bits from the scalar register.
13808+
// FIXME: Support implicit sign extension of vmv.v.x?
13809+
if (ScalarBits < EltBits)
13810+
return;
13811+
13812+
unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13813+
// If the narrow type cannot be expressed with a legal VMV,
13814+
// this is not a valid candidate.
13815+
if (NarrowSize < 8)
13816+
return;
13817+
13818+
if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13819+
SupportsSExt = true;
13820+
13821+
if (DAG.MaskedValueIsZero(Op,
13822+
APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
13823+
SupportsZExt = true;
13824+
13825+
EnforceOneUse = false;
13826+
CheckMask = Opc == ISD::SPLAT_VECTOR;
13827+
13828+
if (Opc == ISD::SPLAT_VECTOR)
13829+
std::tie(Mask, VL) =
13830+
getDefaultScalableVLOps(VT, SDLoc(Root), DAG, Subtarget);
13831+
else
13832+
VL = OrigOperand.getOperand(2);
13833+
}
13834+
1378413835
/// Helper method to set the various fields of this struct based on the
1378513836
/// type of \p Root.
1378613837
void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
@@ -13826,45 +13877,10 @@ struct NodeExtensionHelper {
1382613877
Mask = OrigOperand.getOperand(1);
1382713878
VL = OrigOperand.getOperand(2);
1382813879
break;
13829-
case RISCVISD::VMV_V_X_VL: {
13830-
// Historically, we didn't care about splat values not disappearing during
13831-
// combines.
13832-
EnforceOneUse = false;
13833-
CheckMask = false;
13834-
VL = OrigOperand.getOperand(2);
13835-
13836-
// The operand is a splat of a scalar.
13837-
13838-
// The pasthru must be undef for tail agnostic.
13839-
if (!OrigOperand.getOperand(0).isUndef())
13840-
break;
13841-
13842-
// Get the scalar value.
13843-
SDValue Op = OrigOperand.getOperand(1);
13844-
13845-
// See if we have enough sign bits or zero bits in the scalar to use a
13846-
// widening opcode by splatting to smaller element size.
13847-
MVT VT = Root->getSimpleValueType(0);
13848-
unsigned EltBits = VT.getScalarSizeInBits();
13849-
unsigned ScalarBits = Op.getValueSizeInBits();
13850-
// Make sure we're getting all element bits from the scalar register.
13851-
// FIXME: Support implicit sign extension of vmv.v.x?
13852-
if (ScalarBits < EltBits)
13853-
break;
13854-
13855-
unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13856-
// If the narrow type cannot be expressed with a legal VMV,
13857-
// this is not a valid candidate.
13858-
if (NarrowSize < 8)
13859-
break;
13860-
13861-
if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13862-
SupportsSExt = true;
13863-
if (DAG.MaskedValueIsZero(Op,
13864-
APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
13865-
SupportsZExt = true;
13880+
case ISD::SPLAT_VECTOR:
13881+
case RISCVISD::VMV_V_X_VL:
13882+
fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
1386613883
break;
13867-
}
1386813884
default:
1386913885
break;
1387013886
}

llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll

Lines changed: 0 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,22 +1229,6 @@ define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) {
12291229
; RV64I-NEXT: vsrl.vx v8, v8, a0
12301230
; RV64I-NEXT: ret
12311231
;
1232-
; CHECK-F-LABEL: ctlz_nxv1i64:
1233-
; CHECK-F: # %bb.0:
1234-
; CHECK-F-NEXT: li a0, 190
1235-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1236-
; CHECK-F-NEXT: vmv.v.x v9, a0
1237-
; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1238-
; CHECK-F-NEXT: fsrmi a0, 1
1239-
; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
1240-
; CHECK-F-NEXT: vsrl.vi v8, v10, 23
1241-
; CHECK-F-NEXT: vwsubu.wv v9, v9, v8
1242-
; CHECK-F-NEXT: li a1, 64
1243-
; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1244-
; CHECK-F-NEXT: vminu.vx v8, v9, a1
1245-
; CHECK-F-NEXT: fsrm a0
1246-
; CHECK-F-NEXT: ret
1247-
;
12481232
; CHECK-D-LABEL: ctlz_nxv1i64:
12491233
; CHECK-D: # %bb.0:
12501234
; CHECK-D-NEXT: vsetvli a0, zero, e64, m1, ta, ma
@@ -1370,22 +1354,6 @@ define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) {
13701354
; RV64I-NEXT: vsrl.vx v8, v8, a0
13711355
; RV64I-NEXT: ret
13721356
;
1373-
; CHECK-F-LABEL: ctlz_nxv2i64:
1374-
; CHECK-F: # %bb.0:
1375-
; CHECK-F-NEXT: li a0, 190
1376-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
1377-
; CHECK-F-NEXT: vmv.v.x v10, a0
1378-
; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1379-
; CHECK-F-NEXT: fsrmi a0, 1
1380-
; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8
1381-
; CHECK-F-NEXT: vsrl.vi v8, v12, 23
1382-
; CHECK-F-NEXT: vwsubu.wv v10, v10, v8
1383-
; CHECK-F-NEXT: li a1, 64
1384-
; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
1385-
; CHECK-F-NEXT: vminu.vx v8, v10, a1
1386-
; CHECK-F-NEXT: fsrm a0
1387-
; CHECK-F-NEXT: ret
1388-
;
13891357
; CHECK-D-LABEL: ctlz_nxv2i64:
13901358
; CHECK-D: # %bb.0:
13911359
; CHECK-D-NEXT: vsetvli a0, zero, e64, m2, ta, ma
@@ -1511,22 +1479,6 @@ define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) {
15111479
; RV64I-NEXT: vsrl.vx v8, v8, a0
15121480
; RV64I-NEXT: ret
15131481
;
1514-
; CHECK-F-LABEL: ctlz_nxv4i64:
1515-
; CHECK-F: # %bb.0:
1516-
; CHECK-F-NEXT: li a0, 190
1517-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
1518-
; CHECK-F-NEXT: vmv.v.x v12, a0
1519-
; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1520-
; CHECK-F-NEXT: fsrmi a0, 1
1521-
; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8
1522-
; CHECK-F-NEXT: vsrl.vi v8, v16, 23
1523-
; CHECK-F-NEXT: vwsubu.wv v12, v12, v8
1524-
; CHECK-F-NEXT: li a1, 64
1525-
; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
1526-
; CHECK-F-NEXT: vminu.vx v8, v12, a1
1527-
; CHECK-F-NEXT: fsrm a0
1528-
; CHECK-F-NEXT: ret
1529-
;
15301482
; CHECK-D-LABEL: ctlz_nxv4i64:
15311483
; CHECK-D: # %bb.0:
15321484
; CHECK-D-NEXT: vsetvli a0, zero, e64, m4, ta, ma
@@ -1652,22 +1604,6 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
16521604
; RV64I-NEXT: vsrl.vx v8, v8, a0
16531605
; RV64I-NEXT: ret
16541606
;
1655-
; CHECK-F-LABEL: ctlz_nxv8i64:
1656-
; CHECK-F: # %bb.0:
1657-
; CHECK-F-NEXT: li a0, 190
1658-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1659-
; CHECK-F-NEXT: vmv.v.x v16, a0
1660-
; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1661-
; CHECK-F-NEXT: fsrmi a0, 1
1662-
; CHECK-F-NEXT: vfncvt.f.xu.w v24, v8
1663-
; CHECK-F-NEXT: vsrl.vi v8, v24, 23
1664-
; CHECK-F-NEXT: vwsubu.wv v16, v16, v8
1665-
; CHECK-F-NEXT: li a1, 64
1666-
; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1667-
; CHECK-F-NEXT: vminu.vx v8, v16, a1
1668-
; CHECK-F-NEXT: fsrm a0
1669-
; CHECK-F-NEXT: ret
1670-
;
16711607
; CHECK-D-LABEL: ctlz_nxv8i64:
16721608
; CHECK-D: # %bb.0:
16731609
; CHECK-D-NEXT: vsetvli a0, zero, e64, m8, ta, ma
@@ -2835,20 +2771,6 @@ define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
28352771
; RV64I-NEXT: vsrl.vx v8, v8, a0
28362772
; RV64I-NEXT: ret
28372773
;
2838-
; CHECK-F-LABEL: ctlz_zero_undef_nxv1i64:
2839-
; CHECK-F: # %bb.0:
2840-
; CHECK-F-NEXT: li a0, 190
2841-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
2842-
; CHECK-F-NEXT: vmv.v.x v9, a0
2843-
; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
2844-
; CHECK-F-NEXT: fsrmi a0, 1
2845-
; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
2846-
; CHECK-F-NEXT: vsrl.vi v8, v10, 23
2847-
; CHECK-F-NEXT: vwsubu.wv v9, v9, v8
2848-
; CHECK-F-NEXT: fsrm a0
2849-
; CHECK-F-NEXT: vmv1r.v v8, v9
2850-
; CHECK-F-NEXT: ret
2851-
;
28522774
; CHECK-D-LABEL: ctlz_zero_undef_nxv1i64:
28532775
; CHECK-D: # %bb.0:
28542776
; CHECK-D-NEXT: vsetvli a0, zero, e64, m1, ta, ma
@@ -2971,20 +2893,6 @@ define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
29712893
; RV64I-NEXT: vsrl.vx v8, v8, a0
29722894
; RV64I-NEXT: ret
29732895
;
2974-
; CHECK-F-LABEL: ctlz_zero_undef_nxv2i64:
2975-
; CHECK-F: # %bb.0:
2976-
; CHECK-F-NEXT: li a0, 190
2977-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
2978-
; CHECK-F-NEXT: vmv.v.x v10, a0
2979-
; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2980-
; CHECK-F-NEXT: fsrmi a0, 1
2981-
; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8
2982-
; CHECK-F-NEXT: vsrl.vi v8, v12, 23
2983-
; CHECK-F-NEXT: vwsubu.wv v10, v10, v8
2984-
; CHECK-F-NEXT: fsrm a0
2985-
; CHECK-F-NEXT: vmv2r.v v8, v10
2986-
; CHECK-F-NEXT: ret
2987-
;
29882896
; CHECK-D-LABEL: ctlz_zero_undef_nxv2i64:
29892897
; CHECK-D: # %bb.0:
29902898
; CHECK-D-NEXT: vsetvli a0, zero, e64, m2, ta, ma
@@ -3107,20 +3015,6 @@ define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
31073015
; RV64I-NEXT: vsrl.vx v8, v8, a0
31083016
; RV64I-NEXT: ret
31093017
;
3110-
; CHECK-F-LABEL: ctlz_zero_undef_nxv4i64:
3111-
; CHECK-F: # %bb.0:
3112-
; CHECK-F-NEXT: li a0, 190
3113-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
3114-
; CHECK-F-NEXT: vmv.v.x v12, a0
3115-
; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3116-
; CHECK-F-NEXT: fsrmi a0, 1
3117-
; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8
3118-
; CHECK-F-NEXT: vsrl.vi v8, v16, 23
3119-
; CHECK-F-NEXT: vwsubu.wv v12, v12, v8
3120-
; CHECK-F-NEXT: fsrm a0
3121-
; CHECK-F-NEXT: vmv4r.v v8, v12
3122-
; CHECK-F-NEXT: ret
3123-
;
31243018
; CHECK-D-LABEL: ctlz_zero_undef_nxv4i64:
31253019
; CHECK-D: # %bb.0:
31263020
; CHECK-D-NEXT: vsetvli a0, zero, e64, m4, ta, ma
@@ -3243,20 +3137,6 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
32433137
; RV64I-NEXT: vsrl.vx v8, v8, a0
32443138
; RV64I-NEXT: ret
32453139
;
3246-
; CHECK-F-LABEL: ctlz_zero_undef_nxv8i64:
3247-
; CHECK-F: # %bb.0:
3248-
; CHECK-F-NEXT: vmv8r.v v16, v8
3249-
; CHECK-F-NEXT: li a0, 190
3250-
; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
3251-
; CHECK-F-NEXT: vmv.v.x v8, a0
3252-
; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
3253-
; CHECK-F-NEXT: fsrmi a0, 1
3254-
; CHECK-F-NEXT: vfncvt.f.xu.w v24, v16
3255-
; CHECK-F-NEXT: vsrl.vi v16, v24, 23
3256-
; CHECK-F-NEXT: vwsubu.wv v8, v8, v16
3257-
; CHECK-F-NEXT: fsrm a0
3258-
; CHECK-F-NEXT: ret
3259-
;
32603140
; CHECK-D-LABEL: ctlz_zero_undef_nxv8i64:
32613141
; CHECK-D: # %bb.0:
32623142
; CHECK-D-NEXT: vsetvli a0, zero, e64, m8, ta, ma

0 commit comments

Comments
 (0)