@@ -1289,6 +1289,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
1289
1289
if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1290
1290
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1291
1291
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
1292
+
1293
+ // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1294
+ setOperationAction(ISD::BUILD_VECTOR, MVT::i64, Custom);
1292
1295
}
1293
1296
1294
1297
setOperationAction(
@@ -3622,6 +3625,78 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3622
3625
return Gather;
3623
3626
}
3624
3627
3628
+ static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG,
3629
+ const RISCVSubtarget &Subtarget) {
3630
+ MVT VT = Op.getSimpleValueType();
3631
+ assert(VT.isFixedLengthVector() && "Unexpected vector!");
3632
+
3633
+ MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3634
+
3635
+ SDLoc DL(Op);
3636
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3637
+
3638
+ if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3639
+ int64_t StepNumerator = SimpleVID->StepNumerator;
3640
+ unsigned StepDenominator = SimpleVID->StepDenominator;
3641
+ int64_t Addend = SimpleVID->Addend;
3642
+
3643
+ assert(StepNumerator != 0 && "Invalid step");
3644
+ bool Negate = false;
3645
+ int64_t SplatStepVal = StepNumerator;
3646
+ unsigned StepOpcode = ISD::MUL;
3647
+ // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3648
+ // anyway as the shift of 63 won't fit in uimm5.
3649
+ if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3650
+ isPowerOf2_64(std::abs(StepNumerator))) {
3651
+ Negate = StepNumerator < 0;
3652
+ StepOpcode = ISD::SHL;
3653
+ SplatStepVal = Log2_64(std::abs(StepNumerator));
3654
+ }
3655
+
3656
+ // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3657
+ // threshold since it's the immediate value many RVV instructions accept.
3658
+ // There is no vmul.vi instruction so ensure multiply constant can fit in
3659
+ // a single addi instruction.
3660
+ if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3661
+ (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3662
+ isPowerOf2_32(StepDenominator) &&
3663
+ (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3664
+ MVT VIDVT =
3665
+ VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3666
+ MVT VIDContainerVT =
3667
+ getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3668
+ SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3669
+ // Convert right out of the scalable type so we can use standard ISD
3670
+ // nodes for the rest of the computation. If we used scalable types with
3671
+ // these, we'd lose the fixed-length vector info and generate worse
3672
+ // vsetvli code.
3673
+ VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3674
+ if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3675
+ (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3676
+ SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3677
+ VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3678
+ }
3679
+ if (StepDenominator != 1) {
3680
+ SDValue SplatStep =
3681
+ DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3682
+ VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3683
+ }
3684
+ if (Addend != 0 || Negate) {
3685
+ SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3686
+ VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3687
+ VID);
3688
+ }
3689
+ if (VT.isFloatingPoint()) {
3690
+ // TODO: Use vfwcvt to reduce register pressure.
3691
+ VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3692
+ }
3693
+ return VID;
3694
+ }
3695
+ }
3696
+
3697
+ return SDValue();
3698
+ }
3699
+
3625
3700
/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3626
3701
/// which constitute a large proportion of the elements. In such cases we can
3627
3702
/// splat a vector with the dominant element and make up the shortfall with
@@ -3839,64 +3914,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
3839
3914
// Try and match index sequences, which we can lower to the vid instruction
3840
3915
// with optional modifications. An all-undef vector is matched by
3841
3916
// getSplatValue, above.
3842
- if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3843
- int64_t StepNumerator = SimpleVID->StepNumerator;
3844
- unsigned StepDenominator = SimpleVID->StepDenominator;
3845
- int64_t Addend = SimpleVID->Addend;
3846
-
3847
- assert(StepNumerator != 0 && "Invalid step");
3848
- bool Negate = false;
3849
- int64_t SplatStepVal = StepNumerator;
3850
- unsigned StepOpcode = ISD::MUL;
3851
- // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3852
- // anyway as the shift of 63 won't fit in uimm5.
3853
- if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3854
- isPowerOf2_64(std::abs(StepNumerator))) {
3855
- Negate = StepNumerator < 0;
3856
- StepOpcode = ISD::SHL;
3857
- SplatStepVal = Log2_64(std::abs(StepNumerator));
3858
- }
3859
-
3860
- // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3861
- // threshold since it's the immediate value many RVV instructions accept.
3862
- // There is no vmul.vi instruction so ensure multiply constant can fit in
3863
- // a single addi instruction.
3864
- if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3865
- (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3866
- isPowerOf2_32(StepDenominator) &&
3867
- (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3868
- MVT VIDVT =
3869
- VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3870
- MVT VIDContainerVT =
3871
- getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3872
- SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3873
- // Convert right out of the scalable type so we can use standard ISD
3874
- // nodes for the rest of the computation. If we used scalable types with
3875
- // these, we'd lose the fixed-length vector info and generate worse
3876
- // vsetvli code.
3877
- VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3878
- if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3879
- (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3880
- SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3881
- VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3882
- }
3883
- if (StepDenominator != 1) {
3884
- SDValue SplatStep =
3885
- DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3886
- VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3887
- }
3888
- if (Addend != 0 || Negate) {
3889
- SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3890
- VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3891
- VID);
3892
- }
3893
- if (VT.isFloatingPoint()) {
3894
- // TODO: Use vfwcvt to reduce register pressure.
3895
- VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3896
- }
3897
- return VID;
3898
- }
3899
- }
3917
+ if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
3918
+ return Res;
3900
3919
3901
3920
// For very small build_vectors, use a single scalar insert of a constant.
3902
3921
// TODO: Base this on constant rematerialization cost, not size.
@@ -7586,8 +7605,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
7586
7605
return lowerVECTOR_REVERSE(Op, DAG);
7587
7606
case ISD::VECTOR_SPLICE:
7588
7607
return lowerVECTOR_SPLICE(Op, DAG);
7589
- case ISD::BUILD_VECTOR:
7608
+ case ISD::BUILD_VECTOR: {
7609
+ MVT VT = Op.getSimpleValueType();
7610
+ MVT EltVT = VT.getVectorElementType();
7611
+ if (!Subtarget.is64Bit() && EltVT == MVT::i64)
7612
+ return lowerBuildVectorViaVID(Op, DAG, Subtarget);
7590
7613
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7614
+ }
7591
7615
case ISD::SPLAT_VECTOR: {
7592
7616
MVT VT = Op.getSimpleValueType();
7593
7617
MVT EltVT = VT.getVectorElementType();
0 commit comments