@@ -1281,6 +1281,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
1281
1281
if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1282
1282
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1283
1283
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
1284
+
1285
+ // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1286
+ setOperationAction(ISD::BUILD_VECTOR, MVT::i64, Custom);
1284
1287
}
1285
1288
1286
1289
setOperationAction(
@@ -3601,6 +3604,78 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3601
3604
return Gather;
3602
3605
}
3603
3606
3607
+ static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG,
3608
+ const RISCVSubtarget &Subtarget) {
3609
+ MVT VT = Op.getSimpleValueType();
3610
+ assert(VT.isFixedLengthVector() && "Unexpected vector!");
3611
+
3612
+ MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3613
+
3614
+ SDLoc DL(Op);
3615
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3616
+
3617
+ if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3618
+ int64_t StepNumerator = SimpleVID->StepNumerator;
3619
+ unsigned StepDenominator = SimpleVID->StepDenominator;
3620
+ int64_t Addend = SimpleVID->Addend;
3621
+
3622
+ assert(StepNumerator != 0 && "Invalid step");
3623
+ bool Negate = false;
3624
+ int64_t SplatStepVal = StepNumerator;
3625
+ unsigned StepOpcode = ISD::MUL;
3626
+ // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3627
+ // anyway as the shift of 63 won't fit in uimm5.
3628
+ if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3629
+ isPowerOf2_64(std::abs(StepNumerator))) {
3630
+ Negate = StepNumerator < 0;
3631
+ StepOpcode = ISD::SHL;
3632
+ SplatStepVal = Log2_64(std::abs(StepNumerator));
3633
+ }
3634
+
3635
+ // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3636
+ // threshold since it's the immediate value many RVV instructions accept.
3637
+ // There is no vmul.vi instruction so ensure multiply constant can fit in
3638
+ // a single addi instruction.
3639
+ if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3640
+ (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3641
+ isPowerOf2_32(StepDenominator) &&
3642
+ (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3643
+ MVT VIDVT =
3644
+ VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3645
+ MVT VIDContainerVT =
3646
+ getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3647
+ SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3648
+ // Convert right out of the scalable type so we can use standard ISD
3649
+ // nodes for the rest of the computation. If we used scalable types with
3650
+ // these, we'd lose the fixed-length vector info and generate worse
3651
+ // vsetvli code.
3652
+ VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3653
+ if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3654
+ (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3655
+ SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3656
+ VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3657
+ }
3658
+ if (StepDenominator != 1) {
3659
+ SDValue SplatStep =
3660
+ DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3661
+ VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3662
+ }
3663
+ if (Addend != 0 || Negate) {
3664
+ SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3665
+ VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3666
+ VID);
3667
+ }
3668
+ if (VT.isFloatingPoint()) {
3669
+ // TODO: Use vfwcvt to reduce register pressure.
3670
+ VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3671
+ }
3672
+ return VID;
3673
+ }
3674
+ }
3675
+
3676
+ return SDValue();
3677
+ }
3678
+
3604
3679
/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3605
3680
/// which constitute a large proportion of the elements. In such cases we can
3606
3681
/// splat a vector with the dominant element and make up the shortfall with
@@ -3818,64 +3893,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
3818
3893
// Try and match index sequences, which we can lower to the vid instruction
3819
3894
// with optional modifications. An all-undef vector is matched by
3820
3895
// getSplatValue, above.
3821
- if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3822
- int64_t StepNumerator = SimpleVID->StepNumerator;
3823
- unsigned StepDenominator = SimpleVID->StepDenominator;
3824
- int64_t Addend = SimpleVID->Addend;
3825
-
3826
- assert(StepNumerator != 0 && "Invalid step");
3827
- bool Negate = false;
3828
- int64_t SplatStepVal = StepNumerator;
3829
- unsigned StepOpcode = ISD::MUL;
3830
- // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3831
- // anyway as the shift of 63 won't fit in uimm5.
3832
- if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3833
- isPowerOf2_64(std::abs(StepNumerator))) {
3834
- Negate = StepNumerator < 0;
3835
- StepOpcode = ISD::SHL;
3836
- SplatStepVal = Log2_64(std::abs(StepNumerator));
3837
- }
3838
-
3839
- // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3840
- // threshold since it's the immediate value many RVV instructions accept.
3841
- // There is no vmul.vi instruction so ensure multiply constant can fit in
3842
- // a single addi instruction.
3843
- if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3844
- (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3845
- isPowerOf2_32(StepDenominator) &&
3846
- (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3847
- MVT VIDVT =
3848
- VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3849
- MVT VIDContainerVT =
3850
- getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3851
- SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3852
- // Convert right out of the scalable type so we can use standard ISD
3853
- // nodes for the rest of the computation. If we used scalable types with
3854
- // these, we'd lose the fixed-length vector info and generate worse
3855
- // vsetvli code.
3856
- VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3857
- if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3858
- (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3859
- SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3860
- VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3861
- }
3862
- if (StepDenominator != 1) {
3863
- SDValue SplatStep =
3864
- DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3865
- VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3866
- }
3867
- if (Addend != 0 || Negate) {
3868
- SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3869
- VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3870
- VID);
3871
- }
3872
- if (VT.isFloatingPoint()) {
3873
- // TODO: Use vfwcvt to reduce register pressure.
3874
- VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3875
- }
3876
- return VID;
3877
- }
3878
- }
3896
+ if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
3897
+ return Res;
3879
3898
3880
3899
// For very small build_vectors, use a single scalar insert of a constant.
3881
3900
// TODO: Base this on constant rematerialization cost, not size.
@@ -7473,8 +7492,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
7473
7492
return lowerVECTOR_REVERSE(Op, DAG);
7474
7493
case ISD::VECTOR_SPLICE:
7475
7494
return lowerVECTOR_SPLICE(Op, DAG);
7476
- case ISD::BUILD_VECTOR:
7495
+ case ISD::BUILD_VECTOR: {
7496
+ MVT VT = Op.getSimpleValueType();
7497
+ MVT EltVT = VT.getVectorElementType();
7498
+ if (!Subtarget.is64Bit() && EltVT == MVT::i64)
7499
+ return lowerBuildVectorViaVID(Op, DAG, Subtarget);
7477
7500
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7501
+ }
7478
7502
case ISD::SPLAT_VECTOR: {
7479
7503
MVT VT = Op.getSimpleValueType();
7480
7504
MVT EltVT = VT.getVectorElementType();
0 commit comments