@@ -9816,12 +9816,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9816
9816
}
9817
9817
}
9818
9818
9819
- // If the subvector vector is a fixed-length type, we cannot use subregister
9820
- // manipulation to simplify the codegen; we don't know which register of a
9821
- // LMUL group contains the specific subvector as we only know the minimum
9822
- // register size. Therefore we must slide the vector group up the full
9823
- // amount.
9824
- if (SubVecVT.isFixedLengthVector()) {
9819
+ // If the subvector vector is a fixed-length type and we don't know VLEN
9820
+ // exactly, we cannot use subregister manipulation to simplify the codegen; we
9821
+ // don't know which register of a LMUL group contains the specific subvector
9822
+ // as we only know the minimum register size. Therefore we must slide the
9823
+ // vector group up the full amount.
9824
+ const auto VLen = Subtarget.getRealVLen();
9825
+ if (SubVecVT.isFixedLengthVector() && !VLen) {
9825
9826
if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9826
9827
return Op;
9827
9828
MVT ContainerVT = VecVT;
@@ -9869,41 +9870,90 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9869
9870
return DAG.getBitcast(Op.getValueType(), SubVec);
9870
9871
}
9871
9872
9872
- unsigned SubRegIdx, RemIdx;
9873
- std::tie(SubRegIdx, RemIdx) =
9874
- RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9875
- VecVT, SubVecVT, OrigIdx, TRI);
9873
+ MVT ContainerVecVT = VecVT;
9874
+ if (VecVT.isFixedLengthVector()) {
9875
+ ContainerVecVT = getContainerForFixedLengthVector(VecVT);
9876
+ Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
9877
+ }
9878
+
9879
+ MVT ContainerSubVecVT = SubVecVT;
9880
+ if (SubVecVT.isFixedLengthVector()) {
9881
+ ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
9882
+ SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
9883
+ }
9884
+
9885
+ unsigned SubRegIdx;
9886
+ ElementCount RemIdx;
9887
+ // insert_subvector scales the index by vscale if the subvector is scalable,
9888
+ // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
9889
+ // we have a fixed length subvector, we need to adjust the index by 1/vscale.
9890
+ if (SubVecVT.isFixedLengthVector()) {
9891
+ assert(VLen);
9892
+ unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9893
+ auto Decompose =
9894
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9895
+ ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
9896
+ SubRegIdx = Decompose.first;
9897
+ RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
9898
+ (OrigIdx % Vscale));
9899
+ } else {
9900
+ auto Decompose =
9901
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9902
+ ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
9903
+ SubRegIdx = Decompose.first;
9904
+ RemIdx = ElementCount::getScalable(Decompose.second);
9905
+ }
9876
9906
9877
- RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9878
- bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9879
- SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9880
- SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9907
+ TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
9908
+ assert(isPowerOf2_64(
9909
+ Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
9910
+ bool ExactlyVecRegSized =
9911
+ Subtarget.expandVScale(SubVecVT.getSizeInBits())
9912
+ .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
9881
9913
9882
9914
// 1. If the Idx has been completely eliminated and this subvector's size is
9883
9915
// a vector register or a multiple thereof, or the surrounding elements are
9884
9916
// undef, then this is a subvector insert which naturally aligns to a vector
9885
9917
// register. These can easily be handled using subregister manipulation.
9886
- // 2. If the subvector is smaller than a vector register, then the insertion
9887
- // must preserve the undisturbed elements of the register. We do this by
9888
- // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9889
- // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9890
- // subvector within the vector register, and an INSERT_SUBVECTOR of that
9891
- // LMUL=1 type back into the larger vector (resolving to another subregister
9892
- // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9893
- // to avoid allocating a large register group to hold our subvector.
9894
- if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9918
+ // 2. If the subvector isn't an exact multiple of a valid register group size,
9919
+ // then the insertion must preserve the undisturbed elements of the register.
9920
+ // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
9921
+ // vector type (which resolves to a subregister copy), performing a VSLIDEUP
9922
+ // to place the subvector within the vector register, and an INSERT_SUBVECTOR
9923
+ // of that LMUL=1 type back into the larger vector (resolving to another
9924
+ // subregister operation). See below for how our VSLIDEUP works. We go via a
9925
+ // LMUL=1 type to avoid allocating a large register group to hold our
9926
+ // subvector.
9927
+ if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
9928
+ if (SubVecVT.isFixedLengthVector()) {
9929
+ // We may get NoSubRegister if inserting at index 0 and the subvec
9930
+ // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
9931
+ if (SubRegIdx == RISCV::NoSubRegister) {
9932
+ assert(OrigIdx == 0);
9933
+ return Op;
9934
+ }
9935
+
9936
+ SDValue Insert =
9937
+ DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
9938
+ if (VecVT.isFixedLengthVector())
9939
+ Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
9940
+ return Insert;
9941
+ }
9895
9942
return Op;
9943
+ }
9896
9944
9897
9945
// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9898
9946
// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9899
9947
// (in our case undisturbed). This means we can set up a subvector insertion
9900
9948
// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9901
9949
// size of the subvector.
9902
- MVT InterSubVT = VecVT ;
9950
+ MVT InterSubVT = ContainerVecVT ;
9903
9951
SDValue AlignedExtract = Vec;
9904
- unsigned AlignedIdx = OrigIdx - RemIdx;
9905
- if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9906
- InterSubVT = getLMUL1VT(VecVT);
9952
+ unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
9953
+ if (SubVecVT.isFixedLengthVector())
9954
+ AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
9955
+ if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
9956
+ InterSubVT = getLMUL1VT(ContainerVecVT);
9907
9957
// Extract a subvector equal to the nearest full vector register type. This
9908
9958
// should resolve to a EXTRACT_SUBREG instruction.
9909
9959
AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
@@ -9914,25 +9964,24 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9914
9964
DAG.getUNDEF(InterSubVT), SubVec,
9915
9965
DAG.getVectorIdxConstant(0, DL));
9916
9966
9917
- auto [Mask, VL] = getDefaultScalableVLOps (VecVT, DL, DAG, Subtarget);
9967
+ auto [Mask, VL] = getDefaultVLOps (VecVT, ContainerVecVT , DL, DAG, Subtarget);
9918
9968
9919
- ElementCount EndIndex =
9920
- ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
9921
- VL = computeVLMax(SubVecVT, DL, DAG);
9969
+ ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
9970
+ VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
9922
9971
9923
9972
// Use tail agnostic policy if we're inserting over InterSubVT's tail.
9924
9973
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9925
- if (EndIndex == InterSubVT.getVectorElementCount())
9974
+ if (Subtarget.expandVScale(EndIndex) ==
9975
+ Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
9926
9976
Policy = RISCVII::TAIL_AGNOSTIC;
9927
9977
9928
9978
// If we're inserting into the lowest elements, use a tail undisturbed
9929
9979
// vmv.v.v.
9930
- if (RemIdx == 0 ) {
9980
+ if (RemIdx.isZero() ) {
9931
9981
SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9932
9982
SubVec, VL);
9933
9983
} else {
9934
- SDValue SlideupAmt =
9935
- DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9984
+ SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
9936
9985
9937
9986
// Construct the vector length corresponding to RemIdx + length(SubVecVT).
9938
9987
VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
@@ -9943,10 +9992,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9943
9992
9944
9993
// If required, insert this subvector back into the correct vector register.
9945
9994
// This should resolve to an INSERT_SUBREG instruction.
9946
- if (VecVT .bitsGT(InterSubVT))
9947
- SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT , Vec, SubVec,
9995
+ if (ContainerVecVT .bitsGT(InterSubVT))
9996
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT , Vec, SubVec,
9948
9997
DAG.getVectorIdxConstant(AlignedIdx, DL));
9949
9998
9999
+ if (VecVT.isFixedLengthVector())
10000
+ SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10001
+
9950
10002
// We might have bitcast from a mask type: cast back to the original type if
9951
10003
// required.
9952
10004
return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
0 commit comments