Skip to content

Commit 2bf65c0

Browse files
committed
[RISCV] Handle fixed length vectors with exact VLEN in lowerINSERT_SUBVECTOR
This is the insert_subvector equivalent to llvm#79949, where we can avoid sliding up by the full LMUL amount if we know the exact subregister the subvector will be inserted into. This mirrors the lowerEXTRACT_SUBVECTOR changes in that we handle this in two parts: - We handle fixed length subvector types by converting the subvector to a scalable vector. But unlike EXTRACT_SUBVECTOR, we may also need to convert the vector being inserted into too. - Whenever we don't need a vslideup because either the subvector aligns to a vector register group *or* the vector is undef, we need to emit an insert_subreg ourselves because RISCVISelDAGToDAG::Select doesn't correctly handle fixed length subvectors yet: see d7a28f7 I've left RISCVISelDAGToDAG::Select untouched for now (minus relaxing an invariant), so that the insert_subvector and extract_subvector code paths are the same. We should teach it to properly handle fixed length subvectors in a follow-up patch, so that the "exact subregsiter" logic is handled in one place instead of being spread across both RISCVISelDAGToDAG.cpp and RISCVISelLowering.cpp.
1 parent 96fc548 commit 2bf65c0

File tree

4 files changed

+359
-214
lines changed

4 files changed

+359
-214
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2063,8 +2063,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
20632063
MVT SubVecContainerVT = SubVecVT;
20642064
// Establish the correct scalable-vector types for any fixed-length type.
20652065
if (SubVecVT.isFixedLengthVector()) {
2066-
assert(Idx == 0 && V.isUndef());
20672066
SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2067+
bool AlignedToVecReg = false;
2068+
if (auto VLen = Subtarget->getRealVLen();
2069+
VLen && SubVecVT.getSizeInBits() ==
2070+
SubVecContainerVT.getSizeInBits().getKnownMinValue() *
2071+
(*VLen / RISCV::RVVBitsPerBlock))
2072+
AlignedToVecReg = true;
2073+
assert(Idx == 0 && (AlignedToVecReg || V.isUndef()));
20682074
}
20692075
MVT ContainerVT = VT;
20702076
if (VT.isFixedLengthVector())

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 99 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9596,6 +9596,21 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
95969596
Vec, Mask, VL, DL, DAG, Subtarget);
95979597
}
95989598

9599+
/// Returns true if \p LHS is known to be equal to \p RHS, taking into account
9600+
/// if VLEN is exactly known by \p Subtarget and thus vscale when handling
9601+
/// scalable quantities.
9602+
static bool isKnownEQ(ElementCount LHS, ElementCount RHS,
9603+
const RISCVSubtarget &Subtarget) {
9604+
if (auto VLen = Subtarget.getRealVLen()) {
9605+
const unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9606+
if (LHS.isScalable())
9607+
LHS = ElementCount::getFixed(LHS.getKnownMinValue() * Vscale);
9608+
if (RHS.isScalable())
9609+
RHS = ElementCount::getFixed(RHS.getKnownMinValue() * Vscale);
9610+
}
9611+
return LHS == RHS;
9612+
}
9613+
95999614
SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
96009615
SelectionDAG &DAG) const {
96019616
SDValue Vec = Op.getOperand(0);
@@ -9645,12 +9660,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
96459660
}
96469661
}
96479662

9648-
// If the subvector vector is a fixed-length type, we cannot use subregister
9649-
// manipulation to simplify the codegen; we don't know which register of a
9650-
// LMUL group contains the specific subvector as we only know the minimum
9651-
// register size. Therefore we must slide the vector group up the full
9652-
// amount.
9653-
if (SubVecVT.isFixedLengthVector()) {
9663+
// If the subvector vector is a fixed-length type and we don't know VLEN
9664+
// exactly, we cannot use subregister manipulation to simplify the codegen; we
9665+
// don't know which register of a LMUL group contains the specific subvector
9666+
// as we only know the minimum register size. Therefore we must slide the
9667+
// vector group up the full amount.
9668+
const auto VLen = Subtarget.getRealVLen();
9669+
if (SubVecVT.isFixedLengthVector() && !VLen) {
96549670
if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
96559671
return Op;
96569672
MVT ContainerVT = VecVT;
@@ -9698,41 +9714,92 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
96989714
return DAG.getBitcast(Op.getValueType(), SubVec);
96999715
}
97009716

9701-
unsigned SubRegIdx, RemIdx;
9702-
std::tie(SubRegIdx, RemIdx) =
9703-
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9704-
VecVT, SubVecVT, OrigIdx, TRI);
9717+
MVT ContainerVecVT = VecVT;
9718+
if (VecVT.isFixedLengthVector()) {
9719+
ContainerVecVT = getContainerForFixedLengthVector(VecVT);
9720+
Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
9721+
}
97059722

9706-
RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9723+
MVT ContainerSubVecVT = SubVecVT;
9724+
if (SubVecVT.isFixedLengthVector()) {
9725+
ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
9726+
SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
9727+
}
9728+
9729+
unsigned SubRegIdx;
9730+
ElementCount RemIdx;
9731+
// insert_subvector scales the index by vscale if the subvector is scalable,
9732+
// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
9733+
// we have a fixed length subvector, we need to adjust the index by 1/vscale.
9734+
if (SubVecVT.isFixedLengthVector()) {
9735+
assert(VLen);
9736+
unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9737+
auto Decompose =
9738+
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9739+
ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
9740+
SubRegIdx = Decompose.first;
9741+
RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
9742+
(OrigIdx % Vscale));
9743+
} else {
9744+
auto Decompose =
9745+
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9746+
ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
9747+
SubRegIdx = Decompose.first;
9748+
RemIdx = ElementCount::getScalable(Decompose.second);
9749+
}
9750+
9751+
RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(ContainerSubVecVT);
97079752
bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
97089753
SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
97099754
SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9755+
bool AlignedToVecReg = !IsSubVecPartReg;
9756+
if (SubVecVT.isFixedLengthVector())
9757+
AlignedToVecReg &= SubVecVT.getSizeInBits() ==
9758+
ContainerSubVecVT.getSizeInBits().getKnownMinValue() *
9759+
(*VLen / RISCV::RVVBitsPerBlock);
97109760

97119761
// 1. If the Idx has been completely eliminated and this subvector's size is
97129762
// a vector register or a multiple thereof, or the surrounding elements are
97139763
// undef, then this is a subvector insert which naturally aligns to a vector
97149764
// register. These can easily be handled using subregister manipulation.
9715-
// 2. If the subvector is smaller than a vector register, then the insertion
9716-
// must preserve the undisturbed elements of the register. We do this by
9717-
// lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9718-
// (which resolves to a subregister copy), performing a VSLIDEUP to place the
9719-
// subvector within the vector register, and an INSERT_SUBVECTOR of that
9765+
// 2. If the subvector isn't exactly aligned to a vector register group, then
9766+
// the insertion must preserve the undisturbed elements of the register. We do
9767+
// this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector
9768+
// type (which resolves to a subregister copy), performing a VSLIDEUP to place
9769+
// the subvector within the vector register, and an INSERT_SUBVECTOR of that
97209770
// LMUL=1 type back into the larger vector (resolving to another subregister
97219771
// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
97229772
// to avoid allocating a large register group to hold our subvector.
9723-
if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9773+
if (RemIdx.isZero() && (AlignedToVecReg || Vec.isUndef())) {
9774+
if (SubVecVT.isFixedLengthVector()) {
9775+
// We may get NoSubRegister if inserting at index 0 and the subvec
9776+
// container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
9777+
if (SubRegIdx == RISCV::NoSubRegister) {
9778+
assert(OrigIdx == 0);
9779+
return Op;
9780+
}
9781+
9782+
SDValue Insert =
9783+
DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
9784+
if (VecVT.isFixedLengthVector())
9785+
Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
9786+
return Insert;
9787+
}
97249788
return Op;
9789+
}
97259790

97269791
// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
97279792
// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
97289793
// (in our case undisturbed). This means we can set up a subvector insertion
97299794
// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
97309795
// size of the subvector.
9731-
MVT InterSubVT = VecVT;
9796+
MVT InterSubVT = ContainerVecVT;
97329797
SDValue AlignedExtract = Vec;
9733-
unsigned AlignedIdx = OrigIdx - RemIdx;
9734-
if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9735-
InterSubVT = getLMUL1VT(VecVT);
9798+
unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
9799+
if (SubVecVT.isFixedLengthVector())
9800+
AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
9801+
if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
9802+
InterSubVT = getLMUL1VT(ContainerVecVT);
97369803
// Extract a subvector equal to the nearest full vector register type. This
97379804
// should resolve to a EXTRACT_SUBREG instruction.
97389805
AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
@@ -9743,25 +9810,23 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
97439810
DAG.getUNDEF(InterSubVT), SubVec,
97449811
DAG.getVectorIdxConstant(0, DL));
97459812

9746-
auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9813+
auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
97479814

9748-
ElementCount EndIndex =
9749-
ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
9750-
VL = computeVLMax(SubVecVT, DL, DAG);
9815+
ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
9816+
VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
97519817

97529818
// Use tail agnostic policy if we're inserting over InterSubVT's tail.
97539819
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9754-
if (EndIndex == InterSubVT.getVectorElementCount())
9820+
if (isKnownEQ(EndIndex, InterSubVT.getVectorElementCount(), Subtarget))
97559821
Policy = RISCVII::TAIL_AGNOSTIC;
97569822

97579823
// If we're inserting into the lowest elements, use a tail undisturbed
97589824
// vmv.v.v.
9759-
if (RemIdx == 0) {
9825+
if (RemIdx.isZero()) {
97609826
SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
97619827
SubVec, VL);
97629828
} else {
9763-
SDValue SlideupAmt =
9764-
DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9829+
SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
97659830

97669831
// Construct the vector length corresponding to RemIdx + length(SubVecVT).
97679832
VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
@@ -9772,10 +9837,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
97729837

97739838
// If required, insert this subvector back into the correct vector register.
97749839
// This should resolve to an INSERT_SUBREG instruction.
9775-
if (VecVT.bitsGT(InterSubVT))
9776-
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
9840+
if (ContainerVecVT.bitsGT(InterSubVT))
9841+
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
97779842
DAG.getVectorIdxConstant(AlignedIdx, DL));
97789843

9844+
if (VecVT.isFixedLengthVector())
9845+
SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9846+
97799847
// We might have bitcast from a mask type: cast back to the original type if
97809848
// required.
97819849
return DAG.getBitcast(Op.getSimpleValueType(), SubVec);

0 commit comments

Comments
 (0)