Skip to content

Commit f565b79

Browse files
authored
[RISCV] Handle fixed length vectors with exact VLEN in lowerINSERT_SUBVECTOR (#84107)
This is the insert_subvector equivalent to #79949, where we can avoid sliding up by the full LMUL amount if we know the exact subregister the subvector will be inserted into. This mirrors the lowerEXTRACT_SUBVECTOR changes in that we handle this in two parts: - We handle fixed length subvector types by converting the subvector to a scalable vector. But unlike EXTRACT_SUBVECTOR, we may also need to convert the vector being inserted into too. - Whenever we don't need a vslideup because either the subvector fits exactly into a vector register group *or* the vector is undef, we need to emit an insert_subreg ourselves because RISCVISelDAGToDAG::Select doesn't correctly handle fixed length subvectors yet: see d7a28f7 A subvector exactly fits into a vector register group if its size is a known multiple of the size of a vector register, and this adds a new overload for TypeSize::isKnownMultipleOf for scalable to scalable comparisons to help reason about this. I've left RISCVISelDAGToDAG::Select untouched for now (minus relaxing an invariant), so that the insert_subvector and extract_subvector code paths are the same. We should teach it to properly handle fixed length subvectors in a follow-up patch, so that the "exact subregsiter" logic is handled in one place instead of being spread across both RISCVISelDAGToDAG.cpp and RISCVISelLowering.cpp.
1 parent a754ce0 commit f565b79

File tree

6 files changed

+351
-182
lines changed

6 files changed

+351
-182
lines changed

llvm/include/llvm/Support/TypeSize.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,18 @@ template <typename LeafTy, typename ValueTy> class FixedOrScalableQuantity {
181181
return getKnownMinValue() % RHS == 0;
182182
}
183183

184+
/// Returns whether or not the callee is known to be a multiple of RHS.
185+
constexpr bool isKnownMultipleOf(const FixedOrScalableQuantity &RHS) const {
186+
// x % y == 0 => x % y == 0
187+
// x % y == 0 => (vscale * x) % y == 0
188+
// x % y == 0 => (vscale * x) % (vscale * y) == 0
189+
// but
190+
// x % y == 0 !=> x % (vscale * y) == 0
191+
if (!isScalable() && RHS.isScalable())
192+
return false;
193+
return getKnownMinValue() % RHS.getKnownMinValue() == 0;
194+
}
195+
184196
// Return the minimum value with the assumption that the count is exact.
185197
// Use in places where a scalable count doesn't make sense (e.g. non-vector
186198
// types, or vectors in backends which don't support scalable vectors).

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2099,8 +2099,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
20992099
MVT SubVecContainerVT = SubVecVT;
21002100
// Establish the correct scalable-vector types for any fixed-length type.
21012101
if (SubVecVT.isFixedLengthVector()) {
2102-
assert(Idx == 0 && V.isUndef());
21032102
SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2103+
TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
2104+
[[maybe_unused]] bool ExactlyVecRegSized =
2105+
Subtarget->expandVScale(SubVecVT.getSizeInBits())
2106+
.isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2107+
assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2108+
.getKnownMinValue()));
2109+
assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
21042110
}
21052111
MVT ContainerVT = VT;
21062112
if (VT.isFixedLengthVector())

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 89 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9816,12 +9816,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
98169816
}
98179817
}
98189818

9819-
// If the subvector vector is a fixed-length type, we cannot use subregister
9820-
// manipulation to simplify the codegen; we don't know which register of a
9821-
// LMUL group contains the specific subvector as we only know the minimum
9822-
// register size. Therefore we must slide the vector group up the full
9823-
// amount.
9824-
if (SubVecVT.isFixedLengthVector()) {
9819+
// If the subvector vector is a fixed-length type and we don't know VLEN
9820+
// exactly, we cannot use subregister manipulation to simplify the codegen; we
9821+
// don't know which register of a LMUL group contains the specific subvector
9822+
// as we only know the minimum register size. Therefore we must slide the
9823+
// vector group up the full amount.
9824+
const auto VLen = Subtarget.getRealVLen();
9825+
if (SubVecVT.isFixedLengthVector() && !VLen) {
98259826
if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
98269827
return Op;
98279828
MVT ContainerVT = VecVT;
@@ -9869,41 +9870,90 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
98699870
return DAG.getBitcast(Op.getValueType(), SubVec);
98709871
}
98719872

9872-
unsigned SubRegIdx, RemIdx;
9873-
std::tie(SubRegIdx, RemIdx) =
9874-
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9875-
VecVT, SubVecVT, OrigIdx, TRI);
9873+
MVT ContainerVecVT = VecVT;
9874+
if (VecVT.isFixedLengthVector()) {
9875+
ContainerVecVT = getContainerForFixedLengthVector(VecVT);
9876+
Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
9877+
}
9878+
9879+
MVT ContainerSubVecVT = SubVecVT;
9880+
if (SubVecVT.isFixedLengthVector()) {
9881+
ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
9882+
SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
9883+
}
9884+
9885+
unsigned SubRegIdx;
9886+
ElementCount RemIdx;
9887+
// insert_subvector scales the index by vscale if the subvector is scalable,
9888+
// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
9889+
// we have a fixed length subvector, we need to adjust the index by 1/vscale.
9890+
if (SubVecVT.isFixedLengthVector()) {
9891+
assert(VLen);
9892+
unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9893+
auto Decompose =
9894+
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9895+
ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
9896+
SubRegIdx = Decompose.first;
9897+
RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
9898+
(OrigIdx % Vscale));
9899+
} else {
9900+
auto Decompose =
9901+
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9902+
ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
9903+
SubRegIdx = Decompose.first;
9904+
RemIdx = ElementCount::getScalable(Decompose.second);
9905+
}
98769906

9877-
RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9878-
bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9879-
SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9880-
SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9907+
TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
9908+
assert(isPowerOf2_64(
9909+
Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
9910+
bool ExactlyVecRegSized =
9911+
Subtarget.expandVScale(SubVecVT.getSizeInBits())
9912+
.isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
98819913

98829914
// 1. If the Idx has been completely eliminated and this subvector's size is
98839915
// a vector register or a multiple thereof, or the surrounding elements are
98849916
// undef, then this is a subvector insert which naturally aligns to a vector
98859917
// register. These can easily be handled using subregister manipulation.
9886-
// 2. If the subvector is smaller than a vector register, then the insertion
9887-
// must preserve the undisturbed elements of the register. We do this by
9888-
// lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9889-
// (which resolves to a subregister copy), performing a VSLIDEUP to place the
9890-
// subvector within the vector register, and an INSERT_SUBVECTOR of that
9891-
// LMUL=1 type back into the larger vector (resolving to another subregister
9892-
// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9893-
// to avoid allocating a large register group to hold our subvector.
9894-
if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9918+
// 2. If the subvector isn't an exact multiple of a valid register group size,
9919+
// then the insertion must preserve the undisturbed elements of the register.
9920+
// We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
9921+
// vector type (which resolves to a subregister copy), performing a VSLIDEUP
9922+
// to place the subvector within the vector register, and an INSERT_SUBVECTOR
9923+
// of that LMUL=1 type back into the larger vector (resolving to another
9924+
// subregister operation). See below for how our VSLIDEUP works. We go via a
9925+
// LMUL=1 type to avoid allocating a large register group to hold our
9926+
// subvector.
9927+
if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
9928+
if (SubVecVT.isFixedLengthVector()) {
9929+
// We may get NoSubRegister if inserting at index 0 and the subvec
9930+
// container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
9931+
if (SubRegIdx == RISCV::NoSubRegister) {
9932+
assert(OrigIdx == 0);
9933+
return Op;
9934+
}
9935+
9936+
SDValue Insert =
9937+
DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
9938+
if (VecVT.isFixedLengthVector())
9939+
Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
9940+
return Insert;
9941+
}
98959942
return Op;
9943+
}
98969944

98979945
// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
98989946
// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
98999947
// (in our case undisturbed). This means we can set up a subvector insertion
99009948
// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
99019949
// size of the subvector.
9902-
MVT InterSubVT = VecVT;
9950+
MVT InterSubVT = ContainerVecVT;
99039951
SDValue AlignedExtract = Vec;
9904-
unsigned AlignedIdx = OrigIdx - RemIdx;
9905-
if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9906-
InterSubVT = getLMUL1VT(VecVT);
9952+
unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
9953+
if (SubVecVT.isFixedLengthVector())
9954+
AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
9955+
if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
9956+
InterSubVT = getLMUL1VT(ContainerVecVT);
99079957
// Extract a subvector equal to the nearest full vector register type. This
99089958
// should resolve to a EXTRACT_SUBREG instruction.
99099959
AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
@@ -9914,25 +9964,24 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
99149964
DAG.getUNDEF(InterSubVT), SubVec,
99159965
DAG.getVectorIdxConstant(0, DL));
99169966

9917-
auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9967+
auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
99189968

9919-
ElementCount EndIndex =
9920-
ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
9921-
VL = computeVLMax(SubVecVT, DL, DAG);
9969+
ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
9970+
VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
99229971

99239972
// Use tail agnostic policy if we're inserting over InterSubVT's tail.
99249973
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9925-
if (EndIndex == InterSubVT.getVectorElementCount())
9974+
if (Subtarget.expandVScale(EndIndex) ==
9975+
Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
99269976
Policy = RISCVII::TAIL_AGNOSTIC;
99279977

99289978
// If we're inserting into the lowest elements, use a tail undisturbed
99299979
// vmv.v.v.
9930-
if (RemIdx == 0) {
9980+
if (RemIdx.isZero()) {
99319981
SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
99329982
SubVec, VL);
99339983
} else {
9934-
SDValue SlideupAmt =
9935-
DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9984+
SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
99369985

99379986
// Construct the vector length corresponding to RemIdx + length(SubVecVT).
99389987
VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
@@ -9943,10 +9992,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
99439992

99449993
// If required, insert this subvector back into the correct vector register.
99459994
// This should resolve to an INSERT_SUBREG instruction.
9946-
if (VecVT.bitsGT(InterSubVT))
9947-
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
9995+
if (ContainerVecVT.bitsGT(InterSubVT))
9996+
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
99489997
DAG.getVectorIdxConstant(AlignedIdx, DL));
99499998

9999+
if (VecVT.isFixedLengthVector())
10000+
SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10001+
995010002
// We might have bitcast from a mask type: cast back to the original type if
995110003
// required.
995210004
return DAG.getBitcast(Op.getSimpleValueType(), SubVec);

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,17 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
200200
return Min;
201201
}
202202

203+
/// If the ElementCount or TypeSize \p X is scalable and VScale (VLEN) is
204+
/// exactly known, returns \p X converted to a fixed quantity. Otherwise
205+
/// returns \p X unmodified.
206+
template <typename Quantity> Quantity expandVScale(Quantity X) const {
207+
if (auto VLen = getRealVLen(); VLen && X.isScalable()) {
208+
const unsigned VScale = *VLen / RISCV::RVVBitsPerBlock;
209+
X = Quantity::getFixed(X.getKnownMinValue() * VScale);
210+
}
211+
return X;
212+
}
213+
203214
RISCVABI::ABI getTargetABI() const { return TargetABI; }
204215
bool isSoftFPABI() const {
205216
return TargetABI == RISCVABI::ABI_LP64 ||

0 commit comments

Comments
 (0)