Skip to content

[RISCV] Handle fixed length vectors with exact VLEN in lowerINSERT_SUBVECTOR #84107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions llvm/include/llvm/Support/TypeSize.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,18 @@ template <typename LeafTy, typename ValueTy> class FixedOrScalableQuantity {
return getKnownMinValue() % RHS == 0;
}

/// Returns whether or not the callee is known to be a multiple of RHS.
constexpr bool isKnownMultipleOf(const FixedOrScalableQuantity &RHS) const {
// x % y == 0 => x % y == 0
// x % y == 0 => (vscale * x) % y == 0
// x % y == 0 => (vscale * x) % (vscale * y) == 0
// but
// x % y == 0 !=> x % (vscale * y) == 0
if (!isScalable() && RHS.isScalable())
return false;
return getKnownMinValue() % RHS.getKnownMinValue() == 0;
}

// Return the minimum value with the assumption that the count is exact.
// Use in places where a scalable count doesn't make sense (e.g. non-vector
// types, or vectors in backends which don't support scalable vectors).
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2099,8 +2099,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
MVT SubVecContainerVT = SubVecVT;
// Establish the correct scalable-vector types for any fixed-length type.
if (SubVecVT.isFixedLengthVector()) {
assert(Idx == 0 && V.isUndef());
SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
[[maybe_unused]] bool ExactlyVecRegSized =
Subtarget->expandVScale(SubVecVT.getSizeInBits())
.isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
.getKnownMinValue()));
assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
}
MVT ContainerVT = VT;
if (VT.isFixedLengthVector())
Expand Down
126 changes: 89 additions & 37 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9772,12 +9772,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
}
}

// If the subvector vector is a fixed-length type, we cannot use subregister
// manipulation to simplify the codegen; we don't know which register of a
// LMUL group contains the specific subvector as we only know the minimum
// register size. Therefore we must slide the vector group up the full
// amount.
if (SubVecVT.isFixedLengthVector()) {
// If the subvector vector is a fixed-length type and we don't know VLEN
// exactly, we cannot use subregister manipulation to simplify the codegen; we
// don't know which register of a LMUL group contains the specific subvector
// as we only know the minimum register size. Therefore we must slide the
// vector group up the full amount.
const auto VLen = Subtarget.getRealVLen();
if (SubVecVT.isFixedLengthVector() && !VLen) {
if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
return Op;
MVT ContainerVT = VecVT;
Expand Down Expand Up @@ -9825,41 +9826,90 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
return DAG.getBitcast(Op.getValueType(), SubVec);
}

unsigned SubRegIdx, RemIdx;
std::tie(SubRegIdx, RemIdx) =
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
VecVT, SubVecVT, OrigIdx, TRI);
MVT ContainerVecVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVecVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
}

MVT ContainerSubVecVT = SubVecVT;
if (SubVecVT.isFixedLengthVector()) {
ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
}

unsigned SubRegIdx;
ElementCount RemIdx;
// insert_subvector scales the index by vscale if the subvector is scalable,
// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
// we have a fixed length subvector, we need to adjust the index by 1/vscale.
if (SubVecVT.isFixedLengthVector()) {
assert(VLen);
unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
auto Decompose =
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
SubRegIdx = Decompose.first;
RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
(OrigIdx % Vscale));
} else {
auto Decompose =
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
SubRegIdx = Decompose.first;
RemIdx = ElementCount::getScalable(Decompose.second);
}

RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
assert(isPowerOf2_64(
Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
bool ExactlyVecRegSized =
Subtarget.expandVScale(SubVecVT.getSizeInBits())
.isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));

// 1. If the Idx has been completely eliminated and this subvector's size is
// a vector register or a multiple thereof, or the surrounding elements are
// undef, then this is a subvector insert which naturally aligns to a vector
// register. These can easily be handled using subregister manipulation.
// 2. If the subvector is smaller than a vector register, then the insertion
// must preserve the undisturbed elements of the register. We do this by
// lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
// (which resolves to a subregister copy), performing a VSLIDEUP to place the
// subvector within the vector register, and an INSERT_SUBVECTOR of that
// LMUL=1 type back into the larger vector (resolving to another subregister
// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
// to avoid allocating a large register group to hold our subvector.
if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
// 2. If the subvector isn't an exact multiple of a valid register group size,
// then the insertion must preserve the undisturbed elements of the register.
// We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
// vector type (which resolves to a subregister copy), performing a VSLIDEUP
// to place the subvector within the vector register, and an INSERT_SUBVECTOR
// of that LMUL=1 type back into the larger vector (resolving to another
// subregister operation). See below for how our VSLIDEUP works. We go via a
// LMUL=1 type to avoid allocating a large register group to hold our
// subvector.
if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
if (SubVecVT.isFixedLengthVector()) {
// We may get NoSubRegister if inserting at index 0 and the subvec
// container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
if (SubRegIdx == RISCV::NoSubRegister) {
assert(OrigIdx == 0);
return Op;
}

SDValue Insert =
DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
if (VecVT.isFixedLengthVector())
Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
return Insert;
}
return Op;
}

// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
// (in our case undisturbed). This means we can set up a subvector insertion
// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
// size of the subvector.
MVT InterSubVT = VecVT;
MVT InterSubVT = ContainerVecVT;
SDValue AlignedExtract = Vec;
unsigned AlignedIdx = OrigIdx - RemIdx;
if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
InterSubVT = getLMUL1VT(VecVT);
unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
if (SubVecVT.isFixedLengthVector())
AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
InterSubVT = getLMUL1VT(ContainerVecVT);
// Extract a subvector equal to the nearest full vector register type. This
// should resolve to a EXTRACT_SUBREG instruction.
AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
Expand All @@ -9870,25 +9920,24 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
DAG.getUNDEF(InterSubVT), SubVec,
DAG.getVectorIdxConstant(0, DL));

auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);

ElementCount EndIndex =
ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
VL = computeVLMax(SubVecVT, DL, DAG);
ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());

// Use tail agnostic policy if we're inserting over InterSubVT's tail.
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
if (EndIndex == InterSubVT.getVectorElementCount())
if (Subtarget.expandVScale(EndIndex) ==
Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
Policy = RISCVII::TAIL_AGNOSTIC;

// If we're inserting into the lowest elements, use a tail undisturbed
// vmv.v.v.
if (RemIdx == 0) {
if (RemIdx.isZero()) {
SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
SubVec, VL);
} else {
SDValue SlideupAmt =
DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);

// Construct the vector length corresponding to RemIdx + length(SubVecVT).
VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
Expand All @@ -9899,10 +9948,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,

// If required, insert this subvector back into the correct vector register.
// This should resolve to an INSERT_SUBREG instruction.
if (VecVT.bitsGT(InterSubVT))
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
if (ContainerVecVT.bitsGT(InterSubVT))
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
DAG.getVectorIdxConstant(AlignedIdx, DL));

if (VecVT.isFixedLengthVector())
SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);

// We might have bitcast from a mask type: cast back to the original type if
// required.
return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,17 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
return Min;
}

/// If the ElementCount or TypeSize \p X is scalable and VScale (VLEN) is
/// exactly known, returns \p X converted to a fixed quantity. Otherwise
/// returns \p X unmodified.
template <typename Quantity> Quantity expandVScale(Quantity X) const {
if (auto VLen = getRealVLen(); VLen && X.isScalable()) {
const unsigned VScale = *VLen / RISCV::RVVBitsPerBlock;
X = Quantity::getFixed(X.getKnownMinValue() * VScale);
}
return X;
}

RISCVABI::ABI getTargetABI() const { return TargetABI; }
bool isSoftFPABI() const {
return TargetABI == RISCVABI::ABI_LP64 ||
Expand Down
Loading