Skip to content

Commit 1a1a661

Browse files
committed
Address review comments:
* Rework the subvec size check by introducing a isKnownMultipleOf helper function on TypeSize/ElementCount. We can then reason that the subvector is exactly vector register sized if it is a multiple of the vector register size. * Rename variables to clarify we are checking the subvec size * Update wording of comment
1 parent 0bb35d8 commit 1a1a661

File tree

4 files changed

+46
-41
lines changed

4 files changed

+46
-41
lines changed

llvm/include/llvm/Support/TypeSize.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,18 @@ template <typename LeafTy, typename ValueTy> class FixedOrScalableQuantity {
181181
return getKnownMinValue() % RHS == 0;
182182
}
183183

184+
/// Returns whether or not the callee is known to be a multiple of RHS.
185+
constexpr bool isKnownMultipleOf(const FixedOrScalableQuantity &RHS) const {
186+
// x % y == 0 => x % y == 0
187+
// x % y == 0 => (vscale * x) % y == 0
188+
// x % y == 0 => (vscale * x) % (vscale * y) == 0
189+
// but
190+
// x % y == 0 !=> x % (vscale * y) == 0
191+
if (!isScalable() && RHS.isScalable())
192+
return false;
193+
return getKnownMinValue() % RHS.getKnownMinValue() == 0;
194+
}
195+
184196
// Return the minimum value with the assumption that the count is exact.
185197
// Use in places where a scalable count doesn't make sense (e.g. non-vector
186198
// types, or vectors in backends which don't support scalable vectors).

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2100,13 +2100,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
21002100
// Establish the correct scalable-vector types for any fixed-length type.
21012101
if (SubVecVT.isFixedLengthVector()) {
21022102
SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2103-
bool AlignedToVecReg = false;
2104-
if (auto VLen = Subtarget->getRealVLen();
2105-
VLen && SubVecVT.getSizeInBits() ==
2106-
SubVecContainerVT.getSizeInBits().getKnownMinValue() *
2107-
(*VLen / RISCV::RVVBitsPerBlock))
2108-
AlignedToVecReg = true;
2109-
assert(Idx == 0 && (AlignedToVecReg || V.isUndef()));
2103+
#ifndef NDEBUG
2104+
TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
2105+
bool ExactlyVecRegSized =
2106+
Subtarget->expandVScale(SubVecVT.getSizeInBits())
2107+
.isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2108+
assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2109+
#endif
21102110
}
21112111
MVT ContainerVT = VT;
21122112
if (VT.isFixedLengthVector())

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 16 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -9723,21 +9723,6 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
97239723
Vec, Mask, VL, DL, DAG, Subtarget);
97249724
}
97259725

9726-
/// Returns true if \p LHS is known to be equal to \p RHS, taking into account
9727-
/// if VLEN is exactly known by \p Subtarget and thus vscale when handling
9728-
/// scalable quantities.
9729-
static bool isKnownEQ(ElementCount LHS, ElementCount RHS,
9730-
const RISCVSubtarget &Subtarget) {
9731-
if (auto VLen = Subtarget.getRealVLen()) {
9732-
const unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9733-
if (LHS.isScalable())
9734-
LHS = ElementCount::getFixed(LHS.getKnownMinValue() * Vscale);
9735-
if (RHS.isScalable())
9736-
RHS = ElementCount::getFixed(RHS.getKnownMinValue() * Vscale);
9737-
}
9738-
return LHS == RHS;
9739-
}
9740-
97419726
SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
97429727
SelectionDAG &DAG) const {
97439728
SDValue Vec = Op.getOperand(0);
@@ -9875,29 +9860,25 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
98759860
RemIdx = ElementCount::getScalable(Decompose.second);
98769861
}
98779862

9878-
RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(ContainerSubVecVT);
9879-
bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9880-
SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9881-
SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9882-
bool AlignedToVecReg = !IsSubVecPartReg;
9883-
if (SubVecVT.isFixedLengthVector())
9884-
AlignedToVecReg &= SubVecVT.getSizeInBits() ==
9885-
ContainerSubVecVT.getSizeInBits().getKnownMinValue() *
9886-
(*VLen / RISCV::RVVBitsPerBlock);
9863+
TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
9864+
bool ExactlyVecRegSized =
9865+
Subtarget.expandVScale(SubVecVT.getSizeInBits())
9866+
.isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
98879867

98889868
// 1. If the Idx has been completely eliminated and this subvector's size is
98899869
// a vector register or a multiple thereof, or the surrounding elements are
98909870
// undef, then this is a subvector insert which naturally aligns to a vector
98919871
// register. These can easily be handled using subregister manipulation.
9892-
// 2. If the subvector isn't exactly aligned to a vector register group, then
9893-
// the insertion must preserve the undisturbed elements of the register. We do
9894-
// this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector
9895-
// type (which resolves to a subregister copy), performing a VSLIDEUP to place
9896-
// the subvector within the vector register, and an INSERT_SUBVECTOR of that
9897-
// LMUL=1 type back into the larger vector (resolving to another subregister
9898-
// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9899-
// to avoid allocating a large register group to hold our subvector.
9900-
if (RemIdx.isZero() && (AlignedToVecReg || Vec.isUndef())) {
9872+
// 2. If the subvector isn't an exact multiple of a valid register group size,
9873+
// then the insertion must preserve the undisturbed elements of the register.
9874+
// We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
9875+
// vector type (which resolves to a subregister copy), performing a VSLIDEUP
9876+
// to place the subvector within the vector register, and an INSERT_SUBVECTOR
9877+
// of that LMUL=1 type back into the larger vector (resolving to another
9878+
// subregister operation). See below for how our VSLIDEUP works. We go via a
9879+
// LMUL=1 type to avoid allocating a large register group to hold our
9880+
// subvector.
9881+
if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
99019882
if (SubVecVT.isFixedLengthVector()) {
99029883
// We may get NoSubRegister if inserting at index 0 and the subvec
99039884
// container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
@@ -9944,7 +9925,8 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
99449925

99459926
// Use tail agnostic policy if we're inserting over InterSubVT's tail.
99469927
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9947-
if (isKnownEQ(EndIndex, InterSubVT.getVectorElementCount(), Subtarget))
9928+
if (Subtarget.expandVScale(EndIndex) ==
9929+
Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
99489930
Policy = RISCVII::TAIL_AGNOSTIC;
99499931

99509932
// If we're inserting into the lowest elements, use a tail undisturbed

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,17 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
200200
return Min;
201201
}
202202

203+
/// If the ElementCount or TypeSize \p X is scalable and VScale (VLEN) is
204+
/// exactly known, returns \p X converted to a fixed quantity. Otherwise
205+
/// returns \p X unmodified.
206+
template <typename Quantity> Quantity expandVScale(Quantity X) const {
207+
if (auto VLen = getRealVLen(); VLen && X.isScalable()) {
208+
const unsigned VScale = *VLen / RISCV::RVVBitsPerBlock;
209+
X = Quantity::getFixed(X.getKnownMinValue() * VScale);
210+
}
211+
return X;
212+
}
213+
203214
RISCVABI::ABI getTargetABI() const { return TargetABI; }
204215
bool isSoftFPABI() const {
205216
return TargetABI == RISCVABI::ABI_LP64 ||

0 commit comments

Comments
 (0)