-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Lower non-power-of-2 vector to nearest power-of-2 vector leng… #106092
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[RISCV] Lower non-power-of-2 vector to nearest power-of-2 vector leng… #106092
Conversation
…th with VP intrinsic It's still early stage for this patch, but I would like to kick this out to demonstrate the possility of this approach, although it's mostly nullify by llvm#104689, but it can get some improve after adding more pattern, and will add later. The idea of this patch is lowee the non-power-of-2 vector to nearest power-of-2 vector length with VP intrinsic, and put vector insert and extrat for converting the type from/to the original vector type. Example: ``` define void @vls3i8(ptr align 8 %array) { entry: %1 = load <3 x i8>, ptr %array, align 1 %2 = add<3 x i8> %1, %1 store <3 x i8> %2, ptr %array, align 1 ret void } ``` ``` define void @vls3i8(ptr align 8 %array) #0 { entry: %0 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %array, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 3) %1 = call <3 x i8> @llvm.vector.extract.v3i8.nxv4i8(<vscale x 4 x i8> %0, i64 0) %2 = call <vscale x 4 x i8> @llvm.vector.insert.nxv4i8.v3i8(<vscale x 4 x i8> poison, <3 x i8> %1, i64 0) %3 = call <vscale x 4 x i8> @llvm.vector.insert.nxv4i8.v3i8(<vscale x 4 x i8> poison, <3 x i8> %1, i64 0) %4 = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %2, <vscale x 4 x i8> %3, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> i nsertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 3) %5 = call <3 x i8> @llvm.vector.extract.v3i8.nxv4i8(<vscale x 4 x i8> %4, i64 0) %6 = call <vscale x 4 x i8> @llvm.vector.insert.nxv4i8.v3i8(<vscale x 4 x i8> poison, <3 x i8> %5, i64 0) call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> %6, ptr %array, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 3) ret void } ```
You can test this locally with the following command:git-clang-format --diff 1193f7d6487d2d94009f8d8d27da3907136482b9 a7a504426c28ab95b700f34e6bf5c381574bac25 --extensions cpp,h -- llvm/lib/Target/RISCV/RISCVLegalizeNonPowerOf2Vector.cpp llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp llvm/lib/Target/RISCV/RISCV.h llvm/lib/Target/RISCV/RISCVTargetMachine.cpp View the diff from clang-format here.diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ca6b600ece..343bac491d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -22,10 +22,10 @@
#include "LegalizeTypes.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
@@ -5688,16 +5688,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
unsigned InNumElts = InVT.getVectorMinNumElements();
unsigned VTNumElts = VT.getVectorMinNumElements();
- if (InVT.isScalableVector())
- {
- unsigned EltSize = InVT.getScalarType ().getFixedSizeInBits ();
+ if (InVT.isScalableVector()) {
+ unsigned EltSize = InVT.getScalarType().getFixedSizeInBits();
- unsigned MinVScale = getVScaleRange(&DAG.getMachineFunction ().getFunction(), 64)
- .getUnsignedMin().getZExtValue ();
- InNumElts = InNumElts * MinVScale;
+ unsigned MinVScale =
+ getVScaleRange(&DAG.getMachineFunction().getFunction(), 64)
+ .getUnsignedMin()
+ .getZExtValue();
+ InNumElts = InNumElts * MinVScale;
}
-
assert(IdxVal % VTNumElts == 0 &&
"Expected Idx to be a multiple of subvector minimum vector length");
if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
diff --git a/llvm/lib/Target/RISCV/RISCVLegalizeNonPowerOf2Vector.cpp b/llvm/lib/Target/RISCV/RISCVLegalizeNonPowerOf2Vector.cpp
index 98bca6b96f..d0216fcde1 100644
--- a/llvm/lib/Target/RISCV/RISCVLegalizeNonPowerOf2Vector.cpp
+++ b/llvm/lib/Target/RISCV/RISCVLegalizeNonPowerOf2Vector.cpp
@@ -64,13 +64,13 @@ RISCVLegalizeNonPowerOf2Vector::getContainerForFixedLengthVector(
FixedVectorType *FixedVecTy) {
// TODO: Consider vscale_range to pick a better/smaller type.
//
- uint64_t NumElts =
- std::max<uint64_t>((NextPowerOf2 (FixedVecTy->getNumElements()) / MinVScale), 1);
+ uint64_t NumElts = std::max<uint64_t>(
+ (NextPowerOf2(FixedVecTy->getNumElements()) / MinVScale), 1);
Type *ElementType = FixedVecTy->getElementType();
if (ElementType->isIntegerTy(1))
- NumElts = std::max(NumElts, 8UL);
+ NumElts = std::max(NumElts, 8UL);
return ScalableVectorType::get(ElementType, NumElts);
}
@@ -114,17 +114,16 @@ bool RISCVLegalizeNonPowerOf2Vector::runOnFunction(Function &F) {
auto Attr = F.getFnAttribute(Attribute::VScaleRange);
if (Attr.isValid()) {
- MinVScale = Attr.getVScaleRangeMin ();
+ MinVScale = Attr.getVScaleRangeMin();
} else {
unsigned MinVLen = ST->getRealMinVLen();
if (MinVLen < RISCV::RVVBitsPerBlock)
return false;
MinVScale = MinVLen / RISCV::RVVBitsPerBlock;
AttrBuilder AB(F.getContext());
- AB.addVScaleRangeAttr(MinVScale,
- std::optional<unsigned>());
+ AB.addVScaleRangeAttr(MinVScale, std::optional<unsigned>());
- F.addFnAttr (AB.getAttribute(Attribute::VScaleRange));
+ F.addFnAttr(AB.getAttribute(Attribute::VScaleRange));
}
bool Modified = false;
|
The current version appears to be hitting some pretty terrible scalarization, but at a high level this doesn't sound unreasonable. If we do take this approach for 2^N-1 vectors, I'd want us to be able to move our entire fixed vector lowering to this strategy. Consistency here is fairly important - for confidence in correctness if nothing else. |
…th with VP intrinsic
It's still early stage for this patch, but I would like to kick this out to demonstrate the possility of this approach,
although it's mostly nullify by #104689, but it can get some improve after adding more pattern, and will add later.
The idea of this patch is lowee the non-power-of-2 vector to nearest power-of-2 vector length with VP intrinsic, and put vector insert and extrat for converting the type from/to the original vector type.
Example: