-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SLP] Provide an universal interface for FixedVectorType::get. NFC. #96845
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Han-Kuan Chen (HanKuanChen) ChangesSLP vectorizes scalar type to vector type. In the future, we will try to make SLP vectorizes vector type to vector type. We add a getWidenedType as a helper function. For example, SLP will make the following code %v0 = load i32, ptr %in0, align 4 into a load <4 x i32>. The ScalarTy is i32 and VF is 4. In the future, SLP will make the following code %v0 = load <4 x i32>, ptr %in0, align 4 into a load <16 x i32>. The ScalarTy is <4 x i32> and VF is 4. reference: https://discourse.llvm.org/t/rfc-make-slp-vectorizer-revectorize-vector-instructions/79436 Patch is 29.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96845.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 974f966d46e81..32f264535bccb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -231,6 +231,11 @@ static bool isValidElementType(Type *Ty) {
!Ty->isPPC_FP128Ty();
}
+/// \returns the vector type of ScalarTy based on vectorization factor.
+static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
+ return FixedVectorType::get(ScalarTy, VF);
+}
+
/// \returns True if the value is a constant (but not globals/constant
/// expressions).
static bool isConstant(Value *V) {
@@ -1457,8 +1462,7 @@ class BoUpSLP {
if (getUnderlyingObject(LI1->getPointerOperand()) ==
getUnderlyingObject(LI2->getPointerOperand()) &&
R.TTI->isLegalMaskedGather(
- FixedVectorType::get(LI1->getType(), NumLanes),
- LI1->getAlign()))
+ getWidenedType(LI1->getType(), NumLanes), LI1->getAlign()))
return LookAheadHeuristics::ScoreMaskedGatherCandidate;
return CheckSameEntryOrFail();
}
@@ -4059,7 +4063,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
int NumScalars = GatheredScalars.size();
if (!isValidElementType(ScalarTy))
return std::nullopt;
- auto *VecTy = FixedVectorType::get(ScalarTy, NumScalars);
+ auto *VecTy = getWidenedType(ScalarTy, NumScalars);
int NumParts = TTI->getNumberOfParts(VecTy);
if (NumParts == 0 || NumParts >= NumScalars)
NumParts = 1;
@@ -4403,7 +4407,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
}
Order.clear();
- auto *VecTy = FixedVectorType::get(ScalarTy, Sz);
+ auto *VecTy = getWidenedType(ScalarTy, Sz);
// Check the order of pointer operands or that all pointers are the same.
bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, Order);
// FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
@@ -4522,7 +4526,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
/*VariableMask=*/false, CommonAlignment, CostKind) +
VectorGEPCost - ScalarGEPCost;
InstructionCost VecLdCost = 0;
- auto *SubVecTy = FixedVectorType::get(ScalarTy, VF);
+ auto *SubVecTy = getWidenedType(ScalarTy, VF);
for (auto [I, LS] : enumerate(States)) {
auto *LI0 = cast<LoadInst>(VL[I * VF]);
switch (LS) {
@@ -4792,8 +4796,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
}
}
if (Sz == 2 && TE.getVectorFactor() == 4 &&
- TTI->getNumberOfParts(FixedVectorType::get(
- TE.Scalars.front()->getType(), 2 * TE.getVectorFactor())) == 1)
+ TTI->getNumberOfParts(getWidenedType(TE.Scalars.front()->getType(),
+ 2 * TE.getVectorFactor())) == 1)
return std::nullopt;
if (!ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices,
Sz)) {
@@ -4965,7 +4969,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
find_if(TE.Scalars, [](Value *V) { return !isConstant(V); });
if (It == TE.Scalars.begin())
return OrdersType();
- auto *Ty = FixedVectorType::get(TE.Scalars.front()->getType(), Sz);
+ auto *Ty = getWidenedType(TE.Scalars.front()->getType(), Sz);
if (It != TE.Scalars.end()) {
OrdersType Order(Sz, Sz);
unsigned Idx = std::distance(TE.Scalars.begin(), It);
@@ -5101,7 +5105,7 @@ void BoUpSLP::reorderTopToBottom() {
// to take into account their order when looking for the most used order.
if (TE->isAltShuffle()) {
VectorType *VecTy =
- FixedVectorType::get(TE->Scalars[0]->getType(), TE->Scalars.size());
+ getWidenedType(TE->Scalars[0]->getType(), TE->Scalars.size());
unsigned Opcode0 = TE->getOpcode();
unsigned Opcode1 = TE->getAltOpcode();
SmallBitVector OpcodeMask(getAltInstrMask(TE->Scalars, Opcode0, Opcode1));
@@ -6018,7 +6022,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
unsigned Opcode1 = S.getAltOpcode();
SmallBitVector OpcodeMask(getAltInstrMask(VL, Opcode0, Opcode1));
// If this pattern is supported by the target then consider it profitable.
- if (TTI->isLegalAltInstr(FixedVectorType::get(S.MainOp->getType(), VL.size()),
+ if (TTI->isLegalAltInstr(getWidenedType(S.MainOp->getType(), VL.size()),
Opcode0, Opcode1, OpcodeMask))
return true;
SmallVector<ValueList> Operands;
@@ -7326,7 +7330,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const {
if (!isValidElementType(EltTy))
return 0;
- uint64_t VTSize = DL->getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
+ uint64_t VTSize = DL->getTypeStoreSizeInBits(getWidenedType(EltTy, N));
if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize ||
VTSize != DL->getTypeStoreSizeInBits(T))
return 0;
@@ -7990,7 +7994,7 @@ void BoUpSLP::transformNodes() {
if (E.State != TreeEntry::Vectorize)
break;
Type *ScalarTy = E.getMainOp()->getType();
- auto *VecTy = FixedVectorType::get(ScalarTy, E.Scalars.size());
+ auto *VecTy = getWidenedType(ScalarTy, E.Scalars.size());
Align CommonAlignment = computeCommonAlignment<LoadInst>(E.Scalars);
// Check if profitable to represent consecutive load + reverse as strided
// load with stride -1.
@@ -8017,7 +8021,7 @@ void BoUpSLP::transformNodes() {
case Instruction::Store: {
Type *ScalarTy =
cast<StoreInst>(E.getMainOp())->getValueOperand()->getType();
- auto *VecTy = FixedVectorType::get(ScalarTy, E.Scalars.size());
+ auto *VecTy = getWidenedType(ScalarTy, E.Scalars.size());
Align CommonAlignment = computeCommonAlignment<StoreInst>(E.Scalars);
// Check if profitable to represent consecutive load + reverse as strided
// load with stride -1.
@@ -8087,7 +8091,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
InstructionCost getBuildVectorCost(ArrayRef<Value *> VL, Value *Root) {
if ((!Root && allConstant(VL)) || all_of(VL, IsaPred<UndefValue>))
return TTI::TCC_Free;
- auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+ auto *VecTy = getWidenedType(ScalarTy, VL.size());
InstructionCost GatherCost = 0;
SmallVector<Value *> Gathers(VL.begin(), VL.end());
// Improve gather cost for gather of loads, if we can group some of the
@@ -8185,7 +8189,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
LI->getAlign(), LI->getPointerAddressSpace(),
CostKind, TTI::OperandValueInfo(), LI);
}
- auto *LoadTy = FixedVectorType::get(VL.front()->getType(), VF);
+ auto *LoadTy = getWidenedType(VL.front()->getType(), VF);
for (const std::pair<unsigned, LoadsState> &P : VectorizedStarts) {
auto *LI = cast<LoadInst>(VL[P.first]);
Align Alignment = LI->getAlign();
@@ -8223,7 +8227,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
// TODO: improve checks if GEPs can be vectorized.
Value *Ptr0 = PointerOps.front();
Type *ScalarTy = Ptr0->getType();
- auto *VecTy = FixedVectorType::get(ScalarTy, VF);
+ auto *VecTy = getWidenedType(ScalarTy, VF);
auto [ScalarGEPCost, VectorGEPCost] =
getGEPCosts(TTI, PointerOps, Ptr0, Instruction::GetElementPtr,
CostKind, ScalarTy, VecTy);
@@ -8356,22 +8360,22 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
if (*ShuffleKinds[Part] != TTI::SK_PermuteSingleSrc ||
!ShuffleVectorInst::isIdentityMask(
MaskSlice, std::max<unsigned>(NumElts, MaskSlice.size())))
- Cost += ::getShuffleCost(TTI, *ShuffleKinds[Part],
- FixedVectorType::get(ScalarTy, NumElts),
- MaskSlice);
+ Cost +=
+ ::getShuffleCost(TTI, *ShuffleKinds[Part],
+ getWidenedType(ScalarTy, NumElts), MaskSlice);
continue;
}
if (*RegShuffleKind != TTI::SK_PermuteSingleSrc ||
!ShuffleVectorInst::isIdentityMask(SubMask, EltsPerVector)) {
- Cost += ::getShuffleCost(TTI, *RegShuffleKind,
- FixedVectorType::get(ScalarTy, EltsPerVector),
- SubMask);
+ Cost +=
+ ::getShuffleCost(TTI, *RegShuffleKind,
+ getWidenedType(ScalarTy, EltsPerVector), SubMask);
}
for (int Idx : Indices) {
Cost += ::getShuffleCost(TTI, TTI::SK_ExtractSubvector,
- FixedVectorType::get(ScalarTy, NumElts),
+ getWidenedType(ScalarTy, NumElts),
std::nullopt, CostKind, Idx,
- FixedVectorType::get(ScalarTy, EltsPerVector));
+ getWidenedType(ScalarTy, EltsPerVector));
}
}
return Cost;
@@ -8505,9 +8509,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
unsigned SrcSz = R.DL->getTypeSizeInBits(EScalarTy);
if (DstSz > SrcSz)
CastOpcode = IsSigned ? Instruction::SExt : Instruction::ZExt;
- return TTI.getCastInstrCost(CastOpcode,
- FixedVectorType::get(ScalarTy, VF),
- FixedVectorType::get(EScalarTy, VF),
+ return TTI.getCastInstrCost(CastOpcode, getWidenedType(ScalarTy, VF),
+ getWidenedType(EScalarTy, VF),
TTI::CastContextHint::None, CostKind);
}
return TTI::TCC_Free;
@@ -8562,8 +8565,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
ExtraCost += GetNodeMinBWAffectedCost(*E, E->getVectorFactor()) +
GetNodeMinBWAffectedCost(*E2, E2->getVectorFactor());
}
- V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
- V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+ V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
+ V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
} else if (!V1 && P2.isNull()) {
// Shuffle single entry node.
const TreeEntry *E = P1.get<const TreeEntry *>();
@@ -8583,7 +8586,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
CommonVF = E->Scalars.size();
}
ExtraCost += GetNodeMinBWAffectedCost(*E, CommonVF);
- V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
// Not identity/broadcast? Try to see if the original vector is better.
if (!E->ReorderIndices.empty() && CommonVF == E->ReorderIndices.size() &&
CommonVF == CommonMask.size() &&
@@ -8628,10 +8631,10 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
CommonVF = VF;
}
ExtraCost += GetValueMinBWAffectedCost(V1);
- V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
ExtraCost += GetNodeMinBWAffectedCost(
*E2, std::min(CommonVF, E2->getVectorFactor()));
- V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+ V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
} else if (!V1 && V2) {
// Shuffle vector and tree node.
unsigned VF = cast<FixedVectorType>(V2->getType())->getNumElements();
@@ -8657,9 +8660,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
ExtraCost += GetNodeMinBWAffectedCost(
*E1, std::min(CommonVF, E1->getVectorFactor()));
- V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
ExtraCost += GetValueMinBWAffectedCost(V2);
- V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+ V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
} else {
assert(V1 && V2 && "Expected both vectors.");
unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
@@ -8673,17 +8676,17 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
ExtraCost +=
GetValueMinBWAffectedCost(V1) + GetValueMinBWAffectedCost(V2);
if (V1->getType() != V2->getType()) {
- V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
- V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+ V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
+ V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
} else {
if (cast<VectorType>(V1->getType())->getElementType() != ScalarTy)
- V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
if (cast<VectorType>(V2->getType())->getElementType() != ScalarTy)
- V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+ V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
}
}
- InVectors.front() = Constant::getNullValue(
- FixedVectorType::get(ScalarTy, CommonMask.size()));
+ InVectors.front() =
+ Constant::getNullValue(getWidenedType(ScalarTy, CommonMask.size()));
if (InVectors.size() == 2)
InVectors.pop_back();
return ExtraCost + BaseShuffleAnalysis::createShuffle<InstructionCost>(
@@ -8792,8 +8795,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
SameNodesEstimated = false;
if (NumParts != 1 && UniqueBases.size() != 1) {
UseVecBaseAsInput = true;
- VecBase = Constant::getNullValue(
- FixedVectorType::get(ScalarTy, CommonMask.size()));
+ VecBase =
+ Constant::getNullValue(getWidenedType(ScalarTy, CommonMask.size()));
}
return VecBase;
}
@@ -8821,7 +8824,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return;
}
assert(!CommonMask.empty() && "Expected non-empty common mask.");
- auto *MaskVecTy = FixedVectorType::get(ScalarTy, Mask.size());
+ auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
if (NumParts == 0 || NumParts >= Mask.size())
NumParts = 1;
@@ -8838,7 +8841,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return;
}
assert(!CommonMask.empty() && "Expected non-empty common mask.");
- auto *MaskVecTy = FixedVectorType::get(ScalarTy, Mask.size());
+ auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
if (NumParts == 0 || NumParts >= Mask.size())
NumParts = 1;
@@ -9042,7 +9045,7 @@ static SmallVector<Type *> buildIntrinsicArgTypes(const CallInst *CI,
continue;
}
}
- ArgTys.push_back(FixedVectorType::get(Arg->getType(), VF));
+ ArgTys.push_back(getWidenedType(Arg->getType(), VF));
}
return ArgTys;
}
@@ -9071,9 +9074,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
Type *OrigScalarTy = ScalarTy;
if (It != MinBWs.end())
ScalarTy = IntegerType::get(F->getContext(), It->second.first);
- auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+ auto *VecTy = getWidenedType(ScalarTy, VL.size());
unsigned EntryVF = E->getVectorFactor();
- auto *FinalVecTy = FixedVectorType::get(ScalarTy, EntryVF);
+ auto *FinalVecTy = getWidenedType(ScalarTy, EntryVF);
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
if (E->State == TreeEntry::NeedToGather) {
@@ -9169,7 +9172,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
unsigned SrcBWSz = DL->getTypeSizeInBits(UserScalarTy);
unsigned VecOpcode;
auto *UserVecTy =
- FixedVectorType::get(UserScalarTy, E->getVectorFactor());
+ getWidenedType(UserScalarTy, E->getVectorFactor());
if (BWSz > SrcBWSz)
VecOpcode = Instruction::Trunc;
else
@@ -9241,7 +9244,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
NumElts = ATy->getNumElements();
else
NumElts = AggregateTy->getStructNumElements();
- SrcVecTy = FixedVectorType::get(OrigScalarTy, NumElts);
+ SrcVecTy = getWidenedType(OrigScalarTy, NumElts);
}
if (I->hasOneUse()) {
Instruction *Ext = I->user_back();
@@ -9335,7 +9338,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
// need to shift the vector.
// Do not calculate the cost if the actual size is the register size and
// we can merge this shuffle with the following SK_Select.
- auto *InsertVecTy = FixedVectorType::get(ScalarTy, InsertVecSz);
+ auto *InsertVecTy = getWidenedType(ScalarTy, InsertVecSz);
if (!IsIdentity)
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
InsertVecTy, Mask);
@@ -9351,7 +9354,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
buildUseMask(NumElts, InsertMask, UseMask::UndefsAsMask));
if (!InMask.all() && NumScalars != NumElts && !IsWholeSubvector) {
if (InsertVecSz != VecSz) {
- auto *ActualVecTy = FixedVectorType::get(ScalarTy, VecSz);
+ auto *ActualVecTy = getWidenedType(ScalarTy, VecSz);
Cost += TTI->getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy,
std::nullopt, CostKind, OffsetBeg - Offset,
InsertVecTy);
@@ -9385,7 +9388,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
case Instruction::BitCast: {
auto SrcIt = MinBWs.find(getOperandEntry(E, 0));
Type *SrcScalarTy = VL0->getOperand(0)->getType();
- auto *SrcVecTy = FixedVectorType::get(SrcScalarTy, VL.size());
+ auto *SrcVecTy = getWidenedType(SrcScalarTy, VL.size());
unsigned Opcode = ShuffleOrOp;
unsigned VecOpcode = Opcode;
if (!ScalarTy->isFloatingPointTy() && !SrcScalarTy->isFloatingPointTy() &&
@@ -9395,7 +9398,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
if (SrcIt != MinBWs.end()) {
SrcBWSz = SrcIt->second.first;
SrcScalarTy = IntegerType::get(F->getContext(), SrcBWSz);
- SrcVecTy = FixedVectorType::get(SrcScalarTy, VL.size());
+ SrcVecTy = getWidenedType(SrcScalarTy, VL.size());
}
unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
if (BWSz == SrcBWSz) {
@@ -9702,7 +9705,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
E->getAltOp());
} else {
Type *SrcSclTy = E->getMainOp()->getOperand(0)->getType();
- auto *SrcTy = FixedVectorType::get(SrcSclTy, VL.size());
+ auto *SrcTy = getWidenedType(SrcSclTy, VL.size());
if (SrcSclTy->isIntegerTy() && ScalarTy->isIntegerTy()) {
auto SrcIt = MinBWs.find(getOperandEntry(E, 0));
unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
@@ -9711,7 +9714,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
if (SrcIt != MinBWs.end()) {
SrcBWSz = SrcIt->second.first;
SrcSclTy = IntegerType::get(SrcSclTy->getContext(), SrcBWSz);
- SrcTy = FixedVectorType::get(SrcSclTy, VL.size());
+ SrcTy = getWidenedType(SrcSclTy, VL.size());
}
if (BWSz <= SrcBWSz) {
if (BWSz < SrcBWSz)
@@ -10048,7 +10051,7 @@ Instr...
[truncated]
|
01b84cf
to
acf642b
Compare
acf642b
to
ca980dd
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
SLP vectorizes scalar type to vector type. In the future, we will try to make SLP vectorizes vector type to vector type. We add a getWidenedType as a helper function. For example, SLP will make the following code %v0 = load i32, ptr %in0, align 4 %v1 = load i32, ptr %in1, align 4 %v2 = load i32, ptr %in2, align 4 %v3 = load i32, ptr %in3, align 4 into a load <4 x i32>. The ScalarTy is i32 and VF is 4. In the future, SLP will make the following code %v0 = load <4 x i32>, ptr %in0, align 4 %v1 = load <4 x i32>, ptr %in1, align 4 %v2 = load <4 x i32>, ptr %in2, align 4 %v3 = load <4 x i32>, ptr %in3, align 4 into a load <16 x i32>. The ScalarTy is <4 x i32> and VF is 4. reference: https://discourse.llvm.org/t/rfc-make-slp-vectorizer-revectorize-vector-instructions/79436
ca980dd
to
d7c87a5
Compare
…lvm#96845) SLP vectorizes scalar type to vector type. In the future, we will try to make SLP vectorizes vector type to vector type. We add a getWidenedType as a helper function. For example, SLP will make the following code %v0 = load i32, ptr %in0, align 4 %v1 = load i32, ptr %in1, align 4 %v2 = load i32, ptr %in2, align 4 %v3 = load i32, ptr %in3, align 4 into a load <4 x i32>. The ScalarTy is i32 and VF is 4. In the future, SLP will make the following code %v0 = load <4 x i32>, ptr %in0, align 4 %v1 = load <4 x i32>, ptr %in1, align 4 %v2 = load <4 x i32>, ptr %in2, align 4 %v3 = load <4 x i32>, ptr %in3, align 4 into a load <16 x i32>. The ScalarTy is <4 x i32> and VF is 4. reference: https://discourse.llvm.org/t/rfc-make-slp-vectorizer-revectorize-vector-instructions/79436
SLP vectorizes scalar type to vector type. In the future, we will try to make SLP vectorizes vector type to vector type. We add a getWidenedType as a helper function. For example, SLP will make the following code
%v0 = load i32, ptr %in0, align 4
%v1 = load i32, ptr %in1, align 4
%v2 = load i32, ptr %in2, align 4
%v3 = load i32, ptr %in3, align 4
into a load <4 x i32>. The ScalarTy is i32 and VF is 4. In the future, SLP will make the following code
%v0 = load <4 x i32>, ptr %in0, align 4
%v1 = load <4 x i32>, ptr %in1, align 4
%v2 = load <4 x i32>, ptr %in2, align 4
%v3 = load <4 x i32>, ptr %in3, align 4
into a load <16 x i32>. The ScalarTy is <4 x i32> and VF is 4.
reference: https://discourse.llvm.org/t/rfc-make-slp-vectorizer-revectorize-vector-instructions/79436