@@ -9527,8 +9527,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
9527
9527
// that the costs will be accurate.
9528
9528
auto It = MinBWs.find(E);
9529
9529
Type *OrigScalarTy = ScalarTy;
9530
- if (It != MinBWs.end())
9530
+ if (It != MinBWs.end()) {
9531
+ auto VecTy = dyn_cast<FixedVectorType>(ScalarTy);
9531
9532
ScalarTy = IntegerType::get(F->getContext(), It->second.first);
9533
+ if (VecTy)
9534
+ ScalarTy = getWidenedType(ScalarTy, VecTy->getNumElements());
9535
+ }
9532
9536
auto *VecTy = getWidenedType(ScalarTy, VL.size());
9533
9537
unsigned EntryVF = E->getVectorFactor();
9534
9538
auto *FinalVecTy = getWidenedType(ScalarTy, EntryVF);
@@ -13127,8 +13131,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
13127
13131
else if (auto *IE = dyn_cast<InsertElementInst>(V))
13128
13132
ScalarTy = IE->getOperand(1)->getType();
13129
13133
auto It = MinBWs.find(E);
13130
- if (It != MinBWs.end())
13134
+ if (It != MinBWs.end()) {
13135
+ auto VecTy = dyn_cast<FixedVectorType>(ScalarTy);
13131
13136
ScalarTy = IntegerType::get(F->getContext(), It->second.first);
13137
+ if (VecTy)
13138
+ ScalarTy = getWidenedType(ScalarTy, VecTy->getNumElements());
13139
+ }
13132
13140
auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size());
13133
13141
if (E->isGather()) {
13134
13142
// Set insert point for non-reduction initial nodes.
@@ -16003,16 +16011,18 @@ void BoUpSLP::computeMinimumValueSizes() {
16003
16011
}
16004
16012
16005
16013
unsigned VF = E.getVectorFactor();
16006
- auto *TreeRootIT =
16007
- dyn_cast<IntegerType>(E.Scalars.front()->getType()->getScalarType());
16014
+ Type *ScalarTy = E.Scalars.front()->getType();
16015
+ unsigned ScalarTyNumElements = getNumElements(ScalarTy);
16016
+ auto *TreeRootIT = dyn_cast<IntegerType>(ScalarTy->getScalarType());
16008
16017
if (!TreeRootIT || !Opcode)
16009
16018
return 0u;
16010
16019
16011
16020
if (any_of(E.Scalars,
16012
16021
[&](Value *V) { return AnalyzedMinBWVals.contains(V); }))
16013
16022
return 0u;
16014
16023
16015
- unsigned NumParts = TTI->getNumberOfParts(getWidenedType(TreeRootIT, VF));
16024
+ unsigned NumParts = TTI->getNumberOfParts(
16025
+ getWidenedType(TreeRootIT, VF * ScalarTyNumElements));
16016
16026
16017
16027
// The maximum bit width required to represent all the values that can be
16018
16028
// demoted without loss of precision. It would be safe to truncate the roots
@@ -16034,7 +16044,8 @@ void BoUpSLP::computeMinimumValueSizes() {
16034
16044
// we can truncate the roots to this narrower type.
16035
16045
for (Value *Root : E.Scalars) {
16036
16046
unsigned NumSignBits = ComputeNumSignBits(Root, *DL, 0, AC, nullptr, DT);
16037
- TypeSize NumTypeBits = DL->getTypeSizeInBits(Root->getType());
16047
+ TypeSize NumTypeBits =
16048
+ DL->getTypeSizeInBits(Root->getType()->getScalarType());
16038
16049
unsigned BitWidth1 = NumTypeBits - NumSignBits;
16039
16050
// If we can't prove that the sign bit is zero, we must add one to the
16040
16051
// maximum bit width to account for the unknown sign bit. This preserves
@@ -16206,7 +16217,8 @@ void BoUpSLP::computeMinimumValueSizes() {
16206
16217
// type, we can proceed with the narrowing. Otherwise, do nothing.
16207
16218
if (MaxBitWidth == 0 ||
16208
16219
MaxBitWidth >=
16209
- cast<IntegerType>(TreeRoot.front()->getType())->getBitWidth()) {
16220
+ cast<IntegerType>(TreeRoot.front()->getType()->getScalarType())
16221
+ ->getBitWidth()) {
16210
16222
if (UserIgnoreList)
16211
16223
AnalyzedMinBWVals.insert(TreeRoot.begin(), TreeRoot.end());
16212
16224
continue;
0 commit comments