File tree Expand file tree Collapse file tree 2 files changed +9
-4
lines changed Expand file tree Collapse file tree 2 files changed +9
-4
lines changed Original file line number Diff line number Diff line change @@ -306,16 +306,21 @@ bool GCNTTIImpl::hasBranchDivergence(const Function *F) const {
306
306
return !F || !ST->isSingleLaneExecution (*F);
307
307
}
308
308
309
- unsigned GCNTTIImpl::getNumberOfParts (Type *Tp) const {
309
+ unsigned GCNTTIImpl::getNumberOfParts (Type *Tp) {
310
+ // For certain 8 bit ops, we can pack a v4i8 into a single part
311
+ // (e.g. v4i8 shufflevectors -> v_perm v4i8, v4i8). Thus, we
312
+ // do not limit the numberOfParts for 8 bit vectors to the
313
+ // legalization costs of such. It is left up to other target
314
+ // queries (e.g. get*InstrCost) to decide the proper handling
315
+ // of 8 bit vectors.
310
316
if (FixedVectorType *VTy = dyn_cast<FixedVectorType>(Tp)) {
311
317
if (DL.getTypeSizeInBits (VTy->getElementType ()) == 8 ) {
312
318
unsigned ElCount = VTy->getElementCount ().getFixedValue ();
313
319
return ElCount / 4 ;
314
320
}
315
321
}
316
322
317
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost (Tp);
318
- return LT.first .isValid () ? *LT.first .getValue () : 0 ;
323
+ return BaseT::getNumberOfParts (Tp);
319
324
}
320
325
321
326
unsigned GCNTTIImpl::getNumberOfRegisters (unsigned RCID) const {
Original file line number Diff line number Diff line change @@ -117,7 +117,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
117
117
return TTI::PSK_FastHardware;
118
118
}
119
119
120
- unsigned getNumberOfParts (Type *Tp) const ;
120
+ unsigned getNumberOfParts (Type *Tp);
121
121
unsigned getNumberOfRegisters (unsigned RCID) const ;
122
122
TypeSize getRegisterBitWidth (TargetTransformInfo::RegisterKind Vector) const ;
123
123
unsigned getMinVectorRegisterBitWidth () const ;
You can’t perform that action at this time.
0 commit comments