@@ -343,50 +343,26 @@ RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind) {
343
343
/* AddressSpace=*/ 0 , CostKind);
344
344
}
345
345
346
- InstructionCost
347
- RISCVTTIImpl::isMultipleInsertSubvector (VectorType *Tp, ArrayRef<int > Mask,
348
- TTI::TargetCostKind CostKind) {
349
- if (!isa<FixedVectorType>(Tp))
350
- return InstructionCost::getInvalid ();
351
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost (Tp);
352
- if (LT.second .getScalarSizeInBits () == 1 )
353
- return InstructionCost::getInvalid ();
346
+ static bool isRepeatedConcatMaskImpl (ArrayRef<int > Mask, int &SubVectorSize) {
354
347
unsigned Size = Mask.size ();
355
348
if (!isPowerOf2_32 (Size))
356
- return InstructionCost::getInvalid ();
357
- // Try to guess subvector size.
358
- unsigned SubVecSize;
349
+ return false ;
359
350
for (unsigned I = 0 ; I != Size; ++I) {
360
351
if (static_cast <unsigned >(Mask[I]) == I)
361
352
continue ;
362
- if (Mask[I] == 0 ) {
363
- SubVecSize = I;
364
- break ;
365
- }
366
- return InstructionCost::getInvalid ();
367
- }
368
- if (Size % SubVecSize != 0 )
369
- return InstructionCost::getInvalid ();
370
- for (unsigned I = 0 ; I != Size; ++I)
371
- if (static_cast <unsigned >(Mask[I]) != I % SubVecSize)
372
- return InstructionCost::getInvalid ();
373
- InstructionCost Cost = 0 ;
374
- unsigned NumSlides = Log2_32 (Size / SubVecSize);
375
- // The cost of extraction from a subvector is 0 if the index is 0.
376
- for (unsigned I = 0 ; I != NumSlides; ++I) {
377
- unsigned InsertIndex = SubVecSize * (1 << I);
378
- FixedVectorType *SubTp =
379
- FixedVectorType::get (Tp->getElementType (), InsertIndex);
380
- FixedVectorType *DestTp =
381
- FixedVectorType::getDoubleElementsVectorType (SubTp);
382
- std::pair<InstructionCost, MVT> DestLT = getTypeLegalizationCost (DestTp);
383
- // Add the cost of whole vector register move because the destination vector
384
- // register group for vslideup cannot overlap the source.
385
- Cost += DestLT.first * TLI->getLMULCost (DestLT.second );
386
- Cost += getShuffleCost (TTI::SK_InsertSubvector, DestTp, {}, CostKind,
387
- InsertIndex, SubTp);
353
+ if (Mask[I] != 0 )
354
+ return false ;
355
+ if (Size % I != 0 )
356
+ return false ;
357
+ for (unsigned J = 0 ; J != Size; ++J)
358
+ // Check the pattern is repeated.
359
+ if (static_cast <unsigned >(Mask[J]) != J % I)
360
+ return false ;
361
+ SubVectorSize = I;
362
+ return true ;
388
363
}
389
- return Cost;
364
+ // That means Mask is <0, 1, 2, 3>. This is not a concatenation.
365
+ return false ;
390
366
}
391
367
392
368
static VectorType *getVRGatherIndexType (MVT DataVT, const RISCVSubtarget &ST,
@@ -440,10 +416,29 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
440
416
LT.second , CostKind);
441
417
}
442
418
}
443
- if (InstructionCost Cost =
444
- isMultipleInsertSubvector (Tp, Mask, CostKind);
445
- Cost.isValid ())
419
+ int SubVectorSize;
420
+ if (LT.second .getScalarSizeInBits () != 1 &&
421
+ isRepeatedConcatMaskImpl (Mask, SubVectorSize)) {
422
+ InstructionCost Cost = 0 ;
423
+ unsigned NumSlides = Log2_32 (Mask.size () / SubVectorSize);
424
+ // The cost of extraction from a subvector is 0 if the index is 0.
425
+ for (unsigned I = 0 ; I != NumSlides; ++I) {
426
+ unsigned InsertIndex = SubVectorSize * (1 << I);
427
+ FixedVectorType *SubTp =
428
+ FixedVectorType::get (Tp->getElementType (), InsertIndex);
429
+ FixedVectorType *DestTp =
430
+ FixedVectorType::getDoubleElementsVectorType (SubTp);
431
+ std::pair<InstructionCost, MVT> DestLT =
432
+ getTypeLegalizationCost (DestTp);
433
+ // Add the cost of whole vector register move because the
434
+ // destination vector register group for vslideup cannot overlap the
435
+ // source.
436
+ Cost += DestLT.first * TLI->getLMULCost (DestLT.second );
437
+ Cost += getShuffleCost (TTI::SK_InsertSubvector, DestTp, {},
438
+ CostKind, InsertIndex, SubTp);
439
+ }
446
440
return Cost;
441
+ }
447
442
}
448
443
// vrgather + cost of generating the mask constant.
449
444
// We model this for an unknown mask with a single vrgather.
0 commit comments