@@ -324,33 +324,36 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
324
324
return LT.first * getLMULCost (LT.second );
325
325
}
326
326
}
327
- }
328
- // vrgather + cost of generating the mask constant.
329
- // We model this for an unknown mask with a single vrgather.
330
- if (LT.second .isFixedLengthVector () && LT.first == 1 &&
331
- (LT.second .getScalarSizeInBits () != 8 ||
332
- LT.second .getVectorNumElements () <= 256 )) {
333
- VectorType *IdxTy = getVRGatherIndexType (LT.second , *ST, Tp->getContext ());
334
- InstructionCost IndexCost = getConstantPoolLoadCost (IdxTy, CostKind);
335
- return IndexCost + getVRGatherVVCost (LT.second );
327
+
328
+ // vrgather + cost of generating the mask constant.
329
+ // We model this for an unknown mask with a single vrgather.
330
+ if (LT.first == 1 &&
331
+ (LT.second .getScalarSizeInBits () != 8 ||
332
+ LT.second .getVectorNumElements () <= 256 )) {
333
+ VectorType *IdxTy = getVRGatherIndexType (LT.second , *ST, Tp->getContext ());
334
+ InstructionCost IndexCost = getConstantPoolLoadCost (IdxTy, CostKind);
335
+ return IndexCost + getVRGatherVVCost (LT.second );
336
+ }
336
337
}
337
338
[[fallthrough]];
338
339
}
339
340
case TTI::SK_Transpose:
340
341
case TTI::SK_PermuteTwoSrc: {
341
- // 2 x (vrgather + cost of generating the mask constant) + cost of mask
342
- // register for the second vrgather. We model this for an unknown
343
- // (shuffle) mask.
344
- if (LT.second .isFixedLengthVector () && LT.first == 1 &&
345
- (LT.second .getScalarSizeInBits () != 8 ||
346
- LT.second .getVectorNumElements () <= 256 )) {
347
- auto &C = Tp->getContext ();
348
- auto EC = Tp->getElementCount ();
349
- VectorType *IdxTy = getVRGatherIndexType (LT.second , *ST, C);
350
- VectorType *MaskTy = VectorType::get (IntegerType::getInt1Ty (C), EC);
351
- InstructionCost IndexCost = getConstantPoolLoadCost (IdxTy, CostKind);
352
- InstructionCost MaskCost = getConstantPoolLoadCost (MaskTy, CostKind);
353
- return 2 * IndexCost + 2 * getVRGatherVVCost (LT.second ) + MaskCost;
342
+ if (Mask.size () >= 2 && LT.second .isFixedLengthVector ()) {
343
+ // 2 x (vrgather + cost of generating the mask constant) + cost of mask
344
+ // register for the second vrgather. We model this for an unknown
345
+ // (shuffle) mask.
346
+ if (LT.first == 1 &&
347
+ (LT.second .getScalarSizeInBits () != 8 ||
348
+ LT.second .getVectorNumElements () <= 256 )) {
349
+ auto &C = Tp->getContext ();
350
+ auto EC = Tp->getElementCount ();
351
+ VectorType *IdxTy = getVRGatherIndexType (LT.second , *ST, C);
352
+ VectorType *MaskTy = VectorType::get (IntegerType::getInt1Ty (C), EC);
353
+ InstructionCost IndexCost = getConstantPoolLoadCost (IdxTy, CostKind);
354
+ InstructionCost MaskCost = getConstantPoolLoadCost (MaskTy, CostKind);
355
+ return 2 * IndexCost + 2 * getVRGatherVVCost (LT.second ) + MaskCost;
356
+ }
354
357
}
355
358
[[fallthrough]];
356
359
}
0 commit comments