Skip to content

Commit 2246700

Browse files
committed
[RISCV] Remove mask size restriction on single source and dual src shuffle costing
Some callers pass in an empty mask to represent "unknown". We should use the generic costs for these cases. We can add VL=1 costing seperately if desired.
1 parent 9d7c7f6 commit 2246700

File tree

1 file changed

+22
-25
lines changed

1 file changed

+22
-25
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -324,36 +324,33 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
324324
return LT.first * getLMULCost(LT.second);
325325
}
326326
}
327-
328-
// vrgather + cost of generating the mask constant.
329-
// We model this for an unknown mask with a single vrgather.
330-
if (LT.first == 1 &&
331-
(LT.second.getScalarSizeInBits() != 8 ||
332-
LT.second.getVectorNumElements() <= 256)) {
333-
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
334-
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
335-
return IndexCost + getVRGatherVVCost(LT.second);
336-
}
327+
}
328+
// vrgather + cost of generating the mask constant.
329+
// We model this for an unknown mask with a single vrgather.
330+
if (LT.second.isFixedLengthVector() && LT.first == 1 &&
331+
(LT.second.getScalarSizeInBits() != 8 ||
332+
LT.second.getVectorNumElements() <= 256)) {
333+
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
334+
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
335+
return IndexCost + getVRGatherVVCost(LT.second);
337336
}
338337
[[fallthrough]];
339338
}
340339
case TTI::SK_Transpose:
341340
case TTI::SK_PermuteTwoSrc: {
342-
if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
343-
// 2 x (vrgather + cost of generating the mask constant) + cost of mask
344-
// register for the second vrgather. We model this for an unknown
345-
// (shuffle) mask.
346-
if (LT.first == 1 &&
347-
(LT.second.getScalarSizeInBits() != 8 ||
348-
LT.second.getVectorNumElements() <= 256)) {
349-
auto &C = Tp->getContext();
350-
auto EC = Tp->getElementCount();
351-
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C);
352-
VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
353-
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
354-
InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
355-
return 2 * IndexCost + 2 * getVRGatherVVCost(LT.second) + MaskCost;
356-
}
341+
// 2 x (vrgather + cost of generating the mask constant) + cost of mask
342+
// register for the second vrgather. We model this for an unknown
343+
// (shuffle) mask.
344+
if (LT.second.isFixedLengthVector() && LT.first == 1 &&
345+
(LT.second.getScalarSizeInBits() != 8 ||
346+
LT.second.getVectorNumElements() <= 256)) {
347+
auto &C = Tp->getContext();
348+
auto EC = Tp->getElementCount();
349+
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C);
350+
VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
351+
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
352+
InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
353+
return 2 * IndexCost + 2 * getVRGatherVVCost(LT.second) + MaskCost;
357354
}
358355
[[fallthrough]];
359356
}

0 commit comments

Comments
 (0)