Skip to content

Commit 84e5278

Browse files
committed
Revert "[RISCV] Remove mask size restriction on single source and dual src shuffle costing"
This reverts commit 2246700. Seeing buildbot failures; it looks like I rebased over a new test which is effected by the change.
1 parent b328396 commit 84e5278

File tree

1 file changed

+25
-22
lines changed

1 file changed

+25
-22
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -324,33 +324,36 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
324324
return LT.first * getLMULCost(LT.second);
325325
}
326326
}
327-
}
328-
// vrgather + cost of generating the mask constant.
329-
// We model this for an unknown mask with a single vrgather.
330-
if (LT.second.isFixedLengthVector() && LT.first == 1 &&
331-
(LT.second.getScalarSizeInBits() != 8 ||
332-
LT.second.getVectorNumElements() <= 256)) {
333-
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
334-
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
335-
return IndexCost + getVRGatherVVCost(LT.second);
327+
328+
// vrgather + cost of generating the mask constant.
329+
// We model this for an unknown mask with a single vrgather.
330+
if (LT.first == 1 &&
331+
(LT.second.getScalarSizeInBits() != 8 ||
332+
LT.second.getVectorNumElements() <= 256)) {
333+
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
334+
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
335+
return IndexCost + getVRGatherVVCost(LT.second);
336+
}
336337
}
337338
[[fallthrough]];
338339
}
339340
case TTI::SK_Transpose:
340341
case TTI::SK_PermuteTwoSrc: {
341-
// 2 x (vrgather + cost of generating the mask constant) + cost of mask
342-
// register for the second vrgather. We model this for an unknown
343-
// (shuffle) mask.
344-
if (LT.second.isFixedLengthVector() && LT.first == 1 &&
345-
(LT.second.getScalarSizeInBits() != 8 ||
346-
LT.second.getVectorNumElements() <= 256)) {
347-
auto &C = Tp->getContext();
348-
auto EC = Tp->getElementCount();
349-
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C);
350-
VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
351-
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
352-
InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
353-
return 2 * IndexCost + 2 * getVRGatherVVCost(LT.second) + MaskCost;
342+
if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
343+
// 2 x (vrgather + cost of generating the mask constant) + cost of mask
344+
// register for the second vrgather. We model this for an unknown
345+
// (shuffle) mask.
346+
if (LT.first == 1 &&
347+
(LT.second.getScalarSizeInBits() != 8 ||
348+
LT.second.getVectorNumElements() <= 256)) {
349+
auto &C = Tp->getContext();
350+
auto EC = Tp->getElementCount();
351+
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C);
352+
VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
353+
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
354+
InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
355+
return 2 * IndexCost + 2 * getVRGatherVVCost(LT.second) + MaskCost;
356+
}
354357
}
355358
[[fallthrough]];
356359
}

0 commit comments

Comments
 (0)