Skip to content

Commit d7994bf

Browse files
committed
[CostModel] getInstructionCost - improve estimation of costs for length changing shuffles
Fix gap in the cost estimation for length changing shuffles, by widening/narrowing the shuffle mask and either widening the shuffle inputs or extracting the lower elements of the result A small step towards moving some of this implementation inside improveShuffleKindFromMask and/or target getShuffleCost handlers (and reduce the diffs in cost estimation depending on whether come from a ShuffleVectorInst or the raw operands / mask components)
1 parent 3711329 commit d7994bf

12 files changed

+756
-507
lines changed

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1327,6 +1327,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
13271327
ArrayRef<int> Mask = Shuffle->getShuffleMask();
13281328
int NumSubElts, SubIndex;
13291329

1330+
// TODO: move more of this inside improveShuffleKindFromMask.
13301331
if (Shuffle->changesLength()) {
13311332
// Treat a 'subvector widening' as a free shuffle.
13321333
if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
@@ -1355,7 +1356,35 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
13551356
DemandedDstElts, CostKind);
13561357
}
13571358

1358-
return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
1359+
bool IsUnary = isa<UndefValue>(Operands[1]);
1360+
NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1361+
SmallVector<int, 16> AdjustMask(Mask.begin(), Mask.end());
1362+
1363+
// Widening shuffle - widening the source(s) to the new length
1364+
// (treated as free - see above), and then perform the adjusted
1365+
// shuffle at that width.
1366+
if (Shuffle->increasesLength()) {
1367+
for (int &M : AdjustMask)
1368+
M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1369+
1370+
return TargetTTI->getShuffleCost(
1371+
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
1372+
AdjustMask, CostKind, 0, nullptr);
1373+
}
1374+
1375+
// Narrowing shuffle - perform shuffle at original wider width and
1376+
// then extract the lower elements.
1377+
AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
1378+
1379+
InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
1380+
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
1381+
VecSrcTy, AdjustMask, CostKind, 0, nullptr);
1382+
1383+
SmallVector<int, 16> ExtractMask(Mask.size());
1384+
std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
1385+
return ShuffleCost + TargetTTI->getShuffleCost(
1386+
TTI::SK_ExtractSubvector, VecTy, ExtractMask,
1387+
CostKind, 0, VecSrcTy, Operands);
13591388
}
13601389

13611390
if (Shuffle->isIdentity())

llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll

Lines changed: 184 additions & 184 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) {
1919
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2020
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2121
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
22-
; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 poison>
22+
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 poison>
2323
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
2424
;
2525
; VLEN128-LABEL: 'test_vXf64'
@@ -32,7 +32,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) {
3232
; VLEN128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3333
; VLEN128-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
3434
; VLEN128-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
35-
; VLEN128-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 poison>
35+
; VLEN128-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 poison>
3636
; VLEN128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
3737
;
3838
%V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>

llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ define <8 x i64> @interleave2_v8i64(<4 x i64> %v0, <4 x i64> %v1) {
5656
; TODO: getInstructionCost doesn't call getShuffleCost here because the shuffle changes length
5757
define {<4 x i8>, <4 x i8>} @deinterleave_2(<8 x i8> %v) {
5858
; CHECK-LABEL: 'deinterleave_2'
59-
; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v0 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
60-
; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v1 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
59+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v0 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
60+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v1 = shufflevector <8 x i8> %v, <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
6161
; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %res0 = insertvalue { <4 x i8>, <4 x i8> } poison, <4 x i8> %v0, 0
6262
; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %res1 = insertvalue { <4 x i8>, <4 x i8> } %res0, <4 x i8> %v1, 1
6363
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret { <4 x i8>, <4 x i8> } %res1

0 commit comments

Comments
 (0)