Skip to content

Commit b9de62c

Browse files
committed
[X86] Fix the cost of v16i1->v16i16 sext/zext on avx targets.
Previously we were hitting the scalarization case in the default implementation.
1 parent 19cb26f commit b9de62c

File tree

3 files changed

+20
-16
lines changed

3 files changed

+20
-16
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,6 +1623,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
16231623
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
16241624
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
16251625
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
1626+
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
1627+
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
16261628
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
16271629
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
16281630
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
@@ -1656,6 +1658,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
16561658
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
16571659
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
16581660
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
1661+
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 4 },
1662+
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 4 },
16591663
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
16601664
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
16611665
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 },

llvm/test/Analysis/CostModel/X86/extend.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -374,8 +374,8 @@ define i32 @zext_vXi1() {
374374
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
375375
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
376376
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
377-
; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
378-
; AVX1-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
377+
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
378+
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
379379
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8
380380
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>
381381
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>
@@ -399,8 +399,8 @@ define i32 @zext_vXi1() {
399399
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
400400
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
401401
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
402-
; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
403-
; AVX2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
402+
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
403+
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
404404
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8
405405
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>
406406
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>
@@ -474,8 +474,8 @@ define i32 @zext_vXi1() {
474474
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
475475
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
476476
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
477-
; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
478-
; BTVER2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
477+
; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
478+
; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
479479
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8
480480
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>
481481
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>
@@ -877,8 +877,8 @@ define i32 @sext_vXi1() {
877877
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
878878
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
879879
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>
880-
; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
881-
; AVX1-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
880+
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
881+
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
882882
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8
883883
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>
884884
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>
@@ -902,8 +902,8 @@ define i32 @sext_vXi1() {
902902
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
903903
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
904904
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>
905-
; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
906-
; AVX2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
905+
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
906+
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
907907
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8
908908
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>
909909
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>
@@ -977,8 +977,8 @@ define i32 @sext_vXi1() {
977977
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
978978
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
979979
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>
980-
; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
981-
; BTVER2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
980+
; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
981+
; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
982982
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8
983983
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>
984984
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>

llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -228,8 +228,8 @@ define i32 @zext256_vXi1() "min-legal-vector-width"="256" {
228228
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
229229
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
230230
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
231-
; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
232-
; AVX-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
231+
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
232+
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
233233
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>
234234
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>
235235
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8>
@@ -362,8 +362,8 @@ define i32 @sext256_vXi1() "min-legal-vector-width"="256" {
362362
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
363363
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
364364
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>
365-
; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
366-
; AVX-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
365+
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
366+
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
367367
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8
368368
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>
369369
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>

0 commit comments

Comments
 (0)