Skip to content

Commit 22530e7

Browse files
committed
[CostModel][X86] Update vXi8 mul costs for AVX512BW/AVX2/AVX1/SSE
Later levels were inheriting some of the worst case costs from SSE/AVX1 etc. Based off llvm-mca numbers from the check_cost_tables.py script in https://github.com/RKSimon/llvm-scripts Cleanup prep work for llvm#90748
1 parent dcbe0d4 commit 22530e7

File tree

12 files changed

+95
-102
lines changed

12 files changed

+95
-102
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,9 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
851851
{ ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } }, // psubb
852852
{ ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } }, // psubw
853853

854-
{ ISD::MUL, MVT::v64i8, { 5, 10,10,11 } },
854+
{ ISD::MUL, MVT::v16i8, { 4, 12, 4, 5 } }, // extend/pmullw/trunc
855+
{ ISD::MUL, MVT::v32i8, { 6, 11,10,11 } }, // extend/pmullw/trunc
856+
{ ISD::MUL, MVT::v64i8, { 6, 12,10,11 } }, // unpack/pmullw
855857
{ ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } }, // pmullw
856858

857859
{ ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } }, // psubb
@@ -1117,7 +1119,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
11171119
{ ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } }, // paddq
11181120

11191121
{ ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } }, // extend/pmullw/pack
1120-
{ ISD::MUL, MVT::v32i8, { 6, 11,10,19 } }, // unpack/pmullw
1122+
{ ISD::MUL, MVT::v32i8, { 6, 11,10,20 } }, // unpack/pmullw
11211123
{ ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } }, // pmullw
11221124
{ ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } }, // pmulld
11231125
{ ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } }, // pmulld
@@ -1168,7 +1170,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
11681170
// We don't have to scalarize unsupported ops. We can issue two half-sized
11691171
// operations and we only need to extract the upper YMM half.
11701172
// Two ops + 1 extract + 1 insert = 4.
1171-
{ ISD::MUL, MVT::v32i8, { 12, 13, 22, 23 } }, // unpack/pmullw + split
1173+
{ ISD::MUL, MVT::v32i8, { 12, 12, 22, 23 } }, // unpack/pmullw + split
1174+
{ ISD::MUL, MVT::v16i8, { 5, 6, 10, 12 } }, // unpack/pmullw
11721175
{ ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } }, // pmullw + split
11731176
{ ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } }, // pmulld + split
11741177
{ ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } }, // pmulld
@@ -1308,7 +1311,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
13081311
{ ISD::SRA, MVT::v4i32, { 16, 17,15,19 } }, // Shift each lane + blend.
13091312
{ ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } }, // splat+shuffle sequence.
13101313

1311-
{ ISD::MUL, MVT::v16i8, { 5, 18,10,12 } }, // 2*unpack/2*pmullw/2*and/pack
1314+
{ ISD::MUL, MVT::v16i8, { 6, 18,10,12 } }, // 2*unpack/2*pmullw/2*and/pack
13121315
{ ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } } // pmulld (Nehalem from agner.org)
13131316
};
13141317

@@ -1353,7 +1356,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
13531356
{ ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } }, // paddq
13541357
{ ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } }, // psubq
13551358

1356-
{ ISD::MUL, MVT::v16i8, { 5, 18,12,12 } }, // 2*unpack/2*pmullw/2*and/pack
1359+
{ ISD::MUL, MVT::v16i8, { 6, 18,12,12 } }, // 2*unpack/2*pmullw/2*and/pack
13571360
{ ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } }, // pmullw
13581361
{ ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } }, // 3*pmuludq/4*shuffle
13591362
{ ISD::MUL, MVT::v2i64, { 7, 10,10,10 } }, // 3*pmuludq/3*shift/2*add

llvm/test/Analysis/CostModel/X86/arith-int-codesize.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -901,7 +901,7 @@ define i32 @mul(i32 %arg) {
901901
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef
902902
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
903903
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
904-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
904+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = mul <16 x i8> undef, undef
905905
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
906906
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = mul <64 x i8> undef, undef
907907
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef

llvm/test/Analysis/CostModel/X86/arith-int-latency.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -679,9 +679,9 @@ define i32 @mul(i32 %arg) {
679679
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I8 = mul <2 x i8> undef, undef
680680
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = mul <4 x i8> undef, undef
681681
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = mul <8 x i8> undef, undef
682-
; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = mul <16 x i8> undef, undef
683-
; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = mul <32 x i8> undef, undef
684-
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = mul <64 x i8> undef, undef
682+
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
683+
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = mul <32 x i8> undef, undef
684+
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = mul <64 x i8> undef, undef
685685
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
686686
;
687687
; AVX2-LABEL: 'mul'
@@ -745,9 +745,9 @@ define i32 @mul(i32 %arg) {
745745
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I8 = mul <2 x i8> undef, undef
746746
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = mul <4 x i8> undef, undef
747747
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = mul <8 x i8> undef, undef
748-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = mul <16 x i8> undef, undef
748+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
749749
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
750-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = mul <64 x i8> undef, undef
750+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = mul <64 x i8> undef, undef
751751
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
752752
;
753753
; AVX512DQ-LABEL: 'mul'

llvm/test/Analysis/CostModel/X86/arith-int-sizelatency.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -702,8 +702,8 @@ define i32 @mul(i32 %arg) {
702702
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
703703
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
704704
; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
705-
; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = mul <32 x i8> undef, undef
706-
; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V64I8 = mul <64 x i8> undef, undef
705+
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = mul <32 x i8> undef, undef
706+
; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = mul <64 x i8> undef, undef
707707
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
708708
;
709709
; AVX512F-LABEL: 'mul'
@@ -724,7 +724,7 @@ define i32 @mul(i32 %arg) {
724724
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
725725
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
726726
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
727-
; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = mul <32 x i8> undef, undef
727+
; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = mul <32 x i8> undef, undef
728728
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
729729
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
730730
;
@@ -745,8 +745,8 @@ define i32 @mul(i32 %arg) {
745745
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef
746746
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
747747
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
748-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
749-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = mul <32 x i8> undef, undef
748+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = mul <16 x i8> undef, undef
749+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
750750
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = mul <64 x i8> undef, undef
751751
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
752752
;
@@ -768,7 +768,7 @@ define i32 @mul(i32 %arg) {
768768
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
769769
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
770770
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
771-
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = mul <32 x i8> undef, undef
771+
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = mul <32 x i8> undef, undef
772772
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
773773
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
774774
;

llvm/test/Analysis/CostModel/X86/arith-int.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -905,9 +905,9 @@ define i32 @mul(i32 %arg) {
905905
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = mul <2 x i8> undef, undef
906906
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = mul <4 x i8> undef, undef
907907
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = mul <8 x i8> undef, undef
908-
; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = mul <16 x i8> undef, undef
909-
; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
910-
; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = mul <64 x i8> undef, undef
908+
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
909+
; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = mul <32 x i8> undef, undef
910+
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = mul <64 x i8> undef, undef
911911
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
912912
;
913913
; SSE42-LABEL: 'mul'
@@ -927,9 +927,9 @@ define i32 @mul(i32 %arg) {
927927
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = mul <2 x i8> undef, undef
928928
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = mul <4 x i8> undef, undef
929929
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = mul <8 x i8> undef, undef
930-
; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = mul <16 x i8> undef, undef
931-
; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
932-
; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = mul <64 x i8> undef, undef
930+
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
931+
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = mul <32 x i8> undef, undef
932+
; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = mul <64 x i8> undef, undef
933933
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
934934
;
935935
; AVX1-LABEL: 'mul'
@@ -1015,9 +1015,9 @@ define i32 @mul(i32 %arg) {
10151015
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = mul <2 x i8> undef, undef
10161016
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = mul <4 x i8> undef, undef
10171017
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = mul <8 x i8> undef, undef
1018-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = mul <16 x i8> undef, undef
1018+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = mul <16 x i8> undef, undef
10191019
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = mul <32 x i8> undef, undef
1020-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = mul <64 x i8> undef, undef
1020+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = mul <64 x i8> undef, undef
10211021
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
10221022
;
10231023
; AVX512DQ-LABEL: 'mul'
@@ -1059,9 +1059,9 @@ define i32 @mul(i32 %arg) {
10591059
; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I8 = mul <2 x i8> undef, undef
10601060
; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I8 = mul <4 x i8> undef, undef
10611061
; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I8 = mul <8 x i8> undef, undef
1062-
; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = mul <16 x i8> undef, undef
1063-
; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
1064-
; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = mul <64 x i8> undef, undef
1062+
; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
1063+
; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = mul <32 x i8> undef, undef
1064+
; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = mul <64 x i8> undef, undef
10651065
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
10661066
;
10671067
; GLM-LABEL: 'mul'
@@ -1081,9 +1081,9 @@ define i32 @mul(i32 %arg) {
10811081
; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = mul <2 x i8> undef, undef
10821082
; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = mul <4 x i8> undef, undef
10831083
; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = mul <8 x i8> undef, undef
1084-
; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = mul <16 x i8> undef, undef
1085-
; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
1086-
; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = mul <64 x i8> undef, undef
1084+
; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
1085+
; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = mul <32 x i8> undef, undef
1086+
; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = mul <64 x i8> undef, undef
10871087
; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
10881088
;
10891089
%I64 = mul i64 undef, undef

llvm/test/Analysis/CostModel/X86/rem-codesize.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ define i32 @srem_constpow2() {
379379
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
380380
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
381381
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
382-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
382+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
383383
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
384384
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
385385
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
@@ -617,7 +617,7 @@ define i32 @srem_uniformconstpow2() {
617617
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
618618
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
619619
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
620-
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
620+
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
621621
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
622622
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
623623
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef

0 commit comments

Comments
 (0)