Skip to content

Commit 5e5ba14

Browse files
committed
[CostModel][X86] Adjust fp<->int vXi32 SSE legalized costs based on llvm-mca reports.
Building on rG2a1ef8784ad9a, adjust the SSE cost tables to use the legalized types based on the worst case costs from the script in D103695. To account for different numbers of src/dst legalized type registers we must scale the cost by maximum of the src/dst, not just use src
1 parent 4a361f5 commit 5e5ba14

File tree

10 files changed

+411
-610
lines changed

10 files changed

+411
-610
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2063,9 +2063,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
20632063
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
20642064
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB
20652065

2066-
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
20672066
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
2068-
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
20692067
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 1 },
20702068
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 },
20712069

@@ -2084,39 +2082,36 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
20842082
// These are somewhat magic numbers justified by looking at the output of
20852083
// Intel's IACA, running some kernels and making sure when we take
20862084
// legalization into account the throughput will be overestimated.
2087-
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
2088-
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
2089-
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
2090-
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
2091-
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
2092-
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 2*10 },
2093-
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2*10 },
2094-
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
2095-
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
2096-
2097-
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
2098-
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
2099-
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
2100-
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
2101-
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
2102-
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
2103-
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
2104-
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
2085+
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 3 },
2086+
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 },
2087+
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 3 },
2088+
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 4 },
2089+
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 3 },
2090+
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4 },
2091+
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 8 },
2092+
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 8 },
2093+
2094+
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 8 },
2095+
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 9 },
2096+
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 },
2097+
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 4 },
2098+
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 4 },
2099+
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 4 },
2100+
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 7 },
2101+
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
2102+
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 15 },
2103+
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 18 },
21052104

21062105
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 },
21072106
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 },
21082107
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
21092108
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
21102109
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
21112110
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 },
2112-
21132111
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
21142112

2115-
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 6 },
2116-
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 },
2117-
21182113
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 },
2119-
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 },
2114+
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 15 },
21202115
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 },
21212116
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 },
21222117
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
@@ -2138,11 +2133,11 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
21382133
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 },
21392134
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
21402135
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 },
2141-
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 },
2136+
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 },
21422137
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
21432138
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 },
21442139
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
2145-
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 10 },
2140+
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 10 },
21462141
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 },
21472142
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
21482143
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 },
@@ -2250,12 +2245,12 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
22502245
if (ST->hasSSE41() && !ST->hasAVX())
22512246
if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
22522247
LTDest.second, LTSrc.second))
2253-
return AdjustCost(LTSrc.first * Entry->Cost);
2248+
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
22542249

22552250
if (ST->hasSSE2() && !ST->hasAVX())
22562251
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
22572252
LTDest.second, LTSrc.second))
2258-
return AdjustCost(LTSrc.first * Entry->Cost);
2253+
return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
22592254

22602255
return AdjustCost(
22612256
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));

llvm/test/Analysis/CostModel/X86/cast.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -373,23 +373,23 @@ define i32 @masks4(<4 x i1> %in) {
373373

374374
define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
375375
; SSE2-LABEL: 'sitofp4'
376-
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float>
377-
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
378-
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
379-
; SSE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
380-
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
381-
; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
382-
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float>
383-
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
376+
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float>
377+
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
378+
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
379+
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
380+
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
381+
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
382+
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float>
383+
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
384384
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
385385
;
386386
; SSE41-LABEL: 'sitofp4'
387387
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float>
388-
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
389-
; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
390-
; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
391-
; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
392-
; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
388+
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
389+
; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
390+
; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
391+
; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
392+
; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
393393
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float>
394394
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
395395
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -440,16 +440,16 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
440440

441441
define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
442442
; SSE2-LABEL: 'sitofp8'
443-
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
444-
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
445-
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
446-
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
443+
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
444+
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
445+
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
446+
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
447447
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
448448
;
449449
; SSE41-LABEL: 'sitofp8'
450-
; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
451-
; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
452-
; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
450+
; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
451+
; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
452+
; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
453453
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
454454
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
455455
;
@@ -484,13 +484,13 @@ define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
484484
define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
485485
; SSE-LABEL: 'uitofp4'
486486
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float>
487-
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double>
488-
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
489-
; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double>
490-
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
491-
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double>
487+
; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double>
488+
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
489+
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double>
490+
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
491+
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double>
492492
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float>
493-
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double>
493+
; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double>
494494
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
495495
;
496496
; AVX1-LABEL: 'uitofp4'
@@ -539,9 +539,9 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
539539

540540
define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
541541
; SSE-LABEL: 'uitofp8'
542-
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
542+
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
543543
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
544-
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
544+
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
545545
; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
546546
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
547547
;

llvm/test/Analysis/CostModel/X86/fptoui.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,24 @@
1212

1313
define i32 @fptoui_double_i64(i32 %arg) {
1414
; SSE2-LABEL: 'fptoui_double_i64'
15-
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
16-
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
17-
; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
18-
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
15+
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
16+
; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
17+
; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
18+
; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
1919
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2020
;
2121
; SSE42-LABEL: 'fptoui_double_i64'
22-
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
23-
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
24-
; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
25-
; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
22+
; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
23+
; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
24+
; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
25+
; SSE42-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
2626
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2727
;
2828
; AVX-LABEL: 'fptoui_double_i64'
29-
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
30-
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
31-
; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
32-
; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
29+
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
30+
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
31+
; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
32+
; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
3333
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
3434
;
3535
; AVX512F-LABEL: 'fptoui_double_i64'
@@ -47,10 +47,10 @@ define i32 @fptoui_double_i64(i32 %arg) {
4747
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
4848
;
4949
; SLM-LABEL: 'fptoui_double_i64'
50-
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
51-
; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
52-
; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
53-
; SLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
50+
; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
51+
; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
52+
; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
53+
; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
5454
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
5555
;
5656
%I64 = fptoui double undef to i64

0 commit comments

Comments
 (0)