Skip to content

Commit 0a7eced

Browse files
committed
[AArch64] Update costs for scalarizing i64->f32 int_to_fp.
After llvm#130665 these operations will be scalarized to avoid double-rounding. This updates the cost model to match. In the future we might be able to use SVE instructions to help, but for the moment the costs shoule be higher. Costsize and Latency costs are not yet expected to be accurate.
1 parent 77edfbb commit 0a7eced

File tree

4 files changed

+43
-27
lines changed

4 files changed

+43
-27
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2939,10 +2939,8 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
29392939
// Complex: to v2f32
29402940
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
29412941
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3},
2942-
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2},
29432942
{ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
29442943
{ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3},
2945-
{ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2},
29462944

29472945
// Complex: to v4f32
29482946
{ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4},
@@ -3199,6 +3197,20 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
31993197
FP16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
32003198
return AdjustCost(Entry->Cost);
32013199

3200+
// INT_TO_FP of i64->f32 will scalarize, which is required to avoid
3201+
// double-rounding issues.
3202+
if ((ISD == ISD::SINT_TO_FP || ISD == ISD::UINT_TO_FP) &&
3203+
DstTy.getScalarType() == MVT::f32 && SrcTy.getScalarSizeInBits() > 32 &&
3204+
isa<FixedVectorType>(Dst) && isa<FixedVectorType>(Src))
3205+
return AdjustCost(
3206+
cast<FixedVectorType>(Dst)->getNumElements() *
3207+
getCastInstrCost(Opcode, Dst->getScalarType(), Src->getScalarType(),
3208+
CCH, CostKind) +
3209+
BaseT::getScalarizationOverhead(cast<FixedVectorType>(Src), false, true,
3210+
CostKind) +
3211+
BaseT::getScalarizationOverhead(cast<FixedVectorType>(Dst), true, false,
3212+
CostKind));
3213+
32023214
if ((ISD == ISD::ZERO_EXTEND || ISD == ISD::SIGN_EXTEND) &&
32033215
CCH == TTI::CastContextHint::Masked &&
32043216
ST->isSVEorStreamingSVEAvailable() &&

llvm/test/Analysis/CostModel/AArch64/cast.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -391,8 +391,8 @@ define i32 @casts_no_users() {
391391
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> undef to <2 x float>
392392
; CHECK-NEXT: Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> undef to <2 x float>
393393
; CHECK-NEXT: Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> undef to <2 x float>
394-
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> undef to <2 x float>
395-
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> undef to <2 x float>
394+
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> undef to <2 x float>
395+
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> undef to <2 x float>
396396
; CHECK-NEXT: Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> undef to <2 x double>
397397
; CHECK-NEXT: Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> undef to <2 x double>
398398
; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> undef to <2 x double>
@@ -411,8 +411,8 @@ define i32 @casts_no_users() {
411411
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> undef to <4 x float>
412412
; CHECK-NEXT: Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> undef to <4 x float>
413413
; CHECK-NEXT: Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> undef to <4 x float>
414-
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> undef to <4 x float>
415-
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> undef to <4 x float>
414+
; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> undef to <4 x float>
415+
; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> undef to <4 x float>
416416
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> undef to <4 x double>
417417
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> undef to <4 x double>
418418
; CHECK-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> undef to <4 x double>
@@ -431,8 +431,8 @@ define i32 @casts_no_users() {
431431
; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> undef to <8 x float>
432432
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> undef to <8 x float>
433433
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> undef to <8 x float>
434-
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> undef to <8 x float>
435-
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> undef to <8 x float>
434+
; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> undef to <8 x float>
435+
; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> undef to <8 x float>
436436
; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> undef to <8 x double>
437437
; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> undef to <8 x double>
438438
; CHECK-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> undef to <8 x double>
@@ -451,8 +451,8 @@ define i32 @casts_no_users() {
451451
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> undef to <16 x float>
452452
; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> undef to <16 x float>
453453
; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> undef to <16 x float>
454-
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> undef to <16 x float>
455-
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> undef to <16 x float>
454+
; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> undef to <16 x float>
455+
; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> undef to <16 x float>
456456
; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> undef to <16 x double>
457457
; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> undef to <16 x double>
458458
; CHECK-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> undef to <16 x double>

llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
2-
; RUN: opt -mattr=-neon < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
3-
; RUN: opt -mattr=+sve,-neon < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
2+
; RUN: opt -mattr=-neon < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK-NONEON
3+
; RUN: opt -mattr=+sve,-neon < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK-WITHSVE
44

55
target triple = "aarch64-unknown-linux-gnu"
66
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
77

88
define void @uitofp() {
9-
; CHECK-LABEL: 'uitofp'
10-
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %conv = uitofp <16 x i64> undef to <16 x float>
11-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
9+
; CHECK-NONEON-LABEL: 'uitofp'
10+
; CHECK-NONEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %conv = uitofp <16 x i64> undef to <16 x float>
11+
; CHECK-NONEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
12+
;
13+
; CHECK-WITHSVE-LABEL: 'uitofp'
14+
; CHECK-WITHSVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %conv = uitofp <16 x i64> undef to <16 x float>
15+
; CHECK-WITHSVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1216
;
1317
%conv = uitofp <16 x i64> undef to <16 x float>
1418
ret void

llvm/test/Analysis/CostModel/AArch64/sve-cast.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -696,8 +696,8 @@ define i32 @casts_no_users() {
696696
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float>
697697
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float>
698698
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float>
699-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
700-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
699+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
700+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
701701
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double>
702702
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double>
703703
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double>
@@ -716,8 +716,8 @@ define i32 @casts_no_users() {
716716
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float>
717717
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float>
718718
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float>
719-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float>
720-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float>
719+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float>
720+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float>
721721
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double>
722722
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double>
723723
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double>
@@ -736,8 +736,8 @@ define i32 @casts_no_users() {
736736
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float>
737737
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float>
738738
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float>
739-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float>
740-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float>
739+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float>
740+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float>
741741
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double>
742742
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double>
743743
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double>
@@ -756,8 +756,8 @@ define i32 @casts_no_users() {
756756
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float>
757757
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float>
758758
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float>
759-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float>
760-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float>
759+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float>
760+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float>
761761
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double>
762762
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double>
763763
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double>
@@ -1122,8 +1122,8 @@ define i32 @casts_no_users() {
11221122
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float>
11231123
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float>
11241124
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float>
1125-
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
1126-
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
1125+
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
1126+
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
11271127
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double>
11281128
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double>
11291129
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double>
@@ -1335,8 +1335,8 @@ define i32 @casts_no_users() {
13351335
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float>
13361336
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float>
13371337
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float>
1338-
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
1339-
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
1338+
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
1339+
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
13401340
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double>
13411341
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double>
13421342
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double>

0 commit comments

Comments
 (0)