Skip to content

Commit e2202b9

Browse files
authored
[AArch64] Update costs for scalarizing i64->f32 int_to_fp. (#132366)
After #130665 these operations are scalarized to avoid double-rounding. This updates the cost model to match. In the future we might be able to use SVE instructions to help, but for the moment the costs should be higher. Costsize and Latency costs are not yet expected to be accurate. The vector insert/extract will use the cost of VectorInsertExtractBaseCost (2 by default).
1 parent 0bc2c5b commit e2202b9

File tree

4 files changed

+43
-27
lines changed

4 files changed

+43
-27
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2986,10 +2986,8 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
29862986
// Complex: to v2f32
29872987
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
29882988
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3},
2989-
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2},
29902989
{ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
29912990
{ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3},
2992-
{ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2},
29932991

29942992
// SVE: to nxv2f32
29952993
{ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i8,
@@ -3320,6 +3318,20 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
33203318
FP16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
33213319
return AdjustCost(Entry->Cost);
33223320

3321+
// INT_TO_FP of i64->f32 will scalarize, which is required to avoid
3322+
// double-rounding issues.
3323+
if ((ISD == ISD::SINT_TO_FP || ISD == ISD::UINT_TO_FP) &&
3324+
DstTy.getScalarType() == MVT::f32 && SrcTy.getScalarSizeInBits() > 32 &&
3325+
isa<FixedVectorType>(Dst) && isa<FixedVectorType>(Src))
3326+
return AdjustCost(
3327+
cast<FixedVectorType>(Dst)->getNumElements() *
3328+
getCastInstrCost(Opcode, Dst->getScalarType(), Src->getScalarType(),
3329+
CCH, CostKind) +
3330+
BaseT::getScalarizationOverhead(cast<FixedVectorType>(Src), false, true,
3331+
CostKind) +
3332+
BaseT::getScalarizationOverhead(cast<FixedVectorType>(Dst), true, false,
3333+
CostKind));
3334+
33233335
if ((ISD == ISD::ZERO_EXTEND || ISD == ISD::SIGN_EXTEND) &&
33243336
CCH == TTI::CastContextHint::Masked &&
33253337
ST->isSVEorStreamingSVEAvailable() &&

llvm/test/Analysis/CostModel/AArch64/cast.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -391,8 +391,8 @@ define i32 @casts_no_users() {
391391
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> undef to <2 x float>
392392
; CHECK-NEXT: Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> undef to <2 x float>
393393
; CHECK-NEXT: Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> undef to <2 x float>
394-
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> undef to <2 x float>
395-
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> undef to <2 x float>
394+
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> undef to <2 x float>
395+
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> undef to <2 x float>
396396
; CHECK-NEXT: Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> undef to <2 x double>
397397
; CHECK-NEXT: Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> undef to <2 x double>
398398
; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> undef to <2 x double>
@@ -411,8 +411,8 @@ define i32 @casts_no_users() {
411411
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> undef to <4 x float>
412412
; CHECK-NEXT: Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> undef to <4 x float>
413413
; CHECK-NEXT: Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> undef to <4 x float>
414-
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> undef to <4 x float>
415-
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> undef to <4 x float>
414+
; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> undef to <4 x float>
415+
; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> undef to <4 x float>
416416
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> undef to <4 x double>
417417
; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> undef to <4 x double>
418418
; CHECK-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> undef to <4 x double>
@@ -431,8 +431,8 @@ define i32 @casts_no_users() {
431431
; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> undef to <8 x float>
432432
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> undef to <8 x float>
433433
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> undef to <8 x float>
434-
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> undef to <8 x float>
435-
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> undef to <8 x float>
434+
; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> undef to <8 x float>
435+
; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> undef to <8 x float>
436436
; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> undef to <8 x double>
437437
; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> undef to <8 x double>
438438
; CHECK-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> undef to <8 x double>
@@ -451,8 +451,8 @@ define i32 @casts_no_users() {
451451
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> undef to <16 x float>
452452
; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> undef to <16 x float>
453453
; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> undef to <16 x float>
454-
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> undef to <16 x float>
455-
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> undef to <16 x float>
454+
; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> undef to <16 x float>
455+
; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> undef to <16 x float>
456456
; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> undef to <16 x double>
457457
; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> undef to <16 x double>
458458
; CHECK-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> undef to <16 x double>

llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
2-
; RUN: opt -mattr=-neon < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
3-
; RUN: opt -mattr=+sve,-neon < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
2+
; RUN: opt -mattr=-neon < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK-NONEON
3+
; RUN: opt -mattr=+sve,-neon < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK-WITHSVE
44

55
target triple = "aarch64-unknown-linux-gnu"
66
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
77

88
define void @uitofp() {
9-
; CHECK-LABEL: 'uitofp'
10-
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %conv = uitofp <16 x i64> undef to <16 x float>
11-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
9+
; CHECK-NONEON-LABEL: 'uitofp'
10+
; CHECK-NONEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %conv = uitofp <16 x i64> undef to <16 x float>
11+
; CHECK-NONEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
12+
;
13+
; CHECK-WITHSVE-LABEL: 'uitofp'
14+
; CHECK-WITHSVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %conv = uitofp <16 x i64> undef to <16 x float>
15+
; CHECK-WITHSVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1216
;
1317
%conv = uitofp <16 x i64> undef to <16 x float>
1418
ret void

llvm/test/Analysis/CostModel/AArch64/sve-cast.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -696,8 +696,8 @@ define i32 @casts_no_users() {
696696
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float>
697697
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float>
698698
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float>
699-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
700-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
699+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
700+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
701701
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double>
702702
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double>
703703
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double>
@@ -716,8 +716,8 @@ define i32 @casts_no_users() {
716716
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float>
717717
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float>
718718
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float>
719-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float>
720-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float>
719+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float>
720+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float>
721721
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double>
722722
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double>
723723
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double>
@@ -736,8 +736,8 @@ define i32 @casts_no_users() {
736736
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float>
737737
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float>
738738
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float>
739-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float>
740-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float>
739+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float>
740+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float>
741741
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double>
742742
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double>
743743
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double>
@@ -756,8 +756,8 @@ define i32 @casts_no_users() {
756756
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float>
757757
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float>
758758
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float>
759-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float>
760-
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float>
759+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float>
760+
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float>
761761
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double>
762762
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double>
763763
; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double>
@@ -1122,8 +1122,8 @@ define i32 @casts_no_users() {
11221122
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float>
11231123
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float>
11241124
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float>
1125-
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
1126-
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
1125+
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
1126+
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
11271127
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double>
11281128
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double>
11291129
; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double>
@@ -1335,8 +1335,8 @@ define i32 @casts_no_users() {
13351335
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float>
13361336
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float>
13371337
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float>
1338-
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
1339-
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
1338+
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
1339+
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
13401340
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double>
13411341
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double>
13421342
; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double>

0 commit comments

Comments
 (0)