Skip to content

Commit 8a0073a

Browse files
authored
[CostModel][X86] Treat lrint/llrint as fptosi calls (#90883)
X86 can use the CVTP2SI instructions to lower lrint/llrint calls, which have the same costs as the CVTTP2SI (fptosi) instructions Followup to #90065
1 parent 3563af6 commit 8a0073a

File tree

6 files changed

+508
-484
lines changed

6 files changed

+508
-484
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4153,6 +4153,16 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
41534153
}
41544154
}
41554155
break;
4156+
case Intrinsic::lrint:
4157+
case Intrinsic::llrint:
4158+
// X86 can use the CVTP2SI instructions to lower lrint/llrint calls, which
4159+
// have the same costs as the CVTTP2SI (fptosi) instructions
4160+
if (!ICA.isTypeBasedOnly()) {
4161+
const SmallVectorImpl<Type *> &ArgTys = ICA.getArgTypes();
4162+
return getCastInstrCost(Instruction::FPToSI, RetTy, ArgTys[0],
4163+
TTI::CastContextHint::None, CostKind);
4164+
}
4165+
break;
41564166
case Intrinsic::maxnum:
41574167
case Intrinsic::minnum:
41584168
// FMINNUM has same costs so don't duplicate.

llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll

Lines changed: 39 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -666,35 +666,46 @@ define i32 @rint(i32 %arg) {
666666
define i32 @lrint(i32 %arg) {
667667
; SSE1-LABEL: 'lrint'
668668
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
669-
; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
670-
; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
671-
; SSE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
669+
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
670+
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
671+
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
672672
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
673-
; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
674-
; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
675-
; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
673+
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
674+
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
675+
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
676676
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
677677
;
678+
; SSE2-LABEL: 'lrint'
679+
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
680+
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
681+
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
682+
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
683+
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
684+
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
685+
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
686+
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
687+
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
688+
;
678689
; AVX-LABEL: 'lrint'
679690
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
680-
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
681-
; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
682-
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
691+
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
692+
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
693+
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
683694
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
684-
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
685-
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
686-
; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
695+
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
696+
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
697+
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
687698
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
688699
;
689700
; AVX512-LABEL: 'lrint'
690701
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
691-
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
692-
; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
693-
; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
702+
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
703+
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
704+
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
694705
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
695-
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
696-
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
697-
; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
706+
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
707+
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
708+
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
698709
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
699710
;
700711
%F32 = call i32 @llvm.lrint.i32.f32(float undef)
@@ -711,38 +722,16 @@ define i32 @lrint(i32 %arg) {
711722
}
712723

713724
define i32 @llrint(i32 %arg) {
714-
; SSE1-LABEL: 'llrint'
715-
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
716-
; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
717-
; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
718-
; SSE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
719-
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
720-
; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
721-
; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
722-
; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
723-
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
724-
;
725-
; AVX-LABEL: 'llrint'
726-
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
727-
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
728-
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
729-
; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
730-
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
731-
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
732-
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
733-
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
734-
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
735-
;
736-
; AVX512-LABEL: 'llrint'
737-
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
738-
; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
739-
; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
740-
; AVX512-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
741-
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
742-
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
743-
; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
744-
; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
745-
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
725+
; CHECK-LABEL: 'llrint'
726+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
727+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
728+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
729+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
730+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
731+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
732+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
733+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
734+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
746735
;
747736
%F32 = call i64 @llvm.llrint.i64.f32(float undef)
748737
%V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)

0 commit comments

Comments
 (0)