Skip to content

Commit 3302bef

Browse files
authored
[X86] Combine FRINT + FP_TO_SINT to LRINT (llvm#126477)
Based on Craig's suggestion on llvm#126217 Alive2: https://alive2.llvm.org/ce/z/9XNpWt
1 parent 43d71ba commit 3302bef

File tree

2 files changed

+127
-0
lines changed

2 files changed

+127
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2682,6 +2682,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
26822682
ISD::ZERO_EXTEND_VECTOR_INREG,
26832683
ISD::SINT_TO_FP,
26842684
ISD::UINT_TO_FP,
2685+
ISD::FP_TO_SINT,
26852686
ISD::STRICT_SINT_TO_FP,
26862687
ISD::STRICT_UINT_TO_FP,
26872688
ISD::FP_TO_SINT_SAT,
@@ -56494,6 +56495,17 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
5649456495
return SDValue();
5649556496
}
5649656497

56498+
static SDValue combineFPToSInt(SDNode *N, SelectionDAG &DAG,
56499+
const X86Subtarget &Subtarget) {
56500+
EVT VT = N->getValueType(0);
56501+
SDValue Src = N->getOperand(0);
56502+
if (Subtarget.hasSSE2() && Src.getOpcode() == ISD::FRINT &&
56503+
VT.getScalarType() == MVT::i32 && Src.hasOneUse())
56504+
return DAG.getNode(ISD::LRINT, SDLoc(N), VT, Src.getOperand(0));
56505+
56506+
return SDValue();
56507+
}
56508+
5649756509
// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
5649856510
static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
5649956511
const X86Subtarget &Subtarget) {
@@ -59572,6 +59584,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
5957259584
case ISD::UINT_TO_FP:
5957359585
case ISD::STRICT_UINT_TO_FP:
5957459586
return combineUIntToFP(N, DAG, Subtarget);
59587+
case ISD::FP_TO_SINT: return combineFPToSInt(N, DAG, Subtarget);
5957559588
case ISD::LRINT:
5957659589
case ISD::LLRINT: return combineLRINT_LLRINT(N, DAG, Subtarget);
5957759590
case ISD::FADD:

llvm/test/CodeGen/X86/rint-conv.ll

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
3+
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
4+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
5+
6+
define i32 @combine_f32(float %x) nounwind {
7+
; X86-LABEL: combine_f32:
8+
; X86: # %bb.0: # %entry
9+
; X86-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax
10+
; X86-NEXT: retl
11+
;
12+
; X64-LABEL: combine_f32:
13+
; X64: # %bb.0: # %entry
14+
; X64-NEXT: cvtss2si %xmm0, %eax
15+
; X64-NEXT: retq
16+
;
17+
; AVX-LABEL: combine_f32:
18+
; AVX: # %bb.0: # %entry
19+
; AVX-NEXT: vcvtss2si %xmm0, %eax
20+
; AVX-NEXT: retq
21+
entry:
22+
%0 = tail call float @llvm.rint.f32(float %x)
23+
%1 = fptosi float %0 to i32
24+
ret i32 %1
25+
}
26+
27+
define i32 @combine_f64(double %x) nounwind {
28+
; X86-LABEL: combine_f64:
29+
; X86: # %bb.0: # %entry
30+
; X86-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
31+
; X86-NEXT: retl
32+
;
33+
; X64-LABEL: combine_f64:
34+
; X64: # %bb.0: # %entry
35+
; X64-NEXT: cvtsd2si %xmm0, %eax
36+
; X64-NEXT: retq
37+
;
38+
; AVX-LABEL: combine_f64:
39+
; AVX: # %bb.0: # %entry
40+
; AVX-NEXT: vcvtsd2si %xmm0, %eax
41+
; AVX-NEXT: retq
42+
entry:
43+
%0 = tail call double @llvm.rint.f32(double %x)
44+
%1 = fptosi double %0 to i32
45+
ret i32 %1
46+
}
47+
48+
define <4 x i32> @combine_v4f32(<4 x float> %x) nounwind {
49+
; X86-LABEL: combine_v4f32:
50+
; X86: # %bb.0: # %entry
51+
; X86-NEXT: cvtps2dq %xmm0, %xmm0
52+
; X86-NEXT: retl
53+
;
54+
; X64-LABEL: combine_v4f32:
55+
; X64: # %bb.0: # %entry
56+
; X64-NEXT: cvtps2dq %xmm0, %xmm0
57+
; X64-NEXT: retq
58+
;
59+
; AVX-LABEL: combine_v4f32:
60+
; AVX: # %bb.0: # %entry
61+
; AVX-NEXT: vcvtps2dq %xmm0, %xmm0
62+
; AVX-NEXT: retq
63+
entry:
64+
%0 = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %x)
65+
%1 = fptosi <4 x float> %0 to <4 x i32>
66+
ret <4 x i32> %1
67+
}
68+
69+
define <2 x i32> @combine_v2f64(<2 x double> %x) nounwind {
70+
; X86-LABEL: combine_v2f64:
71+
; X86: # %bb.0: # %entry
72+
; X86-NEXT: cvtpd2dq %xmm0, %xmm0
73+
; X86-NEXT: retl
74+
;
75+
; X64-LABEL: combine_v2f64:
76+
; X64: # %bb.0: # %entry
77+
; X64-NEXT: cvtpd2dq %xmm0, %xmm0
78+
; X64-NEXT: retq
79+
;
80+
; AVX-LABEL: combine_v2f64:
81+
; AVX: # %bb.0: # %entry
82+
; AVX-NEXT: vcvtpd2dq %xmm0, %xmm0
83+
; AVX-NEXT: retq
84+
entry:
85+
%0 = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %x)
86+
%1 = fptosi <2 x double> %0 to <2 x i32>
87+
ret <2 x i32> %1
88+
}
89+
90+
define <4 x i32> @combine_v4f64(<4 x double> %x) nounwind {
91+
; X86-LABEL: combine_v4f64:
92+
; X86: # %bb.0: # %entry
93+
; X86-NEXT: cvtpd2dq %xmm1, %xmm1
94+
; X86-NEXT: cvtpd2dq %xmm0, %xmm0
95+
; X86-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
96+
; X86-NEXT: retl
97+
;
98+
; X64-LABEL: combine_v4f64:
99+
; X64: # %bb.0: # %entry
100+
; X64-NEXT: cvtpd2dq %xmm1, %xmm1
101+
; X64-NEXT: cvtpd2dq %xmm0, %xmm0
102+
; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
103+
; X64-NEXT: retq
104+
;
105+
; AVX-LABEL: combine_v4f64:
106+
; AVX: # %bb.0: # %entry
107+
; AVX-NEXT: vcvtpd2dq %ymm0, %xmm0
108+
; AVX-NEXT: vzeroupper
109+
; AVX-NEXT: retq
110+
entry:
111+
%0 = tail call <4 x double> @llvm.rint.v4f64(<4 x double> %x)
112+
%1 = fptosi <4 x double> %0 to <4 x i32>
113+
ret <4 x i32> %1
114+
}

0 commit comments

Comments
 (0)