Skip to content

Commit cceb630

Browse files
committed
[x86] use vector instructions to lower more FP->int->FP casts
This is an enhancement to D77895 to avoid another round-trip from XMM->GPR->XMM. This time we handle the case of starting/ending with an f64 and casting to signed i32 as the intermediate value. It's a bit more involved than I initially assumed because we need to use target-specific opcodes to represent the non-standard cast ops. Differential Revision: https://reviews.llvm.org/D78362
1 parent 8c68de2 commit cceb630

File tree

4 files changed

+35
-29
lines changed

4 files changed

+35
-29
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19178,17 +19178,25 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
1917819178
MVT IntVT = CastToInt.getSimpleValueType();
1917919179
SDValue X = CastToInt.getOperand(0);
1918019180
// TODO: Allow size-changing from source to dest (double -> i32 -> float)
19181-
if (X.getSimpleValueType() != VT ||
19182-
VT.getSizeInBits() != IntVT.getSizeInBits())
19181+
if (X.getSimpleValueType() != VT)
1918319182
return SDValue();
1918419183

19185-
// See if we have a 128-bit vector cast op for this type of cast.
19186-
unsigned NumEltsInXMM = 128 / VT.getScalarSizeInBits();
19187-
MVT VecFPVT = MVT::getVectorVT(VT, NumEltsInXMM);
19188-
MVT VecIntVT = MVT::getVectorVT(IntVT, NumEltsInXMM);
19189-
if (!useVectorCast(CastToFP.getOpcode(), VecIntVT, VecFPVT, Subtarget))
19184+
// See if we have 128-bit vector cast instructions for this type of cast.
19185+
// We need cvttps2dq + cvtdq2ps or cvttpd2dq + cvtdq2pd.
19186+
if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
19187+
IntVT != MVT::i32)
1919019188
return SDValue();
1919119189

19190+
unsigned NumFPEltsInXMM = 128 / VT.getScalarSizeInBits();
19191+
unsigned NumIntEltsInXMM = 128 / IntVT.getScalarSizeInBits();
19192+
MVT VecFPVT = MVT::getVectorVT(VT, NumFPEltsInXMM);
19193+
MVT VecIntVT = MVT::getVectorVT(IntVT, NumIntEltsInXMM);
19194+
19195+
// We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
19196+
bool NeedX86Opcodes = VT.getSizeInBits() != IntVT.getSizeInBits();
19197+
unsigned ToIntOpcode = NeedX86Opcodes ? X86ISD::CVTTP2SI : ISD::FP_TO_SINT;
19198+
unsigned ToFPOpcode = NeedX86Opcodes ? X86ISD::CVTSI2P : ISD::SINT_TO_FP;
19199+
1919219200
// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
1919319201
//
1919419202
// We are not defining the high elements (for example, zero them) because
@@ -19198,8 +19206,8 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
1919819206
SDLoc DL(CastToFP);
1919919207
SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL);
1920019208
SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecFPVT, X);
19201-
SDValue VCastToInt = DAG.getNode(ISD::FP_TO_SINT, DL, VecIntVT, VecX);
19202-
SDValue VCastToFP = DAG.getNode(ISD::SINT_TO_FP, DL, VecFPVT, VCastToInt);
19209+
SDValue VCastToInt = DAG.getNode(ToIntOpcode, DL, VecIntVT, VecX);
19210+
SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecFPVT, VCastToInt);
1920319211
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx);
1920419212
}
1920519213

llvm/test/CodeGen/X86/ftrunc.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -263,15 +263,14 @@ define float @trunc_signed_f32_nsz(float %x) #0 {
263263
define double @trunc_signed32_f64_no_fast_math(double %x) {
264264
; SSE-LABEL: trunc_signed32_f64_no_fast_math:
265265
; SSE: # %bb.0:
266-
; SSE-NEXT: cvttsd2si %xmm0, %eax
267-
; SSE-NEXT: xorps %xmm0, %xmm0
268-
; SSE-NEXT: cvtsi2sd %eax, %xmm0
266+
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
267+
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
269268
; SSE-NEXT: retq
270269
;
271270
; AVX1-LABEL: trunc_signed32_f64_no_fast_math:
272271
; AVX1: # %bb.0:
273-
; AVX1-NEXT: vcvttsd2si %xmm0, %eax
274-
; AVX1-NEXT: vcvtsi2sd %eax, %xmm1, %xmm0
272+
; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0
273+
; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
275274
; AVX1-NEXT: retq
276275
%i = fptosi double %x to i32
277276
%r = sitofp i32 %i to double
@@ -281,9 +280,8 @@ define double @trunc_signed32_f64_no_fast_math(double %x) {
281280
define double @trunc_signed32_f64_nsz(double %x) #0 {
282281
; SSE2-LABEL: trunc_signed32_f64_nsz:
283282
; SSE2: # %bb.0:
284-
; SSE2-NEXT: cvttsd2si %xmm0, %eax
285-
; SSE2-NEXT: xorps %xmm0, %xmm0
286-
; SSE2-NEXT: cvtsi2sd %eax, %xmm0
283+
; SSE2-NEXT: cvttpd2dq %xmm0, %xmm0
284+
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
287285
; SSE2-NEXT: retq
288286
;
289287
; SSE41-LABEL: trunc_signed32_f64_nsz:

llvm/test/CodeGen/X86/isint.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
define i32 @isint_return(double %d) nounwind {
88
; CHECK64-LABEL: isint_return:
99
; CHECK64: # %bb.0:
10-
; CHECK64-NEXT: cvttsd2si %xmm0, %eax
11-
; CHECK64-NEXT: cvtsi2sd %eax, %xmm1
10+
; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1
11+
; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1
1212
; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1
1313
; CHECK64-NEXT: movq %xmm1, %rax
1414
; CHECK64-NEXT: andl $1, %eax
@@ -18,8 +18,8 @@ define i32 @isint_return(double %d) nounwind {
1818
; CHECK32-LABEL: isint_return:
1919
; CHECK32: # %bb.0:
2020
; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
21-
; CHECK32-NEXT: cvttsd2si %xmm0, %eax
22-
; CHECK32-NEXT: cvtsi2sd %eax, %xmm1
21+
; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1
22+
; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1
2323
; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1
2424
; CHECK32-NEXT: movd %xmm1, %eax
2525
; CHECK32-NEXT: andl $1, %eax
@@ -62,8 +62,8 @@ declare void @foo()
6262
define void @isint_branch(double %d) nounwind {
6363
; CHECK64-LABEL: isint_branch:
6464
; CHECK64: # %bb.0:
65-
; CHECK64-NEXT: cvttsd2si %xmm0, %eax
66-
; CHECK64-NEXT: cvtsi2sd %eax, %xmm1
65+
; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1
66+
; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1
6767
; CHECK64-NEXT: ucomisd %xmm1, %xmm0
6868
; CHECK64-NEXT: jne .LBB2_2
6969
; CHECK64-NEXT: jp .LBB2_2
@@ -77,8 +77,8 @@ define void @isint_branch(double %d) nounwind {
7777
; CHECK32-LABEL: isint_branch:
7878
; CHECK32: # %bb.0:
7979
; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
80-
; CHECK32-NEXT: cvttsd2si %xmm0, %eax
81-
; CHECK32-NEXT: cvtsi2sd %eax, %xmm1
80+
; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1
81+
; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1
8282
; CHECK32-NEXT: ucomisd %xmm1, %xmm0
8383
; CHECK32-NEXT: jne .LBB2_2
8484
; CHECK32-NEXT: jp .LBB2_2

llvm/test/CodeGen/X86/setoeq.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ define zeroext i8 @t(double %x) nounwind readnone {
55
; CHECK-LABEL: t:
66
; CHECK: # %bb.0: # %entry
77
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
8-
; CHECK-NEXT: cvttsd2si %xmm0, %eax
9-
; CHECK-NEXT: cvtsi2sd %eax, %xmm1
8+
; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1
9+
; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
1010
; CHECK-NEXT: cmpeqsd %xmm0, %xmm1
1111
; CHECK-NEXT: movd %xmm1, %eax
1212
; CHECK-NEXT: andl $1, %eax
@@ -24,8 +24,8 @@ define zeroext i8 @u(double %x) nounwind readnone {
2424
; CHECK-LABEL: u:
2525
; CHECK: # %bb.0: # %entry
2626
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
27-
; CHECK-NEXT: cvttsd2si %xmm0, %eax
28-
; CHECK-NEXT: cvtsi2sd %eax, %xmm1
27+
; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1
28+
; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
2929
; CHECK-NEXT: cmpneqsd %xmm0, %xmm1
3030
; CHECK-NEXT: movd %xmm1, %eax
3131
; CHECK-NEXT: andl $1, %eax

0 commit comments

Comments
 (0)