[LegalizeTypes][X86] Improve ExpandIntRes_FP_TO_SINT/ExpandIntRes_FP_TO_UINT when input is SoftPromoteHalf.

topperc · compnerd · commit e20ffeb85cbd · 2021-08-31T12:27:34.000-07:00
Instead of splitting off the fp16 to float conversion and generating a libcall, we should split the operation into fp16 to float and float to integer operations. This will allow the float to integer conversion to go through any custom handling the target has. If the target doesn't have custom handling then we should come back to ExpandIntRes_FP_TO_SINT/ ExpandIntRes_FP_TO_UINT automatically to create the libcall. This avoids generating libcalls on 32-bit X86. These library functions may not exist in 32-bit libgcc. At least for LLVM, we never generate them when hardware floating point instructions are available. Differential Revision: https://reviews.llvm.org/D108933 (cherry picked from commit 201f644)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -3087,6 +3087,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
     EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
     Op = GetSoftPromotedHalf(Op);
     Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+    Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+    SplitInteger(Op, Lo, Hi);
+    return;
   }
 
   RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
@@ -3116,6 +3119,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
     EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
     Op = GetSoftPromotedHalf(Op);
     Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+    Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op);
+    SplitInteger(Op, Lo, Hi);
+    return;
   }
 
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll
@@ -210,14 +210,23 @@ define i64 @test_fptosi_i64(half* %p) #0 {
 ;
 ; CHECK-I686-LABEL: test_fptosi_i64:
 ; CHECK-I686:       # %bb.0:
-; CHECK-I686-NEXT:    subl $12, %esp
+; CHECK-I686-NEXT:    subl $28, %esp
 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-I686-NEXT:    movzwl (%eax), %eax
 ; CHECK-I686-NEXT:    movl %eax, (%esp)
 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
-; CHECK-I686-NEXT:    fstps (%esp)
-; CHECK-I686-NEXT:    calll __fixsfdi
-; CHECK-I686-NEXT:    addl $12, %esp
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-I686-NEXT:    orl $3072, %eax # imm = 0xC00
+; CHECK-I686-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fistpll {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-I686-NEXT:    addl $28, %esp
 ; CHECK-I686-NEXT:    retl
   %a = load half, half* %p, align 2
   %r = fptosi half %a to i64
@@ -295,14 +304,35 @@ define i64 @test_fptoui_i64(half* %p) #0 {
 ;
 ; CHECK-I686-LABEL: test_fptoui_i64:
 ; CHECK-I686:       # %bb.0:
-; CHECK-I686-NEXT:    subl $12, %esp
+; CHECK-I686-NEXT:    subl $28, %esp
 ; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-I686-NEXT:    movzwl (%eax), %eax
 ; CHECK-I686-NEXT:    movl %eax, (%esp)
 ; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
-; CHECK-I686-NEXT:    fstps (%esp)
-; CHECK-I686-NEXT:    calll __fixunssfdi
-; CHECK-I686-NEXT:    addl $12, %esp
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    ucomiss %xmm1, %xmm0
+; CHECK-I686-NEXT:    jae .LBB9_2
+; CHECK-I686-NEXT:  # %bb.1:
+; CHECK-I686-NEXT:    xorps %xmm1, %xmm1
+; CHECK-I686-NEXT:  .LBB9_2:
+; CHECK-I686-NEXT:    subss %xmm1, %xmm0
+; CHECK-I686-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    setae %al
+; CHECK-I686-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; CHECK-I686-NEXT:    orl $3072, %ecx # imm = 0xC00
+; CHECK-I686-NEXT:    movw %cx, {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fistpll {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movzbl %al, %edx
+; CHECK-I686-NEXT:    shll $31, %edx
+; CHECK-I686-NEXT:    xorl {{[0-9]+}}(%esp), %edx
+; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-I686-NEXT:    addl $28, %esp
 ; CHECK-I686-NEXT:    retl
   %a = load half, half* %p, align 2
   %r = fptoui half %a to i64