Skip to content

Commit 3f6dd2c

Browse files
topperccompnerd
authored andcommitted
[LegalizeTypes][X86] Improve ExpandIntRes_FP_TO_SINT/ExpandIntRes_FP_TO_UINT when input is SoftPromoteHalf.
Instead of splitting off the fp16 to float conversion and generating a libcall, we should split the operation into fp16 to float and float to integer operations. This will allow the float to integer conversion to go through any custom handling the target has. If the target doesn't have custom handling then we should come back to ExpandIntRes_FP_TO_SINT/ ExpandIntRes_FP_TO_UINT automatically to create the libcall. This avoids generating libcalls on 32-bit X86. These library functions may not exist in 32-bit libgcc. At least for LLVM, we never generate them when hardware floating point instructions are available. Differential Revision: https://reviews.llvm.org/D108933 (cherry picked from commit 201f644)
1 parent 1fccc83 commit 3f6dd2c

File tree

2 files changed

+44
-8
lines changed

2 files changed

+44
-8
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3022,6 +3022,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
30223022
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
30233023
Op = GetSoftPromotedHalf(Op);
30243024
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
3025+
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
3026+
SplitInteger(Op, Lo, Hi);
3027+
return;
30253028
}
30263029

30273030
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
@@ -3051,6 +3054,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
30513054
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
30523055
Op = GetSoftPromotedHalf(Op);
30533056
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
3057+
Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op);
3058+
SplitInteger(Op, Lo, Hi);
3059+
return;
30543060
}
30553061

30563062
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);

llvm/test/CodeGen/X86/half.ll

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -210,14 +210,23 @@ define i64 @test_fptosi_i64(half* %p) #0 {
210210
;
211211
; CHECK-I686-LABEL: test_fptosi_i64:
212212
; CHECK-I686: # %bb.0:
213-
; CHECK-I686-NEXT: subl $12, %esp
213+
; CHECK-I686-NEXT: subl $28, %esp
214214
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
215215
; CHECK-I686-NEXT: movzwl (%eax), %eax
216216
; CHECK-I686-NEXT: movl %eax, (%esp)
217217
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
218-
; CHECK-I686-NEXT: fstps (%esp)
219-
; CHECK-I686-NEXT: calll __fixsfdi
220-
; CHECK-I686-NEXT: addl $12, %esp
218+
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
219+
; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp)
220+
; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp)
221+
; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %eax
222+
; CHECK-I686-NEXT: orl $3072, %eax # imm = 0xC00
223+
; CHECK-I686-NEXT: movw %ax, {{[0-9]+}}(%esp)
224+
; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
225+
; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp)
226+
; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
227+
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
228+
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %edx
229+
; CHECK-I686-NEXT: addl $28, %esp
221230
; CHECK-I686-NEXT: retl
222231
%a = load half, half* %p, align 2
223232
%r = fptosi half %a to i64
@@ -298,14 +307,35 @@ define i64 @test_fptoui_i64(half* %p) #0 {
298307
;
299308
; CHECK-I686-LABEL: test_fptoui_i64:
300309
; CHECK-I686: # %bb.0:
301-
; CHECK-I686-NEXT: subl $12, %esp
310+
; CHECK-I686-NEXT: subl $28, %esp
302311
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
303312
; CHECK-I686-NEXT: movzwl (%eax), %eax
304313
; CHECK-I686-NEXT: movl %eax, (%esp)
305314
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
306-
; CHECK-I686-NEXT: fstps (%esp)
307-
; CHECK-I686-NEXT: calll __fixunssfdi
308-
; CHECK-I686-NEXT: addl $12, %esp
315+
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
316+
; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
317+
; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
318+
; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0
319+
; CHECK-I686-NEXT: jae .LBB9_2
320+
; CHECK-I686-NEXT: # %bb.1:
321+
; CHECK-I686-NEXT: xorps %xmm1, %xmm1
322+
; CHECK-I686-NEXT: .LBB9_2:
323+
; CHECK-I686-NEXT: subss %xmm1, %xmm0
324+
; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
325+
; CHECK-I686-NEXT: setae %al
326+
; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp)
327+
; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp)
328+
; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
329+
; CHECK-I686-NEXT: orl $3072, %ecx # imm = 0xC00
330+
; CHECK-I686-NEXT: movw %cx, {{[0-9]+}}(%esp)
331+
; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
332+
; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp)
333+
; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
334+
; CHECK-I686-NEXT: movzbl %al, %edx
335+
; CHECK-I686-NEXT: shll $31, %edx
336+
; CHECK-I686-NEXT: xorl {{[0-9]+}}(%esp), %edx
337+
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
338+
; CHECK-I686-NEXT: addl $28, %esp
309339
; CHECK-I686-NEXT: retl
310340
%a = load half, half* %p, align 2
311341
%r = fptoui half %a to i64

0 commit comments

Comments
 (0)