Skip to content

Commit e20ffeb

Browse files
topperccompnerd
authored andcommitted
[LegalizeTypes][X86] Improve ExpandIntRes_FP_TO_SINT/ExpandIntRes_FP_TO_UINT when input is SoftPromoteHalf.
Instead of splitting off the fp16 to float conversion and generating a libcall, we should split the operation into fp16 to float and float to integer operations. This will allow the float to integer conversion to go through any custom handling the target has. If the target doesn't have custom handling then we should come back to ExpandIntRes_FP_TO_SINT/ ExpandIntRes_FP_TO_UINT automatically to create the libcall. This avoids generating libcalls on 32-bit X86. These library functions may not exist in 32-bit libgcc. At least for LLVM, we never generate them when hardware floating point instructions are available. Differential Revision: https://reviews.llvm.org/D108933 (cherry picked from commit 201f644)
1 parent 2b65f5e commit e20ffeb

File tree

2 files changed

+44
-8
lines changed

2 files changed

+44
-8
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3087,6 +3087,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
30873087
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
30883088
Op = GetSoftPromotedHalf(Op);
30893089
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
3090+
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
3091+
SplitInteger(Op, Lo, Hi);
3092+
return;
30903093
}
30913094

30923095
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
@@ -3116,6 +3119,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
31163119
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
31173120
Op = GetSoftPromotedHalf(Op);
31183121
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
3122+
Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op);
3123+
SplitInteger(Op, Lo, Hi);
3124+
return;
31193125
}
31203126

31213127
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);

llvm/test/CodeGen/X86/half.ll

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -210,14 +210,23 @@ define i64 @test_fptosi_i64(half* %p) #0 {
210210
;
211211
; CHECK-I686-LABEL: test_fptosi_i64:
212212
; CHECK-I686: # %bb.0:
213-
; CHECK-I686-NEXT: subl $12, %esp
213+
; CHECK-I686-NEXT: subl $28, %esp
214214
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
215215
; CHECK-I686-NEXT: movzwl (%eax), %eax
216216
; CHECK-I686-NEXT: movl %eax, (%esp)
217217
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
218-
; CHECK-I686-NEXT: fstps (%esp)
219-
; CHECK-I686-NEXT: calll __fixsfdi
220-
; CHECK-I686-NEXT: addl $12, %esp
218+
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
219+
; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp)
220+
; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp)
221+
; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %eax
222+
; CHECK-I686-NEXT: orl $3072, %eax # imm = 0xC00
223+
; CHECK-I686-NEXT: movw %ax, {{[0-9]+}}(%esp)
224+
; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
225+
; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp)
226+
; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
227+
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
228+
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %edx
229+
; CHECK-I686-NEXT: addl $28, %esp
221230
; CHECK-I686-NEXT: retl
222231
%a = load half, half* %p, align 2
223232
%r = fptosi half %a to i64
@@ -295,14 +304,35 @@ define i64 @test_fptoui_i64(half* %p) #0 {
295304
;
296305
; CHECK-I686-LABEL: test_fptoui_i64:
297306
; CHECK-I686: # %bb.0:
298-
; CHECK-I686-NEXT: subl $12, %esp
307+
; CHECK-I686-NEXT: subl $28, %esp
299308
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
300309
; CHECK-I686-NEXT: movzwl (%eax), %eax
301310
; CHECK-I686-NEXT: movl %eax, (%esp)
302311
; CHECK-I686-NEXT: calll __gnu_h2f_ieee
303-
; CHECK-I686-NEXT: fstps (%esp)
304-
; CHECK-I686-NEXT: calll __fixunssfdi
305-
; CHECK-I686-NEXT: addl $12, %esp
312+
; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp)
313+
; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
314+
; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
315+
; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0
316+
; CHECK-I686-NEXT: jae .LBB9_2
317+
; CHECK-I686-NEXT: # %bb.1:
318+
; CHECK-I686-NEXT: xorps %xmm1, %xmm1
319+
; CHECK-I686-NEXT: .LBB9_2:
320+
; CHECK-I686-NEXT: subss %xmm1, %xmm0
321+
; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
322+
; CHECK-I686-NEXT: setae %al
323+
; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp)
324+
; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp)
325+
; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
326+
; CHECK-I686-NEXT: orl $3072, %ecx # imm = 0xC00
327+
; CHECK-I686-NEXT: movw %cx, {{[0-9]+}}(%esp)
328+
; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
329+
; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp)
330+
; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp)
331+
; CHECK-I686-NEXT: movzbl %al, %edx
332+
; CHECK-I686-NEXT: shll $31, %edx
333+
; CHECK-I686-NEXT: xorl {{[0-9]+}}(%esp), %edx
334+
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
335+
; CHECK-I686-NEXT: addl $28, %esp
306336
; CHECK-I686-NEXT: retl
307337
%a = load half, half* %p, align 2
308338
%r = fptoui half %a to i64

0 commit comments

Comments
 (0)