-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAGCombiner] Simplifying {si|ui}tofp
when only signbit is needed
#85445
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: None (goldsteinn) Changes
Full diff: https://github.com/llvm/llvm-project/pull/85445.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 57f8fc409de453..88a4c8124981ee 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -816,6 +816,18 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
}
break;
}
+ case ISD::SINT_TO_FP: {
+ EVT InnerVT = Op.getOperand(0).getValueType();
+ if (DemandedBits.isSignMask() &&
+ VT.getScalarSizeInBits() == InnerVT.getScalarSizeInBits())
+ return DAG.getBitcast(VT, Op.getOperand(0));
+ break;
+ }
+ case ISD::UINT_TO_FP: {
+ if (DemandedBits.isSignMask())
+ return DAG.getConstant(0, SDLoc(Op), VT);
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
// If none of the extended bits are demanded, eliminate the sextinreg.
SDValue Op0 = Op.getOperand(0);
@@ -2313,6 +2325,22 @@ bool TargetLowering::SimplifyDemandedBits(
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
+ case ISD::SINT_TO_FP: {
+ EVT InnerVT = Op.getOperand(0).getValueType();
+ if (DemandedBits.isSignMask() &&
+ VT.getScalarSizeInBits() == InnerVT.getScalarSizeInBits())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Op.getOperand(0)));
+
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ break;
+ }
+ case ISD::UINT_TO_FP: {
+ if (DemandedBits.isSignMask())
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
+
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
diff --git a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
index cbb5bd09c2399a..a332b3e8908003 100644
--- a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
@@ -164,14 +164,13 @@ define <4 x float> @demandedbits_sitofp_blendvps(<4 x float> %a0, <4 x float> %a
; SSE-LABEL: demandedbits_sitofp_blendvps:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm3
-; SSE-NEXT: cvtdq2ps %xmm2, %xmm0
+; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3
; SSE-NEXT: movaps %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: demandedbits_sitofp_blendvps:
; AVX: # %bb.0:
-; AVX-NEXT: vcvtdq2ps %xmm2, %xmm2
; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = sitofp <4 x i32> %a2 to <4 x float>
diff --git a/llvm/test/CodeGen/X86/int-to-fp-demanded.ll b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
new file mode 100644
index 00000000000000..0735d48027196b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
@@ -0,0 +1,413 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+declare void @use.i1(i1)
+declare void @use.i32(i32)
+define i32 @sitofp_signbit_only(i32 %i_in) {
+; X86-LABEL: sitofp_signbit_only:
+; X86: # %bb.0:
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_signbit_only:
+; X64: # %bb.0:
+; X64-NEXT: movd %edi, %xmm0
+; X64-NEXT: movmskps %xmm0, %eax
+; X64-NEXT: shll $31, %eax
+; X64-NEXT: retq
+ %f = sitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @sitofp_signbit_only_okay_width(i16 %i_in) {
+; X86-LABEL: sitofp_signbit_only_okay_width:
+; X86: # %bb.0:
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X86-NEXT: filds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_signbit_only_okay_width:
+; X64: # %bb.0:
+; X64-NEXT: shll $16, %edi
+; X64-NEXT: movd %edi, %xmm0
+; X64-NEXT: movmskps %xmm0, %eax
+; X64-NEXT: shll $31, %eax
+; X64-NEXT: retq
+ %f = sitofp i16 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @sitofp_signbit_only_fail_bad_width1(i64 %i_in) {
+; X86-LABEL: sitofp_signbit_only_fail_bad_width1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: andl (%esp), %eax
+; X86-NEXT: popl %ecx
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_signbit_only_fail_bad_width1:
+; X64: # %bb.0:
+; X64-NEXT: cvtsi2ss %rdi, %xmm0
+; X64-NEXT: movmskps %xmm0, %eax
+; X64-NEXT: shll $31, %eax
+; X64-NEXT: retq
+ %f = sitofp i64 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define <2 x i16> @sitofp_signbit_only_fail_bad_width2(i32 %i_in) {
+; X86-LABEL: sitofp_signbit_only_fail_bad_width2:
+; X86: # %bb.0:
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, (%esp)
+; X86-NEXT: fildl (%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: shrl $16, %edx
+; X86-NEXT: andl $32768, %edx # imm = 0x8000
+; X86-NEXT: movl $32768, %eax # imm = 0x8000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: # kill: def $dx killed $dx killed $edx
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_signbit_only_fail_bad_width2:
+; X64: # %bb.0:
+; X64-NEXT: cvtsi2ss %edi, %xmm0
+; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: retq
+ %f = sitofp i32 %i_in to float
+ %i2xi16 = bitcast float %f to <2 x i16>
+ %r = and <2 x i16> %i2xi16, <i16 32768, i16 32768>
+ ret <2 x i16> %r
+}
+
+define i32 @sitofp_many_bits_fail(i32 %i_in) {
+; X86-LABEL: sitofp_many_bits_fail:
+; X86: # %bb.0:
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: fildl (%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-2147483647, %eax # imm = 0x80000001
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_many_bits_fail:
+; X64: # %bb.0:
+; X64-NEXT: cvtsi2ss %edi, %xmm0
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $-2147483647, %eax # imm = 0x80000001
+; X64-NEXT: retq
+ %f = sitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483649
+ ret i32 %r
+}
+
+define i32 @sitofp_multiuse_fail(i32 %i_in) {
+; X86-LABEL: sitofp_multiuse_fail:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: fildl {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll use.i32@PLT
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_multiuse_fail:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: .cfi_offset %rbx, -16
+; X64-NEXT: cvtsi2ss %edi, %xmm0
+; X64-NEXT: movd %xmm0, %ebx
+; X64-NEXT: movl %ebx, %edi
+; X64-NEXT: callq use.i32@PLT
+; X64-NEXT: andl $-2147483648, %ebx # imm = 0x80000000
+; X64-NEXT: movl %ebx, %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %f = sitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ call void @use.i32(i32 %i)
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @sitofp_multiuse_okay(i32 %i_in) {
+; X86-LABEL: sitofp_multiuse_okay:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: fildl {{[0-9]+}}(%esp)
+; X86-NEXT: fsts {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll use.i1@PLT
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_multiuse_okay:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: .cfi_offset %rbx, -16
+; X64-NEXT: cvtsi2ss %edi, %xmm0
+; X64-NEXT: movd %xmm0, %ebx
+; X64-NEXT: movl %ebx, %edi
+; X64-NEXT: callq use.i1@PLT
+; X64-NEXT: andl $-2147483648, %ebx # imm = 0x80000000
+; X64-NEXT: movl %ebx, %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %f = sitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ %cmp = icmp slt i32 %i, 0
+ call void @use.i1(i32 %i)
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @uitofp_signbit_only(i32 %i_in) {
+; X86-LABEL: uitofp_signbit_only:
+; X86: # %bb.0:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_signbit_only:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+ %f = uitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @uitofp_signbit_only_okay_width(i16 %i_in) {
+; X86-LABEL: uitofp_signbit_only_okay_width:
+; X86: # %bb.0:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_signbit_only_okay_width:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+ %f = uitofp i16 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @uitofp_signbit_only_okay_width1(i64 %i_in) {
+; X86-LABEL: uitofp_signbit_only_okay_width1:
+; X86: # %bb.0:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_signbit_only_okay_width1:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+ %f = uitofp i64 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define <2 x i16> @uitofp_signbit_only_fail_bad_width2(i32 %i_in) {
+; X86-LABEL: uitofp_signbit_only_fail_bad_width2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: movl $32768, %eax # imm = 0x8000
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_signbit_only_fail_bad_width2:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cvtsi2ss %rax, %xmm0
+; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: retq
+ %f = uitofp i32 %i_in to float
+ %i2xi16 = bitcast float %f to <2 x i16>
+ %r = and <2 x i16> %i2xi16, <i16 32768, i16 32768>
+ ret <2 x i16> %r
+}
+
+define i32 @uitofp_many_bits_fail(i32 %i_in) {
+; X86-LABEL: uitofp_many_bits_fail:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_many_bits_fail:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cvtsi2ss %rax, %xmm0
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $1, %eax
+; X64-NEXT: retq
+ %f = uitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ %r = and i32 %i, 2147483649
+ ret i32 %r
+}
+
+define i32 @uitofp_multiuse_fail(i32 %i_in) {
+; X86-LABEL: uitofp_multiuse_fail:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll use.i32@PLT
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_multiuse_fail:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cvtsi2ss %rax, %xmm0
+; X64-NEXT: movd %xmm0, %edi
+; X64-NEXT: callq use.i32@PLT
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %f = uitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ call void @use.i32(i32 %i)
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
+
+define i32 @uitofp_multiuse_okay(i32 %i_in) {
+; X86-LABEL: uitofp_multiuse_okay:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll use.i1@PLT
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_multiuse_okay:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cvtsi2ss %rax, %xmm0
+; X64-NEXT: movd %xmm0, %edi
+; X64-NEXT: callq use.i1@PLT
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %f = uitofp i32 %i_in to float
+ %i = bitcast float %f to i32
+ %cmp = icmp slt i32 %i, 0
+ call void @use.i1(i32 %i)
+ %r = and i32 %i, 2147483648
+ ret i32 %r
+}
|
{si|ui}tofp
when only signbit is needed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
; X86-LABEL: sitofp_signbit_only_okay_width: | ||
; X86: # %bb.0: | ||
; X86-NEXT: subl $8, %esp | ||
; X86-NEXT: .cfi_def_cfa_offset 12 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add nounwind
to suppress the CFI directives.
Do you really have to duplicate the same logic in |
I can make it a helper. My feeling was its only 1-2 lines of code so not really needed. |
If we only need the signbit `uitofp` simplified to 0, and `sitofp` simplifies to `bitcast`.
78ac07f
to
b8b3e10
Compare
It's more of a question about the design of |
SimplifyMultipleUseDemandedBits could be enough, that would need testing. Often SimplifyDemandedBits doesn't make fallback calls to SimplifyMultipleUseDemandedBits when it could. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
@goldsteinn The b60cf84 commit doesn't appear to match this PR? |
Actual commit was 353fbeb |
Bah, sorry, committed both at once, got messed up :/ rebasing the other two PRs shortly. |
… y)` This cleans up basically all the regressions assosiated from llvm#84688 Proof of all new cases: https://alive2.llvm.org/ce/z/5yYWLb Closes llvm#85445
{si|ui}tofp
; NFC{si|ui}tofp
when only signbit is neededIf we only need the signbit
uitofp
simplified to 0, andsitofp
simplifies to
bitcast
.