[DAGCombiner] Simplifying `{si|ui}tofp` when only signbit is needed #85445

goldsteinn · 2024-03-15T18:39:10Z

[DAGCombiner] Add tests for simplifying {si|ui}tofp; NFC
[DAGCombiner] Simplifying {si|ui}tofp when only signbit is needed

If we only need the signbit uitofp simplified to 0, and sitofp
simplifies to bitcast.

llvmbot · 2024-03-15T18:39:40Z

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-x86

Author: None (goldsteinn)

Changes

[DAGCombiner] Add tests for simplifying {si|ui}tofp; NFC
[DAGCombiner] Simplifying {si|ui}tofp when only signbit is needed

Full diff: https://github.com/llvm/llvm-project/pull/85445.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+28)
(modified) llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll (+1-2)
(added) llvm/test/CodeGen/X86/int-to-fp-demanded.ll (+413)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 57f8fc409de453..88a4c8124981ee 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -816,6 +816,18 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
     }
     break;
   }
+  case ISD::SINT_TO_FP: {
+    EVT InnerVT = Op.getOperand(0).getValueType();
+    if (DemandedBits.isSignMask() &&
+        VT.getScalarSizeInBits() == InnerVT.getScalarSizeInBits())
+      return DAG.getBitcast(VT, Op.getOperand(0));
+    break;
+  }
+  case ISD::UINT_TO_FP: {
+    if (DemandedBits.isSignMask())
+      return DAG.getConstant(0, SDLoc(Op), VT);
+    break;
+  }
   case ISD::SIGN_EXTEND_INREG: {
     // If none of the extended bits are demanded, eliminate the sextinreg.
     SDValue Op0 = Op.getOperand(0);
@@ -2313,6 +2325,22 @@ bool TargetLowering::SimplifyDemandedBits(
     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
     break;
   }
+  case ISD::SINT_TO_FP: {
+    EVT InnerVT = Op.getOperand(0).getValueType();
+    if (DemandedBits.isSignMask() &&
+        VT.getScalarSizeInBits() == InnerVT.getScalarSizeInBits())
+      return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Op.getOperand(0)));
+
+    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+    break;
+  }
+  case ISD::UINT_TO_FP: {
+    if (DemandedBits.isSignMask())
+      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
+
+    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+    break;
+  }
   case ISD::SIGN_EXTEND_INREG: {
     SDValue Op0 = Op.getOperand(0);
     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
diff --git a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
index cbb5bd09c2399a..a332b3e8908003 100644
--- a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
@@ -164,14 +164,13 @@ define <4 x float> @demandedbits_sitofp_blendvps(<4 x float> %a0, <4 x float> %a
 ; SSE-LABEL: demandedbits_sitofp_blendvps:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movaps %xmm0, %xmm3
-; SSE-NEXT:    cvtdq2ps %xmm2, %xmm0
+; SSE-NEXT:    movaps %xmm2, %xmm0
 ; SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3
 ; SSE-NEXT:    movaps %xmm3, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: demandedbits_sitofp_blendvps:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vcvtdq2ps %xmm2, %xmm2
 ; AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %cvt = sitofp <4 x i32> %a2 to <4 x float>
diff --git a/llvm/test/CodeGen/X86/int-to-fp-demanded.ll b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
new file mode 100644
index 00000000000000..0735d48027196b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
@@ -0,0 +1,413 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+declare void @use.i1(i1)
+declare void @use.i32(i32)
+define i32 @sitofp_signbit_only(i32 %i_in) {
+; X86-LABEL: sitofp_signbit_only:
+; X86:       # %bb.0:
+; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_signbit_only:
+; X64:       # %bb.0:
+; X64-NEXT:    movd %edi, %xmm0
+; X64-NEXT:    movmskps %xmm0, %eax
+; X64-NEXT:    shll $31, %eax
+; X64-NEXT:    retq
+  %f = sitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @sitofp_signbit_only_okay_width(i16 %i_in) {
+; X86-LABEL: sitofp_signbit_only_okay_width:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X86-NEXT:    filds {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_signbit_only_okay_width:
+; X64:       # %bb.0:
+; X64-NEXT:    shll $16, %edi
+; X64-NEXT:    movd %edi, %xmm0
+; X64-NEXT:    movmskps %xmm0, %eax
+; X64-NEXT:    shll $31, %eax
+; X64-NEXT:    retq
+  %f = sitofp i16 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @sitofp_signbit_only_fail_bad_width1(i64 %i_in) {
+; X86-LABEL: sitofp_signbit_only_fail_bad_width1:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT:    andl (%esp), %eax
+; X86-NEXT:    popl %ecx
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_signbit_only_fail_bad_width1:
+; X64:       # %bb.0:
+; X64-NEXT:    cvtsi2ss %rdi, %xmm0
+; X64-NEXT:    movmskps %xmm0, %eax
+; X64-NEXT:    shll $31, %eax
+; X64-NEXT:    retq
+  %f = sitofp i64 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define <2 x i16> @sitofp_signbit_only_fail_bad_width2(i32 %i_in) {
+; X86-LABEL: sitofp_signbit_only_fail_bad_width2:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, (%esp)
+; X86-NEXT:    fildl (%esp)
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    shrl $16, %edx
+; X86-NEXT:    andl $32768, %edx # imm = 0x8000
+; X86-NEXT:    movl $32768, %eax # imm = 0x8000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    # kill: def $dx killed $dx killed $edx
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_signbit_only_fail_bad_width2:
+; X64:       # %bb.0:
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %f = sitofp i32 %i_in to float
+  %i2xi16 = bitcast float %f to <2 x i16>
+  %r = and <2 x i16> %i2xi16, <i16 32768, i16 32768>
+  ret <2 x i16> %r
+}
+
+define i32 @sitofp_many_bits_fail(i32 %i_in) {
+; X86-LABEL: sitofp_many_bits_fail:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    fildl (%esp)
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-2147483647, %eax # imm = 0x80000001
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_many_bits_fail:
+; X64:       # %bb.0:
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    andl $-2147483647, %eax # imm = 0x80000001
+; X64-NEXT:    retq
+  %f = sitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483649
+  ret i32 %r
+}
+
+define i32 @sitofp_multiuse_fail(i32 %i_in) {
+; X86-LABEL: sitofp_multiuse_fail:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    fildl {{[0-9]+}}(%esp)
+; X86-NEXT:    fsts {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    calll use.i32@PLT
+; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_multiuse_fail:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    .cfi_offset %rbx, -16
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    movd %xmm0, %ebx
+; X64-NEXT:    movl %ebx, %edi
+; X64-NEXT:    callq use.i32@PLT
+; X64-NEXT:    andl $-2147483648, %ebx # imm = 0x80000000
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %f = sitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  call void @use.i32(i32 %i)
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @sitofp_multiuse_okay(i32 %i_in) {
+; X86-LABEL: sitofp_multiuse_okay:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    fildl {{[0-9]+}}(%esp)
+; X86-NEXT:    fsts {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    calll use.i1@PLT
+; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_multiuse_okay:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    .cfi_offset %rbx, -16
+; X64-NEXT:    cvtsi2ss %edi, %xmm0
+; X64-NEXT:    movd %xmm0, %ebx
+; X64-NEXT:    movl %ebx, %edi
+; X64-NEXT:    callq use.i1@PLT
+; X64-NEXT:    andl $-2147483648, %ebx # imm = 0x80000000
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %f = sitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  %cmp = icmp slt i32 %i, 0
+  call void @use.i1(i32 %i)
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @uitofp_signbit_only(i32 %i_in) {
+; X86-LABEL: uitofp_signbit_only:
+; X86:       # %bb.0:
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_signbit_only:
+; X64:       # %bb.0:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    retq
+  %f = uitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @uitofp_signbit_only_okay_width(i16 %i_in) {
+; X86-LABEL: uitofp_signbit_only_okay_width:
+; X86:       # %bb.0:
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_signbit_only_okay_width:
+; X64:       # %bb.0:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    retq
+  %f = uitofp i16 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @uitofp_signbit_only_okay_width1(i64 %i_in) {
+; X86-LABEL: uitofp_signbit_only_okay_width1:
+; X86:       # %bb.0:
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_signbit_only_okay_width1:
+; X64:       # %bb.0:
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    retq
+  %f = uitofp i64 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define <2 x i16> @uitofp_signbit_only_fail_bad_width2(i32 %i_in) {
+; X86-LABEL: uitofp_signbit_only_fail_bad_width2:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $32768, %eax # imm = 0x8000
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_signbit_only_fail_bad_width2:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cvtsi2ss %rax, %xmm0
+; X64-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %f = uitofp i32 %i_in to float
+  %i2xi16 = bitcast float %f to <2 x i16>
+  %r = and <2 x i16> %i2xi16, <i16 32768, i16 32768>
+  ret <2 x i16> %r
+}
+
+define i32 @uitofp_many_bits_fail(i32 %i_in) {
+; X86-LABEL: uitofp_many_bits_fail:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_many_bits_fail:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cvtsi2ss %rax, %xmm0
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    retq
+  %f = uitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  %r = and i32 %i, 2147483649
+  ret i32 %r
+}
+
+define i32 @uitofp_multiuse_fail(i32 %i_in) {
+; X86-LABEL: uitofp_multiuse_fail:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    calll use.i32@PLT
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_multiuse_fail:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cvtsi2ss %rax, %xmm0
+; X64-NEXT:    movd %xmm0, %edi
+; X64-NEXT:    callq use.i32@PLT
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %f = uitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  call void @use.i32(i32 %i)
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}
+
+define i32 @uitofp_multiuse_okay(i32 %i_in) {
+; X86-LABEL: uitofp_multiuse_okay:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    calll use.i1@PLT
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_multiuse_okay:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cvtsi2ss %rax, %xmm0
+; X64-NEXT:    movd %xmm0, %edi
+; X64-NEXT:    callq use.i1@PLT
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %f = uitofp i32 %i_in to float
+  %i = bitcast float %f to i32
+  %cmp = icmp slt i32 %i, 0
+  call void @use.i1(i32 %i)
+  %r = and i32 %i, 2147483648
+  ret i32 %r
+}

phoebewang

LGTM.

phoebewang · 2024-03-16T13:32:47Z

llvm/test/CodeGen/X86/int-to-fp-demanded.ll

+; X86-LABEL: sitofp_signbit_only_okay_width:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 12


Add nounwind to suppress the CFI directives.

jayfoad · 2024-03-17T09:40:35Z

Do you really have to duplicate the same logic in SimplifyMultipleUseDemandedBits and SimplifyDemandedBits?

goldsteinn · 2024-03-17T16:05:28Z

Do you really have to duplicate the same logic in SimplifyMultipleUseDemandedBits and SimplifyDemandedBits?

I can make it a helper. My feeling was its only 1-2 lines of code so not really needed.

If we only need the signbit `uitofp` simplified to 0, and `sitofp` simplifies to `bitcast`.

jayfoad · 2024-03-18T08:31:41Z

Do you really have to duplicate the same logic in SimplifyMultipleUseDemandedBits and SimplifyDemandedBits?

I can make it a helper. My feeling was its only 1-2 lines of code so not really needed.

It's more of a question about the design of SimplifyMultipleUseDemandedBits vs SimplifyDemandedBits - surely we shouldn't need to add the same cases to both of them? @RKSimon?

RKSimon · 2024-03-18T18:07:03Z

SimplifyMultipleUseDemandedBits could be enough, that would need testing. Often SimplifyDemandedBits doesn't make fallback calls to SimplifyMultipleUseDemandedBits when it could.

RKSimon

LGTM - cheers

RKSimon · 2024-03-19T22:20:47Z

@goldsteinn The b60cf84 commit doesn't appear to match this PR?

RKSimon · 2024-03-19T22:30:07Z

Actual commit was 353fbeb

goldsteinn · 2024-03-19T23:04:59Z

Bah, sorry, committed both at once, got messed up :/ rebasing the other two PRs shortly.

… y)` This cleans up basically all the regressions assosiated from llvm#84688 Proof of all new cases: https://alive2.llvm.org/ce/z/5yYWLb Closes llvm#85445

llvmbot added backend:X86 llvm:SelectionDAG SelectionDAGISel as well labels Mar 15, 2024

goldsteinn changed the title ~~goldsteinn/simplify int to fp~~ [DAGCombiner] Simplifying {si|ui}tofp when only signbit is needed Mar 15, 2024

goldsteinn requested review from phoebewang and RKSimon March 15, 2024 18:39

goldsteinn mentioned this pull request Mar 15, 2024

[X86] Try Folding icmp of v8i32 -> fcmp of v8f32 on AVX #82290

Closed

phoebewang approved these changes Mar 16, 2024

View reviewed changes

goldsteinn added 2 commits March 17, 2024 13:09

[DAGCombiner] Add tests for simplifying {si|ui}tofp; NFC

37e6b34

[DAGCombiner] Simplifying {si|ui}tofp when only signbit is needed

b8b3e10

If we only need the signbit `uitofp` simplified to 0, and `sitofp` simplifies to `bitcast`.

goldsteinn force-pushed the goldsteinn/simplify-int-to-fp branch from 78ac07f to b8b3e10 Compare March 17, 2024 18:09

RKSimon approved these changes Mar 19, 2024

View reviewed changes

goldsteinn closed this in b60cf84 Mar 19, 2024

RKSimon reopened this Mar 19, 2024

RKSimon closed this Mar 19, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[DAGCombiner] Simplifying `{si|ui}tofp` when only signbit is needed #85445

[DAGCombiner] Simplifying `{si|ui}tofp` when only signbit is needed #85445

Uh oh!

goldsteinn commented Mar 15, 2024 •

edited

Loading

Uh oh!

llvmbot commented Mar 15, 2024 •

edited

Loading

Uh oh!

phoebewang left a comment

Uh oh!

phoebewang Mar 16, 2024

Uh oh!

jayfoad commented Mar 17, 2024

Uh oh!

goldsteinn commented Mar 17, 2024

Uh oh!

jayfoad commented Mar 18, 2024

Uh oh!

RKSimon commented Mar 18, 2024

Uh oh!

RKSimon left a comment

Uh oh!

RKSimon commented Mar 19, 2024

Uh oh!

RKSimon commented Mar 19, 2024

Uh oh!

goldsteinn commented Mar 19, 2024

Uh oh!

Uh oh!

[DAGCombiner] Simplifying {si|ui}tofp when only signbit is needed #85445

[DAGCombiner] Simplifying {si|ui}tofp when only signbit is needed #85445

Uh oh!

Conversation

goldsteinn commented Mar 15, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Mar 15, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

phoebewang left a comment

Choose a reason for hiding this comment

Uh oh!

phoebewang Mar 16, 2024

Choose a reason for hiding this comment

Uh oh!

jayfoad commented Mar 17, 2024

Uh oh!

goldsteinn commented Mar 17, 2024

Uh oh!

jayfoad commented Mar 18, 2024

Uh oh!

RKSimon commented Mar 18, 2024

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

RKSimon commented Mar 19, 2024

Uh oh!

RKSimon commented Mar 19, 2024

Uh oh!

goldsteinn commented Mar 19, 2024

Uh oh!

Uh oh!

[DAGCombiner] Simplifying `{si|ui}tofp` when only signbit is needed #85445

[DAGCombiner] Simplifying `{si|ui}tofp` when only signbit is needed #85445

goldsteinn commented Mar 15, 2024 •

edited

Loading

llvmbot commented Mar 15, 2024 •

edited

Loading