[X86] Combine FRINT + FP_TO_SINT to LRINT #126477

phoebewang · 2025-02-10T07:16:26Z

Based on Craig's suggestion on #126217

Alive2: https://alive2.llvm.org/ce/z/9XNpWt

Based on Craig's suggestion on llvm#126217

llvmbot · 2025-02-10T07:16:58Z

@llvm/pr-subscribers-backend-x86

Author: Phoebe Wang (phoebewang)

Changes

Based on Craig's suggestion on #126217

Full diff: https://github.com/llvm/llvm-project/pull/126477.diff

2 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+14)
(added) llvm/test/CodeGen/X86/rint-conv.ll (+105)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 744e4e740cb2102..615832d6f787cea 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2684,6 +2684,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
                        ISD::ZERO_EXTEND_VECTOR_INREG,
                        ISD::SINT_TO_FP,
                        ISD::UINT_TO_FP,
+                       ISD::FP_TO_SINT,
                        ISD::STRICT_SINT_TO_FP,
                        ISD::STRICT_UINT_TO_FP,
                        ISD::FP_TO_SINT_SAT,
@@ -56380,6 +56381,18 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static SDValue combineFPToSInt(SDNode *N, SelectionDAG &DAG,
+                               const X86Subtarget &Subtarget) {
+  EVT VT = N->getValueType(0);
+  SDValue Src = N->getOperand(0);
+  if (Src.getOpcode() == ISD::FRINT && VT.getScalarType() == MVT::i32 &&
+      Src->getFlags().hasNoNaNs() && Src->getFlags().hasNoInfs() &&
+      Src.hasOneUse())
+    return DAG.getNode(ISD::LRINT, SDLoc(N), VT, Src.getOperand(0));
+
+  return SDValue();
+}
+
 // Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
 static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
                                      const X86Subtarget &Subtarget) {
@@ -59405,6 +59418,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::UINT_TO_FP:
   case ISD::STRICT_UINT_TO_FP:
     return combineUIntToFP(N, DAG, Subtarget);
+  case ISD::FP_TO_SINT:     return combineFPToSInt(N, DAG, Subtarget);
   case ISD::LRINT:
   case ISD::LLRINT:         return combineLRINT_LLRINT(N, DAG, Subtarget);
   case ISD::FADD:
diff --git a/llvm/test/CodeGen/X86/rint-conv.ll b/llvm/test/CodeGen/X86/rint-conv.ll
new file mode 100644
index 000000000000000..90698ef3ecd03f5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/rint-conv.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
+
+define i32 @no_combine_f32(float %x) nounwind {
+; X86-LABEL: no_combine_f32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    movss %xmm0, (%esp)
+; X86-NEXT:    calll rintf
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    cvttss2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: no_combine_f32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    callq rintf@PLT
+; X64-NEXT:    cvttss2si %xmm0, %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+entry:
+  %0 = tail call float @llvm.rint.f32(float %x)
+  %1 = fptosi float %0 to i32
+  ret i32 %1
+}
+
+define i32 @combine_f32(float %x) nounwind {
+; X86-LABEL: combine_f32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    cvtss2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: combine_f32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtss2si %xmm0, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = tail call nnan ninf float @llvm.rint.f32(float %x)
+  %1 = fptosi float %0 to i32
+  ret i32 %1
+}
+
+define i32 @no_combine_f64(double %x) nounwind {
+; X86-LABEL: no_combine_f64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    movsd %xmm0, (%esp)
+; X86-NEXT:    calll rint
+; X86-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X86-NEXT:    cvttsd2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: no_combine_f64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    callq rint@PLT
+; X64-NEXT:    cvttsd2si %xmm0, %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+entry:
+  %0 = tail call double @llvm.rint.f64(double %x)
+  %1 = fptosi double %0 to i32
+  ret i32 %1
+}
+
+define i32 @combine_f64(double %x) nounwind {
+; X86-LABEL: combine_f64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    cvtsd2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: combine_f64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtsd2si %xmm0, %eax
+; X64-NEXT:    retq
+entry:
+  %0 = tail call nnan ninf double @llvm.rint.f32(double %x)
+  %1 = fptosi double %0 to i32
+  ret i32 %1
+}
+
+define <4 x i32> @combine_v4f32(<4 x float> %x) nounwind {
+; X86-LABEL: combine_v4f32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    cvtps2dq %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: combine_v4f32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cvtps2dq %xmm0, %xmm0
+; X64-NEXT:    retq
+entry:
+  %0 = tail call nnan ninf <4 x float> @llvm.rint.v4f32(<4 x float> %x)
+  %1 = fptosi <4 x float> %0 to <4 x i32>
+  ret <4 x i32> %1
+}

llvm/test/CodeGen/X86/rint-conv.ll

Found when addressing comment on llvm#126477

RKSimon · 2025-02-10T13:25:10Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  SDValue Src = N->getOperand(0);
+  if (Src.getOpcode() == ISD::FRINT && VT.getScalarType() == MVT::i32 &&
+      Src.hasOneUse())
+    return DAG.getNode(ISD::LRINT, SDLoc(N), VT, Src.getOperand(0));


I'm assuming this is OK if it fires for vcvtph2dq?

Do we need to check soft float and no-sse+no-x87 to avoid creating libcalls with the wrong register? Maybe isTypeLegal for the FP type is enough?

Do we need to check f16 is legal?

I'm assuming this is OK if it fires for vcvtph2dq?

Good question! We need to support LRINT for FP16 first: #127382

Do we need to check soft float and no-sse+no-x87 to avoid creating libcalls with the wrong register? Maybe isTypeLegal for the FP type is enough?

I don't think so. It is different with #126217, 1) we don't combine TRUNC this time, so we won't eliminate ZEXT; 2) we based on the fact the out of range result is UB. We don't care the result in this case.

Do we need to check f16 is legal?

I think it doesn't matter. 1) if the f16 is not legal, the ABI is broken, it's UB to the compiler; 2) we don't have f16 math library so far, the lowering for f16 FRINT/LRINT is not supported for now.

Do we need to check soft float and no-sse+no-x87 to avoid creating libcalls with the wrong register? Maybe isTypeLegal for the FP type is enough?

I don't think so. It is different with #126217, 1) we don't combine TRUNC this time, so we won't eliminate ZEXT; 2) we based on the fact the out of range result is UB. We don't care the result in this case.

I think creating libcalls with the wrong register class is in general a bad idea even we can't find a functional issue with it.

Do we need to check soft float and no-sse+no-x87 to avoid creating libcalls with the wrong register? Maybe isTypeLegal for the FP type is enough?

I don't think so. It is different with #126217, 1) we don't combine TRUNC this time, so we won't eliminate ZEXT; 2) we based on the fact the out of range result is UB. We don't care the result in this case.

I think creating libcalls with the wrong register class is in general a bad idea even we can't find a functional issue with it.

Add checking for SSE2.

Found when addressing comment on #126477

Found when addressing comment on llvm#126477

Address comment in llvm#126477

topperc

LGTM

Zentrik · 2025-02-22T12:07:09Z

I'm seeing a crash on x86 windows due to one of the commits in 08c69b2...af64f0a, I suspect it's this one. Let me know if you need a reproducer or any other info and I'll see what I can do.

LLVM ERROR: Cannot select: t58: i32 = lrint t9, float.jl:420 @[ float.jl:996 @[ number.jl:7 @[ rounding.jl:480 @[ rounding.jl:479 @[ rounding.jl:477 @[ twiceprecision.jl:409 ] ] ] ] ] ]
  t9: f16,ch = CopyFromReg t0, Register:f16 %39, float.jl:469 @[ rounding.jl:479 @[ rounding.jl:477 @[ twiceprecision.jl:409 ] ] ]
    t8: f16 = Register %39
In function: julia_Colon_113403

[5492] signal 22: SIGABRT
in expression starting at none:0
crt_sig_handler at D:/a/llvm_julia_tester/llvm_julia_tester/julia/src\signals-win.c:104
raise at C:\Windows\System32\msvcrt.dll (unknown line)
abort at C:\Windows\System32\msvcrt.dll (unknown line)
_ZN4llvm18report_fatal_errorERKNS_5TwineEb at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm18raw_string_ostream10write_implEPKcj at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm11raw_ostream5writeEPKcj at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm16SelectionDAGISel15CannotYetSelectEPNS_6SDNodeE at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm16SelectionDAGISel16SelectCodeCommonEPNS_6SDNodeEPKhj at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZNSt17_Function_handlerIFvPN4llvm6SDNodeES2_EZNS0_16SelectionDAGISel16SelectCodeCommonES2_PKhjE3$_1E9_M_invokeERKSt9_Any_dataOS2_SC_ at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZNK4llvm12SelectionDAG18ComputeNumSignBitsENS_7SDValueERKNS_5APIntEj at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm9KnownBits3mulERKS0_S2_b at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZNK4llvm7SDValue24getScalarValueSizeInBitsEv at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZNK4llvm12SelectionDAG16computeKnownBitsENS_7SDValueERKNS_5APIntEj at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZNK4llvm12SelectionDAG18ComputeNumSignBitsENS_7SDValueERKNS_5APIntEj at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm16SelectionDAGISel22ComputeLiveOutVRegInfoEv at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm12SelectionDAG24AddModifiedNodeToCSEMapsEPNS_6SDNodeE at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)

phoebewang · 2025-02-22T12:13:53Z

I'm seeing a crash on x86 windows due to one of the commits in 08c69b2...af64f0a, I suspect it's this one. Let me know if you need a reproducer or any other info and I'll see what I can do.

LLVM ERROR: Cannot select: t58: i32 = lrint t9, float.jl:420 @[ float.jl:996 @[ number.jl:7 @[ rounding.jl:480 @[ rounding.jl:479 @[ rounding.jl:477 @[ twiceprecision.jl:409 ] ] ] ] ] ]
  t9: f16,ch = CopyFromReg t0, Register:f16 %39, float.jl:469 @[ rounding.jl:479 @[ rounding.jl:477 @[ twiceprecision.jl:409 ] ] ]
    t8: f16 = Register %39
In function: julia_Colon_113403

[5492] signal 22: SIGABRT
in expression starting at none:0
crt_sig_handler at D:/a/llvm_julia_tester/llvm_julia_tester/julia/src\signals-win.c:104
raise at C:\Windows\System32\msvcrt.dll (unknown line)
abort at C:\Windows\System32\msvcrt.dll (unknown line)
_ZN4llvm18report_fatal_errorERKNS_5TwineEb at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm18raw_string_ostream10write_implEPKcj at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm11raw_ostream5writeEPKcj at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm16SelectionDAGISel15CannotYetSelectEPNS_6SDNodeE at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm16SelectionDAGISel16SelectCodeCommonEPNS_6SDNodeEPKhj at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZNSt17_Function_handlerIFvPN4llvm6SDNodeES2_EZNS0_16SelectionDAGISel16SelectCodeCommonES2_PKhjE3$_1E9_M_invokeERKSt9_Any_dataOS2_SC_ at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZNK4llvm12SelectionDAG18ComputeNumSignBitsENS_7SDValueERKNS_5APIntEj at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm9KnownBits3mulERKS0_S2_b at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZNK4llvm7SDValue24getScalarValueSizeInBitsEv at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZNK4llvm12SelectionDAG16computeKnownBitsENS_7SDValueERKNS_5APIntEj at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZNK4llvm12SelectionDAG18ComputeNumSignBitsENS_7SDValueERKNS_5APIntEj at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm16SelectionDAGISel22ComputeLiveOutVRegInfoEv at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)
_ZN4llvm12SelectionDAG24AddModifiedNodeToCSEMapsEPNS_6SDNodeE at D:\a\llvm_julia_tester\llvm_julia_tester\julia\usr\bin\libLLVM-21jl.dll (unknown line)

Oh, yes. I should have landed #127382 before landing this, or limit the type to f32/f64 instead.

…7382) Address comment in #126477

phoebewang · 2025-02-22T13:19:03Z

@Zentrik #127382 has landed, please let me know if it doesn't fix the problem.

Found when addressing comment on llvm#126477

[X86] Combine FRINT + FP_TO_SINT to LRINT

2225596

Based on Craig's suggestion on llvm#126217

phoebewang requested review from RKSimon and topperc February 10, 2025 07:16

llvmbot added the backend:X86 label Feb 10, 2025

phoebewang mentioned this pull request Feb 10, 2025

[X86] Combine LRINT/LLRINT and TRUNC when TRUNC has nsw flag #126217

Closed

Remove fast math constraint

9a70a3e

RKSimon reviewed Feb 10, 2025

View reviewed changes

llvm/test/CodeGen/X86/rint-conv.ll Show resolved Hide resolved

phoebewang added a commit to phoebewang/llvm-project that referenced this pull request Feb 10, 2025

[X86] Generate cvtpd2dq for (v2i32 lrint(v2f64))

3460b2b

Found when addressing comment on llvm#126477

phoebewang mentioned this pull request Feb 10, 2025

[X86] Generate cvtpd2dq for (v2i32 lrint(v2f64)) #126508

Merged

Add v2f64 / v4f64 cases and AVX test coverage

f7eb84b

RKSimon reviewed Feb 10, 2025

View reviewed changes

phoebewang added a commit that referenced this pull request Feb 11, 2025

[X86] Generate cvtpd2dq for (v2i32 lrint(v2f64)) (#126508)

0c95516

Found when addressing comment on #126477

Merge branch 'main' into rint

d6a1fa3

Icohedron pushed a commit to Icohedron/llvm-project that referenced this pull request Feb 11, 2025

[X86] Generate cvtpd2dq for (v2i32 lrint(v2f64)) (llvm#126508)

aa1bb43

Found when addressing comment on llvm#126477

joaosaffran pushed a commit to joaosaffran/llvm-project that referenced this pull request Feb 14, 2025

[X86] Generate cvtpd2dq for (v2i32 lrint(v2f64)) (llvm#126508)

f8ee1e0

Found when addressing comment on llvm#126477

phoebewang added a commit to phoebewang/llvm-project that referenced this pull request Feb 16, 2025

[X86][FP16] Adding lowerings for FP16 ISD::LRINT and ISD::LLRINT

26ff681

Address comment in llvm#126477

phoebewang mentioned this pull request Feb 16, 2025

[X86][FP16] Adding lowerings for FP16 ISD::LRINT and ISD::LLRINT #127382

Merged

Check SSE2

114d435

topperc approved these changes Feb 20, 2025

View reviewed changes

phoebewang merged commit 3302bef into llvm:main Feb 21, 2025
8 checks passed

phoebewang deleted the rint branch February 21, 2025 06:44

phoebewang added a commit that referenced this pull request Feb 22, 2025

[X86][FP16] Adding lowerings for FP16 ISD::LRINT and ISD::LLRINT (#12…

fa64a21

…7382) Address comment in #126477

sivan-shani pushed a commit to sivan-shani/llvm-project that referenced this pull request Feb 24, 2025

[X86] Generate cvtpd2dq for (v2i32 lrint(v2f64)) (llvm#126508)

fb6e338

Found when addressing comment on llvm#126477

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86] Combine FRINT + FP_TO_SINT to LRINT #126477

[X86] Combine FRINT + FP_TO_SINT to LRINT #126477

Uh oh!

phoebewang commented Feb 10, 2025 •

edited

Loading

Uh oh!

llvmbot commented Feb 10, 2025

Uh oh!

Uh oh!

RKSimon Feb 10, 2025

Uh oh!

topperc Feb 10, 2025

Uh oh!

phoebewang Feb 16, 2025

Uh oh!

phoebewang Feb 16, 2025

Uh oh!

topperc Feb 16, 2025

Uh oh!

phoebewang Feb 17, 2025

Uh oh!

topperc left a comment

Uh oh!

Uh oh!

Zentrik commented Feb 22, 2025

Uh oh!

phoebewang commented Feb 22, 2025

Uh oh!

phoebewang commented Feb 22, 2025

Uh oh!

Uh oh!

[X86] Combine FRINT + FP_TO_SINT to LRINT #126477

[X86] Combine FRINT + FP_TO_SINT to LRINT #126477

Uh oh!

Conversation

phoebewang commented Feb 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Feb 10, 2025

Uh oh!

Uh oh!

RKSimon Feb 10, 2025

Choose a reason for hiding this comment

Uh oh!

topperc Feb 10, 2025

Choose a reason for hiding this comment

Uh oh!

phoebewang Feb 16, 2025

Choose a reason for hiding this comment

Uh oh!

phoebewang Feb 16, 2025

Choose a reason for hiding this comment

Uh oh!

topperc Feb 16, 2025

Choose a reason for hiding this comment

Uh oh!

phoebewang Feb 17, 2025

Choose a reason for hiding this comment

Uh oh!

topperc left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Zentrik commented Feb 22, 2025

Uh oh!

phoebewang commented Feb 22, 2025

Uh oh!

phoebewang commented Feb 22, 2025

Uh oh!

Uh oh!

phoebewang commented Feb 10, 2025 •

edited

Loading