-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Combine FRINT + FP_TO_SINT to LRINT #126477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Based on Craig's suggestion on llvm#126217
@llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesBased on Craig's suggestion on #126217 Full diff: https://github.com/llvm/llvm-project/pull/126477.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 744e4e740cb2102..615832d6f787cea 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2684,6 +2684,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::ZERO_EXTEND_VECTOR_INREG,
ISD::SINT_TO_FP,
ISD::UINT_TO_FP,
+ ISD::FP_TO_SINT,
ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP,
ISD::FP_TO_SINT_SAT,
@@ -56380,6 +56381,18 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineFPToSInt(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue Src = N->getOperand(0);
+ if (Src.getOpcode() == ISD::FRINT && VT.getScalarType() == MVT::i32 &&
+ Src->getFlags().hasNoNaNs() && Src->getFlags().hasNoInfs() &&
+ Src.hasOneUse())
+ return DAG.getNode(ISD::LRINT, SDLoc(N), VT, Src.getOperand(0));
+
+ return SDValue();
+}
+
// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -59405,6 +59418,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
return combineUIntToFP(N, DAG, Subtarget);
+ case ISD::FP_TO_SINT: return combineFPToSInt(N, DAG, Subtarget);
case ISD::LRINT:
case ISD::LLRINT: return combineLRINT_LLRINT(N, DAG, Subtarget);
case ISD::FADD:
diff --git a/llvm/test/CodeGen/X86/rint-conv.ll b/llvm/test/CodeGen/X86/rint-conv.ll
new file mode 100644
index 000000000000000..90698ef3ecd03f5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/rint-conv.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
+
+define i32 @no_combine_f32(float %x) nounwind {
+; X86-LABEL: no_combine_f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: movss %xmm0, (%esp)
+; X86-NEXT: calll rintf
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: no_combine_f32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq rintf@PLT
+; X64-NEXT: cvttss2si %xmm0, %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+entry:
+ %0 = tail call float @llvm.rint.f32(float %x)
+ %1 = fptosi float %0 to i32
+ ret i32 %1
+}
+
+define i32 @combine_f32(float %x) nounwind {
+; X86-LABEL: combine_f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_f32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvtss2si %xmm0, %eax
+; X64-NEXT: retq
+entry:
+ %0 = tail call nnan ninf float @llvm.rint.f32(float %x)
+ %1 = fptosi float %0 to i32
+ ret i32 %1
+}
+
+define i32 @no_combine_f64(double %x) nounwind {
+; X86-LABEL: no_combine_f64:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: movsd %xmm0, (%esp)
+; X86-NEXT: calll rint
+; X86-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-NEXT: cvttsd2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: no_combine_f64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq rint@PLT
+; X64-NEXT: cvttsd2si %xmm0, %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+entry:
+ %0 = tail call double @llvm.rint.f64(double %x)
+ %1 = fptosi double %0 to i32
+ ret i32 %1
+}
+
+define i32 @combine_f64(double %x) nounwind {
+; X86-LABEL: combine_f64:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_f64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvtsd2si %xmm0, %eax
+; X64-NEXT: retq
+entry:
+ %0 = tail call nnan ninf double @llvm.rint.f32(double %x)
+ %1 = fptosi double %0 to i32
+ ret i32 %1
+}
+
+define <4 x i32> @combine_v4f32(<4 x float> %x) nounwind {
+; X86-LABEL: combine_v4f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cvtps2dq %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_v4f32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cvtps2dq %xmm0, %xmm0
+; X64-NEXT: retq
+entry:
+ %0 = tail call nnan ninf <4 x float> @llvm.rint.v4f32(<4 x float> %x)
+ %1 = fptosi <4 x float> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
|
Found when addressing comment on llvm#126477
SDValue Src = N->getOperand(0); | ||
if (Src.getOpcode() == ISD::FRINT && VT.getScalarType() == MVT::i32 && | ||
Src.hasOneUse()) | ||
return DAG.getNode(ISD::LRINT, SDLoc(N), VT, Src.getOperand(0)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm assuming this is OK if it fires for vcvtph2dq?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to check soft float and no-sse+no-x87 to avoid creating libcalls with the wrong register? Maybe isTypeLegal for the FP type is enough?
Do we need to check f16 is legal?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm assuming this is OK if it fires for vcvtph2dq?
Good question! We need to support LRINT for FP16 first: #127382
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to check soft float and no-sse+no-x87 to avoid creating libcalls with the wrong register? Maybe isTypeLegal for the FP type is enough?
I don't think so. It is different with #126217, 1) we don't combine TRUNC this time, so we won't eliminate ZEXT; 2) we based on the fact the out of range result is UB. We don't care the result in this case.
Do we need to check f16 is legal?
I think it doesn't matter. 1) if the f16 is not legal, the ABI is broken, it's UB to the compiler; 2) we don't have f16 math library so far, the lowering for f16 FRINT/LRINT is not supported for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to check soft float and no-sse+no-x87 to avoid creating libcalls with the wrong register? Maybe isTypeLegal for the FP type is enough?
I don't think so. It is different with #126217, 1) we don't combine TRUNC this time, so we won't eliminate ZEXT; 2) we based on the fact the out of range result is UB. We don't care the result in this case.
I think creating libcalls with the wrong register class is in general a bad idea even we can't find a functional issue with it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to check soft float and no-sse+no-x87 to avoid creating libcalls with the wrong register? Maybe isTypeLegal for the FP type is enough?
I don't think so. It is different with #126217, 1) we don't combine TRUNC this time, so we won't eliminate ZEXT; 2) we based on the fact the out of range result is UB. We don't care the result in this case.
I think creating libcalls with the wrong register class is in general a bad idea even we can't find a functional issue with it.
Add checking for SSE2.
Found when addressing comment on #126477
Found when addressing comment on llvm#126477
Found when addressing comment on llvm#126477
Address comment in llvm#126477
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
I'm seeing a crash on x86 windows due to one of the commits in 08c69b2...af64f0a, I suspect it's this one. Let me know if you need a reproducer or any other info and I'll see what I can do.
|
Oh, yes. I should have landed #127382 before landing this, or limit the type to f32/f64 instead. |
Found when addressing comment on llvm#126477
Based on Craig's suggestion on #126217
Alive2: https://alive2.llvm.org/ce/z/9XNpWt