-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAGCombiner] Fold and/or of NaN SETCC #135645
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAGCombiner] Fold and/or of NaN SETCC #135645
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-selectiondag Author: Alex MacLean (AlexMaclean) ChangesFold an AND or OR of two NaN SETCC nodes into a single SETCC where possible. This optimization already exists in InstCombine but adding in here as well can allow for additional folding if more logical operations are exposed. Full diff: https://github.com/llvm/llvm-project/pull/135645.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8136f1794775e..8eb3f95a30989 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6427,6 +6427,12 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
}
}
+ if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
+ LHS0.getValueType() == RHS0.getValueType() &&
+ ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
+ (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
+ return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
+
if (TargetPreference == AndOrSETCCFoldKind::None)
return SDValue();
diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
new file mode 100644
index 0000000000000..21be9df94d553
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i1 @and_ord(float %a, float %b) {
+; CHECK-LABEL: and_ord(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [and_ord_param_0];
+; CHECK-NEXT: ld.param.f32 %f2, [and_ord_param_1];
+; CHECK-NEXT: setp.num.f32 %p1, %f1, %f2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %c = fcmp ord float %a, 0.0
+ %d = fcmp ord float %b, 0.0
+ %e = and i1 %c, %d
+ ret i1 %e
+}
+
+define i1 @or_uno(float %a, float %b) {
+; CHECK-LABEL: or_uno(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [or_uno_param_0];
+; CHECK-NEXT: ld.param.f32 %f2, [or_uno_param_1];
+; CHECK-NEXT: setp.nan.f32 %p1, %f1, %f2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %c = fcmp uno float %a, 0.0
+ %d = fcmp uno float %b, 0.0
+ %e = or i1 %c, %d
+ ret i1 %e
+}
|
@llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesFold an AND or OR of two NaN SETCC nodes into a single SETCC where possible. This optimization already exists in InstCombine but adding in here as well can allow for additional folding if more logical operations are exposed. Full diff: https://github.com/llvm/llvm-project/pull/135645.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8136f1794775e..8eb3f95a30989 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6427,6 +6427,12 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
}
}
+ if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
+ LHS0.getValueType() == RHS0.getValueType() &&
+ ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
+ (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
+ return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
+
if (TargetPreference == AndOrSETCCFoldKind::None)
return SDValue();
diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
new file mode 100644
index 0000000000000..21be9df94d553
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i1 @and_ord(float %a, float %b) {
+; CHECK-LABEL: and_ord(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [and_ord_param_0];
+; CHECK-NEXT: ld.param.f32 %f2, [and_ord_param_1];
+; CHECK-NEXT: setp.num.f32 %p1, %f1, %f2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %c = fcmp ord float %a, 0.0
+ %d = fcmp ord float %b, 0.0
+ %e = and i1 %c, %d
+ ret i1 %e
+}
+
+define i1 @or_uno(float %a, float %b) {
+; CHECK-LABEL: or_uno(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [or_uno_param_0];
+; CHECK-NEXT: ld.param.f32 %f2, [or_uno_param_1];
+; CHECK-NEXT: setp.nan.f32 %p1, %f1, %f2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %c = fcmp uno float %a, 0.0
+ %d = fcmp uno float %b, 0.0
+ %e = or i1 %c, %d
+ ret i1 %e
+}
|
84c760a
to
238ca4b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/10/builds/3573 Here is the relevant piece of the build log for the reference
|
Fold an AND or OR of two NaN SETCC nodes into a single SETCC where possible. This optimization already exists in InstCombine but adding in here as well can allow for additional folding if more logical operations are exposed.
%d = fcmp uno float %b, 0.0 | ||
%e = or i1 %c, %d | ||
ret i1 %e | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should also do vector tests
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Apologies, looks like I committed this change prematurely. I've addressed this comment in a follow up here #136168. Please take a look when you have a minute.
; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s | ||
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %} | ||
|
||
target triple = "nvptx64-nvidia-cuda" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Redundant with the mtriple
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also addressed in #136168
Follow up to #135645 to address test cleanup review.
Follow up to llvm#135645 to address test cleanup review.
Follow up to llvm#135645 to address test cleanup review.
Follow up to llvm#135645 to address test cleanup review.
Fold an AND or OR of two NaN SETCC nodes into a single SETCC where possible. This optimization already exists in InstCombine but adding in here as well can allow for additional folding if more logical operations are exposed.