[DAGCombiner] Fold and/or of NaN SETCC #135645

AlexMaclean · 2025-04-14T16:48:29Z

Fold an AND or OR of two NaN SETCC nodes into a single SETCC where possible. This optimization already exists in InstCombine but adding in here as well can allow for additional folding if more logical operations are exposed.

llvmbot · 2025-04-14T16:49:03Z

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-llvm-selectiondag

Author: Alex MacLean (AlexMaclean)

Changes

Fold an AND or OR of two NaN SETCC nodes into a single SETCC where possible. This optimization already exists in InstCombine but adding in here as well can allow for additional folding if more logical operations are exposed.

Full diff: https://github.com/llvm/llvm-project/pull/135645.diff

2 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+6)
(added) llvm/test/CodeGen/NVPTX/and-or-setcc.ll (+45)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8136f1794775e..8eb3f95a30989 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6427,6 +6427,12 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
     }
   }
 
+  if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
+      LHS0.getValueType() == RHS0.getValueType() &&
+      ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
+       (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
+    return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
+
   if (TargetPreference == AndOrSETCCFoldKind::None)
     return SDValue();
 
diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
new file mode 100644
index 0000000000000..21be9df94d553
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i1 @and_ord(float %a, float %b) {
+; CHECK-LABEL: and_ord(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<2>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [and_ord_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [and_ord_param_1];
+; CHECK-NEXT:    setp.num.f32 %p1, %f1, %f2;
+; CHECK-NEXT:    selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
+  %c = fcmp ord float %a, 0.0
+  %d = fcmp ord float %b, 0.0
+  %e = and i1 %c, %d
+  ret i1 %e
+}
+
+define i1 @or_uno(float %a, float %b) {
+; CHECK-LABEL: or_uno(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<2>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [or_uno_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [or_uno_param_1];
+; CHECK-NEXT:    setp.nan.f32 %p1, %f1, %f2;
+; CHECK-NEXT:    selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
+  %c = fcmp uno float %a, 0.0
+  %d = fcmp uno float %b, 0.0
+  %e = or i1 %c, %d
+  ret i1 %e
+}

llvmbot · 2025-04-14T16:49:04Z

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

Changes

Fold an AND or OR of two NaN SETCC nodes into a single SETCC where possible. This optimization already exists in InstCombine but adding in here as well can allow for additional folding if more logical operations are exposed.

Full diff: https://github.com/llvm/llvm-project/pull/135645.diff

2 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+6)
(added) llvm/test/CodeGen/NVPTX/and-or-setcc.ll (+45)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8136f1794775e..8eb3f95a30989 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6427,6 +6427,12 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
     }
   }
 
+  if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
+      LHS0.getValueType() == RHS0.getValueType() &&
+      ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
+       (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
+    return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
+
   if (TargetPreference == AndOrSETCCFoldKind::None)
     return SDValue();
 
diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
new file mode 100644
index 0000000000000..21be9df94d553
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i1 @and_ord(float %a, float %b) {
+; CHECK-LABEL: and_ord(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<2>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [and_ord_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [and_ord_param_1];
+; CHECK-NEXT:    setp.num.f32 %p1, %f1, %f2;
+; CHECK-NEXT:    selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
+  %c = fcmp ord float %a, 0.0
+  %d = fcmp ord float %b, 0.0
+  %e = and i1 %c, %d
+  ret i1 %e
+}
+
+define i1 @or_uno(float %a, float %b) {
+; CHECK-LABEL: or_uno(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<2>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [or_uno_param_0];
+; CHECK-NEXT:    ld.param.f32 %f2, [or_uno_param_1];
+; CHECK-NEXT:    setp.nan.f32 %p1, %f1, %f2;
+; CHECK-NEXT:    selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
+  %c = fcmp uno float %a, 0.0
+  %d = fcmp uno float %b, 0.0
+  %e = or i1 %c, %d
+  ret i1 %e
+}

llvm/test/CodeGen/NVPTX/and-or-setcc.ll

Artem-B

LGTM.

llvm-ci · 2025-04-16T13:59:10Z

LLVM Buildbot has detected a new failure on builder openmp-offload-amdgpu-runtime-2 running on rocm-worker-hw-02 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/10/builds/3573

Here is the relevant piece of the build log for the reference

Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libomp :: tasking/issue-94260-2.c' FAILED ********************
Exit Code: -11

Command Output (stdout):
--
# RUN: at line 1
/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/clang -fopenmp   -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/runtime/test -L /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -fno-omit-frame-pointer -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/runtime/test/ompt /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic && /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# executed command: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/clang -fopenmp -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/runtime/test -L /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -fno-omit-frame-pointer -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/runtime/test/ompt /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic
# note: command had no output on stdout or stderr
# executed command: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# note: command had no output on stdout or stderr
# error: command failed with exit status: -11

--

********************

Fold an AND or OR of two NaN SETCC nodes into a single SETCC where possible. This optimization already exists in InstCombine but adding in here as well can allow for additional folding if more logical operations are exposed.

arsenm · 2025-04-17T11:59:40Z

llvm/test/CodeGen/X86/and-or-setcc.ll

+  %d = fcmp uno float %b, 0.0
+  %e = or i1 %c, %d
+  ret i1 %e
+}


Should also do vector tests

Apologies, looks like I committed this change prematurely. I've addressed this comment in a follow up here #136168. Please take a look when you have a minute.

arsenm · 2025-04-17T12:00:00Z

llvm/test/CodeGen/NVPTX/and-or-setcc.ll

+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"


Redundant with the mtriple

Also addressed in #136168

Follow up to #135645 to address test cleanup review.

Follow up to llvm#135645 to address test cleanup review.

AlexMaclean requested review from arsenm, Artem-B and justinfargnoli April 14, 2025 16:48

AlexMaclean self-assigned this Apr 14, 2025

llvmbot added backend:NVPTX llvm:SelectionDAG SelectionDAGISel as well labels Apr 14, 2025

Artem-B reviewed Apr 14, 2025

View reviewed changes

llvm/test/CodeGen/NVPTX/and-or-setcc.ll Show resolved Hide resolved

AlexMaclean added 2 commits April 15, 2025 18:48

pre-commit tests

88249c6

[DAGCombiner] Fold and/or of NaN SETCC

238ca4b

AlexMaclean force-pushed the dev/amaclean/upstream/and-or-fcmp branch from 84c760a to 238ca4b Compare April 15, 2025 18:49

llvmbot added the backend:X86 label Apr 15, 2025

Artem-B approved these changes Apr 15, 2025

View reviewed changes

AlexMaclean merged commit 1bfd444 into llvm:main Apr 16, 2025
12 checks passed

arsenm reviewed Apr 17, 2025

View reviewed changes

AlexMaclean mentioned this pull request Apr 17, 2025

[DAGCombiner] Fold and/or of NaN SETCC - tests follow up #136168

Merged

AlexMaclean added a commit that referenced this pull request Apr 18, 2025

[DAGCombiner] Fold and/or of NaN SETCC - tests follow up (#136168)

aed4ecc

Follow up to #135645 to address test cleanup review.

IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025

[DAGCombiner] Fold and/or of NaN SETCC - tests follow up (llvm#136168)

9ba19aa

Follow up to llvm#135645 to address test cleanup review.

IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025

[DAGCombiner] Fold and/or of NaN SETCC - tests follow up (llvm#136168)

63036ec

Follow up to llvm#135645 to address test cleanup review.

IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025

[DAGCombiner] Fold and/or of NaN SETCC - tests follow up (llvm#136168)

f731d25

Follow up to llvm#135645 to address test cleanup review.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[DAGCombiner] Fold and/or of NaN SETCC #135645

[DAGCombiner] Fold and/or of NaN SETCC #135645

Uh oh!

AlexMaclean commented Apr 14, 2025

Uh oh!

llvmbot commented Apr 14, 2025 •

edited

Loading

Uh oh!

llvmbot commented Apr 14, 2025

Uh oh!

Uh oh!

Artem-B left a comment

Uh oh!

Uh oh!

llvm-ci commented Apr 16, 2025

Uh oh!

arsenm Apr 17, 2025

Uh oh!

AlexMaclean Apr 17, 2025

Uh oh!

arsenm Apr 17, 2025

Uh oh!

AlexMaclean Apr 17, 2025

Uh oh!

Uh oh!

[DAGCombiner] Fold and/or of NaN SETCC #135645

[DAGCombiner] Fold and/or of NaN SETCC #135645

Uh oh!

Conversation

AlexMaclean commented Apr 14, 2025

Uh oh!

llvmbot commented Apr 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Apr 14, 2025

Uh oh!

Uh oh!

Artem-B left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Apr 16, 2025

Uh oh!

arsenm Apr 17, 2025

Choose a reason for hiding this comment

Uh oh!

AlexMaclean Apr 17, 2025

Choose a reason for hiding this comment

Uh oh!

arsenm Apr 17, 2025

Choose a reason for hiding this comment

Uh oh!

AlexMaclean Apr 17, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvmbot commented Apr 14, 2025 •

edited

Loading