Skip to content

Reapply "[DAGCombiner] Fold pattern for srl-shl-zext" #140038

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 15, 2025

Conversation

apeskov
Copy link
Contributor

@apeskov apeskov commented May 15, 2025

Original commit: bbc5221

Previously reverted due to conflict in LIT test. Mainline changed default version of load instruction to untyped version by this #137698 . Updated test uses ld.param.b64 instead of ld.param.u64.

Original commit: bbc5221

Previously reverted due to conflict in lit test. Mainline changed default version of load
instruction to untyped version, but test uses previous one.
@llvmbot llvmbot added backend:NVPTX llvm:SelectionDAG SelectionDAGISel as well labels May 15, 2025
@llvmbot
Copy link
Member

llvmbot commented May 15, 2025

@llvm/pr-subscribers-backend-nvptx

@llvm/pr-subscribers-llvm-selectiondag

Author: Alexander Peskov (apeskov)

Changes

Original commit: bbc5221

Previously reverted due to conflict in LIT test. Mainline changed default version of load instruction to untyped version by this #137698 . Updated test uses ld.param.b64 instead of ld.param.u64.


Full diff: https://github.com/llvm/llvm-project/pull/140038.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+16)
  • (modified) llvm/test/CodeGen/NVPTX/shift-opt.ll (+16-26)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d6e288a59b2ee..8671efcfd2fb1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10972,6 +10972,22 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
   }
 
+  // fold (srl (logic_op x, (shl (zext y), c1)), c1)
+  //   -> (logic_op (srl x, c1), (zext y))
+  // c1 <= leadingzeros(zext(y))
+  SDValue X, ZExtY;
+  if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
+                              m_Value(X),
+                              m_OneUse(m_Shl(m_AllOf(m_Value(ZExtY),
+                                                     m_Opc(ISD::ZERO_EXTEND)),
+                                             m_Specific(N1))))))) {
+    unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
+                               ZExtY.getOperand(0).getScalarValueSizeInBits();
+    if (N1C->getZExtValue() <= NumLeadingZeros)
+      return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+                         DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
+  }
+
   // fold operands of srl based on knowledge that the low bits are not
   // demanded.
   if (SimplifyDemandedBits(SDValue(N, 0)))
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 5f5ad831cb148..65bcbb8e67156 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -6,15 +6,13 @@
 define i64 @test_or(i64 %x, i32 %y) {
 ; CHECK-LABEL: test_or(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .b32 %r<2>;
 ; CHECK-NEXT:    .reg .b64 %rd<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_or_param_0];
-; CHECK-NEXT:    ld.param.b32 %r1, [test_or_param_1];
-; CHECK-NEXT:    mul.wide.u32 %rd2, %r1, 32;
-; CHECK-NEXT:    or.b64 %rd3, %rd1, %rd2;
-; CHECK-NEXT:    shr.u64 %rd4, %rd3, 5;
+; CHECK-NEXT:    ld.param.b32 %rd2, [test_or_param_1];
+; CHECK-NEXT:    shr.u64 %rd3, %rd1, 5;
+; CHECK-NEXT:    or.b64 %rd4, %rd3, %rd2;
 ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
 ; CHECK-NEXT:    ret;
   %ext = zext i32 %y to i64
@@ -29,15 +27,13 @@ define i64 @test_or(i64 %x, i32 %y) {
 define i64 @test_xor(i64 %x, i32 %y) {
 ; CHECK-LABEL: test_xor(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .b32 %r<2>;
 ; CHECK-NEXT:    .reg .b64 %rd<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_xor_param_0];
-; CHECK-NEXT:    ld.param.b32 %r1, [test_xor_param_1];
-; CHECK-NEXT:    mul.wide.u32 %rd2, %r1, 32;
-; CHECK-NEXT:    xor.b64 %rd3, %rd1, %rd2;
-; CHECK-NEXT:    shr.u64 %rd4, %rd3, 5;
+; CHECK-NEXT:    ld.param.b32 %rd2, [test_xor_param_1];
+; CHECK-NEXT:    shr.u64 %rd3, %rd1, 5;
+; CHECK-NEXT:    xor.b64 %rd4, %rd3, %rd2;
 ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
 ; CHECK-NEXT:    ret;
   %ext = zext i32 %y to i64
@@ -52,15 +48,13 @@ define i64 @test_xor(i64 %x, i32 %y) {
 define i64 @test_and(i64 %x, i32 %y) {
 ; CHECK-LABEL: test_and(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .b32 %r<2>;
 ; CHECK-NEXT:    .reg .b64 %rd<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_and_param_0];
-; CHECK-NEXT:    ld.param.b32 %r1, [test_and_param_1];
-; CHECK-NEXT:    mul.wide.u32 %rd2, %r1, 32;
-; CHECK-NEXT:    and.b64 %rd3, %rd1, %rd2;
-; CHECK-NEXT:    shr.u64 %rd4, %rd3, 5;
+; CHECK-NEXT:    ld.param.b32 %rd2, [test_and_param_1];
+; CHECK-NEXT:    shr.u64 %rd3, %rd1, 5;
+; CHECK-NEXT:    and.b64 %rd4, %rd3, %rd2;
 ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
 ; CHECK-NEXT:    ret;
   %ext = zext i32 %y to i64
@@ -76,23 +70,19 @@ define i64 @test_and(i64 %x, i32 %y) {
 define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
 ; CHECK-LABEL: test_vec(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .b16 %rs<9>;
-; CHECK-NEXT:    .reg .b32 %r<7>;
+; CHECK-NEXT:    .reg .b16 %rs<5>;
+; CHECK-NEXT:    .reg .b32 %r<6>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b32 %r1, [test_vec_param_0];
 ; CHECK-NEXT:    ld.param.b32 %r2, [test_vec_param_1];
 ; CHECK-NEXT:    and.b32 %r3, %r2, 16711935;
-; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
-; CHECK-NEXT:    shl.b16 %rs3, %rs2, 5;
-; CHECK-NEXT:    shl.b16 %rs4, %rs1, 5;
+; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
+; CHECK-NEXT:    shr.u16 %rs3, %rs2, 5;
+; CHECK-NEXT:    shr.u16 %rs4, %rs1, 5;
 ; CHECK-NEXT:    mov.b32 %r4, {%rs4, %rs3};
-; CHECK-NEXT:    or.b32 %r5, %r1, %r4;
-; CHECK-NEXT:    mov.b32 {%rs5, %rs6}, %r5;
-; CHECK-NEXT:    shr.u16 %rs7, %rs6, 5;
-; CHECK-NEXT:    shr.u16 %rs8, %rs5, 5;
-; CHECK-NEXT:    mov.b32 %r6, {%rs8, %rs7};
-; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
+; CHECK-NEXT:    or.b32 %r5, %r4, %r3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r5;
 ; CHECK-NEXT:    ret;
   %ext = zext <2 x i8> %y to <2 x i16>
   %shl = shl <2 x i16> %ext, splat(i16 5)

@apeskov
Copy link
Contributor Author

apeskov commented May 15, 2025

@RKSimon This is reapply of #138290.

@RKSimon RKSimon self-requested a review May 15, 2025 10:33
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

@RKSimon RKSimon merged commit 2bc9f43 into llvm:main May 15, 2025
14 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:NVPTX llvm:SelectionDAG SelectionDAGISel as well
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants