Skip to content

[LegalizeTypes][RISCV][LoongArch] Optimize promotion of ucmp. #101366

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 1, 2024

Conversation

topperc
Copy link
Collaborator

@topperc topperc commented Jul 31, 2024

ucmp can be promoted with either sext or zext. RISC-V and LoongArch prefer sext for promoting i32 to i64 unless the inputs are known to be zero extended already.

This patch uses the existing SExtOrZExtPromotedOperands function that is used by SETCC promotion to intelligently handle this.

ucmp can be promoted with either sext or zext. RISC-V prefers sext
for promoting i32 to i64 unless the inputs are known to be zero
extended already.

This patch uses the existing SExtOrZExtPromotedOperands function
that is used by SETCC promotion to intelligently handle this.
@llvmbot
Copy link
Member

llvmbot commented Jul 31, 2024

@llvm/pr-subscribers-backend-risc-v

@llvm/pr-subscribers-backend-loongarch

Author: Craig Topper (topperc)

Changes

ucmp can be promoted with either sext or zext. RISC-V and LoongArch prefer sext for promoting i32 to i64 unless the inputs are known to be zero extended already.

This patch uses the existing SExtOrZExtPromotedOperands function that is used by SETCC promotion to intelligently handle this.


Full diff: https://github.com/llvm/llvm-project/pull/101366.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (+9-6)
  • (modified) llvm/test/CodeGen/LoongArch/ucmp.ll (+4-4)
  • (modified) llvm/test/CodeGen/RISCV/ucmp.ll (+46-14)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 33a53dfc81379..b1ada66aa9aeb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2294,12 +2294,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_CMP(SDNode *N) {
-  SDValue LHS = N->getOpcode() == ISD::UCMP
-                    ? ZExtPromotedInteger(N->getOperand(0))
-                    : SExtPromotedInteger(N->getOperand(0));
-  SDValue RHS = N->getOpcode() == ISD::UCMP
-                    ? ZExtPromotedInteger(N->getOperand(1))
-                    : SExtPromotedInteger(N->getOperand(1));
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  if (N->getOpcode() == ISD::SCMP) {
+    LHS = SExtPromotedInteger(LHS);
+    RHS = SExtPromotedInteger(RHS);
+  } else {
+    SExtOrZExtPromotedOperands(LHS, RHS);
+  }
 
   return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS), 0);
 }
diff --git a/llvm/test/CodeGen/LoongArch/ucmp.ll b/llvm/test/CodeGen/LoongArch/ucmp.ll
index 548c5bd0db72b..b91d3bf15d812 100644
--- a/llvm/test/CodeGen/LoongArch/ucmp.ll
+++ b/llvm/test/CodeGen/LoongArch/ucmp.ll
@@ -26,8 +26,8 @@ define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind {
 define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: ucmp.8.32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT:    addi.w $a1, $a1, 0
+; CHECK-NEXT:    addi.w $a0, $a0, 0
 ; CHECK-NEXT:    sltu $a2, $a0, $a1
 ; CHECK-NEXT:    sltu $a0, $a1, $a0
 ; CHECK-NEXT:    sub.d $a0, $a0, $a2
@@ -71,8 +71,8 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
 define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: ucmp.32.32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT:    addi.w $a1, $a1, 0
+; CHECK-NEXT:    addi.w $a0, $a0, 0
 ; CHECK-NEXT:    sltu $a2, $a0, $a1
 ; CHECK-NEXT:    sltu $a0, $a1, $a0
 ; CHECK-NEXT:    sub.d $a0, $a0, $a2
diff --git a/llvm/test/CodeGen/RISCV/ucmp.ll b/llvm/test/CodeGen/RISCV/ucmp.ll
index 026340ede1f90..c74bc6838ff7d 100644
--- a/llvm/test/CodeGen/RISCV/ucmp.ll
+++ b/llvm/test/CodeGen/RISCV/ucmp.ll
@@ -48,10 +48,8 @@ define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
 ;
 ; RV64I-LABEL: ucmp.8.32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    srli a1, a1, 32
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    sltu a2, a0, a1
 ; RV64I-NEXT:    sltu a0, a1, a0
 ; RV64I-NEXT:    sub a0, a0, a2
@@ -164,10 +162,44 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
 ;
 ; RV64I-LABEL: ucmp.32.32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    srli a1, a1, 32
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sltu a2, a0, a1
+; RV64I-NEXT:    sltu a0, a1, a0
+; RV64I-NEXT:    sub a0, a0, a2
+; RV64I-NEXT:    ret
+  %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+  ret i32 %1
+}
+
+define i32 @ucmp.32.32_sext(i32 signext %x, i32 signext %y) nounwind {
+; RV32I-LABEL: ucmp.32.32_sext:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sltu a2, a0, a1
+; RV32I-NEXT:    sltu a0, a1, a0
+; RV32I-NEXT:    sub a0, a0, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: ucmp.32.32_sext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sltu a2, a0, a1
+; RV64I-NEXT:    sltu a0, a1, a0
+; RV64I-NEXT:    sub a0, a0, a2
+; RV64I-NEXT:    ret
+  %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+  ret i32 %1
+}
+
+define i32 @ucmp.32.32_zext(i32 zeroext %x, i32 zeroext %y) nounwind {
+; RV32I-LABEL: ucmp.32.32_zext:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sltu a2, a0, a1
+; RV32I-NEXT:    sltu a0, a1, a0
+; RV32I-NEXT:    sub a0, a0, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: ucmp.32.32_zext:
+; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sltu a2, a0, a1
 ; RV64I-NEXT:    sltu a0, a1, a0
 ; RV64I-NEXT:    sub a0, a0, a2
@@ -179,13 +211,13 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
 define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
 ; RV32I-LABEL: ucmp.32.64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    beq a1, a3, .LBB6_2
+; RV32I-NEXT:    beq a1, a3, .LBB8_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sltu a4, a1, a3
 ; RV32I-NEXT:    sltu a0, a3, a1
 ; RV32I-NEXT:    sub a0, a0, a4
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB6_2:
+; RV32I-NEXT:  .LBB8_2:
 ; RV32I-NEXT:    sltu a4, a0, a2
 ; RV32I-NEXT:    sltu a0, a2, a0
 ; RV32I-NEXT:    sub a0, a0, a4
@@ -204,15 +236,15 @@ define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
 define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
 ; RV32I-LABEL: ucmp.64.64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    beq a1, a3, .LBB7_2
+; RV32I-NEXT:    beq a1, a3, .LBB9_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sltu a4, a1, a3
 ; RV32I-NEXT:    sltu a0, a3, a1
-; RV32I-NEXT:    j .LBB7_3
-; RV32I-NEXT:  .LBB7_2:
+; RV32I-NEXT:    j .LBB9_3
+; RV32I-NEXT:  .LBB9_2:
 ; RV32I-NEXT:    sltu a4, a0, a2
 ; RV32I-NEXT:    sltu a0, a2, a0
-; RV32I-NEXT:  .LBB7_3:
+; RV32I-NEXT:  .LBB9_3:
 ; RV32I-NEXT:    sub a0, a0, a4
 ; RV32I-NEXT:    srai a1, a0, 31
 ; RV32I-NEXT:    ret

@llvmbot
Copy link
Member

llvmbot commented Jul 31, 2024

@llvm/pr-subscribers-llvm-selectiondag

Author: Craig Topper (topperc)

Changes

ucmp can be promoted with either sext or zext. RISC-V and LoongArch prefer sext for promoting i32 to i64 unless the inputs are known to be zero extended already.

This patch uses the existing SExtOrZExtPromotedOperands function that is used by SETCC promotion to intelligently handle this.


Full diff: https://github.com/llvm/llvm-project/pull/101366.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (+9-6)
  • (modified) llvm/test/CodeGen/LoongArch/ucmp.ll (+4-4)
  • (modified) llvm/test/CodeGen/RISCV/ucmp.ll (+46-14)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 33a53dfc81379..b1ada66aa9aeb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2294,12 +2294,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_CMP(SDNode *N) {
-  SDValue LHS = N->getOpcode() == ISD::UCMP
-                    ? ZExtPromotedInteger(N->getOperand(0))
-                    : SExtPromotedInteger(N->getOperand(0));
-  SDValue RHS = N->getOpcode() == ISD::UCMP
-                    ? ZExtPromotedInteger(N->getOperand(1))
-                    : SExtPromotedInteger(N->getOperand(1));
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  if (N->getOpcode() == ISD::SCMP) {
+    LHS = SExtPromotedInteger(LHS);
+    RHS = SExtPromotedInteger(RHS);
+  } else {
+    SExtOrZExtPromotedOperands(LHS, RHS);
+  }
 
   return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS), 0);
 }
diff --git a/llvm/test/CodeGen/LoongArch/ucmp.ll b/llvm/test/CodeGen/LoongArch/ucmp.ll
index 548c5bd0db72b..b91d3bf15d812 100644
--- a/llvm/test/CodeGen/LoongArch/ucmp.ll
+++ b/llvm/test/CodeGen/LoongArch/ucmp.ll
@@ -26,8 +26,8 @@ define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind {
 define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: ucmp.8.32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT:    addi.w $a1, $a1, 0
+; CHECK-NEXT:    addi.w $a0, $a0, 0
 ; CHECK-NEXT:    sltu $a2, $a0, $a1
 ; CHECK-NEXT:    sltu $a0, $a1, $a0
 ; CHECK-NEXT:    sub.d $a0, $a0, $a2
@@ -71,8 +71,8 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
 define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: ucmp.32.32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bstrpick.d $a1, $a1, 31, 0
-; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT:    addi.w $a1, $a1, 0
+; CHECK-NEXT:    addi.w $a0, $a0, 0
 ; CHECK-NEXT:    sltu $a2, $a0, $a1
 ; CHECK-NEXT:    sltu $a0, $a1, $a0
 ; CHECK-NEXT:    sub.d $a0, $a0, $a2
diff --git a/llvm/test/CodeGen/RISCV/ucmp.ll b/llvm/test/CodeGen/RISCV/ucmp.ll
index 026340ede1f90..c74bc6838ff7d 100644
--- a/llvm/test/CodeGen/RISCV/ucmp.ll
+++ b/llvm/test/CodeGen/RISCV/ucmp.ll
@@ -48,10 +48,8 @@ define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
 ;
 ; RV64I-LABEL: ucmp.8.32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    srli a1, a1, 32
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    sltu a2, a0, a1
 ; RV64I-NEXT:    sltu a0, a1, a0
 ; RV64I-NEXT:    sub a0, a0, a2
@@ -164,10 +162,44 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
 ;
 ; RV64I-LABEL: ucmp.32.32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    srli a1, a1, 32
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    sext.w a1, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sltu a2, a0, a1
+; RV64I-NEXT:    sltu a0, a1, a0
+; RV64I-NEXT:    sub a0, a0, a2
+; RV64I-NEXT:    ret
+  %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+  ret i32 %1
+}
+
+define i32 @ucmp.32.32_sext(i32 signext %x, i32 signext %y) nounwind {
+; RV32I-LABEL: ucmp.32.32_sext:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sltu a2, a0, a1
+; RV32I-NEXT:    sltu a0, a1, a0
+; RV32I-NEXT:    sub a0, a0, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: ucmp.32.32_sext:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sltu a2, a0, a1
+; RV64I-NEXT:    sltu a0, a1, a0
+; RV64I-NEXT:    sub a0, a0, a2
+; RV64I-NEXT:    ret
+  %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+  ret i32 %1
+}
+
+define i32 @ucmp.32.32_zext(i32 zeroext %x, i32 zeroext %y) nounwind {
+; RV32I-LABEL: ucmp.32.32_zext:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sltu a2, a0, a1
+; RV32I-NEXT:    sltu a0, a1, a0
+; RV32I-NEXT:    sub a0, a0, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: ucmp.32.32_zext:
+; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sltu a2, a0, a1
 ; RV64I-NEXT:    sltu a0, a1, a0
 ; RV64I-NEXT:    sub a0, a0, a2
@@ -179,13 +211,13 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
 define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
 ; RV32I-LABEL: ucmp.32.64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    beq a1, a3, .LBB6_2
+; RV32I-NEXT:    beq a1, a3, .LBB8_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sltu a4, a1, a3
 ; RV32I-NEXT:    sltu a0, a3, a1
 ; RV32I-NEXT:    sub a0, a0, a4
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB6_2:
+; RV32I-NEXT:  .LBB8_2:
 ; RV32I-NEXT:    sltu a4, a0, a2
 ; RV32I-NEXT:    sltu a0, a2, a0
 ; RV32I-NEXT:    sub a0, a0, a4
@@ -204,15 +236,15 @@ define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
 define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
 ; RV32I-LABEL: ucmp.64.64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    beq a1, a3, .LBB7_2
+; RV32I-NEXT:    beq a1, a3, .LBB9_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sltu a4, a1, a3
 ; RV32I-NEXT:    sltu a0, a3, a1
-; RV32I-NEXT:    j .LBB7_3
-; RV32I-NEXT:  .LBB7_2:
+; RV32I-NEXT:    j .LBB9_3
+; RV32I-NEXT:  .LBB9_2:
 ; RV32I-NEXT:    sltu a4, a0, a2
 ; RV32I-NEXT:    sltu a0, a2, a0
-; RV32I-NEXT:  .LBB7_3:
+; RV32I-NEXT:  .LBB9_3:
 ; RV32I-NEXT:    sub a0, a0, a4
 ; RV32I-NEXT:    srai a1, a0, 31
 ; RV32I-NEXT:    ret

@topperc topperc requested a review from arsenm July 31, 2024 18:02
@nikic
Copy link
Contributor

nikic commented Jul 31, 2024

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@topperc topperc merged commit 307d124 into llvm:main Aug 1, 2024
11 checks passed
@topperc topperc deleted the pr/ucmp branch August 1, 2024 00:18
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants