Skip to content

[SDAG] Turn umin into smin if the saturation pattern is broken #88505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 15, 2024

Conversation

dtcxzyw
Copy link
Member

@dtcxzyw dtcxzyw commented Apr 12, 2024

As we canonicalizes smin with non-negative operands into umin in the middle-end, the saturation pattern will be broken.
This patch reverts the transform in DAGCombine to fix the regression on ARM.

Fixes #85706.

@llvmbot
Copy link
Member

llvmbot commented Apr 12, 2024

@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-backend-arm

@llvm/pr-subscribers-llvm-selectiondag

Author: Yingwei Zheng (dtcxzyw)

Changes

As we canonicalizes smin with non-negative operands into umin in the middle-end, the saturation pattern will be broken.
This patch reverts the transform in DAGCombine to fix the regression on ARM.

Fixes #85706.


Full diff: https://github.com/llvm/llvm-project/pull/88505.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+6-3)
  • (modified) llvm/test/CodeGen/ARM/usat.ll (+35-45)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 24f50b87c4cf2f..2f77ad7fea5f84 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5577,8 +5577,11 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
     return RMINMAX;
 
   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
-  // Only do this if the current op isn't legal and the flipped is.
-  if (!TLI.isOperationLegal(Opcode, VT) &&
+  // Only do this if:
+  // 1. The current op isn't legal and the flipped is.
+  // 2. The saturation pattern is broken by canonicalization in InstCombine.
+  bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
+  if ((IsSatBroken || !TLI.isOperationLegal(Opcode, VT)) &&
       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
     unsigned AltOpcode;
@@ -5589,7 +5592,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
     default: llvm_unreachable("Unknown MINMAX opcode");
     }
-    if (TLI.isOperationLegal(AltOpcode, VT))
+    if (IsSatBroken || TLI.isOperationLegal(AltOpcode, VT))
       return DAG.getNode(AltOpcode, DL, VT, N0, N1);
   }
 
diff --git a/llvm/test/CodeGen/ARM/usat.ll b/llvm/test/CodeGen/ARM/usat.ll
index 024a98dd29346b..d01aa1520b326b 100644
--- a/llvm/test/CodeGen/ARM/usat.ll
+++ b/llvm/test/CodeGen/ARM/usat.ll
@@ -756,7 +756,7 @@ define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) {
 ; V4T-NEXT:    bic r1, r0, r0, asr #31
 ; V4T-NEXT:    ldr r0, .LCPI20_0
 ; V4T-NEXT:    cmp r1, r0
-; V4T-NEXT:    movlo r0, r1
+; V4T-NEXT:    movlt r0, r1
 ; V4T-NEXT:    bx lr
 ; V4T-NEXT:    .p2align 2
 ; V4T-NEXT:  @ %bb.1:
@@ -765,23 +765,12 @@ define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) {
 ;
 ; V6-LABEL: mm_unsigned_sat_upper_lower_1:
 ; V6:       @ %bb.0: @ %entry
-; V6-NEXT:    bic r1, r0, r0, asr #31
-; V6-NEXT:    ldr r0, .LCPI20_0
-; V6-NEXT:    cmp r1, r0
-; V6-NEXT:    movlo r0, r1
+; V6-NEXT:    usat r0, #23, r0
 ; V6-NEXT:    bx lr
-; V6-NEXT:    .p2align 2
-; V6-NEXT:  @ %bb.1:
-; V6-NEXT:  .LCPI20_0:
-; V6-NEXT:    .long 8388607 @ 0x7fffff
 ;
 ; V6T2-LABEL: mm_unsigned_sat_upper_lower_1:
 ; V6T2:       @ %bb.0: @ %entry
-; V6T2-NEXT:    bic r1, r0, r0, asr #31
-; V6T2-NEXT:    movw r0, #65535
-; V6T2-NEXT:    movt r0, #127
-; V6T2-NEXT:    cmp r1, r0
-; V6T2-NEXT:    movlo r0, r1
+; V6T2-NEXT:    usat r0, #23, r0
 ; V6T2-NEXT:    bx lr
 entry:
   %0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -795,7 +784,7 @@ define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) {
 ; V4T-NEXT:    bic r1, r0, r0, asr #31
 ; V4T-NEXT:    ldr r0, .LCPI21_0
 ; V4T-NEXT:    cmp r1, r0
-; V4T-NEXT:    movlo r0, r1
+; V4T-NEXT:    movlt r0, r1
 ; V4T-NEXT:    bx lr
 ; V4T-NEXT:    .p2align 2
 ; V4T-NEXT:  @ %bb.1:
@@ -804,23 +793,12 @@ define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) {
 ;
 ; V6-LABEL: mm_unsigned_sat_upper_lower_2:
 ; V6:       @ %bb.0: @ %entry
-; V6-NEXT:    bic r1, r0, r0, asr #31
-; V6-NEXT:    ldr r0, .LCPI21_0
-; V6-NEXT:    cmp r1, r0
-; V6-NEXT:    movlo r0, r1
+; V6-NEXT:    usat r0, #23, r0
 ; V6-NEXT:    bx lr
-; V6-NEXT:    .p2align 2
-; V6-NEXT:  @ %bb.1:
-; V6-NEXT:  .LCPI21_0:
-; V6-NEXT:    .long 8388607 @ 0x7fffff
 ;
 ; V6T2-LABEL: mm_unsigned_sat_upper_lower_2:
 ; V6T2:       @ %bb.0: @ %entry
-; V6T2-NEXT:    bic r1, r0, r0, asr #31
-; V6T2-NEXT:    movw r0, #65535
-; V6T2-NEXT:    movt r0, #127
-; V6T2-NEXT:    cmp r1, r0
-; V6T2-NEXT:    movlo r0, r1
+; V6T2-NEXT:    usat r0, #23, r0
 ; V6T2-NEXT:    bx lr
 entry:
   %0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -834,7 +812,7 @@ define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) {
 ; V4T-NEXT:    bic r1, r0, r0, asr #31
 ; V4T-NEXT:    ldr r0, .LCPI22_0
 ; V4T-NEXT:    cmp r1, r0
-; V4T-NEXT:    movlo r0, r1
+; V4T-NEXT:    movlt r0, r1
 ; V4T-NEXT:    bx lr
 ; V4T-NEXT:    .p2align 2
 ; V4T-NEXT:  @ %bb.1:
@@ -843,23 +821,12 @@ define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) {
 ;
 ; V6-LABEL: mm_unsigned_sat_upper_lower_3:
 ; V6:       @ %bb.0: @ %entry
-; V6-NEXT:    bic r1, r0, r0, asr #31
-; V6-NEXT:    ldr r0, .LCPI22_0
-; V6-NEXT:    cmp r1, r0
-; V6-NEXT:    movlo r0, r1
+; V6-NEXT:    usat r0, #23, r0
 ; V6-NEXT:    bx lr
-; V6-NEXT:    .p2align 2
-; V6-NEXT:  @ %bb.1:
-; V6-NEXT:  .LCPI22_0:
-; V6-NEXT:    .long 8388607 @ 0x7fffff
 ;
 ; V6T2-LABEL: mm_unsigned_sat_upper_lower_3:
 ; V6T2:       @ %bb.0: @ %entry
-; V6T2-NEXT:    bic r1, r0, r0, asr #31
-; V6T2-NEXT:    movw r0, #65535
-; V6T2-NEXT:    movt r0, #127
-; V6T2-NEXT:    cmp r1, r0
-; V6T2-NEXT:    movlo r0, r1
+; V6T2-NEXT:    usat r0, #23, r0
 ; V6T2-NEXT:    bx lr
 entry:
   %0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -913,7 +880,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
 ; V4T-NEXT:    mov r0, #1
 ; V4T-NEXT:    orr r0, r0, #8388608
 ; V4T-NEXT:    cmp r1, #8388608
-; V4T-NEXT:    movls r0, r1
+; V4T-NEXT:    movle r0, r1
 ; V4T-NEXT:    bx lr
 ;
 ; V6-LABEL: mm_no_unsigned_sat_incorrect_constant2:
@@ -922,7 +889,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
 ; V6-NEXT:    mov r0, #1
 ; V6-NEXT:    orr r0, r0, #8388608
 ; V6-NEXT:    cmp r1, #8388608
-; V6-NEXT:    movls r0, r1
+; V6-NEXT:    movle r0, r1
 ; V6-NEXT:    bx lr
 ;
 ; V6T2-LABEL: mm_no_unsigned_sat_incorrect_constant2:
@@ -931,7 +898,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
 ; V6T2-NEXT:    movw r0, #1
 ; V6T2-NEXT:    movt r0, #128
 ; V6T2-NEXT:    cmp r1, #8388608
-; V6T2-NEXT:    movls r0, r1
+; V6T2-NEXT:    movle r0, r1
 ; V6T2-NEXT:    bx lr
 entry:
   %0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -981,6 +948,29 @@ entry:
   ret i32 %1
 }
 
+define i32 @test_umin_smax_usat(i32 %x) {
+; V4T-LABEL: test_umin_smax_usat:
+; V4T:       @ %bb.0: @ %entry
+; V4T-NEXT:    bic r0, r0, r0, asr #31
+; V4T-NEXT:    cmp r0, #255
+; V4T-NEXT:    movge r0, #255
+; V4T-NEXT:    bx lr
+;
+; V6-LABEL: test_umin_smax_usat:
+; V6:       @ %bb.0: @ %entry
+; V6-NEXT:    usat r0, #8, r0
+; V6-NEXT:    bx lr
+;
+; V6T2-LABEL: test_umin_smax_usat:
+; V6T2:       @ %bb.0: @ %entry
+; V6T2-NEXT:    usat r0, #8, r0
+; V6T2-NEXT:    bx lr
+entry:
+  %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 0)
+  %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 255)
+  ret i32 %v2
+}
+
 declare i32 @llvm.smin.i32(i32, i32)
 declare i32 @llvm.smax.i32(i32, i32)
 declare i16 @llvm.smin.i16(i16, i16)

@dtcxzyw
Copy link
Member Author

dtcxzyw commented Apr 12, 2024

Failed Tests (1):
LLVM :: CodeGen/AMDGPU/umed3.ll

@@ -5589,7 +5592,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
case ISD::UMAX: AltOpcode = ISD::SMAX; break;
default: llvm_unreachable("Unknown MINMAX opcode");
}
if (TLI.isOperationLegal(AltOpcode, VT))
if (IsSatBroken || TLI.isOperationLegal(AltOpcode, VT))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we limit this to (!LegalOperations && IsSatBroken) || TLI.isOperationLegal(AltOpcode, VT)?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What will happen after legalization on targets which don't use the SAT pattern and ISD::SMIN isn't a legal operation?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On risc-v without zbb:

define signext i32 @test_umin_smax_usat(i32 signext %x) {
entry:
  %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 0)
  %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 255)
  ret i32 %v2
}
; Before
# %bb.0:                                # %entry
        sgtz    a1, a0
        neg     a1, a1
        and     a0, a1, a0
        li      a1, 255
        bltu    a0, a1, .LBB0_2
# %bb.1:                                # %entry
        li      a0, 255
.LBB0_2:                                # %entry
        ret
; After
# %bb.0:                                # %entry
        sgtz    a1, a0
        neg     a1, a1
        and     a0, a1, a0
        li      a1, 255
        blt     a0, a1, .LBB0_2
# %bb.1:                                # %entry
        li      a0, 255
.LBB0_2:                                # %entry
        ret

It just turns an illegal umin into an illegal smin.

@dtcxzyw dtcxzyw requested a review from RKSimon April 12, 2024 17:29
@dtcxzyw
Copy link
Member Author

dtcxzyw commented Apr 15, 2024

@UsmanNadeem Any comments?

Copy link
Contributor

@UsmanNadeem UsmanNadeem left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

@dtcxzyw dtcxzyw merged commit 4d28d3f into llvm:main Apr 15, 2024
@dtcxzyw dtcxzyw deleted the fix-pr85706 branch April 15, 2024 17:28
aniplcc pushed a commit to aniplcc/llvm-project that referenced this pull request Apr 15, 2024
…88505)

As we canonicalizes smin with non-negative operands into umin in the
middle-end, the saturation pattern will be broken.
This patch reverts the transform in DAGCombine to fix the regression on
ARM.

Fixes llvm#85706.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

ARM USAT instruction not generated after #82478
5 participants