-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SDAG] Turn umin into smin if the saturation pattern is broken #88505
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-selectiondag Author: Yingwei Zheng (dtcxzyw) ChangesAs we canonicalizes smin with non-negative operands into umin in the middle-end, the saturation pattern will be broken. Fixes #85706. Full diff: https://github.com/llvm/llvm-project/pull/88505.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 24f50b87c4cf2f..2f77ad7fea5f84 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5577,8 +5577,11 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return RMINMAX;
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
- // Only do this if the current op isn't legal and the flipped is.
- if (!TLI.isOperationLegal(Opcode, VT) &&
+ // Only do this if:
+ // 1. The current op isn't legal and the flipped is.
+ // 2. The saturation pattern is broken by canonicalization in InstCombine.
+ bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
+ if ((IsSatBroken || !TLI.isOperationLegal(Opcode, VT)) &&
(N0.isUndef() || DAG.SignBitIsZero(N0)) &&
(N1.isUndef() || DAG.SignBitIsZero(N1))) {
unsigned AltOpcode;
@@ -5589,7 +5592,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
case ISD::UMAX: AltOpcode = ISD::SMAX; break;
default: llvm_unreachable("Unknown MINMAX opcode");
}
- if (TLI.isOperationLegal(AltOpcode, VT))
+ if (IsSatBroken || TLI.isOperationLegal(AltOpcode, VT))
return DAG.getNode(AltOpcode, DL, VT, N0, N1);
}
diff --git a/llvm/test/CodeGen/ARM/usat.ll b/llvm/test/CodeGen/ARM/usat.ll
index 024a98dd29346b..d01aa1520b326b 100644
--- a/llvm/test/CodeGen/ARM/usat.ll
+++ b/llvm/test/CodeGen/ARM/usat.ll
@@ -756,7 +756,7 @@ define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) {
; V4T-NEXT: bic r1, r0, r0, asr #31
; V4T-NEXT: ldr r0, .LCPI20_0
; V4T-NEXT: cmp r1, r0
-; V4T-NEXT: movlo r0, r1
+; V4T-NEXT: movlt r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
@@ -765,23 +765,12 @@ define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) {
;
; V6-LABEL: mm_unsigned_sat_upper_lower_1:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: bic r1, r0, r0, asr #31
-; V6-NEXT: ldr r0, .LCPI20_0
-; V6-NEXT: cmp r1, r0
-; V6-NEXT: movlo r0, r1
+; V6-NEXT: usat r0, #23, r0
; V6-NEXT: bx lr
-; V6-NEXT: .p2align 2
-; V6-NEXT: @ %bb.1:
-; V6-NEXT: .LCPI20_0:
-; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_upper_lower_1:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: bic r1, r0, r0, asr #31
-; V6T2-NEXT: movw r0, #65535
-; V6T2-NEXT: movt r0, #127
-; V6T2-NEXT: cmp r1, r0
-; V6T2-NEXT: movlo r0, r1
+; V6T2-NEXT: usat r0, #23, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -795,7 +784,7 @@ define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) {
; V4T-NEXT: bic r1, r0, r0, asr #31
; V4T-NEXT: ldr r0, .LCPI21_0
; V4T-NEXT: cmp r1, r0
-; V4T-NEXT: movlo r0, r1
+; V4T-NEXT: movlt r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
@@ -804,23 +793,12 @@ define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) {
;
; V6-LABEL: mm_unsigned_sat_upper_lower_2:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: bic r1, r0, r0, asr #31
-; V6-NEXT: ldr r0, .LCPI21_0
-; V6-NEXT: cmp r1, r0
-; V6-NEXT: movlo r0, r1
+; V6-NEXT: usat r0, #23, r0
; V6-NEXT: bx lr
-; V6-NEXT: .p2align 2
-; V6-NEXT: @ %bb.1:
-; V6-NEXT: .LCPI21_0:
-; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_upper_lower_2:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: bic r1, r0, r0, asr #31
-; V6T2-NEXT: movw r0, #65535
-; V6T2-NEXT: movt r0, #127
-; V6T2-NEXT: cmp r1, r0
-; V6T2-NEXT: movlo r0, r1
+; V6T2-NEXT: usat r0, #23, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -834,7 +812,7 @@ define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) {
; V4T-NEXT: bic r1, r0, r0, asr #31
; V4T-NEXT: ldr r0, .LCPI22_0
; V4T-NEXT: cmp r1, r0
-; V4T-NEXT: movlo r0, r1
+; V4T-NEXT: movlt r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
@@ -843,23 +821,12 @@ define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) {
;
; V6-LABEL: mm_unsigned_sat_upper_lower_3:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: bic r1, r0, r0, asr #31
-; V6-NEXT: ldr r0, .LCPI22_0
-; V6-NEXT: cmp r1, r0
-; V6-NEXT: movlo r0, r1
+; V6-NEXT: usat r0, #23, r0
; V6-NEXT: bx lr
-; V6-NEXT: .p2align 2
-; V6-NEXT: @ %bb.1:
-; V6-NEXT: .LCPI22_0:
-; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_upper_lower_3:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: bic r1, r0, r0, asr #31
-; V6T2-NEXT: movw r0, #65535
-; V6T2-NEXT: movt r0, #127
-; V6T2-NEXT: cmp r1, r0
-; V6T2-NEXT: movlo r0, r1
+; V6T2-NEXT: usat r0, #23, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -913,7 +880,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
; V4T-NEXT: mov r0, #1
; V4T-NEXT: orr r0, r0, #8388608
; V4T-NEXT: cmp r1, #8388608
-; V4T-NEXT: movls r0, r1
+; V4T-NEXT: movle r0, r1
; V4T-NEXT: bx lr
;
; V6-LABEL: mm_no_unsigned_sat_incorrect_constant2:
@@ -922,7 +889,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
; V6-NEXT: mov r0, #1
; V6-NEXT: orr r0, r0, #8388608
; V6-NEXT: cmp r1, #8388608
-; V6-NEXT: movls r0, r1
+; V6-NEXT: movle r0, r1
; V6-NEXT: bx lr
;
; V6T2-LABEL: mm_no_unsigned_sat_incorrect_constant2:
@@ -931,7 +898,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
; V6T2-NEXT: movw r0, #1
; V6T2-NEXT: movt r0, #128
; V6T2-NEXT: cmp r1, #8388608
-; V6T2-NEXT: movls r0, r1
+; V6T2-NEXT: movle r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -981,6 +948,29 @@ entry:
ret i32 %1
}
+define i32 @test_umin_smax_usat(i32 %x) {
+; V4T-LABEL: test_umin_smax_usat:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: bic r0, r0, r0, asr #31
+; V4T-NEXT: cmp r0, #255
+; V4T-NEXT: movge r0, #255
+; V4T-NEXT: bx lr
+;
+; V6-LABEL: test_umin_smax_usat:
+; V6: @ %bb.0: @ %entry
+; V6-NEXT: usat r0, #8, r0
+; V6-NEXT: bx lr
+;
+; V6T2-LABEL: test_umin_smax_usat:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: usat r0, #8, r0
+; V6T2-NEXT: bx lr
+entry:
+ %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 0)
+ %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 255)
+ ret i32 %v2
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i16 @llvm.smin.i16(i16, i16)
|
|
@@ -5589,7 +5592,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { | |||
case ISD::UMAX: AltOpcode = ISD::SMAX; break; | |||
default: llvm_unreachable("Unknown MINMAX opcode"); | |||
} | |||
if (TLI.isOperationLegal(AltOpcode, VT)) | |||
if (IsSatBroken || TLI.isOperationLegal(AltOpcode, VT)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we limit this to (!LegalOperations && IsSatBroken) || TLI.isOperationLegal(AltOpcode, VT)
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What will happen after legalization on targets which don't use the SAT pattern and ISD::SMIN isn't a legal operation?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On risc-v without zbb:
define signext i32 @test_umin_smax_usat(i32 signext %x) {
entry:
%v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 0)
%v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 255)
ret i32 %v2
}
; Before
# %bb.0: # %entry
sgtz a1, a0
neg a1, a1
and a0, a1, a0
li a1, 255
bltu a0, a1, .LBB0_2
# %bb.1: # %entry
li a0, 255
.LBB0_2: # %entry
ret
; After
# %bb.0: # %entry
sgtz a1, a0
neg a1, a1
and a0, a1, a0
li a1, 255
blt a0, a1, .LBB0_2
# %bb.1: # %entry
li a0, 255
.LBB0_2: # %entry
ret
It just turns an illegal umin into an illegal smin.
@UsmanNadeem Any comments? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
…88505) As we canonicalizes smin with non-negative operands into umin in the middle-end, the saturation pattern will be broken. This patch reverts the transform in DAGCombine to fix the regression on ARM. Fixes llvm#85706.
As we canonicalizes smin with non-negative operands into umin in the middle-end, the saturation pattern will be broken.
This patch reverts the transform in DAGCombine to fix the regression on ARM.
Fixes #85706.