Skip to content

Commit 4d28d3f

Browse files
authored
[SDAG] Turn umin into smin if the saturation pattern is broken (#88505)
As we canonicalizes smin with non-negative operands into umin in the middle-end, the saturation pattern will be broken. This patch reverts the transform in DAGCombine to fix the regression on ARM. Fixes #85706.
1 parent 64dc558 commit 4d28d3f

File tree

3 files changed

+43
-51
lines changed

3 files changed

+43
-51
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5577,9 +5577,12 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
55775577
return RMINMAX;
55785578

55795579
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5580-
// Only do this if the current op isn't legal and the flipped is.
5581-
if (!TLI.isOperationLegal(Opcode, VT) &&
5582-
(N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5580+
// Only do this if:
5581+
// 1. The current op isn't legal and the flipped is.
5582+
// 2. The saturation pattern is broken by canonicalization in InstCombine.
5583+
bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5584+
bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5585+
if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
55835586
(N1.isUndef() || DAG.SignBitIsZero(N1))) {
55845587
unsigned AltOpcode;
55855588
switch (Opcode) {
@@ -5589,7 +5592,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
55895592
case ISD::UMAX: AltOpcode = ISD::SMAX; break;
55905593
default: llvm_unreachable("Unknown MINMAX opcode");
55915594
}
5592-
if (TLI.isOperationLegal(AltOpcode, VT))
5595+
if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
55935596
return DAG.getNode(AltOpcode, DL, VT, N0, N1);
55945597
}
55955598

llvm/test/CodeGen/AMDGPU/umed3.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ define amdgpu_kernel void @v_test_umed3_multi_use_r_i_i_i32(ptr addrspace(1) %ou
4343
}
4444

4545
; GCN-LABEL: {{^}}v_test_umed3_r_i_i_sign_mismatch_i32:
46-
; GCN: v_max_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
47-
; GCN: v_min_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
46+
; GCN: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
4847
define amdgpu_kernel void @v_test_umed3_r_i_i_sign_mismatch_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
4948
%tid = call i32 @llvm.amdgcn.workitem.id.x()
5049
%gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid

llvm/test/CodeGen/ARM/usat.ll

Lines changed: 35 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,7 @@ define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) {
756756
; V4T-NEXT: bic r1, r0, r0, asr #31
757757
; V4T-NEXT: ldr r0, .LCPI20_0
758758
; V4T-NEXT: cmp r1, r0
759-
; V4T-NEXT: movlo r0, r1
759+
; V4T-NEXT: movlt r0, r1
760760
; V4T-NEXT: bx lr
761761
; V4T-NEXT: .p2align 2
762762
; V4T-NEXT: @ %bb.1:
@@ -765,23 +765,12 @@ define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) {
765765
;
766766
; V6-LABEL: mm_unsigned_sat_upper_lower_1:
767767
; V6: @ %bb.0: @ %entry
768-
; V6-NEXT: bic r1, r0, r0, asr #31
769-
; V6-NEXT: ldr r0, .LCPI20_0
770-
; V6-NEXT: cmp r1, r0
771-
; V6-NEXT: movlo r0, r1
768+
; V6-NEXT: usat r0, #23, r0
772769
; V6-NEXT: bx lr
773-
; V6-NEXT: .p2align 2
774-
; V6-NEXT: @ %bb.1:
775-
; V6-NEXT: .LCPI20_0:
776-
; V6-NEXT: .long 8388607 @ 0x7fffff
777770
;
778771
; V6T2-LABEL: mm_unsigned_sat_upper_lower_1:
779772
; V6T2: @ %bb.0: @ %entry
780-
; V6T2-NEXT: bic r1, r0, r0, asr #31
781-
; V6T2-NEXT: movw r0, #65535
782-
; V6T2-NEXT: movt r0, #127
783-
; V6T2-NEXT: cmp r1, r0
784-
; V6T2-NEXT: movlo r0, r1
773+
; V6T2-NEXT: usat r0, #23, r0
785774
; V6T2-NEXT: bx lr
786775
entry:
787776
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -795,7 +784,7 @@ define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) {
795784
; V4T-NEXT: bic r1, r0, r0, asr #31
796785
; V4T-NEXT: ldr r0, .LCPI21_0
797786
; V4T-NEXT: cmp r1, r0
798-
; V4T-NEXT: movlo r0, r1
787+
; V4T-NEXT: movlt r0, r1
799788
; V4T-NEXT: bx lr
800789
; V4T-NEXT: .p2align 2
801790
; V4T-NEXT: @ %bb.1:
@@ -804,23 +793,12 @@ define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) {
804793
;
805794
; V6-LABEL: mm_unsigned_sat_upper_lower_2:
806795
; V6: @ %bb.0: @ %entry
807-
; V6-NEXT: bic r1, r0, r0, asr #31
808-
; V6-NEXT: ldr r0, .LCPI21_0
809-
; V6-NEXT: cmp r1, r0
810-
; V6-NEXT: movlo r0, r1
796+
; V6-NEXT: usat r0, #23, r0
811797
; V6-NEXT: bx lr
812-
; V6-NEXT: .p2align 2
813-
; V6-NEXT: @ %bb.1:
814-
; V6-NEXT: .LCPI21_0:
815-
; V6-NEXT: .long 8388607 @ 0x7fffff
816798
;
817799
; V6T2-LABEL: mm_unsigned_sat_upper_lower_2:
818800
; V6T2: @ %bb.0: @ %entry
819-
; V6T2-NEXT: bic r1, r0, r0, asr #31
820-
; V6T2-NEXT: movw r0, #65535
821-
; V6T2-NEXT: movt r0, #127
822-
; V6T2-NEXT: cmp r1, r0
823-
; V6T2-NEXT: movlo r0, r1
801+
; V6T2-NEXT: usat r0, #23, r0
824802
; V6T2-NEXT: bx lr
825803
entry:
826804
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -834,7 +812,7 @@ define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) {
834812
; V4T-NEXT: bic r1, r0, r0, asr #31
835813
; V4T-NEXT: ldr r0, .LCPI22_0
836814
; V4T-NEXT: cmp r1, r0
837-
; V4T-NEXT: movlo r0, r1
815+
; V4T-NEXT: movlt r0, r1
838816
; V4T-NEXT: bx lr
839817
; V4T-NEXT: .p2align 2
840818
; V4T-NEXT: @ %bb.1:
@@ -843,23 +821,12 @@ define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) {
843821
;
844822
; V6-LABEL: mm_unsigned_sat_upper_lower_3:
845823
; V6: @ %bb.0: @ %entry
846-
; V6-NEXT: bic r1, r0, r0, asr #31
847-
; V6-NEXT: ldr r0, .LCPI22_0
848-
; V6-NEXT: cmp r1, r0
849-
; V6-NEXT: movlo r0, r1
824+
; V6-NEXT: usat r0, #23, r0
850825
; V6-NEXT: bx lr
851-
; V6-NEXT: .p2align 2
852-
; V6-NEXT: @ %bb.1:
853-
; V6-NEXT: .LCPI22_0:
854-
; V6-NEXT: .long 8388607 @ 0x7fffff
855826
;
856827
; V6T2-LABEL: mm_unsigned_sat_upper_lower_3:
857828
; V6T2: @ %bb.0: @ %entry
858-
; V6T2-NEXT: bic r1, r0, r0, asr #31
859-
; V6T2-NEXT: movw r0, #65535
860-
; V6T2-NEXT: movt r0, #127
861-
; V6T2-NEXT: cmp r1, r0
862-
; V6T2-NEXT: movlo r0, r1
829+
; V6T2-NEXT: usat r0, #23, r0
863830
; V6T2-NEXT: bx lr
864831
entry:
865832
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -913,7 +880,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
913880
; V4T-NEXT: mov r0, #1
914881
; V4T-NEXT: orr r0, r0, #8388608
915882
; V4T-NEXT: cmp r1, #8388608
916-
; V4T-NEXT: movls r0, r1
883+
; V4T-NEXT: movle r0, r1
917884
; V4T-NEXT: bx lr
918885
;
919886
; V6-LABEL: mm_no_unsigned_sat_incorrect_constant2:
@@ -922,7 +889,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
922889
; V6-NEXT: mov r0, #1
923890
; V6-NEXT: orr r0, r0, #8388608
924891
; V6-NEXT: cmp r1, #8388608
925-
; V6-NEXT: movls r0, r1
892+
; V6-NEXT: movle r0, r1
926893
; V6-NEXT: bx lr
927894
;
928895
; V6T2-LABEL: mm_no_unsigned_sat_incorrect_constant2:
@@ -931,7 +898,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
931898
; V6T2-NEXT: movw r0, #1
932899
; V6T2-NEXT: movt r0, #128
933900
; V6T2-NEXT: cmp r1, #8388608
934-
; V6T2-NEXT: movls r0, r1
901+
; V6T2-NEXT: movle r0, r1
935902
; V6T2-NEXT: bx lr
936903
entry:
937904
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -981,6 +948,29 @@ entry:
981948
ret i32 %1
982949
}
983950

951+
define i32 @test_umin_smax_usat(i32 %x) {
952+
; V4T-LABEL: test_umin_smax_usat:
953+
; V4T: @ %bb.0: @ %entry
954+
; V4T-NEXT: bic r0, r0, r0, asr #31
955+
; V4T-NEXT: cmp r0, #255
956+
; V4T-NEXT: movge r0, #255
957+
; V4T-NEXT: bx lr
958+
;
959+
; V6-LABEL: test_umin_smax_usat:
960+
; V6: @ %bb.0: @ %entry
961+
; V6-NEXT: usat r0, #8, r0
962+
; V6-NEXT: bx lr
963+
;
964+
; V6T2-LABEL: test_umin_smax_usat:
965+
; V6T2: @ %bb.0: @ %entry
966+
; V6T2-NEXT: usat r0, #8, r0
967+
; V6T2-NEXT: bx lr
968+
entry:
969+
%v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 0)
970+
%v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 255)
971+
ret i32 %v2
972+
}
973+
984974
declare i32 @llvm.smin.i32(i32, i32)
985975
declare i32 @llvm.smax.i32(i32, i32)
986976
declare i16 @llvm.smin.i16(i16, i16)

0 commit comments

Comments
 (0)