-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAGCombiner] Fold subtraction if above a constant threshold to umin
#135194
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e6f6d07
ac507f5
d92a09b
ee43e66
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -845,6 +845,13 @@ namespace { | |
return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations); | ||
} | ||
|
||
bool hasUMin(EVT VT) const { | ||
auto LK = TLI.getTypeConversion(*DAG.getContext(), VT); | ||
return (LK.first == TargetLoweringBase::TypeLegal || | ||
LK.first == TargetLoweringBase::TypePromoteInteger) && | ||
TLI.isOperationLegal(ISD::UMIN, LK.second); | ||
} | ||
|
||
public: | ||
/// Runs the dag combiner on all nodes in the work list | ||
void Run(CombineLevel AtLevel); | ||
|
@@ -4253,10 +4260,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { | |
|
||
// (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y)) | ||
// (sub x, (select (uge x, y), y, 0)) -> (umin x, (sub x, y)) | ||
auto LK = TLI.getTypeConversion(*DAG.getContext(), VT); | ||
if ((LK.first == TargetLoweringBase::TypeLegal || | ||
LK.first == TargetLoweringBase::TypePromoteInteger) && | ||
TLI.isOperationLegal(ISD::UMIN, LK.second)) { | ||
if (hasUMin(VT)) { | ||
SDValue Y; | ||
if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y), | ||
m_SpecificCondCode(ISD::SETULT)), | ||
|
@@ -12074,6 +12078,17 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { | |
|
||
if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2)) | ||
return NewSel; | ||
|
||
// (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pfusik: What if the add results in poison? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you have a reproducer? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this shows the problem: https://alive2.llvm.org/ce/z/fmKmiZ One either need to freeze the UMIN operands, or make sure that N1 and N2 is guaranteed not to be poison when doing this transform. Maybe it is enough to freeze the ADD operand (as the comparison would be poison if x is poison)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks!
I'm new to alive2, but I think it shows it won't work. The transform relies on an unsigned overflow. I'll prepare a patch recreating the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
// (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C)) | ||
APInt C; | ||
if (sd_match(Cond1, m_ConstInt(C)) && hasUMin(VT)) { | ||
if ((CC == ISD::SETUGT && Cond0 == N2 && | ||
sd_match(N1, m_Add(m_Specific(N2), m_SpecificInt(~C)))) || | ||
(CC == ISD::SETULT && Cond0 == N1 && | ||
sd_match(N2, m_Add(m_Specific(N1), m_SpecificInt(-C))))) | ||
return DAG.getNode(ISD::UMIN, DL, VT, N1, N2); | ||
} | ||
} | ||
|
||
if (!VT.isVector()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1720,45 +1720,67 @@ define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 %x, i32 %y, ptr %z) { | |
} | ||
|
||
define i8 @sub_if_uge_C_i8(i8 zeroext %x) { | ||
; CHECK-LABEL: sub_if_uge_C_i8: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: sltiu a1, a0, 13 | ||
; CHECK-NEXT: addi a1, a1, -1 | ||
; CHECK-NEXT: andi a1, a1, -13 | ||
; CHECK-NEXT: add a0, a0, a1 | ||
; CHECK-NEXT: ret | ||
; RV32I-LABEL: sub_if_uge_C_i8: | ||
; RV32I: # %bb.0: | ||
; RV32I-NEXT: sltiu a1, a0, 13 | ||
; RV32I-NEXT: addi a1, a1, -1 | ||
; RV32I-NEXT: andi a1, a1, -13 | ||
; RV32I-NEXT: add a0, a0, a1 | ||
; RV32I-NEXT: ret | ||
; | ||
; RV32ZBB-LABEL: sub_if_uge_C_i8: | ||
; RV32ZBB: # %bb.0: | ||
; RV32ZBB-NEXT: addi a1, a0, -13 | ||
; RV32ZBB-NEXT: zext.b a1, a1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you'll probably need a new DAGCombine for it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's address this extra optimization opportunity for |
||
; RV32ZBB-NEXT: minu a0, a1, a0 | ||
; RV32ZBB-NEXT: ret | ||
%cmp = icmp ugt i8 %x, 12 | ||
%sub = add i8 %x, -13 | ||
%conv4 = select i1 %cmp, i8 %sub, i8 %x | ||
ret i8 %conv4 | ||
} | ||
|
||
define i16 @sub_if_uge_C_i16(i16 zeroext %x) { | ||
; CHECK-LABEL: sub_if_uge_C_i16: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: sltiu a1, a0, 251 | ||
; CHECK-NEXT: addi a1, a1, -1 | ||
; CHECK-NEXT: andi a1, a1, -251 | ||
; CHECK-NEXT: add a0, a0, a1 | ||
; CHECK-NEXT: ret | ||
; RV32I-LABEL: sub_if_uge_C_i16: | ||
; RV32I: # %bb.0: | ||
; RV32I-NEXT: sltiu a1, a0, 251 | ||
; RV32I-NEXT: addi a1, a1, -1 | ||
; RV32I-NEXT: andi a1, a1, -251 | ||
; RV32I-NEXT: add a0, a0, a1 | ||
; RV32I-NEXT: ret | ||
; | ||
; RV32ZBB-LABEL: sub_if_uge_C_i16: | ||
; RV32ZBB: # %bb.0: | ||
; RV32ZBB-NEXT: addi a1, a0, -251 | ||
; RV32ZBB-NEXT: zext.h a1, a1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
; RV32ZBB-NEXT: minu a0, a1, a0 | ||
; RV32ZBB-NEXT: ret | ||
%cmp = icmp ugt i16 %x, 250 | ||
%sub = add i16 %x, -251 | ||
%conv4 = select i1 %cmp, i16 %sub, i16 %x | ||
ret i16 %conv4 | ||
} | ||
|
||
define i32 @sub_if_uge_C_i32(i32 signext %x) { | ||
; CHECK-LABEL: sub_if_uge_C_i32: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: lui a1, 16 | ||
; CHECK-NEXT: lui a2, 1048560 | ||
; CHECK-NEXT: addi a1, a1, -16 | ||
; CHECK-NEXT: sltu a1, a1, a0 | ||
; CHECK-NEXT: neg a1, a1 | ||
; CHECK-NEXT: addi a2, a2, 15 | ||
; CHECK-NEXT: and a1, a1, a2 | ||
; CHECK-NEXT: add a0, a0, a1 | ||
; CHECK-NEXT: ret | ||
; RV32I-LABEL: sub_if_uge_C_i32: | ||
; RV32I: # %bb.0: | ||
; RV32I-NEXT: lui a1, 16 | ||
; RV32I-NEXT: lui a2, 1048560 | ||
; RV32I-NEXT: addi a1, a1, -16 | ||
; RV32I-NEXT: sltu a1, a1, a0 | ||
; RV32I-NEXT: neg a1, a1 | ||
; RV32I-NEXT: addi a2, a2, 15 | ||
; RV32I-NEXT: and a1, a1, a2 | ||
; RV32I-NEXT: add a0, a0, a1 | ||
; RV32I-NEXT: ret | ||
; | ||
; RV32ZBB-LABEL: sub_if_uge_C_i32: | ||
; RV32ZBB: # %bb.0: | ||
; RV32ZBB-NEXT: lui a1, 1048560 | ||
; RV32ZBB-NEXT: addi a1, a1, 15 | ||
; RV32ZBB-NEXT: add a1, a0, a1 | ||
; RV32ZBB-NEXT: minu a0, a1, a0 | ||
; RV32ZBB-NEXT: ret | ||
%cmp = icmp ugt i32 %x, 65520 | ||
%sub = add i32 %x, -65521 | ||
%cond = select i1 %cmp, i32 %sub, i32 %x | ||
|
@@ -1797,18 +1819,30 @@ define i64 @sub_if_uge_C_i64(i64 %x) { | |
} | ||
|
||
define i32 @sub_if_uge_C_multiuse_cmp_i32(i32 signext %x, ptr %z) { | ||
; CHECK-LABEL: sub_if_uge_C_multiuse_cmp_i32: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: lui a2, 16 | ||
; CHECK-NEXT: lui a3, 1048560 | ||
; CHECK-NEXT: addi a2, a2, -16 | ||
; CHECK-NEXT: sltu a2, a2, a0 | ||
; CHECK-NEXT: neg a4, a2 | ||
; CHECK-NEXT: addi a3, a3, 15 | ||
; CHECK-NEXT: and a3, a4, a3 | ||
; CHECK-NEXT: add a0, a0, a3 | ||
; CHECK-NEXT: sw a2, 0(a1) | ||
; CHECK-NEXT: ret | ||
; RV32I-LABEL: sub_if_uge_C_multiuse_cmp_i32: | ||
; RV32I: # %bb.0: | ||
; RV32I-NEXT: lui a2, 16 | ||
; RV32I-NEXT: lui a3, 1048560 | ||
; RV32I-NEXT: addi a2, a2, -16 | ||
; RV32I-NEXT: sltu a2, a2, a0 | ||
; RV32I-NEXT: neg a4, a2 | ||
; RV32I-NEXT: addi a3, a3, 15 | ||
; RV32I-NEXT: and a3, a4, a3 | ||
; RV32I-NEXT: add a0, a0, a3 | ||
; RV32I-NEXT: sw a2, 0(a1) | ||
; RV32I-NEXT: ret | ||
; | ||
; RV32ZBB-LABEL: sub_if_uge_C_multiuse_cmp_i32: | ||
; RV32ZBB: # %bb.0: | ||
; RV32ZBB-NEXT: lui a2, 16 | ||
; RV32ZBB-NEXT: lui a3, 1048560 | ||
; RV32ZBB-NEXT: addi a2, a2, -16 | ||
; RV32ZBB-NEXT: addi a3, a3, 15 | ||
; RV32ZBB-NEXT: sltu a2, a2, a0 | ||
; RV32ZBB-NEXT: add a3, a0, a3 | ||
; RV32ZBB-NEXT: minu a0, a3, a0 | ||
; RV32ZBB-NEXT: sw a2, 0(a1) | ||
; RV32ZBB-NEXT: ret | ||
%cmp = icmp ugt i32 %x, 65520 | ||
%conv = zext i1 %cmp to i32 | ||
store i32 %conv, ptr %z, align 4 | ||
|
@@ -1818,20 +1852,29 @@ define i32 @sub_if_uge_C_multiuse_cmp_i32(i32 signext %x, ptr %z) { | |
} | ||
|
||
define i32 @sub_if_uge_C_multiuse_sub_i32(i32 signext %x, ptr %z) { | ||
; CHECK-LABEL: sub_if_uge_C_multiuse_sub_i32: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: lui a2, 1048560 | ||
; CHECK-NEXT: lui a3, 16 | ||
; CHECK-NEXT: addi a2, a2, 15 | ||
; CHECK-NEXT: add a2, a0, a2 | ||
; CHECK-NEXT: addi a3, a3, -16 | ||
; CHECK-NEXT: sw a2, 0(a1) | ||
; CHECK-NEXT: bltu a3, a0, .LBB62_2 | ||
; CHECK-NEXT: # %bb.1: | ||
; CHECK-NEXT: mv a2, a0 | ||
; CHECK-NEXT: .LBB62_2: | ||
; CHECK-NEXT: mv a0, a2 | ||
; CHECK-NEXT: ret | ||
; RV32I-LABEL: sub_if_uge_C_multiuse_sub_i32: | ||
; RV32I: # %bb.0: | ||
; RV32I-NEXT: lui a2, 1048560 | ||
; RV32I-NEXT: lui a3, 16 | ||
; RV32I-NEXT: addi a2, a2, 15 | ||
; RV32I-NEXT: add a2, a0, a2 | ||
; RV32I-NEXT: addi a3, a3, -16 | ||
; RV32I-NEXT: sw a2, 0(a1) | ||
; RV32I-NEXT: bltu a3, a0, .LBB62_2 | ||
; RV32I-NEXT: # %bb.1: | ||
; RV32I-NEXT: mv a2, a0 | ||
; RV32I-NEXT: .LBB62_2: | ||
; RV32I-NEXT: mv a0, a2 | ||
; RV32I-NEXT: ret | ||
; | ||
; RV32ZBB-LABEL: sub_if_uge_C_multiuse_sub_i32: | ||
; RV32ZBB: # %bb.0: | ||
; RV32ZBB-NEXT: lui a2, 1048560 | ||
; RV32ZBB-NEXT: addi a2, a2, 15 | ||
; RV32ZBB-NEXT: add a2, a0, a2 | ||
; RV32ZBB-NEXT: minu a0, a2, a0 | ||
; RV32ZBB-NEXT: sw a2, 0(a1) | ||
; RV32ZBB-NEXT: ret | ||
%sub = add i32 %x, -65521 | ||
store i32 %sub, ptr %z, align 4 | ||
%cmp = icmp ugt i32 %x, 65520 | ||
|
@@ -1840,17 +1883,25 @@ define i32 @sub_if_uge_C_multiuse_sub_i32(i32 signext %x, ptr %z) { | |
} | ||
|
||
define i32 @sub_if_uge_C_swapped_i32(i32 %x) { | ||
; CHECK-LABEL: sub_if_uge_C_swapped_i32: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: lui a1, 16 | ||
; CHECK-NEXT: lui a2, 1048560 | ||
; CHECK-NEXT: addi a1, a1, -15 | ||
; CHECK-NEXT: sltu a1, a0, a1 | ||
; CHECK-NEXT: addi a1, a1, -1 | ||
; CHECK-NEXT: addi a2, a2, 15 | ||
; CHECK-NEXT: and a1, a1, a2 | ||
; CHECK-NEXT: add a0, a0, a1 | ||
; CHECK-NEXT: ret | ||
; RV32I-LABEL: sub_if_uge_C_swapped_i32: | ||
; RV32I: # %bb.0: | ||
; RV32I-NEXT: lui a1, 16 | ||
; RV32I-NEXT: lui a2, 1048560 | ||
; RV32I-NEXT: addi a1, a1, -15 | ||
; RV32I-NEXT: sltu a1, a0, a1 | ||
; RV32I-NEXT: addi a1, a1, -1 | ||
; RV32I-NEXT: addi a2, a2, 15 | ||
; RV32I-NEXT: and a1, a1, a2 | ||
; RV32I-NEXT: add a0, a0, a1 | ||
; RV32I-NEXT: ret | ||
; | ||
; RV32ZBB-LABEL: sub_if_uge_C_swapped_i32: | ||
; RV32ZBB: # %bb.0: | ||
; RV32ZBB-NEXT: lui a1, 1048560 | ||
; RV32ZBB-NEXT: addi a1, a1, 15 | ||
; RV32ZBB-NEXT: add a1, a0, a1 | ||
; RV32ZBB-NEXT: minu a0, a0, a1 | ||
; RV32ZBB-NEXT: ret | ||
%cmp = icmp ult i32 %x, 65521 | ||
%sub = add i32 %x, -65521 | ||
%cond = select i1 %cmp, i32 %x, i32 %sub | ||
|
Uh oh!
There was an error while loading. Please reload this page.