Skip to content

Commit 6063e6b

Browse files
committed
[InstCombine] move add after min/max intrinsic
This is another regression noted with the proposal to canonicalize to the min/max intrinsics in D98152. Here are Alive2 attempts to show correctness without specifying exact constants: https://alive2.llvm.org/ce/z/bvfCwh (smax) https://alive2.llvm.org/ce/z/of7eqy (smin) https://alive2.llvm.org/ce/z/2Xtxoh (umax) https://alive2.llvm.org/ce/z/Rm4Ad8 (umin) (if you comment out the assume and/or no-wrap, you should see failures) The different output for the umin test is due to a fold added with c4fc2cb : // umin(x, 1) == zext(x != 0) We probably want to adjust that, so it applies more generally (umax --> sext or patterns where we can fold to select-of-constants). Some folds that were ok when starting with cmp+select may increase instruction count for the equivalent intrinsic, so we have to decide if it's worth altering a min/max. Differential Revision: https://reviews.llvm.org/D110038
1 parent 3538ee7 commit 6063e6b

File tree

2 files changed

+86
-8
lines changed

2 files changed

+86
-8
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,45 @@ static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
754754
ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
755755
}
756756

757+
/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
758+
/// can trigger other combines.
759+
static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
760+
InstCombiner::BuilderTy &Builder) {
761+
Intrinsic::ID MinMaxID = II->getIntrinsicID();
762+
assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
763+
MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
764+
"Expected a min or max intrinsic");
765+
766+
// TODO: Match vectors with undef elements, but undef may not propagate.
767+
Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
768+
Value *X;
769+
const APInt *C0, *C1;
770+
if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
771+
!match(Op1, m_APInt(C1)))
772+
return nullptr;
773+
774+
// Check for necessary no-wrap and overflow constraints.
775+
bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
776+
auto *Add = cast<BinaryOperator>(Op0);
777+
if ((IsSigned && !Add->hasNoSignedWrap()) ||
778+
(!IsSigned && !Add->hasNoUnsignedWrap()))
779+
return nullptr;
780+
781+
// If the constant difference overflows, then instsimplify should reduce the
782+
// min/max to the add or C1.
783+
bool Overflow;
784+
APInt CDiff =
785+
IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
786+
assert(!Overflow && "Expected simplify of min/max");
787+
788+
// min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
789+
// Note: the "mismatched" no-overflow setting does not propagate.
790+
Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
791+
Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
792+
return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
793+
: BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
794+
}
795+
757796
/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
758797
/// can only be one of two possible constant values -- turn that into a select
759798
/// of constants.
@@ -1101,6 +1140,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
11011140
if (Instruction *I = moveNotAfterMinMax(I1, I0))
11021141
return I;
11031142

1143+
if (Instruction *I = moveAddAfterMinMax(II, Builder))
1144+
return I;
1145+
11041146
// smax(X, -X) --> abs(X)
11051147
// smin(X, -X) --> -abs(X)
11061148
// umax(X, -X) --> -abs(X)

llvm/test/Transforms/InstCombine/minmax-intrinsics.ll

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1869,15 +1869,17 @@ define void @cmyk_commute11(i8 %r, i8 %g, i8 %b) {
18691869

18701870
define i8 @smax_offset(i8 %x) {
18711871
; CHECK-LABEL: @smax_offset(
1872-
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], 3
1873-
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 -124)
1872+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 -127)
1873+
; CHECK-NEXT: [[M:%.*]] = add nsw i8 [[TMP1]], 3
18741874
; CHECK-NEXT: ret i8 [[M]]
18751875
;
18761876
%a = add nsw i8 %x, 3
18771877
%m = call i8 @llvm.smax.i8(i8 %a, i8 -124)
18781878
ret i8 %m
18791879
}
18801880

1881+
; This is handled by InstSimplify; testing here to confirm assert.
1882+
18811883
define i8 @smax_offset_limit(i8 %x) {
18821884
; CHECK-LABEL: @smax_offset_limit(
18831885
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], 3
@@ -1888,6 +1890,8 @@ define i8 @smax_offset_limit(i8 %x) {
18881890
ret i8 %m
18891891
}
18901892

1893+
; This is handled by InstSimplify; testing here to confirm assert.
1894+
18911895
define i8 @smax_offset_overflow(i8 %x) {
18921896
; CHECK-LABEL: @smax_offset_overflow(
18931897
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], 3
@@ -1898,6 +1902,8 @@ define i8 @smax_offset_overflow(i8 %x) {
18981902
ret i8 %m
18991903
}
19001904

1905+
; negative test - require nsw
1906+
19011907
define i8 @smax_offset_may_wrap(i8 %x) {
19021908
; CHECK-LABEL: @smax_offset_may_wrap(
19031909
; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 3
@@ -1909,6 +1915,8 @@ define i8 @smax_offset_may_wrap(i8 %x) {
19091915
ret i8 %m
19101916
}
19111917

1918+
; negative test
1919+
19121920
define i8 @smax_offset_uses(i8 %x) {
19131921
; CHECK-LABEL: @smax_offset_uses(
19141922
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], 3
@@ -1924,15 +1932,17 @@ define i8 @smax_offset_uses(i8 %x) {
19241932

19251933
define <3 x i8> @smin_offset(<3 x i8> %x) {
19261934
; CHECK-LABEL: @smin_offset(
1927-
; CHECK-NEXT: [[A:%.*]] = add nuw nsw <3 x i8> [[X:%.*]], <i8 124, i8 124, i8 124>
1928-
; CHECK-NEXT: [[M:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[A]], <3 x i8> <i8 -3, i8 -3, i8 -3>)
1935+
; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X:%.*]], <3 x i8> <i8 -127, i8 -127, i8 -127>)
1936+
; CHECK-NEXT: [[M:%.*]] = or <3 x i8> [[TMP1]], <i8 124, i8 124, i8 124>
19291937
; CHECK-NEXT: ret <3 x i8> [[M]]
19301938
;
19311939
%a = add nsw nuw <3 x i8> %x, <i8 124, i8 124, i8 124>
19321940
%m = call <3 x i8> @llvm.smin.v3i8(<3 x i8> %a, <3 x i8> <i8 -3, i8 -3, i8 -3>)
19331941
ret <3 x i8> %m
19341942
}
19351943

1944+
; This is handled by InstSimplify; testing here to confirm assert.
1945+
19361946
define i8 @smin_offset_limit(i8 %x) {
19371947
; CHECK-LABEL: @smin_offset_limit(
19381948
; CHECK-NEXT: ret i8 -3
@@ -1942,6 +1952,8 @@ define i8 @smin_offset_limit(i8 %x) {
19421952
ret i8 %m
19431953
}
19441954

1955+
; This is handled by InstSimplify; testing here to confirm assert.
1956+
19451957
define i8 @smin_offset_overflow(i8 %x) {
19461958
; CHECK-LABEL: @smin_offset_overflow(
19471959
; CHECK-NEXT: ret i8 -3
@@ -1951,6 +1963,8 @@ define i8 @smin_offset_overflow(i8 %x) {
19511963
ret i8 %m
19521964
}
19531965

1966+
; negative test - require nsw
1967+
19541968
define i8 @smin_offset_may_wrap(i8 %x) {
19551969
; CHECK-LABEL: @smin_offset_may_wrap(
19561970
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X:%.*]], 124
@@ -1962,6 +1976,8 @@ define i8 @smin_offset_may_wrap(i8 %x) {
19621976
ret i8 %m
19631977
}
19641978

1979+
; negative test
1980+
19651981
define i8 @smin_offset_uses(i8 %x) {
19661982
; CHECK-LABEL: @smin_offset_uses(
19671983
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], 124
@@ -1975,17 +1991,21 @@ define i8 @smin_offset_uses(i8 %x) {
19751991
ret i8 %m
19761992
}
19771993

1994+
; Note: 'nsw' must not propagate here.
1995+
19781996
define <3 x i8> @umax_offset(<3 x i8> %x) {
19791997
; CHECK-LABEL: @umax_offset(
1980-
; CHECK-NEXT: [[A:%.*]] = add nuw nsw <3 x i8> [[X:%.*]], <i8 127, i8 127, i8 127>
1981-
; CHECK-NEXT: [[M:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[A]], <3 x i8> <i8 -126, i8 -126, i8 -126>)
1998+
; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> <i8 3, i8 3, i8 3>)
1999+
; CHECK-NEXT: [[M:%.*]] = add nuw <3 x i8> [[TMP1]], <i8 127, i8 127, i8 127>
19822000
; CHECK-NEXT: ret <3 x i8> [[M]]
19832001
;
19842002
%a = add nsw nuw <3 x i8> %x, <i8 127, i8 127, i8 127>
19852003
%m = call <3 x i8> @llvm.umax.v3i8(<3 x i8> %a, <3 x i8> <i8 130, i8 130, i8 130>)
19862004
ret <3 x i8> %m
19872005
}
19882006

2007+
; This is handled by InstSimplify; testing here to confirm assert.
2008+
19892009
define i8 @umax_offset_limit(i8 %x) {
19902010
; CHECK-LABEL: @umax_offset_limit(
19912011
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X:%.*]], 3
@@ -1996,6 +2016,8 @@ define i8 @umax_offset_limit(i8 %x) {
19962016
ret i8 %m
19972017
}
19982018

2019+
; This is handled by InstSimplify; testing here to confirm assert.
2020+
19992021
define i8 @umax_offset_overflow(i8 %x) {
20002022
; CHECK-LABEL: @umax_offset_overflow(
20012023
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X:%.*]], 3
@@ -2006,6 +2028,8 @@ define i8 @umax_offset_overflow(i8 %x) {
20062028
ret i8 %m
20072029
}
20082030

2031+
; negative test - require nuw
2032+
20092033
define i8 @umax_offset_may_wrap(i8 %x) {
20102034
; CHECK-LABEL: @umax_offset_may_wrap(
20112035
; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 3
@@ -2017,6 +2041,8 @@ define i8 @umax_offset_may_wrap(i8 %x) {
20172041
ret i8 %m
20182042
}
20192043

2044+
; negative test
2045+
20202046
define i8 @umax_offset_uses(i8 %x) {
20212047
; CHECK-LABEL: @umax_offset_uses(
20222048
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X:%.*]], 3
@@ -2032,15 +2058,17 @@ define i8 @umax_offset_uses(i8 %x) {
20322058

20332059
define i8 @umin_offset(i8 %x) {
20342060
; CHECK-LABEL: @umin_offset(
2035-
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X:%.*]], -5
2036-
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[A]], i8 -4)
2061+
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[X:%.*]], 0
2062+
; CHECK-NEXT: [[M:%.*]] = select i1 [[DOTNOT]], i8 -5, i8 -4
20372063
; CHECK-NEXT: ret i8 [[M]]
20382064
;
20392065
%a = add nuw i8 %x, 251
20402066
%m = call i8 @llvm.umin.i8(i8 %a, i8 252)
20412067
ret i8 %m
20422068
}
20432069

2070+
; This is handled by InstSimplify; testing here to confirm assert.
2071+
20442072
define i8 @umin_offset_limit(i8 %x) {
20452073
; CHECK-LABEL: @umin_offset_limit(
20462074
; CHECK-NEXT: ret i8 -4
@@ -2050,6 +2078,8 @@ define i8 @umin_offset_limit(i8 %x) {
20502078
ret i8 %m
20512079
}
20522080

2081+
; This is handled by InstSimplify; testing here to confirm assert.
2082+
20532083
define i8 @umin_offset_overflow(i8 %x) {
20542084
; CHECK-LABEL: @umin_offset_overflow(
20552085
; CHECK-NEXT: ret i8 -4
@@ -2059,6 +2089,8 @@ define i8 @umin_offset_overflow(i8 %x) {
20592089
ret i8 %m
20602090
}
20612091

2092+
; negative test - require nuw
2093+
20622094
define i8 @umin_offset_may_wrap(i8 %x) {
20632095
; CHECK-LABEL: @umin_offset_may_wrap(
20642096
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], -5
@@ -2070,6 +2102,8 @@ define i8 @umin_offset_may_wrap(i8 %x) {
20702102
ret i8 %m
20712103
}
20722104

2105+
; negative test
2106+
20732107
define i8 @umin_offset_uses(i8 %x) {
20742108
; CHECK-LABEL: @umin_offset_uses(
20752109
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X:%.*]], -5
@@ -2083,6 +2117,8 @@ define i8 @umin_offset_uses(i8 %x) {
20832117
ret i8 %m
20842118
}
20852119

2120+
; TODO: This could transform, but undef element must not propagate to the new add.
2121+
20862122
define <3 x i8> @umax_vector_splat_undef(<3 x i8> %x) {
20872123
; CHECK-LABEL: @umax_vector_splat_undef(
20882124
; CHECK-NEXT: [[A:%.*]] = add nuw <3 x i8> [[X:%.*]], <i8 undef, i8 64, i8 64>

0 commit comments

Comments
 (0)