Skip to content

Commit d840391

Browse files
committed
[ValueTracking] Add logic for isKnownNonZero(smin/smax X, Y)
For `smin` if either `X` or `Y` is negative, the result is non-zero. For `smax` if either `X` or `Y` is strictly positive, the result is non-zero. For both if `X != 0` and `Y != 0` the result is non-zero. Alive2 Link: https://alive2.llvm.org/ce/z/7yvbgN https://alive2.llvm.org/ce/z/zizbvq Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D149417
1 parent e78c30a commit d840391

File tree

6 files changed

+40
-26
lines changed

6 files changed

+40
-26
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2942,6 +2942,26 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
29422942
isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q))
29432943
return true;
29442944
break;
2945+
case Intrinsic::smin:
2946+
case Intrinsic::smax: {
2947+
auto KnownOpImpliesNonZero = [&](const KnownBits &K) {
2948+
return II->getIntrinsicID() == Intrinsic::smin
2949+
? K.isNegative()
2950+
: K.isStrictlyPositive();
2951+
};
2952+
KnownBits XKnown =
2953+
computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
2954+
if (KnownOpImpliesNonZero(XKnown))
2955+
return true;
2956+
KnownBits YKnown =
2957+
computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
2958+
if (KnownOpImpliesNonZero(YKnown))
2959+
return true;
2960+
2961+
if (XKnown.isNonZero() && YKnown.isNonZero())
2962+
return true;
2963+
}
2964+
[[fallthrough]];
29452965
case Intrinsic::umin:
29462966
return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q) &&
29472967
isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q);

llvm/test/Analysis/ValueTracking/known-non-zero.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,12 +1074,7 @@ define i1 @smin_nonzero_fail_y_maybe_z(i8 %xx, i8 %yy, i8 %ind) {
10741074

10751075
define i1 @smax_nonzero_pos_arg(i8 %xx, i8 %yy, i8 %ind) {
10761076
; CHECK-LABEL: @smax_nonzero_pos_arg(
1077-
; CHECK-NEXT: [[YA:%.*]] = and i8 [[YY:%.*]], 127
1078-
; CHECK-NEXT: [[YO:%.*]] = or i8 [[YA]], 1
1079-
; CHECK-NEXT: [[X:%.*]] = call i8 @llvm.smax.i8(i8 [[XX:%.*]], i8 [[YO]])
1080-
; CHECK-NEXT: [[Z:%.*]] = or i8 [[X]], [[IND:%.*]]
1081-
; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[Z]], 0
1082-
; CHECK-NEXT: ret i1 [[R]]
1077+
; CHECK-NEXT: ret i1 false
10831078
;
10841079
%ya = and i8 %yy, 127
10851080
%yo = or i8 %ya, 1

llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,10 +263,9 @@ define double @external_use_without_fast_math(ptr %a, i64 %n) {
263263
; AUTO_VEC-LABEL: @external_use_without_fast_math(
264264
; AUTO_VEC-NEXT: entry:
265265
; AUTO_VEC-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
266-
; AUTO_VEC-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
267266
; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[SMAX]], 7
268-
; AUTO_VEC-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
269-
; AUTO_VEC-NEXT: br i1 [[TMP1]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
267+
; AUTO_VEC-NEXT: [[TMP0:%.*]] = icmp ult i64 [[SMAX]], 8
268+
; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
270269
; AUTO_VEC: entry.new:
271270
; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[SMAX]], 9223372036854775800
272271
; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/induction.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
943943
; IND-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
944944
; IND-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
945945
; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
946-
; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
946+
; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 9
947947
; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
948948
; IND: vector.ph:
949949
; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2
@@ -1003,7 +1003,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
10031003
; UNROLL-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
10041004
; UNROLL-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
10051005
; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1006-
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 24
1006+
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 25
10071007
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
10081008
; UNROLL: vector.ph:
10091009
; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4
@@ -1160,10 +1160,10 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
11601160
; INTERLEAVE-LABEL: @scalarize_induction_variable_02(
11611161
; INTERLEAVE-NEXT: entry:
11621162
; INTERLEAVE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8)
1163-
; INTERLEAVE-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
1164-
; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64
1163+
; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 65
11651164
; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
11661165
; INTERLEAVE: vector.ph:
1166+
; INTERLEAVE-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
11671167
; INTERLEAVE-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
11681168
; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
11691169
; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 7

llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1278,7 +1278,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) {
12781278
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
12791279
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
12801280
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1281-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6
1281+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 7
12821282
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
12831283
; CHECK: vector.ph:
12841284
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4

llvm/test/Transforms/LoopVectorize/loop-scalars.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ define void @vector_gep(ptr %a, ptr %b, i64 %n) {
2222
; CHECK-NEXT: store <2 x ptr> [[TMP0]], ptr [[TMP1]], align 8
2323
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2424
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
25-
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
26-
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
25+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
26+
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
2727
; CHECK: middle.block:
2828
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
2929
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -37,7 +37,7 @@ define void @vector_gep(ptr %a, ptr %b, i64 %n) {
3737
; CHECK-NEXT: store ptr [[VAR0]], ptr [[VAR1]], align 8
3838
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
3939
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
40-
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]]
40+
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]]
4141
; CHECK: for.end:
4242
; CHECK-NEXT: ret void
4343
;
@@ -65,7 +65,7 @@ define void @scalar_store(ptr %a, ptr %b, i64 %n) {
6565
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
6666
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
6767
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
68-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
68+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 3
6969
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
7070
; CHECK: vector.ph:
7171
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2
@@ -125,7 +125,7 @@ define void @expansion(ptr %a, ptr %b, i64 %n) {
125125
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
126126
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
127127
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
128-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
128+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 3
129129
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
130130
; CHECK: vector.ph:
131131
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -2
@@ -142,8 +142,8 @@ define void @expansion(ptr %a, ptr %b, i64 %n) {
142142
; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8
143143
; CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP7]], align 8
144144
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
145-
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
146-
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
145+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
146+
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
147147
; CHECK: middle.block:
148148
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
149149
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -191,13 +191,13 @@ define void @no_gep_or_bitcast(ptr noalias %a, i64 %n) {
191191
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
192192
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]]
193193
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP0]], align 8
194-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 0
194+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 0
195+
; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 8
196+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 1
195197
; CHECK-NEXT: store i32 0, ptr [[TMP2]], align 8
196-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 1
197-
; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 8
198198
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
199-
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
200-
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
199+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
200+
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
201201
; CHECK: middle.block:
202202
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
203203
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]

0 commit comments

Comments
 (0)