Skip to content

Commit 4e949bd

Browse files
committed
[ValueTracking] Implement isKnownNonZero for llvm.vector.reduce.{add,mul}
Proof for bespoke non-zero logic: https://alive2.llvm.org/ce/z/P6HRvw
1 parent c20543f commit 4e949bd

File tree

2 files changed

+38
-15
lines changed

2 files changed

+38
-15
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2942,6 +2942,41 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
29422942
case Intrinsic::vector_reduce_smax:
29432943
case Intrinsic::vector_reduce_smin:
29442944
return isKnownNonZero(II->getArgOperand(0), Depth, Q);
2945+
// If we know the reduction doesn't overflow and all elements are
2946+
// non-zero, the reduction is non-zero.
2947+
case Intrinsic::vector_reduce_mul:
2948+
case Intrinsic::vector_reduce_add:
2949+
if (computeKnownBits(I, Depth + 1, Q).isNonZero())
2950+
return true;
2951+
2952+
if (auto *VecTy =
2953+
dyn_cast<FixedVectorType>(I->getOperand(0)->getType())) {
2954+
bool Overflow;
2955+
if (II->getIntrinsicID() == Intrinsic::vector_reduce_add) {
2956+
APInt NumEle(BitWidth, VecTy->getNumElements());
2957+
// If we can't store num ele in bitwidth, the result is either
2958+
// known-zero or we won't get anything useful.
2959+
if (NumEle.getZExtValue() != VecTy->getNumElements())
2960+
break;
2961+
APInt MaxVal =
2962+
computeKnownBits(II->getArgOperand(0), Depth, Q).getMaxValue();
2963+
MaxVal = MaxVal.umul_ov(NumEle, Overflow);
2964+
} else {
2965+
APInt MaxVal =
2966+
computeKnownBits(II->getArgOperand(0), Depth, Q).getMaxValue();
2967+
APInt SingleVal = MaxVal;
2968+
for (unsigned i = 1, e = VecTy->getNumElements(); i < e; ++i) {
2969+
MaxVal = MaxVal.umul_ov(SingleVal, Overflow);
2970+
if (Overflow)
2971+
break;
2972+
}
2973+
}
2974+
2975+
if (Overflow)
2976+
break;
2977+
return isKnownNonZero(II->getArgOperand(0), Depth, Q);
2978+
}
2979+
break;
29452980
case Intrinsic::umax:
29462981
case Intrinsic::uadd_sat:
29472982
return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) ||

llvm/test/Transforms/InstSimplify/known-non-zero.ll

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -380,11 +380,7 @@ define <2 x i1> @insert_nonzero_any_idx_fail(<2 x i8> %xx, i8 %yy, i32 %idx) {
380380

381381
define i1 @nonzero_reduce_add(<2 x i8> %xx) {
382382
; CHECK-LABEL: @nonzero_reduce_add(
383-
; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
384-
; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[X0]], <i8 1, i8 1>
385-
; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
386-
; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
387-
; CHECK-NEXT: ret i1 [[R]]
383+
; CHECK-NEXT: ret i1 false
388384
;
389385
%x0 = and <2 x i8> %xx, <i8 3, i8 3>
390386
%x = add <2 x i8> %x0, <i8 1, i8 1>
@@ -438,11 +434,7 @@ define i1 @nonzero_reduce_add_fail3(<18 x i4> %xx) {
438434

439435
define i1 @nonzero_reduce_mul(<2 x i8> %xx) {
440436
; CHECK-LABEL: @nonzero_reduce_mul(
441-
; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
442-
; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[X0]], <i8 1, i8 1>
443-
; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
444-
; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
445-
; CHECK-NEXT: ret i1 [[R]]
437+
; CHECK-NEXT: ret i1 false
446438
;
447439
%x0 = and <2 x i8> %xx, <i8 3, i8 3>
448440
%x = add <2 x i8> %x0, <i8 1, i8 1>
@@ -453,11 +445,7 @@ define i1 @nonzero_reduce_mul(<2 x i8> %xx) {
453445

454446
define i1 @nonzero_reduce_mul2(<3 x i16> %xx) {
455447
; CHECK-LABEL: @nonzero_reduce_mul2(
456-
; CHECK-NEXT: [[X0:%.*]] = and <3 x i16> [[XX:%.*]], <i16 3, i16 3, i16 3>
457-
; CHECK-NEXT: [[X:%.*]] = add <3 x i16> [[X0]], <i16 1, i16 1, i16 1>
458-
; CHECK-NEXT: [[V:%.*]] = call i16 @llvm.vector.reduce.mul.v3i16(<3 x i16> [[X]])
459-
; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[V]], 0
460-
; CHECK-NEXT: ret i1 [[R]]
448+
; CHECK-NEXT: ret i1 false
461449
;
462450
%x0 = and <3 x i16> %xx, <i16 3, i16 3, i16 3>
463451
%x = add <3 x i16> %x0, <i16 1, i16 1, i16 1>

0 commit comments

Comments
 (0)