Skip to content

[InstCombine] Improve folding of icmp pred (and X, Mask/~Mask), Y) #81562

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions llvm/include/llvm/IR/PatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,19 @@ inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) {
return V;
}

struct is_negated_power2_or_zero {
bool isValue(const APInt &C) { return !C || C.isNegatedPowerOf2(); }
};
/// Match a integer or vector negated power-of-2.
/// For vectors, this includes constants with undefined elements.
inline cst_pred_ty<is_negated_power2_or_zero> m_NegatedPower2OrZero() {
return cst_pred_ty<is_negated_power2_or_zero>();
}
inline api_pred_ty<is_negated_power2_or_zero>
m_NegatedPower2OrZero(const APInt *&V) {
return V;
}

struct is_power2_or_zero {
bool isValue(const APInt &C) { return !C || C.isPowerOf2(); }
};
Expand Down Expand Up @@ -595,6 +608,18 @@ inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
}
inline api_pred_ty<is_lowbit_mask> m_LowBitMask(const APInt *&V) { return V; }

struct is_lowbit_mask_or_zero {
bool isValue(const APInt &C) { return !C || C.isMask(); }
};
/// Match an integer or vector with only the low bit(s) set.
/// For vectors, this includes constants with undefined elements.
inline cst_pred_ty<is_lowbit_mask_or_zero> m_LowBitMaskOrZero() {
return cst_pred_ty<is_lowbit_mask_or_zero>();
}
inline api_pred_ty<is_lowbit_mask_or_zero> m_LowBitMaskOrZero(const APInt *&V) {
return V;
}

struct icmp_pred_with_threshold {
ICmpInst::Predicate Pred;
const APInt *Thr;
Expand Down
141 changes: 123 additions & 18 deletions llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4069,11 +4069,101 @@ Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
return nullptr;
}

// Returns whether V is a Mask ((X + 1) & X == 0) or ~Mask (-Pow2OrZero)
static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
unsigned Depth = 0) {
if (Not ? match(V, m_NegatedPower2OrZero()) : match(V, m_LowBitMaskOrZero()))
return true;
if (V->getType()->getScalarSizeInBits() == 1)
return true;
if (Depth++ >= MaxAnalysisRecursionDepth)
return false;
Value *X;
const Instruction *I = dyn_cast<Instruction>(V);
if (!I)
return false;
switch (I->getOpcode()) {
case Instruction::ZExt:
// ZExt(Mask) is a Mask.
return !Not && isMaskOrZero(I->getOperand(0), Not, Q, Depth);
case Instruction::SExt:
// SExt(Mask) is a Mask.
// SExt(~Mask) is a ~Mask.
return isMaskOrZero(I->getOperand(0), Not, Q, Depth);
case Instruction::And:
case Instruction::Or:
// Mask0 | Mask1 is a Mask.
// Mask0 & Mask1 is a Mask.
// ~Mask0 | ~Mask1 is a ~Mask.
// ~Mask0 & ~Mask1 is a ~Mask.
return isMaskOrZero(I->getOperand(1), Not, Q, Depth) &&
isMaskOrZero(I->getOperand(0), Not, Q, Depth);
case Instruction::Xor:
if (match(V, m_Not(m_Value(X))))
return isMaskOrZero(X, !Not, Q, Depth);

// (X ^ (X - 1)) is a Mask
return !Not &&
match(V, m_c_Xor(m_Value(X), m_Add(m_Deferred(X), m_AllOnes())));
case Instruction::Select:
// c ? Mask0 : Mask1 is a Mask.
return isMaskOrZero(I->getOperand(1), Not, Q, Depth) &&
isMaskOrZero(I->getOperand(2), Not, Q, Depth);
case Instruction::Shl:
// (~Mask) << X is a ~Mask.
return Not && isMaskOrZero(I->getOperand(0), Not, Q, Depth);
case Instruction::LShr:
// Mask >> X is a Mask.
return !Not && isMaskOrZero(I->getOperand(0), Not, Q, Depth);
case Instruction::AShr:
// Mask s>> X is a Mask.
// ~Mask s>> X is a ~Mask.
return isMaskOrZero(I->getOperand(0), Not, Q, Depth);
case Instruction::Add:
// Pow2 - 1 is a Mask.
if (!Not && match(I->getOperand(1), m_AllOnes()))
return isKnownToBeAPowerOfTwo(I->getOperand(0), Q.DL, /*OrZero*/ true,
Depth, Q.AC, Q.CxtI, Q.DT);
break;
case Instruction::Sub:
// -Pow2 is a ~Mask.
if (Not && match(I->getOperand(0), m_Zero()))
return isKnownToBeAPowerOfTwo(I->getOperand(1), Q.DL, /*OrZero*/ true,
Depth, Q.AC, Q.CxtI, Q.DT);
break;
case Instruction::Call: {
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
// min/max(Mask0, Mask1) is a Mask.
// min/max(~Mask0, ~Mask1) is a ~Mask.
case Intrinsic::umax:
case Intrinsic::smax:
case Intrinsic::umin:
case Intrinsic::smin:
return isMaskOrZero(II->getArgOperand(1), Not, Q, Depth) &&
isMaskOrZero(II->getArgOperand(0), Not, Q, Depth);

// In the context of masks, bitreverse(Mask) == ~Mask
case Intrinsic::bitreverse:
return isMaskOrZero(II->getArgOperand(0), !Not, Q, Depth);
default:
break;
}
}
break;
}
default:
break;
}
return false;
}

/// Some comparisons can be simplified.
/// In this case, we are looking for comparisons that look like
/// a check for a lossy truncation.
/// Folds:
/// icmp SrcPred (x & Mask), x to icmp DstPred x, Mask
/// icmp eq/ne (x & ~Mask), 0 to icmp DstPred x, Mask
/// Where Mask is some pattern that produces all-ones in low bits:
/// (-1 >> y)
/// ((-1 << y) >> y) <- non-canonical, has extra uses
Expand All @@ -4082,23 +4172,35 @@ Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
/// The Mask can be a constant, too.
/// For some predicates, the operands are commutative.
/// For others, x can only be on a specific side.
static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
InstCombiner::BuilderTy &Builder) {
ICmpInst::Predicate SrcPred;
Value *X, *M, *Y;
auto m_VariableMask = m_CombineOr(
m_CombineOr(m_Not(m_Shl(m_AllOnes(), m_Value())),
m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())),
m_CombineOr(m_LShr(m_AllOnes(), m_Value()),
m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y))));
auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask());
if (!match(&I, m_c_ICmp(SrcPred,
m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)),
m_Deferred(X))))
static Value *foldICmpWithLowBitMaskedVal(ICmpInst::Predicate Pred, Value *Op0,
Value *Op1, const SimplifyQuery &Q,
InstCombiner &IC) {
Value *X, *M;
bool NeedsNot = false;

auto CheckMask = [&](Value *V, bool Not) {
if (ICmpInst::isSigned(Pred) && !match(V, m_ImmConstant()))
return false;
return isMaskOrZero(V, Not, Q);
};

if (match(Op0, m_c_And(m_Specific(Op1), m_Value(M))) &&
CheckMask(M, /*Not*/ false)) {
X = Op1;
} else if (match(Op1, m_Zero()) && ICmpInst::isEquality(Pred) &&
match(Op0, m_OneUse(m_And(m_Value(X), m_Value(M))))) {
NeedsNot = true;
if (IC.isFreeToInvert(X, X->hasOneUse()) && CheckMask(X, /*Not*/ true))
std::swap(X, M);
else if (!IC.isFreeToInvert(M, M->hasOneUse()) ||
!CheckMask(M, /*Not*/ true))
return nullptr;
} else {
return nullptr;
}

ICmpInst::Predicate DstPred;
switch (SrcPred) {
switch (Pred) {
case ICmpInst::Predicate::ICMP_EQ:
// x & (-1 >> y) == x -> x u<= (-1 >> y)
DstPred = ICmpInst::Predicate::ICMP_ULE;
Expand Down Expand Up @@ -4164,7 +4266,9 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
M = Constant::replaceUndefsWith(VecC, SafeReplacementConstant);
}

return Builder.CreateICmp(DstPred, X, M);
if (NeedsNot)
M = IC.Builder.CreateNot(M);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use IC.getFreelyInverted?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If its freely invertable (not M) will simplify later on. Seems simpler to let the existing pipeline clean it up. No?

return IC.Builder.CreateICmp(DstPred, X, M);
}

/// Some comparisons can be simplified.
Expand Down Expand Up @@ -5081,9 +5185,6 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
if (Value *V = foldMultiplicationOverflowCheck(I))
return replaceInstUsesWith(I, V);

if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
return replaceInstUsesWith(I, V);

if (Instruction *R = foldICmpAndXX(I, Q, *this))
return R;

Expand Down Expand Up @@ -6984,6 +7085,10 @@ Instruction *InstCombinerImpl::foldICmpCommutative(ICmpInst::Predicate Pred,
}
}

const SimplifyQuery Q = SQ.getWithInstruction(&CxtI);
if (Value *V = foldICmpWithLowBitMaskedVal(Pred, Op0, Op1, Q, *this))
return replaceInstUsesWith(CxtI, V);

return nullptr;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {

define <2 x i1> @p2_vec_nonsplat_edgecase0(<2 x i8> %x) {
; CHECK-LABEL: @p2_vec_nonsplat_edgecase0(
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -4, i8 -1>
; CHECK-NEXT: [[RET:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 1>
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
%tmp0 = and <2 x i8> %x, <i8 3, i8 0>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {

define <2 x i1> @p2_vec_nonsplat_edgecase0(<2 x i8> %x) {
; CHECK-LABEL: @p2_vec_nonsplat_edgecase0(
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -4, i8 -1>
; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i8> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 0>
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
%tmp0 = and <2 x i8> %x, <i8 3, i8 0>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {

define <2 x i1> @p2_vec_nonsplat_edgecase(<2 x i8> %x) {
; CHECK-LABEL: @p2_vec_nonsplat_edgecase(
; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 0>
; CHECK-NEXT: [[RET:%.*]] = icmp sge <2 x i8> [[TMP0]], [[X]]
; CHECK-NEXT: [[RET:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 4, i8 1>
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
%tmp0 = and <2 x i8> %x, <i8 3, i8 0>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@ define <2 x i1> @p2_vec_nonsplat() {
define <2 x i1> @p2_vec_nonsplat_edgecase() {
; CHECK-LABEL: @p2_vec_nonsplat_edgecase(
; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @gen2x8()
; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i8> [[X]], <i8 3, i8 0>
; CHECK-NEXT: [[RET:%.*]] = icmp sgt <2 x i8> [[X]], [[TMP0]]
; CHECK-NEXT: [[RET:%.*]] = icmp sgt <2 x i8> [[X]], <i8 3, i8 0>
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
%x = call <2 x i8> @gen2x8()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@ define <2 x i1> @p2_vec_nonsplat() {
define <2 x i1> @p2_vec_nonsplat_edgecase() {
; CHECK-LABEL: @p2_vec_nonsplat_edgecase(
; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @gen2x8()
; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i8> [[X]], <i8 3, i8 0>
; CHECK-NEXT: [[RET:%.*]] = icmp sle <2 x i8> [[X]], [[TMP0]]
; CHECK-NEXT: [[RET:%.*]] = icmp slt <2 x i8> [[X]], <i8 4, i8 1>
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
%x = call <2 x i8> @gen2x8()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {

define <2 x i1> @p2_vec_nonsplat_edgecase(<2 x i8> %x) {
; CHECK-LABEL: @p2_vec_nonsplat_edgecase(
; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 0>
; CHECK-NEXT: [[RET:%.*]] = icmp slt <2 x i8> [[TMP0]], [[X]]
; CHECK-NEXT: [[RET:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 3, i8 0>
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
%tmp0 = and <2 x i8> %x, <i8 3, i8 0>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {

define <2 x i1> @p2_vec_nonsplat_edgecase0(<2 x i8> %x) {
; CHECK-LABEL: @p2_vec_nonsplat_edgecase0(
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -4, i8 -1>
; CHECK-NEXT: [[RET:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 1>
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
%tmp0 = and <2 x i8> %x, <i8 3, i8 0>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ define <2 x i1> @p2_vec_nonsplat() {
define <2 x i1> @p2_vec_nonsplat_edgecase0() {
; CHECK-LABEL: @p2_vec_nonsplat_edgecase0(
; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @gen2x8()
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], <i8 -4, i8 -1>
; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i8> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X]], <i8 3, i8 0>
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
%x = call <2 x i8> @gen2x8()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ define <2 x i1> @p2_vec_nonsplat() {
define <2 x i1> @p2_vec_nonsplat_edgecase0() {
; CHECK-LABEL: @p2_vec_nonsplat_edgecase0(
; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @gen2x8()
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], <i8 -4, i8 -1>
; CHECK-NEXT: [[RET:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X]], <i8 4, i8 1>
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
%x = call <2 x i8> @gen2x8()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {

define <2 x i1> @p2_vec_nonsplat_edgecase0(<2 x i8> %x) {
; CHECK-LABEL: @p2_vec_nonsplat_edgecase0(
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -4, i8 -1>
; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i8> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 0>
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
%tmp0 = and <2 x i8> %x, <i8 3, i8 0>
Expand Down
Loading