[InstCombine] Support trunc to i1 in foldSelectICmpAnd #127905

andjo403 · 2025-02-19T22:07:29Z

For trunc nuw saves an instruction and otherwise only other instructions without the select, same behavior as for bit test before.
Add test for a regression when pattern is followed by an or instruction

proof: https://alive2.llvm.org/ce/z/Ey6BoT

llvmbot · 2025-02-19T22:08:02Z

@llvm/pr-subscribers-llvm-transforms

Author: Andreas Jonson (andjo403)

Changes

For trunc nuw saves an instruction and otherwise only other instructions without the select, same behavior as for bit test before.
Add test for a regression when pattern is followd by an or instruction

proof: https://alive2.llvm.org/ce/z/Ey6BoT

Full diff: https://github.com/llvm/llvm-project/pull/127905.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+34-25)
(modified) llvm/test/Transforms/InstCombine/select-icmp-and.ll (+13-13)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index e621a0b7fe596..91cf8c266ce47 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -119,7 +119,7 @@ static Instruction *foldSelectBinOpIdentity(SelectInst &Sel,
 ///  (shl (and (X, C1)), (log2(TC-FC) - log2(C1))) + FC
 /// With some variations depending if FC is larger than TC, or the shift
 /// isn't needed, or the bit widths don't match.
-static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
+static Value *foldSelectICmpAnd(SelectInst &Sel, Value *CondVal,
                                 InstCombiner::BuilderTy &Builder) {
   const APInt *SelTC, *SelFC;
   if (!match(Sel.getTrueValue(), m_APInt(SelTC)) ||
@@ -128,33 +128,42 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
 
   // If this is a vector select, we need a vector compare.
   Type *SelType = Sel.getType();
-  if (SelType->isVectorTy() != Cmp->getType()->isVectorTy())
+  if (SelType->isVectorTy() != CondVal->getType()->isVectorTy())
     return nullptr;
 
   Value *V;
   APInt AndMask;
   bool CreateAnd = false;
-  ICmpInst::Predicate Pred = Cmp->getPredicate();
-  if (ICmpInst::isEquality(Pred)) {
-    if (!match(Cmp->getOperand(1), m_Zero()))
-      return nullptr;
+  CmpPredicate Pred;
+  Value *CmpLHS, *CmpRHS;
 
-    V = Cmp->getOperand(0);
-    const APInt *AndRHS;
-    if (!match(V, m_And(m_Value(), m_Power2(AndRHS))))
-      return nullptr;
+  if (match(CondVal, m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) {
+    if (ICmpInst::isEquality(Pred)) {
+      if (!match(CmpRHS, m_Zero()))
+        return nullptr;
 
-    AndMask = *AndRHS;
-  } else if (auto Res = decomposeBitTestICmp(Cmp->getOperand(0),
-                                             Cmp->getOperand(1), Pred)) {
-    assert(ICmpInst::isEquality(Res->Pred) && "Not equality test?");
-    if (!Res->Mask.isPowerOf2())
-      return nullptr;
+      V = CmpLHS;
+      const APInt *AndRHS;
+      if (!match(V, m_And(m_Value(), m_Power2(AndRHS))))
+        return nullptr;
+
+      AndMask = *AndRHS;
+    } else {
+      auto Res = decomposeBitTestICmp(CmpLHS, CmpRHS, Pred);
+      if (!Res || !Res->Mask.isPowerOf2())
+        return nullptr;
+      assert(ICmpInst::isEquality(Res->Pred) && "Not equality test?");
 
-    V = Res->X;
-    AndMask = Res->Mask;
-    Pred = Res->Pred;
-    CreateAnd = true;
+      V = Res->X;
+      AndMask = Res->Mask;
+      Pred = Res->Pred;
+      CreateAnd = true;
+    }
+  } else if (auto *Trunc = dyn_cast<TruncInst>(CondVal)) {
+    V = Trunc->getOperand(0);
+    AndMask = APInt(V->getType()->getScalarSizeInBits(), 1);
+    Pred = ICmpInst::ICMP_NE;
+    CreateAnd = !Trunc->hasNoUnsignedWrap();
   } else {
     return nullptr;
   }
@@ -172,7 +181,7 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
       return nullptr;
     // If we have to create an 'and', then we must kill the cmp to not
     // increase the instruction count.
-    if (CreateAnd && !Cmp->hasOneUse())
+    if (CreateAnd && !CondVal->hasOneUse())
       return nullptr;
 
     // (V & AndMaskC) == 0 ? TC : FC --> TC | (V & AndMaskC)
@@ -213,7 +222,7 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
   // a 'select' + 'icmp', then this transformation would result in more
   // instructions and potentially interfere with other folding.
   if (CreateAnd + ShouldNotVal + NeedShift + NeedZExtTrunc >
-      1 + Cmp->hasOneUse())
+      1 + CondVal->hasOneUse())
     return nullptr;
 
   // Insert the 'and' instruction on the input to the truncate.
@@ -1955,9 +1964,6 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
           tryToReuseConstantFromSelectInComparison(SI, *ICI, *this))
     return NewSel;
 
-  if (Value *V = foldSelectICmpAnd(SI, ICI, Builder))
-    return replaceInstUsesWith(SI, V);
-
   // NOTE: if we wanted to, this is where to detect integer MIN/MAX
   bool Changed = false;
   Value *TrueVal = SI.getTrueValue();
@@ -3955,6 +3961,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
     if (Instruction *Result = foldSelectInstWithICmp(SI, ICI))
       return Result;
 
+  if (Value *V = foldSelectICmpAnd(SI, CondVal, Builder))
+    return replaceInstUsesWith(SI, V);
+
   if (Value *V = foldSelectICmpAndBinOp(CondVal, TrueVal, FalseVal, Builder))
     return replaceInstUsesWith(SI, V);
 
diff --git a/llvm/test/Transforms/InstCombine/select-icmp-and.ll b/llvm/test/Transforms/InstCombine/select-icmp-and.ll
index 16fb3f34047ee..f9f87e8030512 100644
--- a/llvm/test/Transforms/InstCombine/select-icmp-and.ll
+++ b/llvm/test/Transforms/InstCombine/select-icmp-and.ll
@@ -809,8 +809,8 @@ define i8 @select_bittest_to_xor(i8 %x) {
 
 define i8 @select_trunc_bittest_to_sub(i8 %x) {
 ; CHECK-LABEL: @select_trunc_bittest_to_sub(
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i8 [[X:%.*]] to i1
-; CHECK-NEXT:    [[RET:%.*]] = select i1 [[TRUNC]], i8 3, i8 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[RET:%.*]] = sub nuw nsw i8 4, [[TMP1]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
   %trunc = trunc i8 %x to i1
@@ -820,8 +820,7 @@ define i8 @select_trunc_bittest_to_sub(i8 %x) {
 
 define i8 @select_trunc_nuw_bittest_to_sub(i8 %x) {
 ; CHECK-LABEL: @select_trunc_nuw_bittest_to_sub(
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1
-; CHECK-NEXT:    [[RET:%.*]] = select i1 [[TRUNC]], i8 3, i8 4
+; CHECK-NEXT:    [[RET:%.*]] = sub i8 4, [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
   %trunc = trunc nuw i8 %x to i1
@@ -831,8 +830,8 @@ define i8 @select_trunc_nuw_bittest_to_sub(i8 %x) {
 
 define i8 @select_trunc_nsw_bittest_to_sub(i8 %x) {
 ; CHECK-LABEL: @select_trunc_nsw_bittest_to_sub(
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc nsw i8 [[X:%.*]] to i1
-; CHECK-NEXT:    [[RET:%.*]] = select i1 [[TRUNC]], i8 3, i8 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[RET:%.*]] = sub nuw nsw i8 4, [[TMP1]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
   %trunc = trunc nsw i8 %x to i1
@@ -844,7 +843,7 @@ define i8 @select_trunc_nuw_bittest_to_sub_extra_use(i8 %x) {
 ; CHECK-LABEL: @select_trunc_nuw_bittest_to_sub_extra_use(
 ; CHECK-NEXT:    [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1
 ; CHECK-NEXT:    call void @use1(i1 [[TRUNC]])
-; CHECK-NEXT:    [[RET:%.*]] = select i1 [[TRUNC]], i8 3, i8 4
+; CHECK-NEXT:    [[RET:%.*]] = sub i8 4, [[X]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
   %trunc = trunc nuw i8 %x to i1
@@ -868,8 +867,8 @@ define i8 @neg_select_trunc_bittest_to_sub_extra_use(i8 %x) {
 
 define i8 @select_trunc_nuw_bittest_to_shl_not(i8 %x) {
 ; CHECK-LABEL: @select_trunc_nuw_bittest_to_shl_not(
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1
-; CHECK-NEXT:    [[RET:%.*]] = select i1 [[TRUNC]], i8 0, i8 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[RET:%.*]] = xor i8 [[TMP1]], 4
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
   %trunc = trunc nuw i8 %x to i1
@@ -879,8 +878,8 @@ define i8 @select_trunc_nuw_bittest_to_shl_not(i8 %x) {
 
 define i8 @select_trunc_bittest_to_shl(i8 %x) {
 ; CHECK-LABEL: @select_trunc_bittest_to_shl(
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i8 [[X:%.*]] to i1
-; CHECK-NEXT:    [[RET:%.*]] = select i1 [[TRUNC]], i8 4, i8 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], 4
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
   %trunc = trunc i8 %x to i1
@@ -903,8 +902,9 @@ define i8 @neg_select_trunc_bittest_to_shl_extra_use(i8 %x) {
 
 define i16 @select_trunc_nuw_bittest_or(i8 %x) {
 ; CHECK-LABEL: @select_trunc_nuw_bittest_or(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc nuw i8 [[X:%.*]] to i1
-; CHECK-NEXT:    [[RES:%.*]] = select i1 [[TMP1]], i16 20, i16 4
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[X:%.*]] to i16
+; CHECK-NEXT:    [[SELECT:%.*]] = shl nuw nsw i16 [[TMP1]], 4
+; CHECK-NEXT:    [[RES:%.*]] = or disjoint i16 [[SELECT]], 4
 ; CHECK-NEXT:    ret i16 [[RES]]
 ;
   %trunc = trunc nuw i8 %x to i1

dtcxzyw · 2025-02-20T07:45:19Z

llvm/test/Transforms/InstCombine/select-icmp-and.ll

-; CHECK-NEXT:    [[RES:%.*]] = select i1 [[TMP1]], i16 20, i16 4
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[X:%.*]] to i16
+; CHECK-NEXT:    [[SELECT:%.*]] = shl nuw nsw i16 [[TMP1]], 4
+; CHECK-NEXT:    [[RES:%.*]] = or disjoint i16 [[SELECT]], 4


It is not a regression. The codegen without select is generally better: https://godbolt.org/z/1W6ocqdT3

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

dtcxzyw

LGTM. Thank you!
Please wait for additional approval from other reviewers :)

andjo403 · 2025-03-01T16:21:53Z

ping for one more review

andjo403 · 2025-03-11T20:02:24Z

ping

nikic · 2025-03-16T17:28:18Z

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

@@ -119,48 +119,15 @@ static Instruction *foldSelectBinOpIdentity(SelectInst &Sel,
 ///  (shl (and (X, C1)), (log2(TC-FC) - log2(C1))) + FC
 /// With some variations depending if FC is larger than TC, or the shift
 /// isn't needed, or the bit widths don't match.
-static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
+static Value *foldSelectICmpAnd(SelectInst &Sel, Value *CondVal, Value *TrueVal,
+                                Value *FalseVal, Value *V, APInt AndMask,


Suggested change

Value *FalseVal, Value *V, APInt AndMask,

Value *FalseVal, Value *V, const APInt &AndMask,

nikic · 2025-03-16T17:29:09Z

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

@@ -747,60 +714,26 @@ static Value *foldSelectICmpLshrAshr(const ICmpInst *IC, Value *TrueVal,
 /// 2. The select operands are reversed
 /// 3. The magnitude of C2 and C1 are flipped
 static Value *foldSelectICmpAndBinOp(Value *CondVal, Value *TrueVal,
-                                     Value *FalseVal,
+                                     Value *FalseVal, Value *V, APInt AndMask,


Suggested change

Value *FalseVal, Value *V, APInt AndMask,

Value *FalseVal, Value *V, const APInt &AndMask,

andjo403 · 2025-03-17T21:05:50Z

when rebasing I get a merge conflict with the change from #128741 as the second commit that deduplication the code move the code from that commit, it looks like it is possible to use the knownbits for foldSelectICmpAndBinOp also so possible to move to the new foldSelectBitTest function.

what it the preferred way to merge this PR?

add some test for the use of known bits in foldSelectICmpAndBinOp to this PR and make the deduplicate commit not NFC
drop the deduplicate commit from this PR and make a separat PR for that
make a separat PR that use the known bits in foldSelectICmpAndBinOp and then rebase this PR when the other PR is merged

nikic · 2025-03-17T22:16:32Z

I'd go with option 2 (drop the deduplicate commit from this PR and make a separat PR for that).

…ldSelectICmpAnd. (#131902) The commit that was removed from #127905 due to the conflict with #128741. The use of common code results in that the foldSelectICmpAndBinOp also use knownbits in the same way as was added for foldSelectICmpAnd in #128741. proof for the use of knowbits in foldSelectICmpAndBinOp: https://alive2.llvm.org/ce/z/RYXr_k

…inOp and foldSelectICmpAnd. (#131902) The commit that was removed from llvm/llvm-project#127905 due to the conflict with llvm/llvm-project#128741. The use of common code results in that the foldSelectICmpAndBinOp also use knownbits in the same way as was added for foldSelectICmpAnd in llvm/llvm-project#128741. proof for the use of knowbits in foldSelectICmpAndBinOp: https://alive2.llvm.org/ce/z/RYXr_k

andjo403 requested review from nikic, dtcxzyw and goldsteinn February 19, 2025 22:07

llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Feb 19, 2025

This was referenced Feb 20, 2025

Task submission dtcxzyw/llvm-opt-benchmark#1312

Open

pre-commit: PR127905 dtcxzyw/llvm-opt-benchmark#2147

Closed

dtcxzyw reviewed Feb 20, 2025

View reviewed changes

dtcxzyw approved these changes Feb 23, 2025

View reviewed changes

nikic approved these changes Mar 16, 2025

View reviewed changes

[InstCombine] Support trunc to i1 in foldSelectICmpAnd

b4569db

andjo403 force-pushed the selectIcmpAnd branch from c88778a to b4569db Compare March 18, 2025 17:39

andjo403 merged commit b326cb6 into llvm:main Mar 18, 2025
6 of 9 checks passed

andjo403 deleted the selectIcmpAnd branch March 18, 2025 17:46

andjo403 mentioned this pull request Mar 18, 2025

[InstCombine] Reuse common code between foldSelectICmpAndBinOp and foldSelectICmpAnd. #131902

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[InstCombine] Support trunc to i1 in foldSelectICmpAnd #127905

[InstCombine] Support trunc to i1 in foldSelectICmpAnd #127905

Uh oh!

andjo403 commented Feb 19, 2025 •

edited by dtcxzyw

Loading

Uh oh!

llvmbot commented Feb 19, 2025

Uh oh!

dtcxzyw Feb 20, 2025

Uh oh!

Uh oh!

dtcxzyw left a comment

Uh oh!

andjo403 commented Mar 1, 2025

Uh oh!

andjo403 commented Mar 11, 2025

Uh oh!

nikic Mar 16, 2025

Uh oh!

nikic Mar 16, 2025

Uh oh!

andjo403 commented Mar 17, 2025

Uh oh!

nikic commented Mar 17, 2025

Uh oh!

Uh oh!

Uh oh!

	Value FalseVal, Value V, APInt AndMask,
	Value FalseVal, Value V, const APInt &AndMask,

[InstCombine] Support trunc to i1 in foldSelectICmpAnd #127905

[InstCombine] Support trunc to i1 in foldSelectICmpAnd #127905

Uh oh!

Conversation

andjo403 commented Feb 19, 2025 • edited by dtcxzyw Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Feb 19, 2025

Uh oh!

dtcxzyw Feb 20, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

andjo403 commented Mar 1, 2025

Uh oh!

andjo403 commented Mar 11, 2025

Uh oh!

nikic Mar 16, 2025

Choose a reason for hiding this comment

Uh oh!

nikic Mar 16, 2025

Choose a reason for hiding this comment

Uh oh!

andjo403 commented Mar 17, 2025

Uh oh!

nikic commented Mar 17, 2025

Uh oh!

Uh oh!

Uh oh!

andjo403 commented Feb 19, 2025 •

edited by dtcxzyw

Loading