Skip to content

Commit c3e3444

Browse files
committed
[InstCombine] Extend Fold of Zero-extended Bit Test
Previously, (zext (icmp ne (and X, (1 << ShAmt)), 0)) has only been folded if the bit width of X and the result were equal. Use a trunc or zext instruction to also support other bit widths. This is a follow-up to commit 533190a, which introduced a regression: (zext (icmp ne (and (lshr X ShAmt) 1) 0)) is not folded any longer to (zext/trunc (and (lshr X ShAmt) 1)) since the commit introduced the fold of (icmp ne (and (lshr X ShAmt) 1) 0) to (icmp ne (and X (1 << ShAmt)) 0). The change introduced by this commit restores this fold. Alive proof: https://alive2.llvm.org/ce/z/MFkNXs Relates to issue #86813.
1 parent e07bda0 commit c3e3444

File tree

2 files changed

+22
-22
lines changed

2 files changed

+22
-22
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -985,7 +985,7 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
985985
}
986986
}
987987

988-
if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
988+
if (Cmp->isEquality()) {
989989
// Test if a bit is clear/set using a shifted-one mask:
990990
// zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
991991
// zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
@@ -997,7 +997,8 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
997997
X = Builder.CreateNot(X);
998998
Value *Lshr = Builder.CreateLShr(X, ShAmt);
999999
Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1000-
return replaceInstUsesWith(Zext, And1);
1000+
return replaceInstUsesWith(
1001+
Zext, Builder.CreateZExtOrTrunc(And1, Zext.getType()));
10011002
}
10021003
}
10031004

llvm/test/Transforms/InstCombine/zext.ll

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,10 @@ define i32 @zext_or_masked_bit_test_uses(i32 %a, i32 %b, i32 %x) {
456456

457457
define i16 @zext_masked_bit_zero_to_smaller_bitwidth(i32 %a, i32 %b) {
458458
; CHECK-LABEL: @zext_masked_bit_zero_to_smaller_bitwidth(
459-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[B:%.*]]
460-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
461-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
462-
; CHECK-NEXT: [[Z:%.*]] = zext i1 [[CMP]] to i16
459+
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
460+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], [[B:%.*]]
461+
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
462+
; CHECK-NEXT: [[Z:%.*]] = and i16 [[TMP3]], 1
463463
; CHECK-NEXT: ret i16 [[Z]]
464464
;
465465
%shl = shl i32 1, %b
@@ -471,10 +471,10 @@ define i16 @zext_masked_bit_zero_to_smaller_bitwidth(i32 %a, i32 %b) {
471471

472472
define <4 x i16> @zext_masked_bit_zero_to_smaller_bitwidth_v4i32(<4 x i32> %a, <4 x i32> %b) {
473473
; CHECK-LABEL: @zext_masked_bit_zero_to_smaller_bitwidth_v4i32(
474-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[B:%.*]]
475-
; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SHL]], [[A:%.*]]
476-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer
477-
; CHECK-NEXT: [[Z:%.*]] = zext <4 x i1> [[CMP]] to <4 x i16>
474+
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
475+
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], [[B:%.*]]
476+
; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
477+
; CHECK-NEXT: [[Z:%.*]] = and <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
478478
; CHECK-NEXT: ret <4 x i16> [[Z]]
479479
;
480480
%shl = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %b
@@ -486,10 +486,9 @@ define <4 x i16> @zext_masked_bit_zero_to_smaller_bitwidth_v4i32(<4 x i32> %a, <
486486

487487
define i16 @zext_masked_bit_nonzero_to_smaller_bitwidth(i32 %a, i32 %b) {
488488
; CHECK-LABEL: @zext_masked_bit_nonzero_to_smaller_bitwidth(
489-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[B:%.*]]
490-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
491-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
492-
; CHECK-NEXT: [[Z:%.*]] = zext i1 [[CMP]] to i16
489+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A:%.*]], [[B:%.*]]
490+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
491+
; CHECK-NEXT: [[Z:%.*]] = and i16 [[TMP2]], 1
493492
; CHECK-NEXT: ret i16 [[Z]]
494493
;
495494
%shl = shl i32 1, %b
@@ -501,10 +500,10 @@ define i16 @zext_masked_bit_nonzero_to_smaller_bitwidth(i32 %a, i32 %b) {
501500

502501
define i64 @zext_masked_bit_zero_to_larger_bitwidth(i32 %a, i32 %b) {
503502
; CHECK-LABEL: @zext_masked_bit_zero_to_larger_bitwidth(
504-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[B:%.*]]
505-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
506-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
507-
; CHECK-NEXT: [[Z:%.*]] = zext i1 [[CMP]] to i64
503+
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
504+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], [[B:%.*]]
505+
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1
506+
; CHECK-NEXT: [[Z:%.*]] = zext nneg i32 [[TMP3]] to i64
508507
; CHECK-NEXT: ret i64 [[Z]]
509508
;
510509
%shl = shl i32 1, %b
@@ -516,10 +515,10 @@ define i64 @zext_masked_bit_zero_to_larger_bitwidth(i32 %a, i32 %b) {
516515

517516
define <4 x i64> @zext_masked_bit_zero_to_larger_bitwidth_v4i32(<4 x i32> %a, <4 x i32> %b) {
518517
; CHECK-LABEL: @zext_masked_bit_zero_to_larger_bitwidth_v4i32(
519-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[B:%.*]]
520-
; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SHL]], [[A:%.*]]
521-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer
522-
; CHECK-NEXT: [[Z:%.*]] = zext <4 x i1> [[CMP]] to <4 x i64>
518+
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
519+
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], [[B:%.*]]
520+
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
521+
; CHECK-NEXT: [[Z:%.*]] = zext nneg <4 x i32> [[TMP3]] to <4 x i64>
523522
; CHECK-NEXT: ret <4 x i64> [[Z]]
524523
;
525524
%shl = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %b

0 commit comments

Comments
 (0)