Skip to content

Commit 5372160

Browse files
committed
[InstCombine] SimplifyDemandedBits - mul(x,x) - if only demand bit[1] then fold to zero
This is a translation of the fold added to codegen with: 2d1390e Part of solving issue llvm#48027
1 parent 5488021 commit 5372160

File tree

2 files changed

+5
-8
lines changed

2 files changed

+5
-8
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
557557
Instruction *Shl = BinaryOperator::CreateShl(I->getOperand(0), ShiftC);
558558
return InsertNewInstWith(Shl, *I);
559559
}
560+
// 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1]
561+
if (DemandedMask == 2 && I->getOperand(0) == I->getOperand(1))
562+
return ConstantInt::getNullValue(VTy);
560563
}
561564
computeKnownBits(I, Known, Depth, CxtI);
562565
break;

llvm/test/Transforms/InstCombine/mul-masked-bits.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,7 @@ define i32 @foo(i32 %x, i32 %y) {
2222

2323
define i1 @PR48683(i32 %x) {
2424
; CHECK-LABEL: @PR48683(
25-
; CHECK-NEXT: [[A:%.*]] = mul i32 [[X:%.*]], [[X]]
26-
; CHECK-NEXT: [[B:%.*]] = and i32 [[A]], 2
27-
; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[B]], 0
28-
; CHECK-NEXT: ret i1 [[C]]
25+
; CHECK-NEXT: ret i1 false
2926
;
3027
%a = mul i32 %x, %x
3128
%b = and i32 %a, 2
@@ -35,10 +32,7 @@ define i1 @PR48683(i32 %x) {
3532

3633
define <4 x i1> @PR48683_vec(<4 x i32> %x) {
3734
; CHECK-LABEL: @PR48683_vec(
38-
; CHECK-NEXT: [[A:%.*]] = mul <4 x i32> [[X:%.*]], [[X]]
39-
; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A]], <i32 2, i32 2, i32 2, i32 2>
40-
; CHECK-NEXT: [[C:%.*]] = icmp ne <4 x i32> [[B]], zeroinitializer
41-
; CHECK-NEXT: ret <4 x i1> [[C]]
35+
; CHECK-NEXT: ret <4 x i1> zeroinitializer
4236
;
4337
%a = mul <4 x i32> %x, %x
4438
%b = and <4 x i32> %a, <i32 2, i32 2, i32 2, i32 2>

0 commit comments

Comments
 (0)