Skip to content

Commit aa250ce

Browse files
committed
Add optimizations for icmp eq/ne (mul(X, Y), 0)
1. Add checks if X and/or Y are odd. The Odd values are unnecessary to the icmp: isZero(Odd * N) == isZero(N) 2. If neither X nor Y is known odd, then if X * Y cannot overflow AND if X and/or Y is non-zero, the non-zero values are unnecessary to the icmp. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D140850
1 parent 44977a1 commit aa250ce

File tree

3 files changed

+52
-21
lines changed

3 files changed

+52
-21
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,6 +1295,48 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
12951295
return new ICmpInst(Pred, X, Cmp.getOperand(1));
12961296
}
12971297

1298+
// (icmp eq/ne (mul X Y)) -> (icmp eq/ne X/Y) if we know about whether X/Y are
1299+
// odd/non-zero/there is no overflow.
1300+
if (match(Cmp.getOperand(0), m_Mul(m_Value(X), m_Value(Y))) &&
1301+
ICmpInst::isEquality(Pred)) {
1302+
1303+
KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
1304+
// if X % 2 != 0
1305+
// (icmp eq/ne Y)
1306+
if (XKnown.countMaxTrailingZeros() == 0)
1307+
return new ICmpInst(Pred, Y, Cmp.getOperand(1));
1308+
1309+
KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
1310+
// if Y % 2 != 0
1311+
// (icmp eq/ne X)
1312+
if (YKnown.countMaxTrailingZeros() == 0)
1313+
return new ICmpInst(Pred, X, Cmp.getOperand(1));
1314+
1315+
auto *BO0 = cast<OverflowingBinaryOperator>(Cmp.getOperand(0));
1316+
if (BO0->hasNoUnsignedWrap() || BO0->hasNoSignedWrap()) {
1317+
const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
1318+
// `isKnownNonZero` does more analysis than just `!KnownBits.One.isZero()`
1319+
// but to avoid unnecessary work, first just if this is an obvious case.
1320+
1321+
// if X non-zero and NoOverflow(X * Y)
1322+
// (icmp eq/ne Y)
1323+
if (!XKnown.One.isZero() || isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT))
1324+
return new ICmpInst(Pred, Y, Cmp.getOperand(1));
1325+
1326+
// if Y non-zero and NoOverflow(X * Y)
1327+
// (icmp eq/ne X)
1328+
if (!YKnown.One.isZero() || isKnownNonZero(Y, DL, 0, Q.AC, Q.CxtI, Q.DT))
1329+
return new ICmpInst(Pred, X, Cmp.getOperand(1));
1330+
}
1331+
// Note, we are skipping cases:
1332+
// if Y % 2 != 0 AND X % 2 != 0
1333+
// (false/true)
1334+
// if X non-zero and Y non-zero and NoOverflow(X * Y)
1335+
// (false/true)
1336+
// Those can be simplified later as we would have already replaced the (icmp
1337+
// eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that
1338+
// will fold to a constant elsewhere.
1339+
}
12981340
return nullptr;
12991341
}
13001342

llvm/test/Transforms/InstCombine/icmp-binop.ll

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ declare void @llvm.assume(i1)
66

77
define i1 @mul_unkV_oddC_eq(i32 %v) {
88
; CHECK-LABEL: @mul_unkV_oddC_eq(
9-
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[V:%.*]], 3
10-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[MUL]], 0
9+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V:%.*]], 0
1110
; CHECK-NEXT: ret i1 [[CMP]]
1211
;
1312
%mul = mul i32 %v, 3
@@ -28,8 +27,7 @@ define i1 @mul_unkV_oddC_eq_nonzero(i32 %v) {
2827

2928
define <2 x i1> @mul_unkV_oddC_ne_vec(<2 x i64> %v) {
3029
; CHECK-LABEL: @mul_unkV_oddC_ne_vec(
31-
; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i64> [[V:%.*]], <i64 3, i64 3>
32-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i64> [[MUL]], zeroinitializer
30+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i64> [[V:%.*]], zeroinitializer
3331
; CHECK-NEXT: ret <2 x i1> [[CMP]]
3432
;
3533
%mul = mul <2 x i64> %v, <i64 3, i64 3>
@@ -72,7 +70,7 @@ define i1 @mul_unkV_oddC_sge(i8 %v) {
7270
define i1 @mul_reused_unkV_oddC_ne(i64 %v) {
7371
; CHECK-LABEL: @mul_reused_unkV_oddC_ne(
7472
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[V:%.*]], 3
75-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0
73+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[V]], 0
7674
; CHECK-NEXT: call void @use64(i64 [[MUL]])
7775
; CHECK-NEXT: ret i1 [[CMP]]
7876
;
@@ -87,8 +85,7 @@ define i1 @mul_assumeoddV_unkV_eq(i16 %v, i16 %v2) {
8785
; CHECK-NEXT: [[LB:%.*]] = and i16 [[V2:%.*]], 1
8886
; CHECK-NEXT: [[ODD:%.*]] = icmp ne i16 [[LB]], 0
8987
; CHECK-NEXT: call void @llvm.assume(i1 [[ODD]])
90-
; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[V:%.*]], [[V2]]
91-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[MUL]], 0
88+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[V:%.*]], 0
9289
; CHECK-NEXT: ret i1 [[CMP]]
9390
;
9491
%lb = and i16 %v2, 1
@@ -105,7 +102,7 @@ define i1 @mul_reusedassumeoddV_unkV_ne(i64 %v, i64 %v2) {
105102
; CHECK-NEXT: [[ODD:%.*]] = icmp ne i64 [[LB]], 0
106103
; CHECK-NEXT: call void @llvm.assume(i1 [[ODD]])
107104
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[V]], [[V2:%.*]]
108-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0
105+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[V2]], 0
109106
; CHECK-NEXT: call void @use64(i64 [[MUL]])
110107
; CHECK-NEXT: ret i1 [[CMP]]
111108
;
@@ -120,9 +117,7 @@ define i1 @mul_reusedassumeoddV_unkV_ne(i64 %v, i64 %v2) {
120117

121118
define <2 x i1> @mul_setoddV_unkV_ne(<2 x i32> %v1, <2 x i32> %v2) {
122119
; CHECK-LABEL: @mul_setoddV_unkV_ne(
123-
; CHECK-NEXT: [[V:%.*]] = or <2 x i32> [[V1:%.*]], <i32 1, i32 1>
124-
; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[V]], [[V2:%.*]]
125-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[MUL]], zeroinitializer
120+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V2:%.*]], zeroinitializer
126121
; CHECK-NEXT: ret <2 x i1> [[CMP]]
127122
;
128123
%v = or <2 x i32> %v1, <i32 1, i32 1>
@@ -190,8 +185,7 @@ define i1 @mul_assumenzV_unkV_nsw_ne(i32 %v, i32 %v2) {
190185
; CHECK-LABEL: @mul_assumenzV_unkV_nsw_ne(
191186
; CHECK-NEXT: [[NZ:%.*]] = icmp ne i32 [[V:%.*]], 0
192187
; CHECK-NEXT: call void @llvm.assume(i1 [[NZ]])
193-
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[V]], [[V2:%.*]]
194-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[MUL]], 0
188+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[V2:%.*]], 0
195189
; CHECK-NEXT: ret i1 [[CMP]]
196190
;
197191
%nz = icmp ne i32 %v, 0
@@ -229,9 +223,7 @@ define <2 x i1> @mul_unkV_unkV_nsw_nuw_ne(<2 x i16> %v, <2 x i16> %v2) {
229223

230224
define i1 @mul_setnzV_unkV_nuw_eq(i8 %v1, i8 %v2) {
231225
; CHECK-LABEL: @mul_setnzV_unkV_nuw_eq(
232-
; CHECK-NEXT: [[V:%.*]] = or i8 [[V1:%.*]], 2
233-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw i8 [[V]], [[V2:%.*]]
234-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[MUL]], 0
226+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[V2:%.*]], 0
235227
; CHECK-NEXT: ret i1 [[CMP]]
236228
;
237229
%v = or i8 %v1, 2
@@ -245,8 +237,7 @@ define i1 @mul_brnzV_unkV_nuw_eq(i64 %v, i64 %v2) {
245237
; CHECK-NEXT: [[NZ_NOT:%.*]] = icmp eq i64 [[V2:%.*]], 0
246238
; CHECK-NEXT: br i1 [[NZ_NOT]], label [[FALSE:%.*]], label [[TRUE:%.*]]
247239
; CHECK: true:
248-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[V:%.*]], [[V2]]
249-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[MUL]], 0
240+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[V:%.*]], 0
250241
; CHECK-NEXT: ret i1 [[CMP]]
251242
; CHECK: false:
252243
; CHECK-NEXT: call void @use64(i64 [[V]])

llvm/test/Transforms/InstCombine/pr38677.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@ define i32 @foo(i1 %which, ptr %dst) {
1212
; CHECK-NEXT: br label [[FINAL]]
1313
; CHECK: final:
1414
; CHECK-NEXT: [[USE2:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ select (i1 icmp eq (ptr @A, ptr @B), i32 2, i32 1), [[DELAY]] ]
15-
; CHECK-NEXT: [[B7:%.*]] = mul i32 [[USE2]], 2147483647
16-
; CHECK-NEXT: [[C3:%.*]] = icmp eq i32 [[B7]], 0
17-
; CHECK-NEXT: store i1 [[C3]], ptr [[DST:%.*]], align 1
15+
; CHECK-NEXT: store i1 false, ptr [[DST:%.*]], align 1
1816
; CHECK-NEXT: ret i32 [[USE2]]
1917
;
2018
entry:

0 commit comments

Comments
 (0)