Skip to content

Commit 6c85e92

Browse files
committed
[InstCombine] Simplify a umul overflow check to a != 0 && b != 0.
This patch adds a simplification if an OR weakens the overflow condition for umul.with.overflow by treating any non-zero result as overflow. In that case, we overflow if both umul.with.overflow operands are != 0, as in that case the result can only be 0, iff the multiplication overflows. Code like this is generated by code using __builtin_mul_overflow with negative integer constants, e.g. bool test(unsigned long long v, unsigned long long *res) { return __builtin_mul_overflow(v, -4775807LL, res); } ``` ---------------------------------------- Name: D74141 %res = umul_overflow {i8, i1} %a, %b %mul = extractvalue {i8, i1} %res, 0 %overflow = extractvalue {i8, i1} %res, 1 %cmp = icmp ne %mul, 0 %ret = or i1 %overflow, %cmp ret i1 %ret => %t0 = icmp ne i8 %a, 0 %t1 = icmp ne i8 %b, 0 %ret = and i1 %t0, %t1 ret i1 %ret %res = umul_overflow {i8, i1} %a, %b %mul = extractvalue {i8, i1} %res, 0 %cmp = icmp ne %mul, 0 %overflow = extractvalue {i8, i1} %res, 1 Done: 1 Optimization is correct! ``` Reviewers: nikic, lebedev.ri, spatel, Bigcheese, dexonsmith, aemerson Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D74141
1 parent 813ca53 commit 6c85e92

File tree

2 files changed

+47
-26
lines changed

2 files changed

+47
-26
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2723,6 +2723,31 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
27232723
canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I))
27242724
return V;
27252725

2726+
CmpInst::Predicate Pred;
2727+
Value *Mul, *Ov, *MulIsNotZero, *UMulWithOv;
2728+
// Check if the OR weakens the overflow condition for umul.with.overflow by
2729+
// treating any non-zero result as overflow. In that case, we overflow if both
2730+
// umul.with.overflow operands are != 0, as in that case the result can only
2731+
// be 0, iff the multiplication overflows.
2732+
if (match(&I,
2733+
m_c_Or(m_CombineAnd(m_ExtractValue<1>(m_Value(UMulWithOv)),
2734+
m_Value(Ov)),
2735+
m_CombineAnd(m_ICmp(Pred,
2736+
m_CombineAnd(m_ExtractValue<0>(
2737+
m_Deferred(UMulWithOv)),
2738+
m_Value(Mul)),
2739+
m_ZeroInt()),
2740+
m_Value(MulIsNotZero)))) &&
2741+
(Ov->hasOneUse() || (MulIsNotZero->hasOneUse() && Mul->hasOneUse())) &&
2742+
Pred == CmpInst::ICMP_NE) {
2743+
Value *A, *B;
2744+
if (match(UMulWithOv, m_Intrinsic<Intrinsic::umul_with_overflow>(
2745+
m_Value(A), m_Value(B))))
2746+
2747+
return BinaryOperator::CreateAnd(Builder.CreateIsNotNull(A),
2748+
Builder.CreateIsNotNull(B));
2749+
}
2750+
27262751
return nullptr;
27272752
}
27282753

llvm/test/Transforms/InstCombine/umul-sign-check.ll

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@ declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #0
1313

1414
define i1 @test1(i64 %a, i64 %b, i64* %ptr) {
1515
; CHECK-LABEL: @test1(
16-
; CHECK-NEXT: [[RES:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
17-
; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i64, i1 } [[RES]], 1
18-
; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[RES]], 0
19-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0
20-
; CHECK-NEXT: [[OVERFLOW_1:%.*]] = or i1 [[OVERFLOW]], [[CMP]]
16+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[A:%.*]], [[B:%.*]]
17+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[A]], 0
18+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[B]], 0
19+
; CHECK-NEXT: [[OVERFLOW_1:%.*]] = and i1 [[TMP1]], [[TMP2]]
2120
; CHECK-NEXT: store i64 [[MUL]], i64* [[PTR:%.*]], align 8
2221
; CHECK-NEXT: ret i1 [[OVERFLOW_1]]
2322
;
@@ -33,11 +32,10 @@ define i1 @test1(i64 %a, i64 %b, i64* %ptr) {
3332

3433
define i1 @test1_or_ops_swapped(i64 %a, i64 %b, i64* %ptr) {
3534
; CHECK-LABEL: @test1_or_ops_swapped(
36-
; CHECK-NEXT: [[RES:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
37-
; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i64, i1 } [[RES]], 1
38-
; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[RES]], 0
39-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0
40-
; CHECK-NEXT: [[OVERFLOW_1:%.*]] = or i1 [[CMP]], [[OVERFLOW]]
35+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[A:%.*]], [[B:%.*]]
36+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[A]], 0
37+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[B]], 0
38+
; CHECK-NEXT: [[OVERFLOW_1:%.*]] = and i1 [[TMP1]], [[TMP2]]
4139
; CHECK-NEXT: store i64 [[MUL]], i64* [[PTR:%.*]], align 8
4240
; CHECK-NEXT: ret i1 [[OVERFLOW_1]]
4341
;
@@ -54,11 +52,10 @@ define i1 @test1_or_ops_swapped(i64 %a, i64 %b, i64* %ptr) {
5452

5553
define i1 @test2(i64 %a, i64 %b, i64* %ptr) {
5654
; CHECK-LABEL: @test2(
57-
; CHECK-NEXT: [[RES:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
58-
; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i64, i1 } [[RES]], 1
59-
; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[RES]], 0
60-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0
61-
; CHECK-NEXT: [[OVERFLOW_1:%.*]] = or i1 [[OVERFLOW]], [[CMP]]
55+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[A:%.*]], [[B:%.*]]
56+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[A]], 0
57+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[B]], 0
58+
; CHECK-NEXT: [[OVERFLOW_1:%.*]] = and i1 [[TMP1]], [[TMP2]]
6259
; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[MUL]]
6360
; CHECK-NEXT: store i64 [[NEG]], i64* [[PTR:%.*]], align 8
6461
; CHECK-NEXT: ret i1 [[OVERFLOW_1]]
@@ -80,9 +77,9 @@ define i1 @test3_multiple_overflow_users(i64 %a, i64 %b, i64* %ptr) {
8077
; CHECK-LABEL: @test3_multiple_overflow_users(
8178
; CHECK-NEXT: [[RES:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
8279
; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i64, i1 } [[RES]], 1
83-
; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[RES]], 0
84-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0
85-
; CHECK-NEXT: [[OVERFLOW_1:%.*]] = or i1 [[OVERFLOW]], [[CMP]]
80+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[A]], 0
81+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[B]], 0
82+
; CHECK-NEXT: [[OVERFLOW_1:%.*]] = and i1 [[TMP1]], [[TMP2]]
8683
; CHECK-NEXT: call void @use(i1 [[OVERFLOW]])
8784
; CHECK-NEXT: ret i1 [[OVERFLOW_1]]
8885
;
@@ -124,10 +121,10 @@ declare void @use.2({ i64, i1 })
124121
define i1 @test3_multiple_res_users(i64 %a, i64 %b, i64* %ptr) {
125122
; CHECK-LABEL: @test3_multiple_res_users(
126123
; CHECK-NEXT: [[RES:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
127-
; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i64, i1 } [[RES]], 1
128124
; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[RES]], 0
129-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0
130-
; CHECK-NEXT: [[OVERFLOW_1:%.*]] = or i1 [[OVERFLOW]], [[CMP]]
125+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[A]], 0
126+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[B]], 0
127+
; CHECK-NEXT: [[OVERFLOW_1:%.*]] = and i1 [[TMP1]], [[TMP2]]
131128
; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[MUL]]
132129
; CHECK-NEXT: store i64 [[NEG]], i64* [[PTR:%.*]], align 8
133130
; CHECK-NEXT: call void @use.2({ i64, i1 } [[RES]])
@@ -149,11 +146,10 @@ declare void @use.3(i64)
149146
; Simplify if %mul has multiple uses.
150147
define i1 @test3_multiple_mul_users(i64 %a, i64 %b, i64* %ptr) {
151148
; CHECK-LABEL: @test3_multiple_mul_users(
152-
; CHECK-NEXT: [[RES:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
153-
; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i64, i1 } [[RES]], 1
154-
; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[RES]], 0
155-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0
156-
; CHECK-NEXT: [[OVERFLOW_1:%.*]] = or i1 [[OVERFLOW]], [[CMP]]
149+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[A:%.*]], [[B:%.*]]
150+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[A]], 0
151+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[B]], 0
152+
; CHECK-NEXT: [[OVERFLOW_1:%.*]] = and i1 [[TMP1]], [[TMP2]]
157153
; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[MUL]]
158154
; CHECK-NEXT: store i64 [[NEG]], i64* [[PTR:%.*]], align 8
159155
; CHECK-NEXT: call void @use.3(i64 [[MUL]])

0 commit comments

Comments
 (0)