Skip to content

Commit cdd9221

Browse files
authored
[InstCombine] Simplify the overflow result of umulov X, X (#80796)
This patch does the following folds if only the overflow result is used: ``` extractvalue (umul.with.overflow iN X, X), 1 -> icmp ugt X, 2^(N/2)-1 ``` Alive2: https://alive2.llvm.org/ce/z/a8yPC6
1 parent a8ab830 commit cdd9221

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3459,6 +3459,17 @@ InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
34593459
WO->getLHS()->getType()->isIntOrIntVectorTy(1))
34603460
return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
34613461

3462+
// extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
3463+
if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
3464+
unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
3465+
// Only handle even bitwidths for performance reasons.
3466+
if (BitWidth % 2 == 0)
3467+
return new ICmpInst(
3468+
ICmpInst::ICMP_UGT, WO->getLHS(),
3469+
ConstantInt::get(WO->getLHS()->getType(),
3470+
APInt::getLowBitsSet(BitWidth, BitWidth / 2)));
3471+
}
3472+
34623473
// If only the overflow result is used, and the right hand side is a
34633474
// constant (or constant splat), we can remove the intrinsic by directly
34643475
// checking for overflow.
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -passes=instcombine -S %s | FileCheck %s
3+
4+
define i1 @umulov_square_i32(i32 %x) {
5+
; CHECK-LABEL: define i1 @umulov_square_i32(
6+
; CHECK-SAME: i32 [[X:%.*]]) {
7+
; CHECK-NEXT: [[RES:%.*]] = icmp ugt i32 [[X]], 65535
8+
; CHECK-NEXT: ret i1 [[RES]]
9+
;
10+
%ret = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 %x)
11+
%res = extractvalue {i32, i1} %ret, 1
12+
ret i1 %res
13+
}
14+
15+
define i1 @umulov_square_i16(i16 %x) {
16+
; CHECK-LABEL: define i1 @umulov_square_i16(
17+
; CHECK-SAME: i16 [[X:%.*]]) {
18+
; CHECK-NEXT: [[RES:%.*]] = icmp ugt i16 [[X]], 255
19+
; CHECK-NEXT: ret i1 [[RES]]
20+
;
21+
%ret = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %x, i16 %x)
22+
%res = extractvalue {i16, i1} %ret, 1
23+
ret i1 %res
24+
}
25+
26+
; Negative tests
27+
28+
define i1 @umulov_square_i13(i13 %x) {
29+
; CHECK-LABEL: define i1 @umulov_square_i13(
30+
; CHECK-SAME: i13 [[X:%.*]]) {
31+
; CHECK-NEXT: [[RET:%.*]] = call { i13, i1 } @llvm.umul.with.overflow.i13(i13 [[X]], i13 [[X]])
32+
; CHECK-NEXT: [[RES:%.*]] = extractvalue { i13, i1 } [[RET]], 1
33+
; CHECK-NEXT: ret i1 [[RES]]
34+
;
35+
%ret = call {i13, i1} @llvm.umul.with.overflow.i13(i13 %x, i13 %x)
36+
%res = extractvalue {i13, i1} %ret, 1
37+
ret i1 %res
38+
}
39+
40+
define i1 @umulov_square_i32_multiuse(i32 %x) {
41+
; CHECK-LABEL: define i1 @umulov_square_i32_multiuse(
42+
; CHECK-SAME: i32 [[X:%.*]]) {
43+
; CHECK-NEXT: [[RET:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[X]], i32 [[X]])
44+
; CHECK-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[RET]], 1
45+
; CHECK-NEXT: [[VAL:%.*]] = extractvalue { i32, i1 } [[RET]], 0
46+
; CHECK-NEXT: call void @use(i32 [[VAL]])
47+
; CHECK-NEXT: ret i1 [[RES]]
48+
;
49+
%ret = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 %x)
50+
%res = extractvalue {i32, i1} %ret, 1
51+
%val = extractvalue {i32, i1} %ret, 0
52+
call void @use(i32 %val)
53+
ret i1 %res
54+
}
55+
56+
define i1 @smulov_square_i32(i32 %x) {
57+
; CHECK-LABEL: define i1 @smulov_square_i32(
58+
; CHECK-SAME: i32 [[X:%.*]]) {
59+
; CHECK-NEXT: [[RET:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[X]], i32 [[X]])
60+
; CHECK-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[RET]], 1
61+
; CHECK-NEXT: ret i1 [[RES]]
62+
;
63+
%ret = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %x, i32 %x)
64+
%res = extractvalue {i32, i1} %ret, 1
65+
ret i1 %res
66+
}
67+
68+
declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)
69+
declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
70+
declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16)
71+
declare {i13, i1} @llvm.umul.with.overflow.i13(i13, i13)
72+
declare void @use(i32)

0 commit comments

Comments
 (0)