Skip to content

Commit 4e21d4e

Browse files
committed
[InstCombine] Fold usub_sat((sub nuw C1, A), C2) to usub_sat(C1 - C2, A) or 0
1 parent 6c07b2f commit 4e21d4e

File tree

2 files changed

+23
-16
lines changed

2 files changed

+23
-16
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2139,6 +2139,23 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
21392139
}
21402140
}
21412141

2142+
// usub_sat((sub nuw C1, A), C2) -> usub_sat(C1 - C2, A) if C2 u< C1
2143+
// usub_sat((sub nuw C1, A), C2) -> 0 otherwise
2144+
const APInt *C1, *C2;
2145+
Value *A;
2146+
if (IID == Intrinsic::usub_sat &&
2147+
match(Arg0, m_OneUse(m_NUWSub(m_APInt(C1), m_Value(A)))) &&
2148+
match(Arg1, m_APInt(C2))) {
2149+
2150+
if (C2->ult(*C1)) {
2151+
auto *New = Builder.CreateBinaryIntrinsic(
2152+
Intrinsic::usub_sat, ConstantInt::get(Ty, *C1 - *C2), A);
2153+
return replaceInstUsesWith(*SI, New);
2154+
} else {
2155+
return replaceInstUsesWith(*SI, ConstantInt::get(Ty, 0));
2156+
}
2157+
}
2158+
21422159
// ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
21432160
Constant *C;
21442161
if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&

llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ declare void @usei1(i1)
1212

1313
define i32 @usub_sat_C1_C2(i32 %a){
1414
; CHECK-LABEL: @usub_sat_C1_C2(
15-
; CHECK-NEXT: [[ADD:%.*]] = sub nuw i32 64, [[A:%.*]]
16-
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ADD]], i32 14)
15+
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.usub.sat.i32(i32 50, i32 [[A:%.*]])
1716
; CHECK-NEXT: ret i32 [[COND]]
1817
;
1918
%add = sub nuw i32 64, %a
@@ -23,9 +22,7 @@ define i32 @usub_sat_C1_C2(i32 %a){
2322

2423
define i32 @usub_sat_C1_C2_produce_0(i32 %a){
2524
; CHECK-LABEL: @usub_sat_C1_C2_produce_0(
26-
; CHECK-NEXT: [[ADD:%.*]] = sub nuw i32 14, [[A:%.*]]
27-
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ADD]], i32 14)
28-
; CHECK-NEXT: ret i32 [[COND]]
25+
; CHECK-NEXT: ret i32 0
2926
;
3027
%add = sub nuw i32 14, %a
3128
%cond = call i32 @llvm.usub.sat.i32(i32 %add, i32 14)
@@ -34,9 +31,7 @@ define i32 @usub_sat_C1_C2_produce_0(i32 %a){
3431

3532
define i32 @usub_sat_C1_C2_produce_0_too(i32 %a){
3633
; CHECK-LABEL: @usub_sat_C1_C2_produce_0_too(
37-
; CHECK-NEXT: [[ADD:%.*]] = sub nuw i32 12, [[A:%.*]]
38-
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ADD]], i32 14)
39-
; CHECK-NEXT: ret i32 [[COND]]
34+
; CHECK-NEXT: ret i32 0
4035
;
4136
%add = sub nuw i32 12, %a
4237
%cond = call i32 @llvm.usub.sat.i32(i32 %add, i32 14)
@@ -46,8 +41,7 @@ define i32 @usub_sat_C1_C2_produce_0_too(i32 %a){
4641
; vector tests
4742
define <2 x i16> @usub_sat_C1_C2_splat(<2 x i16> %a) {
4843
; CHECK-LABEL: @usub_sat_C1_C2_splat(
49-
; CHECK-NEXT: [[ADD:%.*]] = sub nuw <2 x i16> <i16 64, i16 64>, [[A:%.*]]
50-
; CHECK-NEXT: [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 14, i16 14>)
44+
; CHECK-NEXT: [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> <i16 50, i16 50>, <2 x i16> [[A:%.*]])
5145
; CHECK-NEXT: ret <2 x i16> [[COND]]
5246
;
5347
%add = sub nuw <2 x i16> <i16 64, i16 64>, %a
@@ -57,9 +51,7 @@ define <2 x i16> @usub_sat_C1_C2_splat(<2 x i16> %a) {
5751

5852
define <2 x i16> @usub_sat_C1_C2_splat_produce_0(<2 x i16> %a){
5953
; CHECK-LABEL: @usub_sat_C1_C2_splat_produce_0(
60-
; CHECK-NEXT: [[ADD:%.*]] = sub nuw <2 x i16> <i16 14, i16 14>, [[A:%.*]]
61-
; CHECK-NEXT: [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 14, i16 14>)
62-
; CHECK-NEXT: ret <2 x i16> [[COND]]
54+
; CHECK-NEXT: ret <2 x i16> zeroinitializer
6355
;
6456
%add = sub nuw <2 x i16> <i16 14, i16 14>, %a
6557
%cond = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %add, <2 x i16> <i16 14, i16 14>)
@@ -68,9 +60,7 @@ define <2 x i16> @usub_sat_C1_C2_splat_produce_0(<2 x i16> %a){
6860

6961
define <2 x i16> @usub_sat_C1_C2_splat_produce_0_too(<2 x i16> %a){
7062
; CHECK-LABEL: @usub_sat_C1_C2_splat_produce_0_too(
71-
; CHECK-NEXT: [[ADD:%.*]] = sub nuw <2 x i16> <i16 12, i16 12>, [[A:%.*]]
72-
; CHECK-NEXT: [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 14, i16 14>)
73-
; CHECK-NEXT: ret <2 x i16> [[COND]]
63+
; CHECK-NEXT: ret <2 x i16> zeroinitializer
7464
;
7565
%add = sub nuw <2 x i16> <i16 12, i16 12>, %a
7666
%cond = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %add, <2 x i16> <i16 14, i16 14>)

0 commit comments

Comments
 (0)