Skip to content

Commit e15c88e

Browse files
committed
[InstCombine] fold unsigned predicates to signed on srem result
This allows optimization of more signed floor implementations when the divisor is a known power of two to an arithmetic shift. Proof for the implemented optimizations: https://alive2.llvm.org/ce/z/j6C-Nz Proof for the test cases: https://alive2.llvm.org/ce/z/M_PBjw
1 parent 00085c7 commit e15c88e

File tree

3 files changed

+42
-19
lines changed

3 files changed

+42
-19
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2674,10 +2674,41 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
26742674
Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
26752675
BinaryOperator *SRem,
26762676
const APInt &C) {
2677+
const ICmpInst::Predicate Pred = Cmp.getPredicate();
2678+
if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT) {
2679+
// Canonicalize unsigned predicates to signed:
2680+
// (X % DivisorC) ugt C -> (X % DivisorC) slt 0
2681+
// iff (C slt 0 ? ~C : C) uge abs(DivisorC)-1
2682+
// (X % DivisorC) ult C+1 -> (X % DivisorC) sgt -1
2683+
// iff (C+1 slt 0 ? ~C : C) uge abs(DivisorC)-1
2684+
2685+
const APInt *DivisorC;
2686+
if (!match(SRem->getOperand(1), m_APInt(DivisorC)))
2687+
return nullptr;
2688+
2689+
APInt NormalizedC = C;
2690+
if (Pred == ICmpInst::ICMP_ULT) {
2691+
assert(!NormalizedC.isZero() &&
2692+
"ult X, 0 should have been simplified already.");
2693+
--NormalizedC;
2694+
}
2695+
if (C.isNegative())
2696+
NormalizedC.flipAllBits();
2697+
assert(!DivisorC->isZero() &&
2698+
"srem X, 0 should have been simplified already.");
2699+
if (!NormalizedC.uge(DivisorC->abs() - 1))
2700+
return nullptr;
2701+
2702+
Type *Ty = SRem->getType();
2703+
if (Pred == ICmpInst::ICMP_UGT)
2704+
return new ICmpInst(ICmpInst::ICMP_SLT, SRem,
2705+
ConstantInt::getNullValue(Ty));
2706+
return new ICmpInst(ICmpInst::ICMP_SGT, SRem,
2707+
ConstantInt::getAllOnesValue(Ty));
2708+
}
26772709
// Match an 'is positive' or 'is negative' comparison of remainder by a
26782710
// constant power-of-2 value:
26792711
// (X % pow2C) sgt/slt 0
2680-
const ICmpInst::Predicate Pred = Cmp.getPredicate();
26812712
if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT &&
26822713
Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
26832714
return nullptr;

llvm/test/Transforms/InstCombine/add.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3020,11 +3020,7 @@ define i32 @floor_sdiv_wrong_op(i32 %x, i32 %y) {
30203020

30213021
define i32 @floor_sdiv_using_srem_by_8(i32 %x) {
30223022
; CHECK-LABEL: @floor_sdiv_using_srem_by_8(
3023-
; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 8
3024-
; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 8
3025-
; CHECK-NEXT: [[I:%.*]] = icmp ugt i32 [[R]], -2147483648
3026-
; CHECK-NEXT: [[S:%.*]] = sext i1 [[I]] to i32
3027-
; CHECK-NEXT: [[F:%.*]] = add nsw i32 [[D]], [[S]]
3023+
; CHECK-NEXT: [[F:%.*]] = ashr i32 [[X:%.*]], 3
30283024
; CHECK-NEXT: ret i32 [[F]]
30293025
;
30303026
%d = sdiv i32 %x, 8
@@ -3037,11 +3033,7 @@ define i32 @floor_sdiv_using_srem_by_8(i32 %x) {
30373033

30383034
define i32 @floor_sdiv_using_srem_by_2(i32 %x) {
30393035
; CHECK-LABEL: @floor_sdiv_using_srem_by_2(
3040-
; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 2
3041-
; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 2
3042-
; CHECK-NEXT: [[I:%.*]] = icmp ugt i32 [[R]], -2147483648
3043-
; CHECK-NEXT: [[S:%.*]] = sext i1 [[I]] to i32
3044-
; CHECK-NEXT: [[F:%.*]] = add nsw i32 [[D]], [[S]]
3036+
; CHECK-NEXT: [[F:%.*]] = ashr i32 [[X:%.*]], 1
30453037
; CHECK-NEXT: ret i32 [[F]]
30463038
;
30473039
%d = sdiv i32 %x, 2

llvm/test/Transforms/InstCombine/icmp-srem.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ define i1 @icmp_ugt_srem5_smin(i32 %x) {
55
; CHECK-LABEL: define i1 @icmp_ugt_srem5_smin(
66
; CHECK-SAME: i32 [[X:%.*]]) {
77
; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 5
8-
; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[R]], -2147483648
8+
; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[R]], 0
99
; CHECK-NEXT: ret i1 [[C]]
1010
;
1111
%r = srem i32 %x, 5
@@ -17,7 +17,7 @@ define i1 @icmp_ugt_srem5_m5(i32 %x) {
1717
; CHECK-LABEL: define i1 @icmp_ugt_srem5_m5(
1818
; CHECK-SAME: i32 [[X:%.*]]) {
1919
; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 5
20-
; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[R]], -5
20+
; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[R]], 0
2121
; CHECK-NEXT: ret i1 [[C]]
2222
;
2323
%r = srem i32 %x, 5
@@ -53,7 +53,7 @@ define i1 @icmp_ugt_srem5_4(i32 %x) {
5353
; CHECK-LABEL: define i1 @icmp_ugt_srem5_4(
5454
; CHECK-SAME: i32 [[X:%.*]]) {
5555
; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 5
56-
; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[R]], 4
56+
; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[R]], 0
5757
; CHECK-NEXT: ret i1 [[C]]
5858
;
5959
%r = srem i32 %x, 5
@@ -65,7 +65,7 @@ define i1 @icmp_ugt_srem5_smaxm1(i32 %x) {
6565
; CHECK-LABEL: define i1 @icmp_ugt_srem5_smaxm1(
6666
; CHECK-SAME: i32 [[X:%.*]]) {
6767
; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 5
68-
; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[R]], 2147483646
68+
; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[R]], 0
6969
; CHECK-NEXT: ret i1 [[C]]
7070
;
7171
%r = srem i32 %x, 5
@@ -77,7 +77,7 @@ define i1 @icmp_ult_srem5_sminp1(i32 %x) {
7777
; CHECK-LABEL: define i1 @icmp_ult_srem5_sminp1(
7878
; CHECK-SAME: i32 [[X:%.*]]) {
7979
; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 5
80-
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[R]], -2147483647
80+
; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[R]], -1
8181
; CHECK-NEXT: ret i1 [[C]]
8282
;
8383
%r = srem i32 %x, 5
@@ -89,7 +89,7 @@ define i1 @icmp_ult_srem5_m4(i32 %x) {
8989
; CHECK-LABEL: define i1 @icmp_ult_srem5_m4(
9090
; CHECK-SAME: i32 [[X:%.*]]) {
9191
; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 5
92-
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[R]], -4
92+
; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[R]], -1
9393
; CHECK-NEXT: ret i1 [[C]]
9494
;
9595
%r = srem i32 %x, 5
@@ -125,7 +125,7 @@ define i1 @icmp_ult_srem5_5(i32 %x) {
125125
; CHECK-LABEL: define i1 @icmp_ult_srem5_5(
126126
; CHECK-SAME: i32 [[X:%.*]]) {
127127
; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 5
128-
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[R]], 5
128+
; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[R]], -1
129129
; CHECK-NEXT: ret i1 [[C]]
130130
;
131131
%r = srem i32 %x, 5
@@ -137,7 +137,7 @@ define i1 @icmp_ult_srem5_smax(i32 %x) {
137137
; CHECK-LABEL: define i1 @icmp_ult_srem5_smax(
138138
; CHECK-SAME: i32 [[X:%.*]]) {
139139
; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 5
140-
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[R]], 2147483647
140+
; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[R]], -1
141141
; CHECK-NEXT: ret i1 [[C]]
142142
;
143143
%r = srem i32 %x, 5

0 commit comments

Comments
 (0)