Skip to content

Commit 872932b

Browse files
authored
[InstCombine] Generalize icmp (shl nuw C2, Y), C -> icmp Y, C3 (#104696)
The motivation of this patch is to fold more generalized patterns like `icmp ult (shl nuw 16, X), 64 -> icmp ult X, 2`. Alive2: https://alive2.llvm.org/ce/z/gyqjQH
1 parent 4b529f8 commit 872932b

File tree

2 files changed

+120
-8
lines changed

2 files changed

+120
-8
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2227,18 +2227,24 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
22272227
return NewC ? new ICmpInst(Pred, X, NewC) : nullptr;
22282228
}
22292229

2230-
/// Fold icmp (shl 1, Y), C.
2231-
static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
2232-
const APInt &C) {
2230+
/// Fold icmp (shl nuw C2, Y), C.
2231+
static Instruction *foldICmpShlLHSC(ICmpInst &Cmp, Instruction *Shl,
2232+
const APInt &C) {
22332233
Value *Y;
2234-
if (!match(Shl, m_Shl(m_One(), m_Value(Y))))
2234+
const APInt *C2;
2235+
if (!match(Shl, m_NUWShl(m_APInt(C2), m_Value(Y))))
22352236
return nullptr;
22362237

22372238
Type *ShiftType = Shl->getType();
22382239
unsigned TypeBits = C.getBitWidth();
2239-
bool CIsPowerOf2 = C.isPowerOf2();
22402240
ICmpInst::Predicate Pred = Cmp.getPredicate();
22412241
if (Cmp.isUnsigned()) {
2242+
if (C2->isZero() || C2->ugt(C))
2243+
return nullptr;
2244+
APInt Div, Rem;
2245+
APInt::udivrem(C, *C2, Div, Rem);
2246+
bool CIsPowerOf2 = Rem.isZero() && Div.isPowerOf2();
2247+
22422248
// (1 << Y) pred C -> Y pred Log2(C)
22432249
if (!CIsPowerOf2) {
22442250
// (1 << Y) < 30 -> Y <= 4
@@ -2251,9 +2257,9 @@ static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
22512257
Pred = ICmpInst::ICMP_UGT;
22522258
}
22532259

2254-
unsigned CLog2 = C.logBase2();
2260+
unsigned CLog2 = Div.logBase2();
22552261
return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
2256-
} else if (Cmp.isSigned()) {
2262+
} else if (Cmp.isSigned() && C2->isOne()) {
22572263
Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
22582264
// (1 << Y) > 0 -> Y != 31
22592265
// (1 << Y) > C -> Y != 31 if C is negative.
@@ -2307,7 +2313,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
23072313

23082314
const APInt *ShiftAmt;
23092315
if (!match(Shl->getOperand(1), m_APInt(ShiftAmt)))
2310-
return foldICmpShlOne(Cmp, Shl, C);
2316+
return foldICmpShlLHSC(Cmp, Shl, C);
23112317

23122318
// Check that the shift amount is in range. If not, don't perform undefined
23132319
// shifts. When the shift is visited, it will be simplified.

llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,109 @@ define <2 x i1> @icmp_ugt_16x2(<2 x i32>) {
9090
%d = icmp ugt <2 x i32> %c, <i32 1048575, i32 1048575>
9191
ret <2 x i1> %d
9292
}
93+
94+
define i1 @fold_icmp_shl_nuw_c1(i32 %x) {
95+
; CHECK-LABEL: @fold_icmp_shl_nuw_c1(
96+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 61440
97+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0
98+
; CHECK-NEXT: ret i1 [[CMP]]
99+
;
100+
%lshr = lshr i32 %x, 12
101+
%and = and i32 %lshr, 15
102+
%shl = shl nuw i32 2, %and
103+
%cmp = icmp ult i32 %shl, 4
104+
ret i1 %cmp
105+
}
106+
107+
define i1 @fold_icmp_shl_nuw_c2(i32 %x) {
108+
; CHECK-LABEL: @fold_icmp_shl_nuw_c2(
109+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 2
110+
; CHECK-NEXT: ret i1 [[CMP]]
111+
;
112+
%shl = shl nuw i32 16, %x
113+
%cmp = icmp ult i32 %shl, 64
114+
ret i1 %cmp
115+
}
116+
117+
define i1 @fold_icmp_shl_nuw_c2_non_pow2(i32 %x) {
118+
; CHECK-LABEL: @fold_icmp_shl_nuw_c2_non_pow2(
119+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 2
120+
; CHECK-NEXT: ret i1 [[CMP]]
121+
;
122+
%shl = shl nuw i32 48, %x
123+
%cmp = icmp ult i32 %shl, 192
124+
ret i1 %cmp
125+
}
126+
127+
define i1 @fold_icmp_shl_nuw_c2_div_non_pow2(i32 %x) {
128+
; CHECK-LABEL: @fold_icmp_shl_nuw_c2_div_non_pow2(
129+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 5
130+
; CHECK-NEXT: ret i1 [[CMP]]
131+
;
132+
%shl = shl nuw i32 2, %x
133+
%cmp = icmp ult i32 %shl, 60
134+
ret i1 %cmp
135+
}
136+
137+
define i1 @fold_icmp_shl_nuw_c3(i32 %x) {
138+
; CHECK-LABEL: @fold_icmp_shl_nuw_c3(
139+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], 1
140+
; CHECK-NEXT: ret i1 [[CMP]]
141+
;
142+
%shl = shl nuw i32 48, %x
143+
%cmp = icmp uge i32 %shl, 144
144+
ret i1 %cmp
145+
}
146+
147+
define i1 @fold_icmp_shl_nuw_c2_indivisible(i32 %x) {
148+
; CHECK-LABEL: @fold_icmp_shl_nuw_c2_indivisible(
149+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 2
150+
; CHECK-NEXT: ret i1 [[CMP]]
151+
;
152+
%shl = shl nuw i32 16, %x
153+
%cmp = icmp ult i32 %shl, 63
154+
ret i1 %cmp
155+
}
156+
157+
; Negative tests
158+
159+
define i1 @fold_icmp_shl_c2_without_nuw(i32 %x) {
160+
; CHECK-LABEL: @fold_icmp_shl_c2_without_nuw(
161+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 16, [[X:%.*]]
162+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[SHL]], 64
163+
; CHECK-NEXT: ret i1 [[CMP]]
164+
;
165+
%shl = shl i32 16, %x
166+
%cmp = icmp ult i32 %shl, 64
167+
ret i1 %cmp
168+
}
169+
170+
; Make sure this trivial case is folded by InstSimplify.
171+
define i1 @fold_icmp_shl_nuw_c2_precondition1(i32 %x) {
172+
; CHECK-LABEL: @fold_icmp_shl_nuw_c2_precondition1(
173+
; CHECK-NEXT: ret i1 true
174+
;
175+
%shl = shl nuw i32 0, %x
176+
%cmp = icmp ult i32 %shl, 63
177+
ret i1 %cmp
178+
}
179+
180+
; Make sure this trivial case is folded by InstSimplify.
181+
define i1 @fold_icmp_shl_nuw_c2_precondition2(i32 %x) {
182+
; CHECK-LABEL: @fold_icmp_shl_nuw_c2_precondition2(
183+
; CHECK-NEXT: ret i1 false
184+
;
185+
%shl = shl nuw i32 127, %x
186+
%cmp = icmp ult i32 %shl, 63
187+
ret i1 %cmp
188+
}
189+
190+
; Make sure we don't crash on this case.
191+
define i1 @fold_icmp_shl_nuw_c2_precondition3(i32 %x) {
192+
; CHECK-LABEL: @fold_icmp_shl_nuw_c2_precondition3(
193+
; CHECK-NEXT: ret i1 false
194+
;
195+
%shl = shl nuw i32 1, %x
196+
%cmp = icmp ult i32 %shl, 1
197+
ret i1 %cmp
198+
}

0 commit comments

Comments
 (0)