Skip to content

Commit 1220c9b

Browse files
authored
[InstCombine] Fold the log2_ceil idiom (#76661)
This patch folds the `log2_ceil` idiom: ``` (BW - ctlz(A)) + (is_power2(A) ? 0 : 1) -> zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1)) (canonical form) -> BW - ctlz(A - 1, false) ``` Alive2: https://alive2.llvm.org/ce/z/6mSbdi
1 parent 205aa3f commit 1220c9b

File tree

2 files changed

+361
-0
lines changed

2 files changed

+361
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,6 +1723,30 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
17231723
I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
17241724
{Builder.CreateOr(A, B)}));
17251725

1726+
// Fold the log2_ceil idiom:
1727+
// zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1))
1728+
// -->
1729+
// BW - ctlz(A - 1, false)
1730+
const APInt *XorC;
1731+
if (match(&I,
1732+
m_c_Add(
1733+
m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
1734+
m_One())),
1735+
m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor(
1736+
m_OneUse(m_TruncOrSelf(m_OneUse(
1737+
m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))),
1738+
m_APInt(XorC))))))) &&
1739+
(Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
1740+
*XorC == A->getType()->getScalarSizeInBits() - 1) {
1741+
Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
1742+
Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},
1743+
{Sub, Builder.getFalse()});
1744+
Value *Ret = Builder.CreateSub(
1745+
ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
1746+
Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
1747+
return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType()));
1748+
}
1749+
17261750
if (Instruction *Res = foldSquareSumInt(I))
17271751
return Res;
17281752

Lines changed: 337 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,337 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
define i32 @log2_ceil_idiom(i32 %x) {
5+
; CHECK-LABEL: define i32 @log2_ceil_idiom(
6+
; CHECK-SAME: i32 [[X:%.*]]) {
7+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
8+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0:![0-9]+]]
9+
; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
10+
; CHECK-NEXT: ret i32 [[RET]]
11+
;
12+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
13+
%xor = xor i32 %ctlz, 31
14+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
15+
%cmp = icmp ugt i32 %ctpop, 1
16+
%zext = zext i1 %cmp to i32
17+
%ret = add i32 %xor, %zext
18+
ret i32 %ret
19+
}
20+
21+
define i5 @log2_ceil_idiom_trunc(i32 %x) {
22+
; CHECK-LABEL: define i5 @log2_ceil_idiom_trunc(
23+
; CHECK-SAME: i32 [[X:%.*]]) {
24+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
25+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
26+
; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 0, [[TMP2]]
27+
; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[TMP3]] to i5
28+
; CHECK-NEXT: ret i5 [[RET]]
29+
;
30+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
31+
%trunc = trunc i32 %ctlz to i5
32+
%xor = xor i5 %trunc, 31
33+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
34+
%cmp = icmp ugt i32 %ctpop, 1
35+
%zext = zext i1 %cmp to i5
36+
%ret = add i5 %xor, %zext
37+
ret i5 %ret
38+
}
39+
40+
define i64 @log2_ceil_idiom_zext(i32 %x) {
41+
; CHECK-LABEL: define i64 @log2_ceil_idiom_zext(
42+
; CHECK-SAME: i32 [[X:%.*]]) {
43+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
44+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
45+
; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 32, [[TMP2]]
46+
; CHECK-NEXT: [[RET:%.*]] = zext i32 [[TMP3]] to i64
47+
; CHECK-NEXT: ret i64 [[RET]]
48+
;
49+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
50+
%xor = xor i32 %ctlz, 31
51+
%ext = zext nneg i32 %xor to i64
52+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
53+
%cmp = icmp ugt i32 %ctpop, 1
54+
%zext = zext i1 %cmp to i64
55+
%ret = add i64 %ext, %zext
56+
ret i64 %ret
57+
}
58+
59+
define i32 @log2_ceil_idiom_power2_test2(i32 %x) {
60+
; CHECK-LABEL: define i32 @log2_ceil_idiom_power2_test2(
61+
; CHECK-SAME: i32 [[X:%.*]]) {
62+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
63+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
64+
; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
65+
; CHECK-NEXT: ret i32 [[RET]]
66+
;
67+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
68+
%xor = xor i32 %ctlz, 31
69+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
70+
%cmp = icmp ne i32 %ctpop, 1
71+
%zext = zext i1 %cmp to i32
72+
%ret = add i32 %xor, %zext
73+
ret i32 %ret
74+
}
75+
76+
define i32 @log2_ceil_idiom_commuted(i32 %x) {
77+
; CHECK-LABEL: define i32 @log2_ceil_idiom_commuted(
78+
; CHECK-SAME: i32 [[X:%.*]]) {
79+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
80+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
81+
; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
82+
; CHECK-NEXT: ret i32 [[RET]]
83+
;
84+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
85+
%xor = xor i32 %ctlz, 31
86+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
87+
%cmp = icmp ugt i32 %ctpop, 1
88+
%zext = zext i1 %cmp to i32
89+
%ret = add i32 %zext, %xor
90+
ret i32 %ret
91+
}
92+
93+
define i32 @log2_ceil_idiom_multiuse1(i32 %x) {
94+
; CHECK-LABEL: define i32 @log2_ceil_idiom_multiuse1(
95+
; CHECK-SAME: i32 [[X:%.*]]) {
96+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
97+
; CHECK-NEXT: call void @use32(i32 [[CTPOP]])
98+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
99+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
100+
; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
101+
; CHECK-NEXT: ret i32 [[RET]]
102+
;
103+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
104+
%xor = xor i32 %ctlz, 31
105+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
106+
call void @use32(i32 %ctpop)
107+
%cmp = icmp ugt i32 %ctpop, 1
108+
%zext = zext i1 %cmp to i32
109+
%ret = add i32 %xor, %zext
110+
ret i32 %ret
111+
}
112+
113+
; Negative tests
114+
115+
define i32 @log2_ceil_idiom_x_may_be_zero(i32 %x) {
116+
; CHECK-LABEL: define i32 @log2_ceil_idiom_x_may_be_zero(
117+
; CHECK-SAME: i32 [[X:%.*]]) {
118+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 false), !range [[RNG0]]
119+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
120+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
121+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
122+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
123+
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
124+
; CHECK-NEXT: ret i32 [[RET]]
125+
;
126+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
127+
%xor = xor i32 %ctlz, 31
128+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
129+
%cmp = icmp ugt i32 %ctpop, 1
130+
%zext = zext i1 %cmp to i32
131+
%ret = add i32 %xor, %zext
132+
ret i32 %ret
133+
}
134+
135+
define i4 @log2_ceil_idiom_trunc_too_short(i32 %x) {
136+
; CHECK-LABEL: define i4 @log2_ceil_idiom_trunc_too_short(
137+
; CHECK-SAME: i32 [[X:%.*]]) {
138+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
139+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[CTLZ]] to i4
140+
; CHECK-NEXT: [[XOR:%.*]] = xor i4 [[TRUNC]], -1
141+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
142+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
143+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i4
144+
; CHECK-NEXT: [[RET:%.*]] = add i4 [[XOR]], [[ZEXT]]
145+
; CHECK-NEXT: ret i4 [[RET]]
146+
;
147+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
148+
%trunc = trunc i32 %ctlz to i4
149+
%xor = xor i4 %trunc, 31
150+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
151+
%cmp = icmp ugt i32 %ctpop, 1
152+
%zext = zext i1 %cmp to i4
153+
%ret = add i4 %xor, %zext
154+
ret i4 %ret
155+
}
156+
157+
define i32 @log2_ceil_idiom_mismatched_operands(i32 %x, i32 %y) {
158+
; CHECK-LABEL: define i32 @log2_ceil_idiom_mismatched_operands(
159+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
160+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
161+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
162+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[Y]]), !range [[RNG0]]
163+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
164+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
165+
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
166+
; CHECK-NEXT: ret i32 [[RET]]
167+
;
168+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
169+
%xor = xor i32 %ctlz, 31
170+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %y)
171+
%cmp = icmp ugt i32 %ctpop, 1
172+
%zext = zext i1 %cmp to i32
173+
%ret = add i32 %xor, %zext
174+
ret i32 %ret
175+
}
176+
177+
define i32 @log2_ceil_idiom_wrong_constant(i32 %x) {
178+
; CHECK-LABEL: define i32 @log2_ceil_idiom_wrong_constant(
179+
; CHECK-SAME: i32 [[X:%.*]]) {
180+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
181+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 30
182+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
183+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
184+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
185+
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
186+
; CHECK-NEXT: ret i32 [[RET]]
187+
;
188+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
189+
%xor = xor i32 %ctlz, 30
190+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
191+
%cmp = icmp ugt i32 %ctpop, 1
192+
%zext = zext i1 %cmp to i32
193+
%ret = add i32 %xor, %zext
194+
ret i32 %ret
195+
}
196+
197+
define i32 @log2_ceil_idiom_not_a_power2_test1(i32 %x) {
198+
; CHECK-LABEL: define i32 @log2_ceil_idiom_not_a_power2_test1(
199+
; CHECK-SAME: i32 [[X:%.*]]) {
200+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
201+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
202+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
203+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CTPOP]], 1
204+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
205+
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
206+
; CHECK-NEXT: ret i32 [[RET]]
207+
;
208+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
209+
%xor = xor i32 %ctlz, 31
210+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
211+
%cmp = icmp eq i32 %ctpop, 1
212+
%zext = zext i1 %cmp to i32
213+
%ret = add i32 %xor, %zext
214+
ret i32 %ret
215+
}
216+
217+
define i32 @log2_ceil_idiom_not_a_power2_test2(i32 %x) {
218+
; CHECK-LABEL: define i32 @log2_ceil_idiom_not_a_power2_test2(
219+
; CHECK-SAME: i32 [[X:%.*]]) {
220+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
221+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
222+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
223+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 2
224+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
225+
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
226+
; CHECK-NEXT: ret i32 [[RET]]
227+
;
228+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
229+
%xor = xor i32 %ctlz, 31
230+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
231+
%cmp = icmp ugt i32 %ctpop, 2
232+
%zext = zext i1 %cmp to i32
233+
%ret = add i32 %xor, %zext
234+
ret i32 %ret
235+
}
236+
237+
define i32 @log2_ceil_idiom_multiuse2(i32 %x) {
238+
; CHECK-LABEL: define i32 @log2_ceil_idiom_multiuse2(
239+
; CHECK-SAME: i32 [[X:%.*]]) {
240+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
241+
; CHECK-NEXT: call void @use32(i32 [[CTLZ]])
242+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
243+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
244+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
245+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
246+
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
247+
; CHECK-NEXT: ret i32 [[RET]]
248+
;
249+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
250+
call void @use32(i32 %ctlz)
251+
%xor = xor i32 %ctlz, 31
252+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
253+
%cmp = icmp ugt i32 %ctpop, 1
254+
%zext = zext i1 %cmp to i32
255+
%ret = add i32 %xor, %zext
256+
ret i32 %ret
257+
}
258+
259+
define i32 @log2_ceil_idiom_multiuse3(i32 %x) {
260+
; CHECK-LABEL: define i32 @log2_ceil_idiom_multiuse3(
261+
; CHECK-SAME: i32 [[X:%.*]]) {
262+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
263+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
264+
; CHECK-NEXT: call void @use32(i32 [[XOR]])
265+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
266+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
267+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
268+
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
269+
; CHECK-NEXT: ret i32 [[RET]]
270+
;
271+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
272+
%xor = xor i32 %ctlz, 31
273+
call void @use32(i32 %xor)
274+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
275+
%cmp = icmp ugt i32 %ctpop, 1
276+
%zext = zext i1 %cmp to i32
277+
%ret = add i32 %xor, %zext
278+
ret i32 %ret
279+
}
280+
281+
define i5 @log2_ceil_idiom_trunc_multiuse4(i32 %x) {
282+
; CHECK-LABEL: define i5 @log2_ceil_idiom_trunc_multiuse4(
283+
; CHECK-SAME: i32 [[X:%.*]]) {
284+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
285+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[CTLZ]] to i5
286+
; CHECK-NEXT: call void @use5(i5 [[TRUNC]])
287+
; CHECK-NEXT: [[XOR:%.*]] = xor i5 [[TRUNC]], -1
288+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
289+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
290+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i5
291+
; CHECK-NEXT: [[RET:%.*]] = add i5 [[XOR]], [[ZEXT]]
292+
; CHECK-NEXT: ret i5 [[RET]]
293+
;
294+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
295+
%trunc = trunc i32 %ctlz to i5
296+
call void @use5(i5 %trunc)
297+
%xor = xor i5 %trunc, 31
298+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
299+
%cmp = icmp ugt i32 %ctpop, 1
300+
%zext = zext i1 %cmp to i5
301+
%ret = add i5 %xor, %zext
302+
ret i5 %ret
303+
}
304+
305+
define i64 @log2_ceil_idiom_zext_multiuse5(i32 %x) {
306+
; CHECK-LABEL: define i64 @log2_ceil_idiom_zext_multiuse5(
307+
; CHECK-SAME: i32 [[X:%.*]]) {
308+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
309+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
310+
; CHECK-NEXT: [[EXT:%.*]] = zext nneg i32 [[XOR]] to i64
311+
; CHECK-NEXT: call void @use64(i64 [[EXT]])
312+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
313+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
314+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i64
315+
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i64 [[EXT]], [[ZEXT]]
316+
; CHECK-NEXT: ret i64 [[RET]]
317+
;
318+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
319+
%xor = xor i32 %ctlz, 31
320+
%ext = zext nneg i32 %xor to i64
321+
call void @use64(i64 %ext)
322+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
323+
%cmp = icmp ugt i32 %ctpop, 1
324+
%zext = zext i1 %cmp to i64
325+
%ret = add i64 %ext, %zext
326+
ret i64 %ret
327+
}
328+
329+
declare void @use5(i5)
330+
declare void @use32(i32)
331+
declare void @use64(i64)
332+
333+
declare i32 @llvm.ctlz.i32(i32, i1)
334+
declare i32 @llvm.ctpop.i32(i32)
335+
;.
336+
; CHECK: [[RNG0]] = !{i32 0, i32 33}
337+
;.

0 commit comments

Comments
 (0)