Skip to content

Commit 9a36cbf

Browse files
committed
[InstCombine] Add one-use constraints for folding of log2-ceil idioms
1 parent 8f79898 commit 9a36cbf

File tree

2 files changed

+127
-9
lines changed

2 files changed

+127
-9
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,16 +1724,18 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
17241724
{Builder.CreateOr(A, B)}));
17251725

17261726
// Fold the log2_ceil idiom:
1727-
// zext(ctpop(A, true) >u/!= 1) + (ctlz(A) ^ (BW - 1)) --> BW - ctlz(A - 1)
1728-
// TODO: Add one-use checks?
1727+
// zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1))
1728+
// -->
1729+
// BW - ctlz(A - 1, false)
17291730
const APInt *XorC;
1730-
if (match(
1731-
&I,
1732-
m_c_Add(m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
1733-
m_One())),
1734-
m_ZExtOrSelf(m_Xor(m_TruncOrSelf(m_Intrinsic<Intrinsic::ctlz>(
1735-
m_Deferred(A), m_One())),
1736-
m_APInt(XorC))))) &&
1731+
if (match(&I,
1732+
m_c_Add(
1733+
m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
1734+
m_One())),
1735+
m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor(
1736+
m_OneUse(m_TruncOrSelf(m_OneUse(
1737+
m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))),
1738+
m_APInt(XorC))))))) &&
17371739
(Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
17381740
*XorC == A->getType()->getScalarSizeInBits() - 1) {
17391741
Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()));

llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,26 @@ define i32 @log2_ceil_idiom_commuted(i32 %x) {
9090
ret i32 %ret
9191
}
9292

93+
define i32 @log2_ceil_idiom_multiuse1(i32 %x) {
94+
; CHECK-LABEL: define i32 @log2_ceil_idiom_multiuse1(
95+
; CHECK-SAME: i32 [[X:%.*]]) {
96+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
97+
; CHECK-NEXT: call void @use32(i32 [[CTPOP]])
98+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
99+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
100+
; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
101+
; CHECK-NEXT: ret i32 [[RET]]
102+
;
103+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
104+
%xor = xor i32 %ctlz, 31
105+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
106+
call void @use32(i32 %ctpop)
107+
%cmp = icmp ugt i32 %ctpop, 1
108+
%zext = zext i1 %cmp to i32
109+
%ret = add i32 %xor, %zext
110+
ret i32 %ret
111+
}
112+
93113
; Negative tests
94114

95115
define i32 @log2_ceil_idiom_x_may_be_zero(i32 %x) {
@@ -214,6 +234,102 @@ define i32 @log2_ceil_idiom_not_a_power2_test2(i32 %x) {
214234
ret i32 %ret
215235
}
216236

237+
define i32 @log2_ceil_idiom_multiuse2(i32 %x) {
238+
; CHECK-LABEL: define i32 @log2_ceil_idiom_multiuse2(
239+
; CHECK-SAME: i32 [[X:%.*]]) {
240+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
241+
; CHECK-NEXT: call void @use32(i32 [[CTLZ]])
242+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
243+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
244+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
245+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
246+
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
247+
; CHECK-NEXT: ret i32 [[RET]]
248+
;
249+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
250+
call void @use32(i32 %ctlz)
251+
%xor = xor i32 %ctlz, 31
252+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
253+
%cmp = icmp ugt i32 %ctpop, 1
254+
%zext = zext i1 %cmp to i32
255+
%ret = add i32 %xor, %zext
256+
ret i32 %ret
257+
}
258+
259+
define i32 @log2_ceil_idiom_multiuse3(i32 %x) {
260+
; CHECK-LABEL: define i32 @log2_ceil_idiom_multiuse3(
261+
; CHECK-SAME: i32 [[X:%.*]]) {
262+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
263+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
264+
; CHECK-NEXT: call void @use32(i32 [[XOR]])
265+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
266+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
267+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
268+
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
269+
; CHECK-NEXT: ret i32 [[RET]]
270+
;
271+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
272+
%xor = xor i32 %ctlz, 31
273+
call void @use32(i32 %xor)
274+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
275+
%cmp = icmp ugt i32 %ctpop, 1
276+
%zext = zext i1 %cmp to i32
277+
%ret = add i32 %xor, %zext
278+
ret i32 %ret
279+
}
280+
281+
define i5 @log2_ceil_idiom_trunc_multiuse4(i32 %x) {
282+
; CHECK-LABEL: define i5 @log2_ceil_idiom_trunc_multiuse4(
283+
; CHECK-SAME: i32 [[X:%.*]]) {
284+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
285+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[CTLZ]] to i5
286+
; CHECK-NEXT: call void @use5(i5 [[TRUNC]])
287+
; CHECK-NEXT: [[XOR:%.*]] = xor i5 [[TRUNC]], -1
288+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
289+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
290+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i5
291+
; CHECK-NEXT: [[RET:%.*]] = add i5 [[XOR]], [[ZEXT]]
292+
; CHECK-NEXT: ret i5 [[RET]]
293+
;
294+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
295+
%trunc = trunc i32 %ctlz to i5
296+
call void @use5(i5 %trunc)
297+
%xor = xor i5 %trunc, 31
298+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
299+
%cmp = icmp ugt i32 %ctpop, 1
300+
%zext = zext i1 %cmp to i5
301+
%ret = add i5 %xor, %zext
302+
ret i5 %ret
303+
}
304+
305+
define i64 @log2_ceil_idiom_zext_multiuse5(i32 %x) {
306+
; CHECK-LABEL: define i64 @log2_ceil_idiom_zext_multiuse5(
307+
; CHECK-SAME: i32 [[X:%.*]]) {
308+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
309+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
310+
; CHECK-NEXT: [[EXT:%.*]] = zext nneg i32 [[XOR]] to i64
311+
; CHECK-NEXT: call void @use64(i64 [[EXT]])
312+
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
313+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
314+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i64
315+
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i64 [[EXT]], [[ZEXT]]
316+
; CHECK-NEXT: ret i64 [[RET]]
317+
;
318+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
319+
%xor = xor i32 %ctlz, 31
320+
%ext = zext nneg i32 %xor to i64
321+
call void @use64(i64 %ext)
322+
%ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
323+
%cmp = icmp ugt i32 %ctpop, 1
324+
%zext = zext i1 %cmp to i64
325+
%ret = add i64 %ext, %zext
326+
ret i64 %ret
327+
}
328+
329+
declare void @use5(i5)
330+
declare void @use32(i32)
331+
declare void @use64(i64)
332+
217333
declare i32 @llvm.ctlz.i32(i32, i1)
218334
declare i32 @llvm.ctpop.i32(i32)
219335
;.

0 commit comments

Comments
 (0)