Skip to content

Commit 8f79898

Browse files
committed
[InstCombine] Fold the log2_ceil idiom
1 parent a3404fb commit 8f79898

File tree

2 files changed

+39
-32
lines changed

2 files changed

+39
-32
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,6 +1723,28 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
17231723
I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
17241724
{Builder.CreateOr(A, B)}));
17251725

1726+
// Fold the log2_ceil idiom:
1727+
// zext(ctpop(A, true) >u/!= 1) + (ctlz(A) ^ (BW - 1)) --> BW - ctlz(A - 1)
1728+
// TODO: Add one-use checks?
1729+
const APInt *XorC;
1730+
if (match(
1731+
&I,
1732+
m_c_Add(m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
1733+
m_One())),
1734+
m_ZExtOrSelf(m_Xor(m_TruncOrSelf(m_Intrinsic<Intrinsic::ctlz>(
1735+
m_Deferred(A), m_One())),
1736+
m_APInt(XorC))))) &&
1737+
(Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
1738+
*XorC == A->getType()->getScalarSizeInBits() - 1) {
1739+
Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
1740+
Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},
1741+
{Sub, Builder.getFalse()});
1742+
Value *Ret = Builder.CreateSub(
1743+
ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
1744+
Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
1745+
return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType()));
1746+
}
1747+
17261748
if (Instruction *Res = foldSquareSumInt(I))
17271749
return Res;
17281750

llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,9 @@
44
define i32 @log2_ceil_idiom(i32 %x) {
55
; CHECK-LABEL: define i32 @log2_ceil_idiom(
66
; CHECK-SAME: i32 [[X:%.*]]) {
7-
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0:![0-9]+]]
8-
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
9-
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
10-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
11-
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
12-
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
7+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
8+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0:![0-9]+]]
9+
; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
1310
; CHECK-NEXT: ret i32 [[RET]]
1411
;
1512
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
@@ -24,13 +21,10 @@ define i32 @log2_ceil_idiom(i32 %x) {
2421
define i5 @log2_ceil_idiom_trunc(i32 %x) {
2522
; CHECK-LABEL: define i5 @log2_ceil_idiom_trunc(
2623
; CHECK-SAME: i32 [[X:%.*]]) {
27-
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
28-
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[CTLZ]] to i5
29-
; CHECK-NEXT: [[XOR:%.*]] = xor i5 [[TRUNC]], -1
30-
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
31-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
32-
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i5
33-
; CHECK-NEXT: [[RET:%.*]] = add i5 [[XOR]], [[ZEXT]]
24+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
25+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
26+
; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 0, [[TMP2]]
27+
; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[TMP3]] to i5
3428
; CHECK-NEXT: ret i5 [[RET]]
3529
;
3630
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
@@ -46,13 +40,10 @@ define i5 @log2_ceil_idiom_trunc(i32 %x) {
4640
define i64 @log2_ceil_idiom_zext(i32 %x) {
4741
; CHECK-LABEL: define i64 @log2_ceil_idiom_zext(
4842
; CHECK-SAME: i32 [[X:%.*]]) {
49-
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
50-
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
51-
; CHECK-NEXT: [[EXT:%.*]] = zext nneg i32 [[XOR]] to i64
52-
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
53-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
54-
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i64
55-
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i64 [[EXT]], [[ZEXT]]
43+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
44+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
45+
; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 32, [[TMP2]]
46+
; CHECK-NEXT: [[RET:%.*]] = zext i32 [[TMP3]] to i64
5647
; CHECK-NEXT: ret i64 [[RET]]
5748
;
5849
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
@@ -68,12 +59,9 @@ define i64 @log2_ceil_idiom_zext(i32 %x) {
6859
define i32 @log2_ceil_idiom_power2_test2(i32 %x) {
6960
; CHECK-LABEL: define i32 @log2_ceil_idiom_power2_test2(
7061
; CHECK-SAME: i32 [[X:%.*]]) {
71-
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
72-
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
73-
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
74-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CTPOP]], 1
75-
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
76-
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
62+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
63+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
64+
; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
7765
; CHECK-NEXT: ret i32 [[RET]]
7866
;
7967
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
@@ -88,12 +76,9 @@ define i32 @log2_ceil_idiom_power2_test2(i32 %x) {
8876
define i32 @log2_ceil_idiom_commuted(i32 %x) {
8977
; CHECK-LABEL: define i32 @log2_ceil_idiom_commuted(
9078
; CHECK-SAME: i32 [[X:%.*]]) {
91-
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
92-
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
93-
; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
94-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
95-
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
96-
; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
79+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
80+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
81+
; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
9782
; CHECK-NEXT: ret i32 [[RET]]
9883
;
9984
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)

0 commit comments

Comments
 (0)