-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[InstCombine] Fold the log2_ceil
idiom
#76661
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch folds the
Alive2: https://alive2.llvm.org/ce/z/6mSbdi Full diff: https://github.com/llvm/llvm-project/pull/76661.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 556fde37efeb2d..a529d2234f9689 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1730,6 +1730,28 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
{Builder.CreateOr(A, B)}));
+ // Fold the log2_ceil idiom:
+ // zext(ctpop(A, true) >u/!= 1) + (ctlz(A) ^ (BW - 1)) --> BW - ctlz(A - 1)
+ // TODO: Add one-use checks?
+ const APInt *XorC;
+ if (match(
+ &I,
+ m_c_Add(m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
+ m_One())),
+ m_ZExtOrSelf(m_Xor(m_TruncOrSelf(m_Intrinsic<Intrinsic::ctlz>(
+ m_Deferred(A), m_One())),
+ m_APInt(XorC))))) &&
+ (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
+ *XorC == A->getType()->getScalarSizeInBits() - 1) {
+ Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
+ Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},
+ {Sub, Builder.getFalse()});
+ Value *Ret = Builder.CreateSub(
+ ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
+ Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
+ return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType()));
+ }
+
if (Instruction *Res = foldSquareSumInt(I))
return Res;
diff --git a/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll b/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
new file mode 100644
index 00000000000000..7e5977d67195a4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
@@ -0,0 +1,221 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i32 @log2_ceil_idiom(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+define i5 @log2_ceil_idiom_trunc(i32 %x) {
+; CHECK-LABEL: define i5 @log2_ceil_idiom_trunc(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 0, [[TMP2]]
+; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[TMP3]] to i5
+; CHECK-NEXT: ret i5 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %trunc = trunc i32 %ctlz to i5
+ %xor = xor i5 %trunc, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i5
+ %ret = add i5 %xor, %zext
+ ret i5 %ret
+}
+
+define i64 @log2_ceil_idiom_zext(i32 %x) {
+; CHECK-LABEL: define i64 @log2_ceil_idiom_zext(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 32, [[TMP2]]
+; CHECK-NEXT: [[RET:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: ret i64 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ext = zext nneg i32 %xor to i64
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i64
+ %ret = add i64 %ext, %zext
+ ret i64 %ret
+}
+
+define i32 @log2_ceil_idiom_power2_test2(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_power2_test2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ne i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_commuted(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_commuted(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %zext, %xor
+ ret i32 %ret
+}
+
+; Negative tests
+
+define i32 @log2_ceil_idiom_x_may_be_zero(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_x_may_be_zero(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+define i4 @log2_ceil_idiom_trunc_too_short(i32 %x) {
+; CHECK-LABEL: define i4 @log2_ceil_idiom_trunc_too_short(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[CTLZ]] to i4
+; CHECK-NEXT: [[XOR:%.*]] = xor i4 [[TRUNC]], -1
+; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i4
+; CHECK-NEXT: [[RET:%.*]] = add i4 [[XOR]], [[ZEXT]]
+; CHECK-NEXT: ret i4 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %trunc = trunc i32 %ctlz to i4
+ %xor = xor i4 %trunc, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i4
+ %ret = add i4 %xor, %zext
+ ret i4 %ret
+}
+
+define i32 @log2_ceil_idiom_mismatched_operands(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_mismatched_operands(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[Y]]), !range [[RNG0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %y)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_wrong_constant(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_wrong_constant(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 30
+; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 30
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_not_a_power2_test1(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_not_a_power2_test1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CTPOP]], 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_not_a_power2_test2(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_not_a_power2_test2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 2
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 2
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.ctpop.i32(i32)
+;.
+; CHECK: [[RNG0]] = !{i32 0, i32 33}
+;.
|
9fe8fb2
to
9a36cbf
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch folds the `log2_ceil` idiom: ``` (BW - ctlz(A)) + (is_power2(A) ? 0 : 1) -> zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1)) (canonical form) -> BW - ctlz(A - 1, false) ``` Alive2: https://alive2.llvm.org/ce/z/6mSbdi
This patch folds the
log2_ceil
idiom:Alive2: https://alive2.llvm.org/ce/z/6mSbdi