Skip to content

Commit 0ab4458

Browse files
authored
[InstCombine] Fold cttz(lshr(-1, x) + 1) to width - x (#91244)
Fold ``` llvm define i64 @src(i64 %50) { %52 = lshr i64 -1, %50 %53 = add i64 %52, 1 %54 = call i64 @llvm.cttz.i64(i64 %53, i1 false) ret i64 %54 } ``` to ``` llvm define i64 @tgt(i64 %50) { %52 = sub i64 64, %50 ret i64 %52 } ``` as #91171 (review) pointed out. Alive2 proof: https://alive2.llvm.org/ce/z/2aHfYa Note: the `ctlz` version of this pattern seems not exist in dtcxzyw's benchmark, so put it aside for now.
1 parent fd0ffb7 commit 0ab4458

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,13 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
562562
IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
563563
return BinaryOperator::CreateSub(ConstCttz, X);
564564
}
565+
566+
// cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
567+
if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
568+
Value *Width =
569+
ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
570+
return BinaryOperator::CreateSub(Width, X);
571+
}
565572
} else {
566573
// ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
567574
if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&

llvm/test/Transforms/InstCombine/cttz.ll

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,3 +215,64 @@ define i32 @cttz_of_lowest_set_bit_wrong_intrinsic(i32 %x) {
215215
%tz = call i32 @llvm.ctlz.i32(i32 %and, i1 false)
216216
ret i32 %tz
217217
}
218+
219+
define i32 @cttz_of_power_of_two(i32 %x) {
220+
; CHECK-LABEL: @cttz_of_power_of_two(
221+
; CHECK-NEXT: [[R:%.*]] = sub i32 32, [[X:%.*]]
222+
; CHECK-NEXT: ret i32 [[R]]
223+
;
224+
%lshr = lshr i32 -1, %x
225+
%add = add i32 %lshr, 1
226+
%r = call i32 @llvm.cttz.i32(i32 %add, i1 false)
227+
ret i32 %r
228+
}
229+
230+
define i32 @cttz_of_power_of_two_zero_poison(i32 %x) {
231+
; CHECK-LABEL: @cttz_of_power_of_two_zero_poison(
232+
; CHECK-NEXT: [[R:%.*]] = sub i32 32, [[X:%.*]]
233+
; CHECK-NEXT: ret i32 [[R]]
234+
;
235+
%lshr = lshr i32 -1, %x
236+
%add = add i32 %lshr, 1
237+
%r = call i32 @llvm.cttz.i32(i32 %add, i1 true)
238+
ret i32 %r
239+
}
240+
241+
define i32 @cttz_of_power_of_two_wrong_intrinsic(i32 %x) {
242+
; CHECK-LABEL: @cttz_of_power_of_two_wrong_intrinsic(
243+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 -1, [[X:%.*]]
244+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LSHR]], 1
245+
; CHECK-NEXT: [[R:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[ADD]], i1 false)
246+
; CHECK-NEXT: ret i32 [[R]]
247+
;
248+
%lshr = lshr i32 -1, %x
249+
%add = add i32 %lshr, 1
250+
%r = call i32 @llvm.ctlz.i32(i32 %add, i1 false)
251+
ret i32 %r
252+
}
253+
254+
define i32 @cttz_of_power_of_two_wrong_constant_1(i32 %x) {
255+
; CHECK-LABEL: @cttz_of_power_of_two_wrong_constant_1(
256+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 -2, [[X:%.*]]
257+
; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[LSHR]], 1
258+
; CHECK-NEXT: [[R:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[ADD]], i1 true)
259+
; CHECK-NEXT: ret i32 [[R]]
260+
;
261+
%lshr = lshr i32 -2, %x
262+
%add = add i32 %lshr, 1
263+
%r = call i32 @llvm.cttz.i32(i32 %add, i1 false)
264+
ret i32 %r
265+
}
266+
267+
define i32 @cttz_of_power_of_two_wrong_constant_2(i32 %x) {
268+
; CHECK-LABEL: @cttz_of_power_of_two_wrong_constant_2(
269+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 -1, [[X:%.*]]
270+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[LSHR]], -1
271+
; CHECK-NEXT: [[R:%.*]] = call range(i32 1, 33) i32 @llvm.cttz.i32(i32 [[ADD]], i1 false)
272+
; CHECK-NEXT: ret i32 [[R]]
273+
;
274+
%lshr = lshr i32 -1, %x
275+
%add = add i32 %lshr, -1
276+
%r = call i32 @llvm.cttz.i32(i32 %add, i1 false)
277+
ret i32 %r
278+
}

0 commit comments

Comments
 (0)