Skip to content

Commit 0c26921

Browse files
[InstCombine] Fold select with ctlz to cttz
Summary: Handle pattern [0]: int ctz(unsigned int a) { int c = __clz(a & -a); return a ? 31 - c : c; } In reality, the compiler can generate much better code for cttz, so fold away this pattern. https://godbolt.org/z/c5kPtV [0] https://community.arm.com/community-help/f/discussions/2114/count-trailing-zeros Reviewers: spatel, nikic, lebedev.ri, dmgreen, hfinkel Reviewed By: hfinkel Subscribers: hfinkel, javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66308 llvm-svn: 370037
1 parent a7f226f commit 0c26921

File tree

2 files changed

+46
-38
lines changed

2 files changed

+46
-38
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,41 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
785785
return nullptr;
786786
}
787787

788+
/// Fold the following code sequence:
789+
/// \code
790+
/// int a = ctlz(x & -x);
791+
// x ? 31 - a : a;
792+
/// \code
793+
///
794+
/// into:
795+
/// cttz(x)
796+
static Instruction *foldSelectCtlzToCttz(ICmpInst *ICI, Value *TrueVal,
797+
Value *FalseVal,
798+
InstCombiner::BuilderTy &Builder) {
799+
unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits();
800+
if (!ICI->isEquality() || !match(ICI->getOperand(1), m_Zero()))
801+
return nullptr;
802+
803+
if (ICI->getPredicate() == ICmpInst::ICMP_NE)
804+
std::swap(TrueVal, FalseVal);
805+
806+
if (!match(FalseVal,
807+
m_Xor(m_Deferred(TrueVal), m_SpecificInt(BitWidth - 1))))
808+
return nullptr;
809+
810+
if (!match(TrueVal, m_Intrinsic<Intrinsic::ctlz>()))
811+
return nullptr;
812+
813+
Value *X = ICI->getOperand(0);
814+
auto *II = cast<IntrinsicInst>(TrueVal);
815+
if (!match(II->getOperand(0), m_c_And(m_Specific(X), m_Neg(m_Specific(X)))))
816+
return nullptr;
817+
818+
Function *F = Intrinsic::getDeclaration(II->getModule(), Intrinsic::cttz,
819+
II->getType());
820+
return CallInst::Create(F, {X, II->getArgOperand(1)});
821+
}
822+
788823
/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
789824
/// call to cttz/ctlz with flag 'is_zero_undef' cleared.
790825
///
@@ -1432,6 +1467,9 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
14321467
foldSelectICmpAndAnd(SI.getType(), ICI, TrueVal, FalseVal, Builder))
14331468
return V;
14341469

1470+
if (Instruction *V = foldSelectCtlzToCttz(ICI, TrueVal, FalseVal, Builder))
1471+
return V;
1472+
14351473
if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder))
14361474
return replaceInstUsesWith(SI, V);
14371475

llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll

Lines changed: 8 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,7 @@ declare void @use2(i1)
1616

1717
define i32 @select_clz_to_ctz(i32 %a) {
1818
; CHECK-LABEL: @select_clz_to_ctz(
19-
; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]]
20-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[A]]
21-
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[AND]], i1 true), !range !0
22-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0
23-
; CHECK-NEXT: [[SUB1:%.*]] = xor i32 [[LZ]], 31
24-
; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 [[LZ]], i32 [[SUB1]]
19+
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 true), !range !0
2520
; CHECK-NEXT: ret i32 [[COND]]
2621
;
2722
%sub = sub i32 0, %a
@@ -35,12 +30,7 @@ define i32 @select_clz_to_ctz(i32 %a) {
3530

3631
define i32 @select_clz_to_ctz_preserve_flag(i32 %a) {
3732
; CHECK-LABEL: @select_clz_to_ctz_preserve_flag(
38-
; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]]
39-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[A]]
40-
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[AND]], i1 false), !range !0
41-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0
42-
; CHECK-NEXT: [[SUB1:%.*]] = xor i32 [[LZ]], 31
43-
; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 [[LZ]], i32 [[SUB1]]
33+
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 false), !range !0
4434
; CHECK-NEXT: ret i32 [[COND]]
4535
;
4636
%sub = sub i32 0, %a
@@ -54,12 +44,7 @@ define i32 @select_clz_to_ctz_preserve_flag(i32 %a) {
5444

5545
define <2 x i32> @select_clz_to_ctz_vec(<2 x i32> %a) {
5646
; CHECK-LABEL: @select_clz_to_ctz_vec(
57-
; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> zeroinitializer, [[A:%.*]]
58-
; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[SUB]], [[A]]
59-
; CHECK-NEXT: [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[AND]], i1 true)
60-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq <2 x i32> [[A]], zeroinitializer
61-
; CHECK-NEXT: [[SUB1:%.*]] = xor <2 x i32> [[LZ]], <i32 31, i32 31>
62-
; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[TOBOOL]], <2 x i32> [[LZ]], <2 x i32> [[SUB1]]
47+
; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[A:%.*]], i1 true)
6348
; CHECK-NEXT: ret <2 x i32> [[COND]]
6449
;
6550
%sub = sub <2 x i32> zeroinitializer, %a
@@ -76,10 +61,9 @@ define i32 @select_clz_to_ctz_extra_use(i32 %a) {
7661
; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]]
7762
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[A]]
7863
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[AND]], i1 true), !range !0
79-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0
8064
; CHECK-NEXT: [[SUB1:%.*]] = xor i32 [[LZ]], 31
8165
; CHECK-NEXT: call void @use(i32 [[SUB1]])
82-
; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 [[LZ]], i32 [[SUB1]]
66+
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), !range !0
8367
; CHECK-NEXT: ret i32 [[COND]]
8468
;
8569
%sub = sub i32 0, %a
@@ -94,12 +78,7 @@ define i32 @select_clz_to_ctz_extra_use(i32 %a) {
9478

9579
define i32 @select_clz_to_ctz_and_commuted(i32 %a) {
9680
; CHECK-LABEL: @select_clz_to_ctz_and_commuted(
97-
; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]]
98-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[A]]
99-
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[AND]], i1 true), !range !0
100-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0
101-
; CHECK-NEXT: [[SUB1:%.*]] = xor i32 [[LZ]], 31
102-
; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 [[LZ]], i32 [[SUB1]]
81+
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 true), !range !0
10382
; CHECK-NEXT: ret i32 [[COND]]
10483
;
10584
%sub = sub i32 0, %a
@@ -113,13 +92,9 @@ define i32 @select_clz_to_ctz_and_commuted(i32 %a) {
11392

11493
define i32 @select_clz_to_ctz_icmp_ne(i32 %a) {
11594
; CHECK-LABEL: @select_clz_to_ctz_icmp_ne(
116-
; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]]
117-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[A]]
118-
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[AND]], i1 true), !range !0
119-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A]], 0
95+
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
12096
; CHECK-NEXT: call void @use2(i1 [[TOBOOL]])
121-
; CHECK-NEXT: [[SUB1:%.*]] = xor i32 [[LZ]], 31
122-
; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 [[SUB1]], i32 [[LZ]]
97+
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), !range !0
12398
; CHECK-NEXT: ret i32 [[COND]]
12499
;
125100
%sub = sub i32 0, %a
@@ -134,12 +109,7 @@ define i32 @select_clz_to_ctz_icmp_ne(i32 %a) {
134109

135110
define i64 @select_clz_to_ctz_i64(i64 %a) {
136111
; CHECK-LABEL: @select_clz_to_ctz_i64(
137-
; CHECK-NEXT: [[SUB:%.*]] = sub i64 0, [[A:%.*]]
138-
; CHECK-NEXT: [[AND:%.*]] = and i64 [[SUB]], [[A]]
139-
; CHECK-NEXT: [[LZ:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[AND]], i1 true), !range !1
140-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[A]], 0
141-
; CHECK-NEXT: [[SUB1:%.*]] = xor i64 [[LZ]], 63
142-
; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 [[LZ]], i64 [[SUB1]]
112+
; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.cttz.i64(i64 [[A:%.*]], i1 true), !range !1
143113
; CHECK-NEXT: ret i64 [[COND]]
144114
;
145115
%sub = sub i64 0, %a

0 commit comments

Comments
 (0)