Skip to content

Commit 827f8a7

Browse files
authored
Add opt with ctlz and shifts of power of 2 constants (#74175)
This patch does the following simplifications: ``` cttz(shl(C, X), 1) -> add(cttz(C, 1), X) cttz(lshr exact(C, X), 1) -> sub(cttz(C, 1), X) ctlz(lshr(C, X), 1) --> add(ctlz(C, 1), X) ctlz(shl nuw (C, X), 1) --> sub(ctlz(C, 1), X) ``` Alive2: https://alive2.llvm.org/ce/z/9KHlKc Closes #41333
1 parent 28a78e2 commit 827f8a7

File tree

2 files changed

+268
-0
lines changed

2 files changed

+268
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,8 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
514514
return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
515515
}
516516

517+
Constant *C;
518+
517519
if (IsTZ) {
518520
// cttz(-x) -> cttz(x)
519521
if (match(Op0, m_Neg(m_Value(X))))
@@ -549,6 +551,38 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
549551

550552
if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
551553
return IC.replaceOperand(II, 0, X);
554+
555+
// cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
556+
if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
557+
match(Op1, m_One())) {
558+
Value *ConstCttz =
559+
IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
560+
return BinaryOperator::CreateAdd(ConstCttz, X);
561+
}
562+
563+
// cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
564+
if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
565+
match(Op1, m_One())) {
566+
Value *ConstCttz =
567+
IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
568+
return BinaryOperator::CreateSub(ConstCttz, X);
569+
}
570+
} else {
571+
// ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
572+
if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
573+
match(Op1, m_One())) {
574+
Value *ConstCtlz =
575+
IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
576+
return BinaryOperator::CreateAdd(ConstCtlz, X);
577+
}
578+
579+
// ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
580+
if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
581+
match(Op1, m_One())) {
582+
Value *ConstCtlz =
583+
IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
584+
return BinaryOperator::CreateSub(ConstCtlz, X);
585+
}
552586
}
553587

554588
KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
3+
4+
declare i32 @llvm.ctlz.i32(i32, i1)
5+
declare i32 @llvm.cttz.i32(i32, i1)
6+
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
7+
declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
8+
9+
define i32 @lshr_ctlz_true(i32) {
10+
; CHECK-LABEL: define i32 @lshr_ctlz_true(
11+
; CHECK-SAME: i32 [[TMP0:%.*]]) {
12+
; CHECK-NEXT: [[CTLZ:%.*]] = add i32 [[TMP0]], 9
13+
; CHECK-NEXT: ret i32 [[CTLZ]]
14+
;
15+
%lshr = lshr i32 8387584, %0
16+
%ctlz = call i32 @llvm.ctlz.i32(i32 %lshr, i1 true)
17+
ret i32 %ctlz
18+
}
19+
20+
define i32 @shl_nuw_ctlz_true(i32) {
21+
; CHECK-LABEL: define i32 @shl_nuw_ctlz_true(
22+
; CHECK-SAME: i32 [[TMP0:%.*]]) {
23+
; CHECK-NEXT: [[CTLZ:%.*]] = sub i32 9, [[TMP0]]
24+
; CHECK-NEXT: ret i32 [[CTLZ]]
25+
;
26+
%shl = shl nuw i32 8387584, %0
27+
%ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 true)
28+
ret i32 %ctlz
29+
}
30+
31+
define i32 @shl_nuw_nsw_ctlz_true(i32) {
32+
; CHECK-LABEL: define i32 @shl_nuw_nsw_ctlz_true(
33+
; CHECK-SAME: i32 [[TMP0:%.*]]) {
34+
; CHECK-NEXT: [[CTLZ:%.*]] = sub i32 9, [[TMP0]]
35+
; CHECK-NEXT: ret i32 [[CTLZ]]
36+
;
37+
%shl = shl nuw nsw i32 8387584, %0
38+
%ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 true)
39+
ret i32 %ctlz
40+
}
41+
42+
define i32 @lshr_exact_cttz_true(i32) {
43+
; CHECK-LABEL: define i32 @lshr_exact_cttz_true(
44+
; CHECK-SAME: i32 [[TMP0:%.*]]) {
45+
; CHECK-NEXT: [[CTTZ:%.*]] = sub i32 10, [[TMP0]]
46+
; CHECK-NEXT: ret i32 [[CTTZ]]
47+
;
48+
%lshr = lshr exact i32 8387584, %0
49+
%cttz = call i32 @llvm.cttz.i32(i32 %lshr, i1 true)
50+
ret i32 %cttz
51+
}
52+
53+
define i32 @shl_cttz_true(i32) {
54+
; CHECK-LABEL: define i32 @shl_cttz_true(
55+
; CHECK-SAME: i32 [[TMP0:%.*]]) {
56+
; CHECK-NEXT: [[CTTZ:%.*]] = add i32 [[TMP0]], 10
57+
; CHECK-NEXT: ret i32 [[CTTZ]]
58+
;
59+
%shl = shl i32 8387584, %0
60+
%cttz = call i32 @llvm.cttz.i32(i32 %shl, i1 true)
61+
ret i32 %cttz
62+
}
63+
64+
define <2 x i32> @vec2_lshr_ctlz_true(<2 x i32>) {
65+
; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_true(
66+
; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
67+
; CHECK-NEXT: [[CTLZ:%.*]] = add <2 x i32> [[TMP0]], <i32 9, i32 9>
68+
; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
69+
;
70+
%div = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
71+
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %div, i1 true)
72+
ret <2 x i32> %ctlz
73+
}
74+
75+
define <2 x i32> @vec2_shl_nuw_ctlz_true(<2 x i32>) {
76+
; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_ctlz_true(
77+
; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
78+
; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]]
79+
; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
80+
;
81+
%shl = shl nuw <2 x i32> <i32 8387584, i32 4276440>, %0
82+
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 true)
83+
ret <2 x i32> %ctlz
84+
}
85+
86+
define <2 x i32> @vec2_shl_nuw_nsw_ctlz_true(<2 x i32>) {
87+
; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_nsw_ctlz_true(
88+
; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
89+
; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]]
90+
; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
91+
;
92+
%shl = shl nuw nsw <2 x i32> <i32 8387584, i32 4276440>, %0
93+
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 true)
94+
ret <2 x i32> %ctlz
95+
}
96+
97+
define <2 x i32> @vec2_lshr_exact_cttz_true(<2 x i32>) {
98+
; CHECK-LABEL: define <2 x i32> @vec2_lshr_exact_cttz_true(
99+
; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
100+
; CHECK-NEXT: [[CTTZ:%.*]] = sub <2 x i32> <i32 10, i32 3>, [[TMP0]]
101+
; CHECK-NEXT: ret <2 x i32> [[CTTZ]]
102+
;
103+
%lshr = lshr exact <2 x i32> <i32 8387584, i32 4276440>, %0
104+
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %lshr, i1 true)
105+
ret <2 x i32> %cttz
106+
}
107+
108+
define <2 x i32> @vec2_shl_cttz_true(<2 x i32>) {
109+
; CHECK-LABEL: define <2 x i32> @vec2_shl_cttz_true(
110+
; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
111+
; CHECK-NEXT: [[CTTZ:%.*]] = add <2 x i32> [[TMP0]], <i32 10, i32 3>
112+
; CHECK-NEXT: ret <2 x i32> [[CTTZ]]
113+
;
114+
%shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0
115+
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %shl, i1 true)
116+
ret <2 x i32> %cttz
117+
}
118+
119+
; negative tests:
120+
121+
define <2 x i32> @vec2_shl_nsw_ctlz_true_neg(<2 x i32>) {
122+
; CHECK-LABEL: define <2 x i32> @vec2_shl_nsw_ctlz_true_neg(
123+
; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
124+
; CHECK-NEXT: [[SHL:%.*]] = shl nsw <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
125+
; CHECK-NEXT: [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[SHL]], i1 true), !range [[RNG0:![0-9]+]]
126+
; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
127+
;
128+
%shl = shl nsw <2 x i32> <i32 8387584, i32 4276440>, %0
129+
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 true)
130+
ret <2 x i32> %ctlz
131+
}
132+
133+
define <2 x i32> @vec2_lshr_ctlz_false_neg(<2 x i32>) {
134+
; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_false_neg(
135+
; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
136+
; CHECK-NEXT: [[DIV:%.*]] = lshr <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
137+
; CHECK-NEXT: [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[DIV]], i1 false), !range [[RNG1:![0-9]+]]
138+
; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
139+
;
140+
%div = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
141+
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %div, i1 false)
142+
ret <2 x i32> %ctlz
143+
}
144+
145+
define <2 x i32> @vec2_shl_ctlz_false_neg(<2 x i32>) {
146+
; CHECK-LABEL: define <2 x i32> @vec2_shl_ctlz_false_neg(
147+
; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
148+
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
149+
; CHECK-NEXT: [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[SHL]], i1 false), !range [[RNG2:![0-9]+]]
150+
; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
151+
;
152+
%shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0
153+
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false)
154+
ret <2 x i32> %ctlz
155+
}
156+
157+
define <2 x i32> @vec2_lshr_cttz_false_neg(<2 x i32>) {
158+
; CHECK-LABEL: define <2 x i32> @vec2_lshr_cttz_false_neg(
159+
; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
160+
; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
161+
; CHECK-NEXT: [[CTTZ:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[LSHR]], i1 false), !range [[RNG2]]
162+
; CHECK-NEXT: ret <2 x i32> [[CTTZ]]
163+
;
164+
%lshr = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
165+
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %lshr, i1 false)
166+
ret <2 x i32> %cttz
167+
}
168+
169+
define <2 x i32> @vec2_shl_cttz_false_neg(<2 x i32>) {
170+
; CHECK-LABEL: define <2 x i32> @vec2_shl_cttz_false_neg(
171+
; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
172+
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
173+
; CHECK-NEXT: [[CTTZ:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[SHL]], i1 false), !range [[RNG3:![0-9]+]]
174+
; CHECK-NEXT: ret <2 x i32> [[CTTZ]]
175+
;
176+
%shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0
177+
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %shl, i1 false)
178+
ret <2 x i32> %cttz
179+
}
180+
181+
define i32 @lshr_ctlz_faslse_neg(i32) {
182+
; CHECK-LABEL: define i32 @lshr_ctlz_faslse_neg(
183+
; CHECK-SAME: i32 [[TMP0:%.*]]) {
184+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 8387584, [[TMP0]]
185+
; CHECK-NEXT: [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG1]]
186+
; CHECK-NEXT: ret i32 [[CTLZ]]
187+
;
188+
%lshr = lshr i32 8387584, %0
189+
%ctlz = call i32 @llvm.ctlz.i32(i32 %lshr, i1 false)
190+
ret i32 %ctlz
191+
}
192+
193+
define i32 @shl_ctlz_false_neg(i32) {
194+
; CHECK-LABEL: define i32 @shl_ctlz_false_neg(
195+
; CHECK-SAME: i32 [[TMP0:%.*]]) {
196+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 8387584, [[TMP0]]
197+
; CHECK-NEXT: [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SHL]], i1 false), !range [[RNG2]]
198+
; CHECK-NEXT: ret i32 [[CTLZ]]
199+
;
200+
%shl = shl i32 8387584, %0
201+
%ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false)
202+
ret i32 %ctlz
203+
}
204+
205+
define i32 @lshr_cttz_false_neg(i32) {
206+
; CHECK-LABEL: define i32 @lshr_cttz_false_neg(
207+
; CHECK-SAME: i32 [[TMP0:%.*]]) {
208+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 8387584, [[TMP0]]
209+
; CHECK-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[LSHR]], i1 false), !range [[RNG2]]
210+
; CHECK-NEXT: ret i32 [[CTTZ]]
211+
;
212+
%lshr = lshr i32 8387584, %0
213+
%cttz = call i32 @llvm.cttz.i32(i32 %lshr, i1 false)
214+
ret i32 %cttz
215+
}
216+
217+
define i32 @shl_cttz_false_neg(i32) {
218+
; CHECK-LABEL: define i32 @shl_cttz_false_neg(
219+
; CHECK-SAME: i32 [[TMP0:%.*]]) {
220+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 8387584, [[TMP0]]
221+
; CHECK-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[SHL]], i1 false), !range [[RNG4:![0-9]+]]
222+
; CHECK-NEXT: ret i32 [[CTTZ]]
223+
;
224+
%shl = shl i32 8387584, %0
225+
%cttz = call i32 @llvm.cttz.i32(i32 %shl, i1 false)
226+
ret i32 %cttz
227+
}
228+
;.
229+
; CHECK: [[RNG0]] = !{i32 1, i32 33}
230+
; CHECK: [[RNG1]] = !{i32 9, i32 33}
231+
; CHECK: [[RNG2]] = !{i32 0, i32 33}
232+
; CHECK: [[RNG3]] = !{i32 3, i32 33}
233+
; CHECK: [[RNG4]] = !{i32 10, i32 33}
234+
;.

0 commit comments

Comments
 (0)