Skip to content

Commit bfdbfe5

Browse files
committed
[BPF] expand cttz, ctlz for i32, i64
Fixes: #62252
1 parent c41b11a commit bfdbfe5

File tree

2 files changed

+308
-5
lines changed

2 files changed

+308
-5
lines changed

llvm/lib/Target/BPF/BPFISelLowering.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
113113
setOperationAction(ISD::SRL_PARTS, VT, Expand);
114114
setOperationAction(ISD::SRA_PARTS, VT, Expand);
115115
setOperationAction(ISD::CTPOP, VT, Expand);
116+
setOperationAction(ISD::CTTZ, VT, Expand);
117+
setOperationAction(ISD::CTLZ, VT, Expand);
118+
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
119+
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
116120

117121
setOperationAction(ISD::SETCC, VT, Expand);
118122
setOperationAction(ISD::SELECT, VT, Expand);
@@ -125,11 +129,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
125129
STI.getHasJmp32() ? Custom : Promote);
126130
}
127131

128-
setOperationAction(ISD::CTTZ, MVT::i64, Custom);
129-
setOperationAction(ISD::CTLZ, MVT::i64, Custom);
130-
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
131-
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
132-
133132
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
134133
if (!STI.hasMovsx()) {
135134
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);

llvm/test/CodeGen/BPF/cttz-ctlz.ll

Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -march=bpf | FileCheck %s
3+
4+
; test that we can expand CTTZ & CTLZ
5+
6+
declare i32 @llvm.cttz.i32(i32, i1)
7+
8+
define i32 @cttz_i32_zdef(i32 %a) {
9+
; CHECK-LABEL: cttz_i32_zdef:
10+
; CHECK: # %bb.0:
11+
; CHECK-NEXT: r2 = r1
12+
; CHECK-NEXT: r2 = -r2
13+
; CHECK-NEXT: r1 &= r2
14+
; CHECK-NEXT: r1 *= 125613361
15+
; CHECK-NEXT: r2 = 4160749568 ll
16+
; CHECK-NEXT: r1 &= r2
17+
; CHECK-NEXT: r1 >>= 27
18+
; CHECK-NEXT: r2 = {{\.?LCPI[0-9]+_[0-9]+}} ll
19+
; CHECK-NEXT: r2 += r1
20+
; CHECK-NEXT: r0 = *(u8 *)(r2 + 0)
21+
; CHECK-NEXT: exit
22+
%ret = call i32 @llvm.cttz.i32(i32 %a, i1 1)
23+
ret i32 %ret
24+
}
25+
26+
27+
define i32 @cttz_i32(i32 %a) {
28+
; CHECK-LABEL: cttz_i32:
29+
; CHECK: # %bb.0:
30+
; CHECK-NEXT: r0 = 32
31+
; CHECK-NEXT: r2 = r1
32+
; CHECK-NEXT: r2 <<= 32
33+
; CHECK-NEXT: r2 >>= 32
34+
; CHECK-NEXT: if r2 == 0 goto LBB1_2
35+
; CHECK-NEXT: # %bb.1: # %cond.false
36+
; CHECK-NEXT: r2 = r1
37+
; CHECK-NEXT: r2 = -r2
38+
; CHECK-NEXT: r1 &= r2
39+
; CHECK-NEXT: r1 *= 125613361
40+
; CHECK-NEXT: r2 = 4160749568 ll
41+
; CHECK-NEXT: r1 &= r2
42+
; CHECK-NEXT: r1 >>= 27
43+
; CHECK-NEXT: r2 = {{\.?LCPI[0-9]+_[0-9]+}} ll
44+
; CHECK-NEXT: r2 += r1
45+
; CHECK-NEXT: r0 = *(u8 *)(r2 + 0)
46+
; CHECK-NEXT: LBB1_2: # %cond.end
47+
; CHECK-NEXT: exit
48+
%ret = call i32 @llvm.cttz.i32(i32 %a, i1 0)
49+
ret i32 %ret
50+
}
51+
52+
declare i64 @llvm.cttz.i64(i64, i1)
53+
54+
define i64 @cttz_i64_zdef(i64 %a) {
55+
; CHECK-LABEL: cttz_i64_zdef:
56+
; CHECK: # %bb.0:
57+
; CHECK-NEXT: r2 = r1
58+
; CHECK-NEXT: r2 = -r2
59+
; CHECK-NEXT: r1 &= r2
60+
; CHECK-NEXT: r2 = 151050438420815295 ll
61+
; CHECK-NEXT: r1 *= r2
62+
; CHECK-NEXT: r1 >>= 58
63+
; CHECK-NEXT: r2 = {{\.?LCPI[0-9]+_[0-9]+}} ll
64+
; CHECK-NEXT: r2 += r1
65+
; CHECK-NEXT: r0 = *(u8 *)(r2 + 0)
66+
; CHECK-NEXT: exit
67+
%ret = call i64 @llvm.cttz.i64(i64 %a, i1 1)
68+
ret i64 %ret
69+
}
70+
71+
72+
define i64 @cttz_i64(i64 %a) {
73+
; CHECK-LABEL: cttz_i64:
74+
; CHECK: # %bb.0:
75+
; CHECK-NEXT: r0 = 64
76+
; CHECK-NEXT: if r1 == 0 goto LBB3_2
77+
; CHECK-NEXT: # %bb.1: # %cond.false
78+
; CHECK-NEXT: r2 = r1
79+
; CHECK-NEXT: r2 = -r2
80+
; CHECK-NEXT: r1 &= r2
81+
; CHECK-NEXT: r2 = 151050438420815295 ll
82+
; CHECK-NEXT: r1 *= r2
83+
; CHECK-NEXT: r1 >>= 58
84+
; CHECK-NEXT: r2 = {{\.?LCPI[0-9]+_[0-9]+}} ll
85+
; CHECK-NEXT: r2 += r1
86+
; CHECK-NEXT: r0 = *(u8 *)(r2 + 0)
87+
; CHECK-NEXT: LBB3_2: # %cond.end
88+
; CHECK-NEXT: exit
89+
%ret = call i64 @llvm.cttz.i64(i64 %a, i1 0)
90+
ret i64 %ret
91+
}
92+
93+
94+
declare i32 @llvm.ctlz.i32(i32, i1)
95+
96+
define i32 @ctlz_i32_zdef(i32 %a) {
97+
; CHECK-LABEL: ctlz_i32_zdef:
98+
; CHECK: # %bb.0:
99+
; CHECK-NEXT: r2 = 4294967294 ll
100+
; CHECK-NEXT: r3 = r1
101+
; CHECK-NEXT: r3 &= r2
102+
; CHECK-NEXT: r3 >>= 1
103+
; CHECK-NEXT: r1 |= r3
104+
; CHECK-NEXT: r2 = 4294967292 ll
105+
; CHECK-NEXT: r3 = r1
106+
; CHECK-NEXT: r3 &= r2
107+
; CHECK-NEXT: r3 >>= 2
108+
; CHECK-NEXT: r1 |= r3
109+
; CHECK-NEXT: r2 = 4294967280 ll
110+
; CHECK-NEXT: r3 = r1
111+
; CHECK-NEXT: r3 &= r2
112+
; CHECK-NEXT: r3 >>= 4
113+
; CHECK-NEXT: r1 |= r3
114+
; CHECK-NEXT: r2 = 4294967040 ll
115+
; CHECK-NEXT: r3 = r1
116+
; CHECK-NEXT: r3 &= r2
117+
; CHECK-NEXT: r3 >>= 8
118+
; CHECK-NEXT: r1 |= r3
119+
; CHECK-NEXT: r2 = 4294901760 ll
120+
; CHECK-NEXT: r3 = r1
121+
; CHECK-NEXT: r3 &= r2
122+
; CHECK-NEXT: r3 >>= 16
123+
; CHECK-NEXT: r1 |= r3
124+
; CHECK-NEXT: r1 ^= -1
125+
; CHECK-NEXT: r2 = r1
126+
; CHECK-NEXT: r2 >>= 1
127+
; CHECK-NEXT: r2 &= 1431655765
128+
; CHECK-NEXT: r1 -= r2
129+
; CHECK-NEXT: r0 = r1
130+
; CHECK-NEXT: r0 &= 858993459
131+
; CHECK-NEXT: r1 >>= 2
132+
; CHECK-NEXT: r1 &= 858993459
133+
; CHECK-NEXT: r0 += r1
134+
; CHECK-NEXT: r1 = r0
135+
; CHECK-NEXT: r1 >>= 4
136+
; CHECK-NEXT: r0 += r1
137+
; CHECK-NEXT: r0 &= 252645135
138+
; CHECK-NEXT: r0 *= 16843009
139+
; CHECK-NEXT: r1 = 4278190080 ll
140+
; CHECK-NEXT: r0 &= r1
141+
; CHECK-NEXT: r0 >>= 24
142+
; CHECK-NEXT: exit
143+
%ret = call i32 @llvm.ctlz.i32(i32 %a, i1 1)
144+
ret i32 %ret
145+
}
146+
147+
148+
define i32 @ctlz_i32(i32 %a) {
149+
; CHECK-LABEL: ctlz_i32:
150+
; CHECK: # %bb.0:
151+
; CHECK-NEXT: r0 = 32
152+
; CHECK-NEXT: r2 = r1
153+
; CHECK-NEXT: r2 <<= 32
154+
; CHECK-NEXT: r2 >>= 32
155+
; CHECK-NEXT: if r2 == 0 goto LBB5_2
156+
; CHECK-NEXT: # %bb.1: # %cond.false
157+
; CHECK-NEXT: r2 = 4294967294 ll
158+
; CHECK-NEXT: r3 = r1
159+
; CHECK-NEXT: r3 &= r2
160+
; CHECK-NEXT: r3 >>= 1
161+
; CHECK-NEXT: r1 |= r3
162+
; CHECK-NEXT: r2 = 4294967292 ll
163+
; CHECK-NEXT: r3 = r1
164+
; CHECK-NEXT: r3 &= r2
165+
; CHECK-NEXT: r3 >>= 2
166+
; CHECK-NEXT: r1 |= r3
167+
; CHECK-NEXT: r2 = 4294967280 ll
168+
; CHECK-NEXT: r3 = r1
169+
; CHECK-NEXT: r3 &= r2
170+
; CHECK-NEXT: r3 >>= 4
171+
; CHECK-NEXT: r1 |= r3
172+
; CHECK-NEXT: r2 = 4294967040 ll
173+
; CHECK-NEXT: r3 = r1
174+
; CHECK-NEXT: r3 &= r2
175+
; CHECK-NEXT: r3 >>= 8
176+
; CHECK-NEXT: r1 |= r3
177+
; CHECK-NEXT: r2 = 4294901760 ll
178+
; CHECK-NEXT: r3 = r1
179+
; CHECK-NEXT: r3 &= r2
180+
; CHECK-NEXT: r3 >>= 16
181+
; CHECK-NEXT: r1 |= r3
182+
; CHECK-NEXT: r1 ^= -1
183+
; CHECK-NEXT: r2 = r1
184+
; CHECK-NEXT: r2 >>= 1
185+
; CHECK-NEXT: r2 &= 1431655765
186+
; CHECK-NEXT: r1 -= r2
187+
; CHECK-NEXT: r0 = r1
188+
; CHECK-NEXT: r0 &= 858993459
189+
; CHECK-NEXT: r1 >>= 2
190+
; CHECK-NEXT: r1 &= 858993459
191+
; CHECK-NEXT: r0 += r1
192+
; CHECK-NEXT: r1 = r0
193+
; CHECK-NEXT: r1 >>= 4
194+
; CHECK-NEXT: r0 += r1
195+
; CHECK-NEXT: r0 &= 252645135
196+
; CHECK-NEXT: r0 *= 16843009
197+
; CHECK-NEXT: r1 = 4278190080 ll
198+
; CHECK-NEXT: r0 &= r1
199+
; CHECK-NEXT: r0 >>= 24
200+
; CHECK-NEXT: LBB5_2: # %cond.end
201+
; CHECK-NEXT: exit
202+
%ret = call i32 @llvm.ctlz.i32(i32 %a, i1 0)
203+
ret i32 %ret
204+
}
205+
206+
declare i64 @llvm.ctlz.i64(i64, i1)
207+
208+
define i64 @ctlz_i64_zdef(i64 %a) {
209+
; CHECK-LABEL: ctlz_i64_zdef:
210+
; CHECK: # %bb.0:
211+
; CHECK-NEXT: r2 = r1
212+
; CHECK-NEXT: r2 >>= 1
213+
; CHECK-NEXT: r1 |= r2
214+
; CHECK-NEXT: r2 = r1
215+
; CHECK-NEXT: r2 >>= 2
216+
; CHECK-NEXT: r1 |= r2
217+
; CHECK-NEXT: r2 = r1
218+
; CHECK-NEXT: r2 >>= 4
219+
; CHECK-NEXT: r1 |= r2
220+
; CHECK-NEXT: r2 = r1
221+
; CHECK-NEXT: r2 >>= 8
222+
; CHECK-NEXT: r1 |= r2
223+
; CHECK-NEXT: r2 = r1
224+
; CHECK-NEXT: r2 >>= 16
225+
; CHECK-NEXT: r1 |= r2
226+
; CHECK-NEXT: r2 = r1
227+
; CHECK-NEXT: r2 >>= 32
228+
; CHECK-NEXT: r1 |= r2
229+
; CHECK-NEXT: r1 ^= -1
230+
; CHECK-NEXT: r2 = 6148914691236517205 ll
231+
; CHECK-NEXT: r3 = r1
232+
; CHECK-NEXT: r3 >>= 1
233+
; CHECK-NEXT: r3 &= r2
234+
; CHECK-NEXT: r1 -= r3
235+
; CHECK-NEXT: r2 = 3689348814741910323 ll
236+
; CHECK-NEXT: r0 = r1
237+
; CHECK-NEXT: r0 &= r2
238+
; CHECK-NEXT: r1 >>= 2
239+
; CHECK-NEXT: r1 &= r2
240+
; CHECK-NEXT: r0 += r1
241+
; CHECK-NEXT: r1 = r0
242+
; CHECK-NEXT: r1 >>= 4
243+
; CHECK-NEXT: r0 += r1
244+
; CHECK-NEXT: r1 = 1085102592571150095 ll
245+
; CHECK-NEXT: r0 &= r1
246+
; CHECK-NEXT: r1 = 72340172838076673 ll
247+
; CHECK-NEXT: r0 *= r1
248+
; CHECK-NEXT: r0 >>= 56
249+
; CHECK-NEXT: exit
250+
%ret = call i64 @llvm.ctlz.i64(i64 %a, i1 1)
251+
ret i64 %ret
252+
}
253+
254+
255+
define i64 @ctlz_i64(i64 %a) {
256+
; CHECK-LABEL: ctlz_i64:
257+
; CHECK: # %bb.0:
258+
; CHECK-NEXT: r0 = 64
259+
; CHECK-NEXT: if r1 == 0 goto LBB7_2
260+
; CHECK-NEXT: # %bb.1: # %cond.false
261+
; CHECK-NEXT: r2 = r1
262+
; CHECK-NEXT: r2 >>= 1
263+
; CHECK-NEXT: r1 |= r2
264+
; CHECK-NEXT: r2 = r1
265+
; CHECK-NEXT: r2 >>= 2
266+
; CHECK-NEXT: r1 |= r2
267+
; CHECK-NEXT: r2 = r1
268+
; CHECK-NEXT: r2 >>= 4
269+
; CHECK-NEXT: r1 |= r2
270+
; CHECK-NEXT: r2 = r1
271+
; CHECK-NEXT: r2 >>= 8
272+
; CHECK-NEXT: r1 |= r2
273+
; CHECK-NEXT: r2 = r1
274+
; CHECK-NEXT: r2 >>= 16
275+
; CHECK-NEXT: r1 |= r2
276+
; CHECK-NEXT: r2 = r1
277+
; CHECK-NEXT: r2 >>= 32
278+
; CHECK-NEXT: r1 |= r2
279+
; CHECK-NEXT: r1 ^= -1
280+
; CHECK-NEXT: r2 = 6148914691236517205 ll
281+
; CHECK-NEXT: r3 = r1
282+
; CHECK-NEXT: r3 >>= 1
283+
; CHECK-NEXT: r3 &= r2
284+
; CHECK-NEXT: r1 -= r3
285+
; CHECK-NEXT: r2 = 3689348814741910323 ll
286+
; CHECK-NEXT: r0 = r1
287+
; CHECK-NEXT: r0 &= r2
288+
; CHECK-NEXT: r1 >>= 2
289+
; CHECK-NEXT: r1 &= r2
290+
; CHECK-NEXT: r0 += r1
291+
; CHECK-NEXT: r1 = r0
292+
; CHECK-NEXT: r1 >>= 4
293+
; CHECK-NEXT: r0 += r1
294+
; CHECK-NEXT: r1 = 1085102592571150095 ll
295+
; CHECK-NEXT: r0 &= r1
296+
; CHECK-NEXT: r1 = 72340172838076673 ll
297+
; CHECK-NEXT: r0 *= r1
298+
; CHECK-NEXT: r0 >>= 56
299+
; CHECK-NEXT: LBB7_2: # %cond.end
300+
; CHECK-NEXT: exit
301+
%ret = call i64 @llvm.ctlz.i64(i64 %a, i1 0)
302+
ret i64 %ret
303+
}
304+

0 commit comments

Comments
 (0)