Skip to content

Commit d1b393d

Browse files
committed
AMDGPU/GlobalISel: Select G_CTTZ_ZERO_UNDEF
Directly select this rather than going through the intermediate instruction, which may provide some combine value in the future.
1 parent 045a892 commit d1b393d

File tree

4 files changed

+91
-3
lines changed

4 files changed

+91
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
286286
def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>;
287287
def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>;
288288

289-
def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>;
289+
def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>;
290290

291291
// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
292292
// when performing the mulitply. The result is a 32-bit value.
@@ -425,6 +425,10 @@ def AMDGPUffbh_u32 : PatFrags<(ops node:$src),
425425
[(ctlz_zero_undef node:$src),
426426
(AMDGPUffbh_u32_impl node:$src)]>;
427427

428+
def AMDGPUffbl_b32 : PatFrags<(ops node:$src),
429+
[(cttz_zero_undef node:$src),
430+
(AMDGPUffbl_b32_impl node:$src)]>;
431+
428432
def AMDGPUpkrtz_f16_f32 : PatFrags<(ops node:$src0, node:$src1),
429433
[(int_amdgcn_cvt_pkrtz node:$src0, node:$src1),
430434
(AMDGPUpkrtz_f16_f32_impl node:$src0, node:$src1)]>;

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,9 @@ def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64",
210210

211211
def S_FF0_I32_B32 : SOP1_32 <"s_ff0_i32_b32">;
212212
def S_FF0_I32_B64 : SOP1_32_64 <"s_ff0_i32_b64">;
213-
def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64">;
213+
def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64",
214+
[(set i32:$sdst, (AMDGPUffbl_b32 i64:$src0))]
215+
>;
214216

215217
def S_FF1_I32_B32 : SOP1_32 <"s_ff1_i32_b32",
216218
[(set i32:$sdst, (AMDGPUffbl_b32 i32:$src0))]

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>;
241241
defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>;
242242
defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>;
243243
defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>;
244-
defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>;
244+
defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>;
245245
defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>;
246246

247247
let SchedRW = [WriteDoubleAdd] in {
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: cttz_zero_undef_s32_ss
6+
legalized: true
7+
regBankSelected: true
8+
tracksRegLiveness: true
9+
10+
body: |
11+
bb.0:
12+
liveins: $sgpr0
13+
14+
; CHECK-LABEL: name: cttz_zero_undef_s32_ss
15+
; CHECK: liveins: $sgpr0
16+
; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
17+
; CHECK: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY]]
18+
; CHECK: S_ENDPGM 0, implicit [[S_FF1_I32_B32_]]
19+
%0:sgpr(s32) = COPY $sgpr0
20+
%1:sgpr(s32) = G_CTTZ_ZERO_UNDEF %0
21+
S_ENDPGM 0, implicit %1
22+
...
23+
24+
---
25+
name: cttz_zero_undef_s32_vs
26+
legalized: true
27+
regBankSelected: true
28+
tracksRegLiveness: true
29+
30+
body: |
31+
bb.0:
32+
liveins: $sgpr0
33+
34+
; CHECK-LABEL: name: cttz_zero_undef_s32_vs
35+
; CHECK: liveins: $sgpr0
36+
; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
37+
; CHECK: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec
38+
; CHECK: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]]
39+
%0:sgpr(s32) = COPY $sgpr0
40+
%1:vgpr(s32) = G_CTTZ_ZERO_UNDEF %0
41+
S_ENDPGM 0, implicit %1
42+
...
43+
44+
---
45+
name: cttz_zero_undef_s32_vv
46+
legalized: true
47+
regBankSelected: true
48+
tracksRegLiveness: true
49+
50+
body: |
51+
bb.0:
52+
liveins: $vgpr0
53+
54+
; CHECK-LABEL: name: cttz_zero_undef_s32_vv
55+
; CHECK: liveins: $vgpr0
56+
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
57+
; CHECK: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec
58+
; CHECK: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]]
59+
%0:vgpr(s32) = COPY $vgpr0
60+
%1:vgpr(s32) = G_CTTZ_ZERO_UNDEF %0
61+
S_ENDPGM 0, implicit %1
62+
...
63+
64+
---
65+
name: cttz_zero_undef_s64_ss
66+
legalized: true
67+
regBankSelected: true
68+
tracksRegLiveness: true
69+
70+
body: |
71+
bb.0:
72+
liveins: $sgpr0_sgpr1
73+
74+
; CHECK-LABEL: name: cttz_zero_undef_s64_ss
75+
; CHECK: liveins: $sgpr0_sgpr1
76+
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
77+
; CHECK: [[S_FF1_I32_B64_:%[0-9]+]]:sreg_32 = S_FF1_I32_B64 [[COPY]]
78+
; CHECK: S_ENDPGM 0, implicit [[S_FF1_I32_B64_]]
79+
%0:sgpr(s64) = COPY $sgpr0_sgpr1
80+
%1:sgpr(s32) = G_CTTZ_ZERO_UNDEF %0
81+
S_ENDPGM 0, implicit %1
82+
...

0 commit comments

Comments
 (0)