Skip to content

Commit 045a892

Browse files
committed
AMDGPU/GlobalISel: Select G_CTLZ_ZERO_UNDEF
Directly select this rather than going through the intermediate instruction, which may provide some combine value in the future.
1 parent e174c27 commit 045a892

File tree

4 files changed

+92
-4
lines changed

4 files changed

+92
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def : GINodeEquiv<G_ATOMICRMW_UMIN, atomic_load_umin_glue>;
140140
def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax_glue>;
141141
def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>;
142142

143-
def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32>;
143+
def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32_impl>;
144144
def : GINodeEquiv<G_AMDGPU_FMIN_LEGACY, AMDGPUfmin_legacy>;
145145
def : GINodeEquiv<G_AMDGPU_FMAX_LEGACY, AMDGPUfmax_legacy>;
146146

llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
283283
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
284284
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
285285

286-
def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>;
287-
def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>;
286+
def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>;
287+
def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>;
288288

289289
def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>;
290290

@@ -421,6 +421,10 @@ def AMDGPUffbh_i32 : PatFrags<(ops node:$src),
421421
[(int_amdgcn_sffbh node:$src),
422422
(AMDGPUffbh_i32_impl node:$src)]>;
423423

424+
def AMDGPUffbh_u32 : PatFrags<(ops node:$src),
425+
[(ctlz_zero_undef node:$src),
426+
(AMDGPUffbh_u32_impl node:$src)]>;
427+
424428
def AMDGPUpkrtz_f16_f32 : PatFrags<(ops node:$src0, node:$src1),
425429
[(int_amdgcn_cvt_pkrtz node:$src0, node:$src1),
426430
(AMDGPUpkrtz_f16_f32_impl node:$src0, node:$src1)]>;

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,9 @@ def S_FLBIT_I32_B32 : SOP1_32 <"s_flbit_i32_b32",
220220
[(set i32:$sdst, (AMDGPUffbh_u32 i32:$src0))]
221221
>;
222222

223-
def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64">;
223+
def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64",
224+
[(set i32:$sdst, (AMDGPUffbh_u32 i64:$src0))]
225+
>;
224226
def S_FLBIT_I32 : SOP1_32 <"s_flbit_i32",
225227
[(set i32:$sdst, (AMDGPUffbh_i32 i32:$src0))]
226228
>;
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: ctlz_zero_undef_s32_ss
6+
legalized: true
7+
regBankSelected: true
8+
tracksRegLiveness: true
9+
10+
body: |
11+
bb.0:
12+
liveins: $sgpr0
13+
14+
; CHECK-LABEL: name: ctlz_zero_undef_s32_ss
15+
; CHECK: liveins: $sgpr0
16+
; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
17+
; CHECK: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[COPY]]
18+
; CHECK: S_ENDPGM 0, implicit [[S_FLBIT_I32_B32_]]
19+
%0:sgpr(s32) = COPY $sgpr0
20+
%1:sgpr(s32) = G_CTLZ_ZERO_UNDEF %0
21+
S_ENDPGM 0, implicit %1
22+
...
23+
24+
---
25+
name: ctlz_zero_undef_s32_vs
26+
legalized: true
27+
regBankSelected: true
28+
tracksRegLiveness: true
29+
30+
body: |
31+
bb.0:
32+
liveins: $sgpr0
33+
34+
; CHECK-LABEL: name: ctlz_zero_undef_s32_vs
35+
; CHECK: liveins: $sgpr0
36+
; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
37+
; CHECK: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec
38+
; CHECK: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]]
39+
%0:sgpr(s32) = COPY $sgpr0
40+
%1:vgpr(s32) = G_CTLZ_ZERO_UNDEF %0
41+
S_ENDPGM 0, implicit %1
42+
...
43+
44+
---
45+
name: ctlz_zero_undef_s32_vv
46+
legalized: true
47+
regBankSelected: true
48+
tracksRegLiveness: true
49+
50+
body: |
51+
bb.0:
52+
liveins: $vgpr0
53+
54+
; CHECK-LABEL: name: ctlz_zero_undef_s32_vv
55+
; CHECK: liveins: $vgpr0
56+
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
57+
; CHECK: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec
58+
; CHECK: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]]
59+
%0:vgpr(s32) = COPY $vgpr0
60+
%1:vgpr(s32) = G_CTLZ_ZERO_UNDEF %0
61+
S_ENDPGM 0, implicit %1
62+
...
63+
64+
---
65+
name: ctlz_zero_undef_s64_ss
66+
legalized: true
67+
regBankSelected: true
68+
tracksRegLiveness: true
69+
70+
body: |
71+
bb.0:
72+
liveins: $sgpr0_sgpr1
73+
74+
; CHECK-LABEL: name: ctlz_zero_undef_s64_ss
75+
; CHECK: liveins: $sgpr0_sgpr1
76+
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
77+
; CHECK: [[S_FLBIT_I32_B64_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B64 [[COPY]]
78+
; CHECK: S_ENDPGM 0, implicit [[S_FLBIT_I32_B64_]]
79+
%0:sgpr(s64) = COPY $sgpr0_sgpr1
80+
%1:sgpr(s32) = G_CTLZ_ZERO_UNDEF %0
81+
S_ENDPGM 0, implicit %1
82+
...

0 commit comments

Comments
 (0)