Skip to content

Commit 0b688f3

Browse files
authored
[AMDGPU][True16][CodeGen] enable true16 for more codegen test patch 1 (#131206)
This is a NFC patch. Enable true16 mode for more CodeGen tests
1 parent 7598cea commit 0b688f3

18 files changed

+10548
-4307
lines changed

llvm/test/CodeGen/AMDGPU/abs_i16.ll

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
55
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
66
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
7-
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
8-
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
7+
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
8+
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
9+
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
10+
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
911

1012
define i16 @abs_i16(i16 %arg) {
1113
; GFX6-LABEL: abs_i16:
@@ -45,25 +47,45 @@ define i16 @abs_i16(i16 %arg) {
4547
; GFX10-NEXT: v_max_i16 v0, v0, v1
4648
; GFX10-NEXT: s_setpc_b64 s[30:31]
4749
;
48-
; GFX11-LABEL: abs_i16:
49-
; GFX11: ; %bb.0:
50-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51-
; GFX11-NEXT: v_sub_nc_u16 v1, 0, v0
52-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
53-
; GFX11-NEXT: v_max_i16 v0, v0, v1
54-
; GFX11-NEXT: s_setpc_b64 s[30:31]
50+
; GFX11-TRUE16-LABEL: abs_i16:
51+
; GFX11-TRUE16: ; %bb.0:
52+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53+
; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.h, 0, v0.l
54+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
55+
; GFX11-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v0.h
56+
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
5557
;
56-
; GFX12-LABEL: abs_i16:
57-
; GFX12: ; %bb.0:
58-
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
59-
; GFX12-NEXT: s_wait_expcnt 0x0
60-
; GFX12-NEXT: s_wait_samplecnt 0x0
61-
; GFX12-NEXT: s_wait_bvhcnt 0x0
62-
; GFX12-NEXT: s_wait_kmcnt 0x0
63-
; GFX12-NEXT: v_sub_nc_u16 v1, 0, v0
64-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
65-
; GFX12-NEXT: v_max_i16 v0, v0, v1
66-
; GFX12-NEXT: s_setpc_b64 s[30:31]
58+
; GFX11-FAKE16-LABEL: abs_i16:
59+
; GFX11-FAKE16: ; %bb.0:
60+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61+
; GFX11-FAKE16-NEXT: v_sub_nc_u16 v1, 0, v0
62+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
63+
; GFX11-FAKE16-NEXT: v_max_i16 v0, v0, v1
64+
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
65+
;
66+
; GFX12-TRUE16-LABEL: abs_i16:
67+
; GFX12-TRUE16: ; %bb.0:
68+
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
69+
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
70+
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
71+
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
72+
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
73+
; GFX12-TRUE16-NEXT: v_sub_nc_u16 v0.h, 0, v0.l
74+
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
75+
; GFX12-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v0.h
76+
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
77+
;
78+
; GFX12-FAKE16-LABEL: abs_i16:
79+
; GFX12-FAKE16: ; %bb.0:
80+
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
81+
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
82+
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
83+
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
84+
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
85+
; GFX12-FAKE16-NEXT: v_sub_nc_u16 v1, 0, v0
86+
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
87+
; GFX12-FAKE16-NEXT: v_max_i16 v0, v0, v1
88+
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
6789
%res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
6890
ret i16 %res
6991
}

llvm/test/CodeGen/AMDGPU/add.v2i16.ll

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
33
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
44
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
5-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
5+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
6+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
67

78
; FIXME: Need to handle non-uniform case for function below (load without gep).
89
; FIXME: VI or should be unnecessary
@@ -753,26 +754,49 @@ define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i64(ptr addrspace(1) %out,
753754
; GFX10-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
754755
; GFX10-NEXT: s_endpgm
755756
;
756-
; GFX11-LABEL: v_test_add_v2i16_zext_to_v2i64:
757-
; GFX11: ; %bb.0:
758-
; GFX11-NEXT: s_clause 0x1
759-
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
760-
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
761-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
762-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
763-
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
764-
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
765-
; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
766-
; GFX11-NEXT: s_waitcnt vmcnt(0)
767-
; GFX11-NEXT: global_load_b32 v0, v0, s[4:5] glc dlc
768-
; GFX11-NEXT: s_waitcnt vmcnt(0)
769-
; GFX11-NEXT: v_pk_add_u16 v0, v1, v0
770-
; GFX11-NEXT: v_mov_b32_e32 v1, 0
771-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
772-
; GFX11-NEXT: v_alignbit_b32 v2, 0, v0, 16
773-
; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xffff, v0
774-
; GFX11-NEXT: global_store_b128 v1, v[0:3], s[0:1]
775-
; GFX11-NEXT: s_endpgm
757+
; GFX11-TRUE16-LABEL: v_test_add_v2i16_zext_to_v2i64:
758+
; GFX11-TRUE16: ; %bb.0:
759+
; GFX11-TRUE16-NEXT: s_clause 0x1
760+
; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
761+
; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
762+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
763+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
764+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
765+
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
766+
; GFX11-TRUE16-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
767+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
768+
; GFX11-TRUE16-NEXT: global_load_b32 v0, v0, s[4:5] glc dlc
769+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
770+
; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, v1, v0
771+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
772+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
773+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
774+
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xffff, v0
775+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
776+
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, 0, 16, v2
777+
; GFX11-TRUE16-NEXT: global_store_b128 v1, v[0:3], s[0:1]
778+
; GFX11-TRUE16-NEXT: s_endpgm
779+
;
780+
; GFX11-FAKE16-LABEL: v_test_add_v2i16_zext_to_v2i64:
781+
; GFX11-FAKE16: ; %bb.0:
782+
; GFX11-FAKE16-NEXT: s_clause 0x1
783+
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
784+
; GFX11-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
785+
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
786+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
787+
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
788+
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
789+
; GFX11-FAKE16-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
790+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
791+
; GFX11-FAKE16-NEXT: global_load_b32 v0, v0, s[4:5] glc dlc
792+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
793+
; GFX11-FAKE16-NEXT: v_pk_add_u16 v0, v1, v0
794+
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0
795+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
796+
; GFX11-FAKE16-NEXT: v_alignbit_b32 v2, 0, v0, 16
797+
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xffff, v0
798+
; GFX11-FAKE16-NEXT: global_store_b128 v1, v[0:3], s[0:1]
799+
; GFX11-FAKE16-NEXT: s_endpgm
776800
%tid = call i32 @llvm.amdgcn.workitem.id.x()
777801
%gep.out = getelementptr inbounds <2 x i64>, ptr addrspace(1) %out, i32 %tid
778802
%gep.in0 = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in0, i32 %tid

llvm/test/CodeGen/AMDGPU/br_cc.f16.ll

Lines changed: 140 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
33
; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
4-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-TRUE16 %s
5+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-FAKE16 %s
56

67
define amdgpu_kernel void @br_cc_f16(
78
; SI-LABEL: br_cc_f16:
@@ -60,32 +61,62 @@ define amdgpu_kernel void @br_cc_f16(
6061
; VI-NEXT: buffer_store_short v1, off, s[0:3], 0
6162
; VI-NEXT: s_endpgm
6263
;
63-
; GFX11-LABEL: br_cc_f16:
64-
; GFX11: ; %bb.0: ; %entry
65-
; GFX11-NEXT: s_clause 0x1
66-
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
67-
; GFX11-NEXT: s_load_b64 s[8:9], s[4:5], 0x34
68-
; GFX11-NEXT: s_mov_b32 s6, -1
69-
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
70-
; GFX11-NEXT: s_mov_b32 s10, s6
71-
; GFX11-NEXT: s_mov_b32 s11, s7
72-
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
73-
; GFX11-NEXT: s_mov_b32 s4, s2
74-
; GFX11-NEXT: s_mov_b32 s5, s3
75-
; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
76-
; GFX11-NEXT: s_waitcnt vmcnt(0)
77-
; GFX11-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc
78-
; GFX11-NEXT: s_waitcnt vmcnt(0)
79-
; GFX11-NEXT: s_mov_b32 s2, s6
80-
; GFX11-NEXT: s_mov_b32 s3, s7
81-
; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
82-
; GFX11-NEXT: s_cbranch_vccnz .LBB0_2
83-
; GFX11-NEXT: ; %bb.1: ; %one
84-
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
85-
; GFX11-NEXT: s_endpgm
86-
; GFX11-NEXT: .LBB0_2: ; %two
87-
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
88-
; GFX11-NEXT: s_endpgm
64+
; GFX11-TRUE16-LABEL: br_cc_f16:
65+
; GFX11-TRUE16: ; %bb.0: ; %entry
66+
; GFX11-TRUE16-NEXT: s_clause 0x1
67+
; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
68+
; GFX11-TRUE16-NEXT: s_load_b64 s[8:9], s[4:5], 0x34
69+
; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1
70+
; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
71+
; GFX11-TRUE16-NEXT: s_mov_b32 s10, s6
72+
; GFX11-TRUE16-NEXT: s_mov_b32 s11, s7
73+
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
74+
; GFX11-TRUE16-NEXT: s_mov_b32 s4, s2
75+
; GFX11-TRUE16-NEXT: s_mov_b32 s5, s3
76+
; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
77+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
78+
; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc
79+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
80+
; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
81+
; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7
82+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
83+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
84+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
85+
; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v2.l, v2.h
86+
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB0_2
87+
; GFX11-TRUE16-NEXT: ; %bb.1: ; %one
88+
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
89+
; GFX11-TRUE16-NEXT: s_endpgm
90+
; GFX11-TRUE16-NEXT: .LBB0_2: ; %two
91+
; GFX11-TRUE16-NEXT: buffer_store_b16 v1, off, s[0:3], 0
92+
; GFX11-TRUE16-NEXT: s_endpgm
93+
;
94+
; GFX11-FAKE16-LABEL: br_cc_f16:
95+
; GFX11-FAKE16: ; %bb.0: ; %entry
96+
; GFX11-FAKE16-NEXT: s_clause 0x1
97+
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
98+
; GFX11-FAKE16-NEXT: s_load_b64 s[8:9], s[4:5], 0x34
99+
; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
100+
; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
101+
; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6
102+
; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7
103+
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
104+
; GFX11-FAKE16-NEXT: s_mov_b32 s4, s2
105+
; GFX11-FAKE16-NEXT: s_mov_b32 s5, s3
106+
; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
107+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
108+
; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc
109+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
110+
; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6
111+
; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7
112+
; GFX11-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
113+
; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB0_2
114+
; GFX11-FAKE16-NEXT: ; %bb.1: ; %one
115+
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
116+
; GFX11-FAKE16-NEXT: s_endpgm
117+
; GFX11-FAKE16-NEXT: .LBB0_2: ; %two
118+
; GFX11-FAKE16-NEXT: buffer_store_b16 v1, off, s[0:3], 0
119+
; GFX11-FAKE16-NEXT: s_endpgm
89120
ptr addrspace(1) %r,
90121
ptr addrspace(1) %a,
91122
ptr addrspace(1) %b) {
@@ -151,25 +182,47 @@ define amdgpu_kernel void @br_cc_f16_imm_a(
151182
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
152183
; VI-NEXT: s_endpgm
153184
;
154-
; GFX11-LABEL: br_cc_f16_imm_a:
155-
; GFX11: ; %bb.0: ; %entry
156-
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
157-
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
158-
; GFX11-NEXT: s_mov_b32 s6, -1
159-
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
160-
; GFX11-NEXT: s_mov_b32 s4, s2
161-
; GFX11-NEXT: s_mov_b32 s5, s3
162-
; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0
163-
; GFX11-NEXT: s_waitcnt vmcnt(0)
164-
; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0
165-
; GFX11-NEXT: s_cbranch_vccnz .LBB1_2
166-
; GFX11-NEXT: ; %bb.1: ; %one
167-
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800
168-
; GFX11-NEXT: .LBB1_2: ; %two
169-
; GFX11-NEXT: s_mov_b32 s2, s6
170-
; GFX11-NEXT: s_mov_b32 s3, s7
171-
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
172-
; GFX11-NEXT: s_endpgm
185+
; GFX11-TRUE16-LABEL: br_cc_f16_imm_a:
186+
; GFX11-TRUE16: ; %bb.0: ; %entry
187+
; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
188+
; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
189+
; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1
190+
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
191+
; GFX11-TRUE16-NEXT: s_mov_b32 s4, s2
192+
; GFX11-TRUE16-NEXT: s_mov_b32 s5, s3
193+
; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0
194+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
195+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
196+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
197+
; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v1.l
198+
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB1_2
199+
; GFX11-TRUE16-NEXT: ; %bb.1: ; %one
200+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800
201+
; GFX11-TRUE16-NEXT: .LBB1_2: ; %two
202+
; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
203+
; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7
204+
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
205+
; GFX11-TRUE16-NEXT: s_endpgm
206+
;
207+
; GFX11-FAKE16-LABEL: br_cc_f16_imm_a:
208+
; GFX11-FAKE16: ; %bb.0: ; %entry
209+
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
210+
; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
211+
; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
212+
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
213+
; GFX11-FAKE16-NEXT: s_mov_b32 s4, s2
214+
; GFX11-FAKE16-NEXT: s_mov_b32 s5, s3
215+
; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0
216+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
217+
; GFX11-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0
218+
; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB1_2
219+
; GFX11-FAKE16-NEXT: ; %bb.1: ; %one
220+
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3800
221+
; GFX11-FAKE16-NEXT: .LBB1_2: ; %two
222+
; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6
223+
; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7
224+
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
225+
; GFX11-FAKE16-NEXT: s_endpgm
173226
ptr addrspace(1) %r,
174227
ptr addrspace(1) %b) {
175228
entry:
@@ -235,25 +288,47 @@ define amdgpu_kernel void @br_cc_f16_imm_b(
235288
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
236289
; VI-NEXT: s_endpgm
237290
;
238-
; GFX11-LABEL: br_cc_f16_imm_b:
239-
; GFX11: ; %bb.0: ; %entry
240-
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
241-
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
242-
; GFX11-NEXT: s_mov_b32 s6, -1
243-
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
244-
; GFX11-NEXT: s_mov_b32 s4, s2
245-
; GFX11-NEXT: s_mov_b32 s5, s3
246-
; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0
247-
; GFX11-NEXT: s_waitcnt vmcnt(0)
248-
; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0
249-
; GFX11-NEXT: s_cbranch_vccz .LBB2_2
250-
; GFX11-NEXT: ; %bb.1: ; %two
251-
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800
252-
; GFX11-NEXT: .LBB2_2: ; %one
253-
; GFX11-NEXT: s_mov_b32 s2, s6
254-
; GFX11-NEXT: s_mov_b32 s3, s7
255-
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
256-
; GFX11-NEXT: s_endpgm
291+
; GFX11-TRUE16-LABEL: br_cc_f16_imm_b:
292+
; GFX11-TRUE16: ; %bb.0: ; %entry
293+
; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
294+
; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
295+
; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1
296+
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
297+
; GFX11-TRUE16-NEXT: s_mov_b32 s4, s2
298+
; GFX11-TRUE16-NEXT: s_mov_b32 s5, s3
299+
; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0
300+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
301+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
302+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
303+
; GFX11-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v1.l
304+
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB2_2
305+
; GFX11-TRUE16-NEXT: ; %bb.1: ; %two
306+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800
307+
; GFX11-TRUE16-NEXT: .LBB2_2: ; %one
308+
; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
309+
; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7
310+
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
311+
; GFX11-TRUE16-NEXT: s_endpgm
312+
;
313+
; GFX11-FAKE16-LABEL: br_cc_f16_imm_b:
314+
; GFX11-FAKE16: ; %bb.0: ; %entry
315+
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
316+
; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
317+
; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
318+
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
319+
; GFX11-FAKE16-NEXT: s_mov_b32 s4, s2
320+
; GFX11-FAKE16-NEXT: s_mov_b32 s5, s3
321+
; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0
322+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
323+
; GFX11-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0
324+
; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB2_2
325+
; GFX11-FAKE16-NEXT: ; %bb.1: ; %two
326+
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3800
327+
; GFX11-FAKE16-NEXT: .LBB2_2: ; %one
328+
; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6
329+
; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7
330+
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
331+
; GFX11-FAKE16-NEXT: s_endpgm
257332
ptr addrspace(1) %r,
258333
ptr addrspace(1) %a) {
259334
entry:

0 commit comments

Comments
 (0)