Skip to content

Commit 0993207

Browse files
committed
update test with true16
1 parent 1aed6ad commit 0993207

File tree

143 files changed

+176992
-84633
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

143 files changed

+176992
-84633
lines changed

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 28219 additions & 14689 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll

Lines changed: 3740 additions & 1806 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll

Lines changed: 254 additions & 101 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll

Lines changed: 6419 additions & 3187 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll

Lines changed: 4769 additions & 2437 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll

Lines changed: 1354 additions & 677 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.48bit.ll

Lines changed: 177 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
44
; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s
55
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
6-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
6+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
7+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
78

89
define <3 x half> @bitcast_v3bf16_to_v3f16(<3 x bfloat> %a, i32 %b) {
910
; GCN-LABEL: bitcast_v3bf16_to_v3f16:
@@ -134,47 +135,92 @@ define <3 x half> @bitcast_v3bf16_to_v3f16(<3 x bfloat> %a, i32 %b) {
134135
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
135136
; GFX9-NEXT: s_setpc_b64 s[30:31]
136137
;
137-
; GFX11-LABEL: bitcast_v3bf16_to_v3f16:
138-
; GFX11: ; %bb.0:
139-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140-
; GFX11-NEXT: s_mov_b32 s0, exec_lo
141-
; GFX11-NEXT: v_cmpx_ne_u32_e32 0, v2
142-
; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0
143-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
144-
; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0
145-
; GFX11-NEXT: s_cbranch_execz .LBB0_2
146-
; GFX11-NEXT: ; %bb.1: ; %cmp.true
147-
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
148-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
149-
; GFX11-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v0
150-
; GFX11-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
151-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
152-
; GFX11-NEXT: v_bfe_u32 v4, v1, 16, 1
153-
; GFX11-NEXT: v_or_b32_e32 v6, 0x400000, v1
154-
; GFX11-NEXT: v_bfe_u32 v3, v2, 16, 1
155-
; GFX11-NEXT: v_or_b32_e32 v7, 0x400000, v2
156-
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
157-
; GFX11-NEXT: v_add3_u32 v4, v4, v1, 0x7fff
158-
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
159-
; GFX11-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
160-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
161-
; GFX11-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
162-
; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo
163-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
164-
; GFX11-NEXT: v_bfe_u32 v5, v0, 16, 1
165-
; GFX11-NEXT: v_or_b32_e32 v8, 0x400000, v0
166-
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
167-
; GFX11-NEXT: v_add3_u32 v5, v5, v0, 0x7fff
168-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
169-
; GFX11-NEXT: v_cndmask_b32_e32 v0, v5, v8, vcc_lo
170-
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
171-
; GFX11-NEXT: v_perm_b32 v0, v0, v2, 0x7060302
172-
; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc_lo
173-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
174-
; GFX11-NEXT: v_alignbit_b32 v1, 0x7fc0, v1, 16
175-
; GFX11-NEXT: .LBB0_2: ; %end
176-
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
177-
; GFX11-NEXT: s_setpc_b64 s[30:31]
138+
; GFX11-TRUE16-LABEL: bitcast_v3bf16_to_v3f16:
139+
; GFX11-TRUE16: ; %bb.0:
140+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141+
; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo
142+
; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v2
143+
; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0
144+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
145+
; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0
146+
; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB0_2
147+
; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.true
148+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
149+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
150+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
151+
; GFX11-TRUE16-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v2
152+
; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
153+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
154+
; GFX11-TRUE16-NEXT: v_bfe_u32 v4, v1, 16, 1
155+
; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v1
156+
; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v2, 16, 1
157+
; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v2
158+
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
159+
; GFX11-TRUE16-NEXT: v_add3_u32 v4, v4, v1, 0x7fff
160+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
161+
; GFX11-TRUE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
162+
; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v2, v3, v7 :: v_dual_mov_b32 v3, 0x7fc0
163+
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
164+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
165+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
166+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v2.h
167+
; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v1, v4, v5 :: v_dual_add_f32 v0, 0x40c00000, v0
168+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
169+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
170+
; GFX11-TRUE16-NEXT: v_bfe_u32 v6, v0, 16, 1
171+
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v0
172+
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
173+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v3.l
174+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
175+
; GFX11-TRUE16-NEXT: v_add3_u32 v6, v6, v0, 0x7fff
176+
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
177+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
178+
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v2, v0
179+
; GFX11-TRUE16-NEXT: .LBB0_2: ; %end
180+
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
181+
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
182+
;
183+
; GFX11-FAKE16-LABEL: bitcast_v3bf16_to_v3f16:
184+
; GFX11-FAKE16: ; %bb.0:
185+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186+
; GFX11-FAKE16-NEXT: s_mov_b32 s0, exec_lo
187+
; GFX11-FAKE16-NEXT: v_cmpx_ne_u32_e32 0, v2
188+
; GFX11-FAKE16-NEXT: s_xor_b32 s0, exec_lo, s0
189+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
190+
; GFX11-FAKE16-NEXT: s_and_not1_saveexec_b32 s0, s0
191+
; GFX11-FAKE16-NEXT: s_cbranch_execz .LBB0_2
192+
; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.true
193+
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
194+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
195+
; GFX11-FAKE16-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v0
196+
; GFX11-FAKE16-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
197+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
198+
; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v1, 16, 1
199+
; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, 0x400000, v1
200+
; GFX11-FAKE16-NEXT: v_bfe_u32 v3, v2, 16, 1
201+
; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, 0x400000, v2
202+
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
203+
; GFX11-FAKE16-NEXT: v_add3_u32 v4, v4, v1, 0x7fff
204+
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
205+
; GFX11-FAKE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
206+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
207+
; GFX11-FAKE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
208+
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo
209+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
210+
; GFX11-FAKE16-NEXT: v_bfe_u32 v5, v0, 16, 1
211+
; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, 0x400000, v0
212+
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
213+
; GFX11-FAKE16-NEXT: v_add3_u32 v5, v5, v0, 0x7fff
214+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
215+
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v5, v8, vcc_lo
216+
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
217+
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v0, v2, 0x7060302
218+
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc_lo
219+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
220+
; GFX11-FAKE16-NEXT: v_alignbit_b32 v1, 0x7fc0, v1, 16
221+
; GFX11-FAKE16-NEXT: .LBB0_2: ; %end
222+
; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
223+
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
178224
%cmp = icmp eq i32 %b, 0
179225
br i1 %cmp, label %cmp.true, label %cmp.false
180226

@@ -426,47 +472,95 @@ define <3 x i16> @bitcast_v3bf16_to_v3i16(<3 x bfloat> %a, i32 %b) {
426472
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
427473
; GFX9-NEXT: s_setpc_b64 s[30:31]
428474
;
429-
; GFX11-LABEL: bitcast_v3bf16_to_v3i16:
430-
; GFX11: ; %bb.0:
431-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
432-
; GFX11-NEXT: s_mov_b32 s0, exec_lo
433-
; GFX11-NEXT: v_cmpx_ne_u32_e32 0, v2
434-
; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0
435-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
436-
; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0
437-
; GFX11-NEXT: s_cbranch_execz .LBB2_2
438-
; GFX11-NEXT: ; %bb.1: ; %cmp.true
439-
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
440-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
441-
; GFX11-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v0
442-
; GFX11-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
443-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
444-
; GFX11-NEXT: v_bfe_u32 v4, v1, 16, 1
445-
; GFX11-NEXT: v_or_b32_e32 v6, 0x400000, v1
446-
; GFX11-NEXT: v_bfe_u32 v3, v2, 16, 1
447-
; GFX11-NEXT: v_or_b32_e32 v7, 0x400000, v2
448-
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
449-
; GFX11-NEXT: v_add3_u32 v4, v4, v1, 0x7fff
450-
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
451-
; GFX11-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
452-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
453-
; GFX11-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
454-
; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo
455-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
456-
; GFX11-NEXT: v_bfe_u32 v5, v0, 16, 1
457-
; GFX11-NEXT: v_or_b32_e32 v8, 0x400000, v0
458-
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
459-
; GFX11-NEXT: v_add3_u32 v5, v5, v0, 0x7fff
460-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
461-
; GFX11-NEXT: v_cndmask_b32_e32 v0, v5, v8, vcc_lo
462-
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
463-
; GFX11-NEXT: v_perm_b32 v0, v0, v2, 0x7060302
464-
; GFX11-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc_lo
465-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
466-
; GFX11-NEXT: v_alignbit_b32 v1, 0x7fc0, v1, 16
467-
; GFX11-NEXT: .LBB2_2: ; %end
468-
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
469-
; GFX11-NEXT: s_setpc_b64 s[30:31]
475+
; GFX11-TRUE16-LABEL: bitcast_v3bf16_to_v3i16:
476+
; GFX11-TRUE16: ; %bb.0:
477+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
478+
; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo
479+
; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v2
480+
; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0
481+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
482+
; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0
483+
; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB2_2
484+
; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.true
485+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
486+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
487+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
488+
; GFX11-TRUE16-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v2
489+
; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
490+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
491+
; GFX11-TRUE16-NEXT: v_bfe_u32 v4, v1, 16, 1
492+
; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v1
493+
; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v2, 16, 1
494+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
495+
; GFX11-TRUE16-NEXT: v_add3_u32 v4, v4, v1, 0x7fff
496+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
497+
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v2
498+
; GFX11-TRUE16-NEXT: v_add3_u32 v5, v5, v2, 0x7fff
499+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
500+
; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
501+
; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v0, 16, 1
502+
; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v0
503+
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
504+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
505+
; GFX11-TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
506+
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc_lo
507+
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
508+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
509+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
510+
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v5, v8, vcc_lo
511+
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
512+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v2
513+
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc_lo
514+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
515+
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v2
516+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
517+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
518+
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, 0x7fc0, 16, v1
519+
; GFX11-TRUE16-NEXT: .LBB2_2: ; %end
520+
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
521+
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
522+
;
523+
; GFX11-FAKE16-LABEL: bitcast_v3bf16_to_v3i16:
524+
; GFX11-FAKE16: ; %bb.0:
525+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526+
; GFX11-FAKE16-NEXT: s_mov_b32 s0, exec_lo
527+
; GFX11-FAKE16-NEXT: v_cmpx_ne_u32_e32 0, v2
528+
; GFX11-FAKE16-NEXT: s_xor_b32 s0, exec_lo, s0
529+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
530+
; GFX11-FAKE16-NEXT: s_and_not1_saveexec_b32 s0, s0
531+
; GFX11-FAKE16-NEXT: s_cbranch_execz .LBB2_2
532+
; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.true
533+
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1
534+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
535+
; GFX11-FAKE16-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v0
536+
; GFX11-FAKE16-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
537+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
538+
; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v1, 16, 1
539+
; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, 0x400000, v1
540+
; GFX11-FAKE16-NEXT: v_bfe_u32 v3, v2, 16, 1
541+
; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, 0x400000, v2
542+
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
543+
; GFX11-FAKE16-NEXT: v_add3_u32 v4, v4, v1, 0x7fff
544+
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
545+
; GFX11-FAKE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
546+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
547+
; GFX11-FAKE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
548+
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v3, v7, vcc_lo
549+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
550+
; GFX11-FAKE16-NEXT: v_bfe_u32 v5, v0, 16, 1
551+
; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, 0x400000, v0
552+
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
553+
; GFX11-FAKE16-NEXT: v_add3_u32 v5, v5, v0, 0x7fff
554+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
555+
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v5, v8, vcc_lo
556+
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
557+
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v0, v2, 0x7060302
558+
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc_lo
559+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
560+
; GFX11-FAKE16-NEXT: v_alignbit_b32 v1, 0x7fc0, v1, 16
561+
; GFX11-FAKE16-NEXT: .LBB2_2: ; %end
562+
; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
563+
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
470564
%cmp = icmp eq i32 %b, 0
471565
br i1 %cmp, label %cmp.true, label %cmp.false
472566

0 commit comments

Comments
 (0)