@@ -38819,16 +38819,14 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
38819
38819
; GFX11TRUE16-LABEL: s_select_v2bf16:
38820
38820
; GFX11TRUE16: ; %bb.0:
38821
38821
; GFX11TRUE16-NEXT: s_lshr_b32 s2, s0, 16
38822
- ; GFX11TRUE16-NEXT: s_lshr_b32 s3, s1, 16
38823
38822
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
38824
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s3
38825
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s2
38826
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s1
38827
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s0
38828
- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
38829
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v0.h, vcc_lo
38830
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v1.h, vcc_lo
38831
- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
38823
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s2
38824
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
38825
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s0, s1, 16
38826
+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
38827
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, s0, v1.l, vcc_lo
38828
+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
38829
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, s1, v0.l, vcc_lo
38832
38830
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
38833
38831
; GFX11TRUE16-NEXT: ; return to shader part epilog
38834
38832
;
@@ -38936,19 +38934,17 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
38936
38934
;
38937
38935
; GFX11TRUE16-LABEL: s_vselect_v2bf16:
38938
38936
; GFX11TRUE16: ; %bb.0:
38939
- ; GFX11TRUE16-NEXT: s_lshr_b32 s3, s1, 16
38940
- ; GFX11TRUE16-NEXT: s_lshr_b32 s4, s0, 16
38937
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s3, s0, 16
38941
38938
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
38942
38939
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 0, v1
38943
38940
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s3
38944
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s4
38945
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s1
38946
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s0
38947
- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
38948
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v0.h, s2
38949
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v1.h, vcc_lo
38950
- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
38951
- ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
38941
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
38942
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s0, s1, 16
38943
+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
38944
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, s0, v0.l, s2
38945
+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
38946
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, s1, v0.h, vcc_lo
38947
+ ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v1
38952
38948
; GFX11TRUE16-NEXT: ; return to shader part epilog
38953
38949
;
38954
38950
; GFX11FAKE16-LABEL: s_vselect_v2bf16:
@@ -40655,30 +40651,25 @@ define amdgpu_ps <2 x i32> @s_vselect_v4bf16(<4 x bfloat> inreg %a, <4 x bfloat>
40655
40651
;
40656
40652
; GFX11TRUE16-LABEL: s_vselect_v4bf16:
40657
40653
; GFX11TRUE16: ; %bb.0:
40658
- ; GFX11TRUE16-NEXT: s_lshr_b32 s7, s3, 16
40654
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s7, s1, 16
40655
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s9, s0, 16
40659
40656
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
40660
40657
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 0, v1
40661
- ; GFX11TRUE16-NEXT: s_lshr_b32 s8, s1, 16
40662
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s7
40663
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s3
40664
- ; GFX11TRUE16-NEXT: s_lshr_b32 s3, s2, 16
40665
- ; GFX11TRUE16-NEXT: s_lshr_b32 s7, s0, 16
40666
40658
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 0, v2
40667
40659
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 0, v3
40668
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s8
40669
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s3
40670
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, s7
40671
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.h, s2
40672
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, s0
40673
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.h, s1
40674
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v0.h, s6
40675
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v4.h, v1.h, v2.l, s4
40676
- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
40677
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v4.l, v2.h, v3.l, vcc_lo
40678
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.h, s5
40660
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s7
40661
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s9
40662
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
40663
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s1
40664
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s8, s3, 16
40665
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s0, s2, 16
40666
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, s8, v0.l, s6
40667
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, s0, v0.h, s4
40668
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, s2, v1.l, vcc_lo
40669
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, s3, v1.h, s5
40679
40670
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
40680
- ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v4
40681
- ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s1, v0
40671
+ ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
40672
+ ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s1, v2
40682
40673
; GFX11TRUE16-NEXT: ; return to shader part epilog
40683
40674
;
40684
40675
; GFX11FAKE16-LABEL: s_vselect_v4bf16:
0 commit comments