@@ -38259,16 +38259,14 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
38259
38259
; GFX11TRUE16-LABEL: s_select_v2bf16:
38260
38260
; GFX11TRUE16: ; %bb.0:
38261
38261
; GFX11TRUE16-NEXT: s_lshr_b32 s2, s0, 16
38262
- ; GFX11TRUE16-NEXT: s_lshr_b32 s3, s1, 16
38263
38262
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
38264
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s3
38265
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s2
38266
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s1
38267
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s0
38268
- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
38269
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v0.h, vcc_lo
38270
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v1.h, vcc_lo
38271
- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
38263
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s2
38264
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
38265
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s0, s1, 16
38266
+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
38267
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, s0, v1.l, vcc_lo
38268
+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
38269
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, s1, v0.l, vcc_lo
38272
38270
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
38273
38271
; GFX11TRUE16-NEXT: ; return to shader part epilog
38274
38272
;
@@ -38376,19 +38374,17 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
38376
38374
;
38377
38375
; GFX11TRUE16-LABEL: s_vselect_v2bf16:
38378
38376
; GFX11TRUE16: ; %bb.0:
38379
- ; GFX11TRUE16-NEXT: s_lshr_b32 s3, s1, 16
38380
- ; GFX11TRUE16-NEXT: s_lshr_b32 s4, s0, 16
38377
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s3, s0, 16
38381
38378
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
38382
38379
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 0, v1
38383
38380
; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s3
38384
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s4
38385
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s1
38386
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s0
38387
- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
38388
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v0.h, s2
38389
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v1.h, vcc_lo
38390
- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
38391
- ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
38381
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
38382
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s0, s1, 16
38383
+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
38384
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, s0, v0.l, s2
38385
+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
38386
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, s1, v0.h, vcc_lo
38387
+ ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v1
38392
38388
; GFX11TRUE16-NEXT: ; return to shader part epilog
38393
38389
;
38394
38390
; GFX11FAKE16-LABEL: s_vselect_v2bf16:
@@ -40095,30 +40091,25 @@ define amdgpu_ps <2 x i32> @s_vselect_v4bf16(<4 x bfloat> inreg %a, <4 x bfloat>
40095
40091
;
40096
40092
; GFX11TRUE16-LABEL: s_vselect_v4bf16:
40097
40093
; GFX11TRUE16: ; %bb.0:
40098
- ; GFX11TRUE16-NEXT: s_lshr_b32 s7, s3, 16
40094
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s7, s1, 16
40095
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s9, s0, 16
40099
40096
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
40100
40097
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 0, v1
40101
- ; GFX11TRUE16-NEXT: s_lshr_b32 s8, s1, 16
40102
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s7
40103
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s3
40104
- ; GFX11TRUE16-NEXT: s_lshr_b32 s3, s2, 16
40105
- ; GFX11TRUE16-NEXT: s_lshr_b32 s7, s0, 16
40106
40098
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 0, v2
40107
40099
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 0, v3
40108
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s8
40109
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s3
40110
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, s7
40111
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.h, s2
40112
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, s0
40113
- ; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.h, s1
40114
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v0.h, s6
40115
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v4.h, v1.h, v2.l, s4
40116
- ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
40117
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v4.l, v2.h, v3.l, vcc_lo
40118
- ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.h, s5
40100
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s7
40101
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s9
40102
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
40103
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s1
40104
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s8, s3, 16
40105
+ ; GFX11TRUE16-NEXT: s_lshr_b32 s0, s2, 16
40106
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, s8, v0.l, s6
40107
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, s0, v0.h, s4
40108
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, s2, v1.l, vcc_lo
40109
+ ; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, s3, v1.h, s5
40119
40110
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
40120
- ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v4
40121
- ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s1, v0
40111
+ ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
40112
+ ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s1, v2
40122
40113
; GFX11TRUE16-NEXT: ; return to shader part epilog
40123
40114
;
40124
40115
; GFX11FAKE16-LABEL: s_vselect_v4bf16:
0 commit comments