@@ -38481,7 +38481,10 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
38481
38481
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
38482
38482
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
38483
38483
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
38484
- ; GFX8-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38484
+ ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v1
38485
+ ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v2
38486
+ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
38487
+ ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
38485
38488
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
38486
38489
; GFX8-NEXT: s_setpc_b64 s[30:31]
38487
38490
;
@@ -38491,7 +38494,9 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
38491
38494
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
38492
38495
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
38493
38496
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
38494
- ; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38497
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
38498
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
38499
+ ; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
38495
38500
; GFX9-NEXT: s_mov_b32 s4, 0x5040100
38496
38501
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
38497
38502
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -38500,9 +38505,11 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
38500
38505
; GFX10: ; %bb.0:
38501
38506
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38502
38507
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
38508
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v1
38509
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v2
38503
38510
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
38504
38511
; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
38505
- ; GFX10-NEXT: v_cndmask_b32_sdwa v1, v2, v1 , vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38512
+ ; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v3 , vcc_lo
38506
38513
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
38507
38514
; GFX10-NEXT: s_setpc_b64 s[30:31]
38508
38515
;
@@ -38570,37 +38577,44 @@ define <2 x bfloat> @v_vselect_v2bf16(<2 x i1> %cond, <2 x bfloat> %a, <2 x bflo
38570
38577
; GFX8-LABEL: v_vselect_v2bf16:
38571
38578
; GFX8: ; %bb.0:
38572
38579
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38573
- ; GFX8-NEXT: v_and_b32_e32 v1, 1, v1
38574
38580
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
38581
+ ; GFX8-NEXT: v_and_b32_e32 v1, 1, v1
38582
+ ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
38583
+ ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
38584
+ ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v2
38585
+ ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v3
38575
38586
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
38576
- ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
38577
- ; GFX8-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
38578
- ; GFX8-NEXT: v_cndmask_b32_sdwa v1, v3, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38587
+ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
38588
+ ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
38579
38589
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
38580
38590
; GFX8-NEXT: s_setpc_b64 s[30:31]
38581
38591
;
38582
38592
; GFX9-LABEL: v_vselect_v2bf16:
38583
38593
; GFX9: ; %bb.0:
38584
38594
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38585
- ; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
38586
38595
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
38596
+ ; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
38597
+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
38598
+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
38599
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
38600
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
38587
38601
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
38588
- ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
38589
- ; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
38590
- ; GFX9-NEXT: v_cndmask_b32_sdwa v1, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38602
+ ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
38591
38603
; GFX9-NEXT: s_mov_b32 s4, 0x5040100
38592
38604
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
38593
38605
; GFX9-NEXT: s_setpc_b64 s[30:31]
38594
38606
;
38595
38607
; GFX10-LABEL: v_vselect_v2bf16:
38596
38608
; GFX10: ; %bb.0:
38597
38609
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38598
- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
38599
38610
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
38611
+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
38612
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v2
38613
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v3
38614
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
38615
+ ; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
38600
38616
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
38601
- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v0
38602
- ; GFX10-NEXT: v_cndmask_b32_sdwa v1, v3, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38603
- ; GFX10-NEXT: v_cndmask_b32_e64 v0, v3, v2, s4
38617
+ ; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc_lo
38604
38618
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
38605
38619
; GFX10-NEXT: s_setpc_b64 s[30:31]
38606
38620
;
@@ -38757,12 +38771,13 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
38757
38771
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
38758
38772
; GFX8-NEXT: v_mov_b32_e32 v1, s3
38759
38773
; GFX8-NEXT: v_mov_b32_e32 v2, s2
38760
- ; GFX8-NEXT: v_mov_b32_e32 v3, s1
38761
- ; GFX8-NEXT: v_mov_b32_e32 v4, s0
38762
38774
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
38763
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
38764
- ; GFX8-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
38765
- ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
38775
+ ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
38776
+ ; GFX8-NEXT: v_mov_b32_e32 v1, s1
38777
+ ; GFX8-NEXT: v_mov_b32_e32 v2, s0
38778
+ ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
38779
+ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
38780
+ ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
38766
38781
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
38767
38782
; GFX8-NEXT: ; return to shader part epilog
38768
38783
;
@@ -38867,13 +38882,14 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
38867
38882
; GFX8: ; %bb.0:
38868
38883
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
38869
38884
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
38885
+ ; GFX8-NEXT: v_mov_b32_e32 v2, s3
38886
+ ; GFX8-NEXT: v_mov_b32_e32 v3, s2
38870
38887
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
38871
- ; GFX8-NEXT: v_mov_b32_e32 v1, s3
38872
- ; GFX8-NEXT: v_mov_b32_e32 v2, s2
38873
- ; GFX8-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
38888
+ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
38874
38889
; GFX8-NEXT: v_mov_b32_e32 v2, s1
38875
38890
; GFX8-NEXT: v_mov_b32_e32 v3, s0
38876
38891
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
38892
+ ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
38877
38893
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
38878
38894
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
38879
38895
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
@@ -40776,42 +40792,48 @@ define <4 x bfloat> @v_vselect_v4bf16(<4 x i1> %cond, <4 x bfloat> %a, <4 x bflo
40776
40792
; GFX9-LABEL: v_vselect_v4bf16:
40777
40793
; GFX9: ; %bb.0:
40778
40794
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40779
- ; GFX9-NEXT: v_and_b32_e32 v1 , 1, v1
40780
- ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5] , 1, v1
40781
- ; GFX9-NEXT: v_and_b32_e32 v1 , 1, v3
40795
+ ; GFX9-NEXT: v_and_b32_e32 v2 , 1, v2
40796
+ ; GFX9-NEXT: v_and_b32_e32 v3 , 1, v3
40797
+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc , 1, v2
40782
40798
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
40783
- ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
40784
- ; GFX9-NEXT: v_and_b32_e32 v1, 1, v2
40785
- ; GFX9-NEXT: v_cndmask_b32_sdwa v2, v7, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40799
+ ; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v5, vcc
40800
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v5
40801
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v7
40802
+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
40803
+ ; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
40804
+ ; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc
40786
40805
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
40787
- ; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v1
40788
40806
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
40789
- ; GFX9-NEXT: s_mov_b64 vcc, s[4:5]
40790
- ; GFX9-NEXT: v_cndmask_b32_e64 v1, v7, v5, s[6:7]
40791
- ; GFX9-NEXT: v_cndmask_b32_sdwa v3, v6, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40807
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
40808
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v6
40809
+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
40810
+ ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
40792
40811
; GFX9-NEXT: s_mov_b32 s4, 0x5040100
40793
- ; GFX9-NEXT: v_perm_b32 v0, v3 , v0, s4
40794
- ; GFX9-NEXT: v_perm_b32 v1, v2, v1 , s4
40812
+ ; GFX9-NEXT: v_perm_b32 v0, v1 , v0, s4
40813
+ ; GFX9-NEXT: v_perm_b32 v1, v3, v2 , s4
40795
40814
; GFX9-NEXT: s_setpc_b64 s[30:31]
40796
40815
;
40797
40816
; GFX10-LABEL: v_vselect_v4bf16:
40798
40817
; GFX10: ; %bb.0:
40799
40818
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40800
- ; GFX10-NEXT: v_and_b32_e32 v3, 1, v3
40801
- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
40819
+ ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
40802
40820
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
40803
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
40804
- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v1
40805
- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v2
40806
- ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 1, v0
40807
- ; GFX10-NEXT: v_cndmask_b32_sdwa v2, v7, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40808
- ; GFX10-NEXT: s_mov_b32 vcc_lo, s4
40809
- ; GFX10-NEXT: v_cndmask_b32_sdwa v3, v6, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40821
+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
40822
+ ; GFX10-NEXT: v_and_b32_e32 v3, 1, v3
40823
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v4
40824
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
40825
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v6
40826
+ ; GFX10-NEXT: v_cndmask_b32_e32 v2, v7, v5, vcc_lo
40827
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
40828
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v5
40829
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v7
40830
+ ; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc_lo
40810
40831
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
40811
- ; GFX10-NEXT: v_cndmask_b32_e64 v0, v6, v4, s5
40812
- ; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc_lo
40813
- ; GFX10-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
40814
- ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
40832
+ ; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc_lo
40833
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
40834
+ ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
40835
+ ; GFX10-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc_lo
40836
+ ; GFX10-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
40815
40837
; GFX10-NEXT: s_setpc_b64 s[30:31]
40816
40838
;
40817
40839
; GFX11TRUE16-LABEL: v_vselect_v4bf16:
@@ -41059,37 +41081,42 @@ define <8 x bfloat> @v_vselect_v8bf16(<8 x i1> %cond, <8 x bfloat> %a, <8 x bflo
41059
41081
; GFX10-LABEL: v_vselect_v8bf16:
41060
41082
; GFX10: ; %bb.0:
41061
41083
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41084
+ ; GFX10-NEXT: v_and_b32_e32 v6, 1, v6
41085
+ ; GFX10-NEXT: v_and_b32_e32 v4, 1, v4
41086
+ ; GFX10-NEXT: v_and_b32_e32 v5, 1, v5
41087
+ ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
41088
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 16, v10
41089
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
41090
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v14
41091
+ ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
41062
41092
; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
41063
- ; GFX10-NEXT: v_and_b32_e32 v7, 1, v7
41064
41093
; GFX10-NEXT: v_and_b32_e32 v3, 1, v3
41065
- ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
41066
- ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
41067
- ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v1
41068
- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v5
41069
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
41070
- ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 1, v3
41071
- ; GFX10-NEXT: v_and_b32_e32 v3, 1, v6
41072
- ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 1, v1
41073
- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v4
41074
- ; GFX10-NEXT: v_cndmask_b32_sdwa v4, v15, v11, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41075
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
41076
- ; GFX10-NEXT: v_cndmask_b32_e32 v5, v14, v10, vcc_lo
41077
- ; GFX10-NEXT: s_mov_b32 vcc_lo, s6
41078
- ; GFX10-NEXT: v_cndmask_b32_sdwa v6, v14, v10, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41079
- ; GFX10-NEXT: s_mov_b32 vcc_lo, s5
41080
- ; GFX10-NEXT: v_cndmask_b32_sdwa v1, v13, v9, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41081
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
41082
- ; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v8, vcc_lo
41083
- ; GFX10-NEXT: s_mov_b32 vcc_lo, s4
41084
- ; GFX10-NEXT: v_cndmask_b32_sdwa v7, v12, v8, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41094
+ ; GFX10-NEXT: v_cndmask_b32_e32 v6, v15, v11, vcc_lo
41095
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
41096
+ ; GFX10-NEXT: v_and_b32_e32 v7, 1, v7
41097
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v11
41098
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 16, v15
41099
+ ; GFX10-NEXT: v_cndmask_b32_e32 v4, v14, v10, vcc_lo
41100
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
41101
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v8
41102
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v12
41103
+ ; GFX10-NEXT: v_cndmask_b32_e32 v5, v17, v16, vcc_lo
41085
41104
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
41086
- ; GFX10-NEXT: v_perm_b32 v0, v7, v0, 0x5040100
41087
41105
; GFX10-NEXT: v_cndmask_b32_e32 v2, v13, v9, vcc_lo
41106
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
41107
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v9
41108
+ ; GFX10-NEXT: v_lshrrev_b32_e32 v13, 16, v13
41109
+ ; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v8, vcc_lo
41110
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
41111
+ ; GFX10-NEXT: v_cndmask_b32_e32 v1, v14, v10, vcc_lo
41088
41112
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
41089
- ; GFX10-NEXT: v_perm_b32 v1, v1, v2, 0x5040100
41090
- ; GFX10-NEXT: v_cndmask_b32_e32 v3, v15, v11, vcc_lo
41091
- ; GFX10-NEXT: v_perm_b32 v2, v6, v5, 0x5040100
41092
- ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
41113
+ ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
41114
+ ; GFX10-NEXT: v_cndmask_b32_e32 v3, v13, v9, vcc_lo
41115
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
41116
+ ; GFX10-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
41117
+ ; GFX10-NEXT: v_cndmask_b32_e32 v7, v15, v11, vcc_lo
41118
+ ; GFX10-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
41119
+ ; GFX10-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
41093
41120
; GFX10-NEXT: s_setpc_b64 s[30:31]
41094
41121
;
41095
41122
; GFX11TRUE16-LABEL: v_vselect_v8bf16:
0 commit comments