@@ -1669,40 +1669,38 @@ define amdgpu_kernel void @notdot4_mixedtypes(ptr addrspace(1) %src1,
1669
1669
; GFX11-DL-TRUE16-LABEL: notdot4_mixedtypes:
1670
1670
; GFX11-DL-TRUE16: ; %bb.0: ; %entry
1671
1671
; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1672
- ; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1672
+ ; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1673
1673
; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
1674
- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1674
+ ; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v6, 0
1675
+ ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
1675
1676
; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1676
1677
; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1677
1678
; GFX11-DL-TRUE16-NEXT: s_clause 0x1
1678
- ; GFX11-DL-TRUE16-NEXT: global_load_b32 v3 , v0, s[0:1]
1679
- ; GFX11-DL-TRUE16-NEXT: global_load_b32 v4 , v0, s[2:3]
1680
- ; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v5 , s[4:5]
1679
+ ; GFX11-DL-TRUE16-NEXT: global_load_b32 v4 , v0, s[0:1]
1680
+ ; GFX11-DL-TRUE16-NEXT: global_load_b32 v5 , v0, s[2:3]
1681
+ ; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v6 , s[4:5]
1681
1682
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2)
1682
- ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v3
1683
+ ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v4
1683
1684
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1)
1684
- ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v4
1685
- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v3.l
1686
- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.l
1685
+ ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v5
1686
+ ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
1687
+ ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8
1687
1688
; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
1688
1689
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1689
1690
; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l
1690
- ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
1691
+ ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
1691
1692
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1692
- ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6 , v7, 0, 8
1693
+ ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v3.l , v7.l
1693
1694
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0)
1694
1695
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l
1695
- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1696
- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
1697
- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v6.l
1698
- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
1699
- ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v2.l, v0.l
1700
- ; GFX11-DL-TRUE16-NEXT: v_perm_b32 v1, v4, v4, 0xc0c0302
1701
- ; GFX11-DL-TRUE16-NEXT: v_perm_b32 v2, v3, v3, 0xc0c0302
1696
+ ; GFX11-DL-TRUE16-NEXT: v_perm_b32 v1, v5, v5, 0xc0c0302
1697
+ ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1698
+ ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v3.l, v0.l
1699
+ ; GFX11-DL-TRUE16-NEXT: v_perm_b32 v2, v4, v4, 0xc0c0302
1702
1700
; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
1703
1701
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1704
1702
; GFX11-DL-TRUE16-NEXT: v_dot4_u32_u8 v0, v2, v1, v0
1705
- ; GFX11-DL-TRUE16-NEXT: global_store_b16 v5 , v0, s[4:5]
1703
+ ; GFX11-DL-TRUE16-NEXT: global_store_b16 v6 , v0, s[4:5]
1706
1704
; GFX11-DL-TRUE16-NEXT: s_endpgm
1707
1705
;
1708
1706
; GFX11-DL-FAKE16-LABEL: notdot4_mixedtypes:
@@ -1964,44 +1962,41 @@ define amdgpu_kernel void @notdot4_mixedtypes2(ptr addrspace(1) %src1,
1964
1962
; GFX11-DL-TRUE16-LABEL: notdot4_mixedtypes2:
1965
1963
; GFX11-DL-TRUE16: ; %bb.0: ; %entry
1966
1964
; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1967
- ; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1965
+ ; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1968
1966
; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
1969
- ; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v4, 0
1970
- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
1967
+ ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1971
1968
; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1972
1969
; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1973
1970
; GFX11-DL-TRUE16-NEXT: s_clause 0x1
1974
- ; GFX11-DL-TRUE16-NEXT: global_load_b32 v2 , v0, s[2:3]
1975
- ; GFX11-DL-TRUE16-NEXT: global_load_b32 v3 , v0, s[0:1]
1976
- ; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v4 , s[4:5]
1971
+ ; GFX11-DL-TRUE16-NEXT: global_load_b32 v3 , v0, s[2:3]
1972
+ ; GFX11-DL-TRUE16-NEXT: global_load_b32 v4 , v0, s[0:1]
1973
+ ; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v5 , s[4:5]
1977
1974
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2)
1978
- ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
1975
+ ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v3
1979
1976
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1)
1980
- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v3.l
1981
- ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v3
1982
- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.h
1983
- ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v3
1977
+ ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v4
1978
+ ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v4, 0, 8
1979
+ ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.h
1984
1980
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
1985
- ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
1986
- ; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
1987
- ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v2
1988
- ; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
1981
+ ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1982
+ ; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l
1983
+ ; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v3.l
1989
1984
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
1990
- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.l
1985
+ ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v6.l
1986
+ ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v3
1991
1987
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0)
1992
1988
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l
1993
- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
1994
- ; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.h
1989
+ ; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v3.h
1995
1990
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
1996
- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1991
+ ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v4
1992
+ ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1997
1993
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v1.h, v0.l
1998
- ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8
1999
- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1994
+ ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
2000
1995
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.h, v0.l
1996
+ ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2001
1997
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
2002
- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
2003
1998
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v3.l, v1.l, v0.l
2004
- ; GFX11-DL-TRUE16-NEXT: global_store_b16 v4 , v0, s[4:5]
1999
+ ; GFX11-DL-TRUE16-NEXT: global_store_b16 v5 , v0, s[4:5]
2005
2000
; GFX11-DL-TRUE16-NEXT: s_endpgm
2006
2001
;
2007
2002
; GFX11-DL-FAKE16-LABEL: notdot4_mixedtypes2:
0 commit comments