Skip to content

Commit e1b5438

Browse files
committed
update test
1 parent 89dfa75 commit e1b5438

File tree

6 files changed

+2641
-613
lines changed

6 files changed

+2641
-613
lines changed

llvm/test/CodeGen/AMDGPU/idot4s.ll

Lines changed: 31 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,35 +1165,32 @@ define amdgpu_kernel void @idot4_acc16_vecMul(ptr addrspace(1) %src1,
11651165
; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
11661166
; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
11671167
; GFX11-DL-TRUE16-NEXT: s_clause 0x1
1168-
; GFX11-DL-TRUE16-NEXT: global_load_b32 v1, v0, s[2:3]
1169-
; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[0:1]
1168+
; GFX11-DL-TRUE16-NEXT: global_load_b32 v1, v0, s[0:1]
1169+
; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[2:3]
11701170
; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v3, s[4:5]
11711171
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2)
1172-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
1172+
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8
11731173
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1)
1174-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
1175-
; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v6.h, 8, v2.l
1176-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.h
1177-
; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v8.h, 8, v1.l
1178-
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8
1179-
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
1180-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.h
1181-
; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v2.h, 8, v2.h
1174+
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v2, 0, 8
1175+
; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v4.h, 8, v1.l
1176+
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
1177+
; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v7.h, 8, v2.l
1178+
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v2.h
1179+
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l
1180+
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
1181+
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8
11821182
; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v1.h, 8, v1.h
1183-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.l
1184-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v5.l
1185-
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
1186-
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v7, 0, 8
1187-
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1188-
; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v6, v6, v8
1189-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
1190-
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1183+
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v2, 0, 8
1184+
; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v2.h, 8, v2.h
1185+
; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v4, v4, v7
1186+
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v6.l
1187+
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
11911188
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.l
11921189
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0)
1193-
; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v6.l, v0.l
1190+
; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v4.l, v0.l
11941191
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1195-
; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v2, v1
1196-
; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v6.h
1192+
; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v1, v2
1193+
; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v4.h
11971194
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
11981195
; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l
11991196
; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v1.h
@@ -3435,35 +3432,31 @@ define amdgpu_kernel void @idot4_nonstandard_signed(ptr addrspace(1) %src1,
34353432
; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[0:1]
34363433
; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[2:3]
34373434
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1)
3438-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
3439-
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
3435+
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v2
3436+
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 8
34403437
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0)
3441-
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v3
3442-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.h
3443-
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 24, v2
3444-
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v0, 0, 8
3445-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l
34463438
; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v3.l
3447-
; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
3439+
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v3
3440+
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v2.h
3441+
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8
34483442
; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v3.h
3443+
; GFX11-DL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v1.l, v0.l
3444+
; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v5.l
3445+
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8
34493446
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
3450-
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
3451-
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v7, 0, 8
3447+
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 24, v2
34523448
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v3
34533449
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3454-
; GFX11-DL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v1.l, v0.l
3455-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
3456-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
34573450
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.l
3458-
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
34593451
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l
3452+
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
34603453
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8
3461-
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
34623454
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.h, v2.l, v0.l
3455+
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
34633456
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
3464-
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
34653457
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v3.l, v0.l
34663458
; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
3459+
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
34673460
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
34683461
; GFX11-DL-TRUE16-NEXT: global_store_b32 v1, v0, s[4:5]
34693462
; GFX11-DL-TRUE16-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/idot4u.ll

Lines changed: 37 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,40 +1669,38 @@ define amdgpu_kernel void @notdot4_mixedtypes(ptr addrspace(1) %src1,
16691669
; GFX11-DL-TRUE16-LABEL: notdot4_mixedtypes:
16701670
; GFX11-DL-TRUE16: ; %bb.0: ; %entry
16711671
; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1672-
; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1672+
; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
16731673
; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
1674-
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1674+
; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v6, 0
1675+
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
16751676
; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
16761677
; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
16771678
; GFX11-DL-TRUE16-NEXT: s_clause 0x1
1678-
; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[0:1]
1679-
; GFX11-DL-TRUE16-NEXT: global_load_b32 v4, v0, s[2:3]
1680-
; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v5, s[4:5]
1679+
; GFX11-DL-TRUE16-NEXT: global_load_b32 v4, v0, s[0:1]
1680+
; GFX11-DL-TRUE16-NEXT: global_load_b32 v5, v0, s[2:3]
1681+
; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v6, s[4:5]
16811682
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2)
1682-
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v3
1683+
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v4
16831684
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1)
1684-
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v4
1685-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v3.l
1686-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.l
1685+
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v5
1686+
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
1687+
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8
16871688
; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
16881689
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
16891690
; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l
1690-
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
1691+
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
16911692
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1692-
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v7, 0, 8
1693+
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
16931694
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0)
16941695
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l
1695-
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1696-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
1697-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v6.l
1698-
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
1699-
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v2.l, v0.l
1700-
; GFX11-DL-TRUE16-NEXT: v_perm_b32 v1, v4, v4, 0xc0c0302
1701-
; GFX11-DL-TRUE16-NEXT: v_perm_b32 v2, v3, v3, 0xc0c0302
1696+
; GFX11-DL-TRUE16-NEXT: v_perm_b32 v1, v5, v5, 0xc0c0302
1697+
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1698+
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v3.l, v0.l
1699+
; GFX11-DL-TRUE16-NEXT: v_perm_b32 v2, v4, v4, 0xc0c0302
17021700
; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
17031701
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
17041702
; GFX11-DL-TRUE16-NEXT: v_dot4_u32_u8 v0, v2, v1, v0
1705-
; GFX11-DL-TRUE16-NEXT: global_store_b16 v5, v0, s[4:5]
1703+
; GFX11-DL-TRUE16-NEXT: global_store_b16 v6, v0, s[4:5]
17061704
; GFX11-DL-TRUE16-NEXT: s_endpgm
17071705
;
17081706
; GFX11-DL-FAKE16-LABEL: notdot4_mixedtypes:
@@ -1964,44 +1962,41 @@ define amdgpu_kernel void @notdot4_mixedtypes2(ptr addrspace(1) %src1,
19641962
; GFX11-DL-TRUE16-LABEL: notdot4_mixedtypes2:
19651963
; GFX11-DL-TRUE16: ; %bb.0: ; %entry
19661964
; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1967-
; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1965+
; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v0, 0x3ff, v0
19681966
; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
1969-
; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v4, 0
1970-
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
1967+
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
19711968
; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
19721969
; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
19731970
; GFX11-DL-TRUE16-NEXT: s_clause 0x1
1974-
; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[2:3]
1975-
; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[0:1]
1976-
; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v4, s[4:5]
1971+
; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[2:3]
1972+
; GFX11-DL-TRUE16-NEXT: global_load_b32 v4, v0, s[0:1]
1973+
; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v5, s[4:5]
19771974
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2)
1978-
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
1975+
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v3
19791976
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1)
1980-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v3.l
1981-
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v3
1982-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.h
1983-
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v3
1977+
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v4
1978+
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v4, 0, 8
1979+
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.h
19841980
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
1985-
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
1986-
; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
1987-
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v2
1988-
; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
1981+
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1982+
; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l
1983+
; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v3.l
19891984
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
1990-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.l
1985+
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v6.l
1986+
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v3
19911987
; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0)
19921988
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l
1993-
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
1994-
; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.h
1989+
; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v3.h
19951990
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
1996-
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1991+
; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v4
1992+
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
19971993
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v1.h, v0.l
1998-
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8
1999-
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1994+
; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
20001995
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.h, v0.l
1996+
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
20011997
; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
2002-
; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
20031998
; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v3.l, v1.l, v0.l
2004-
; GFX11-DL-TRUE16-NEXT: global_store_b16 v4, v0, s[4:5]
1999+
; GFX11-DL-TRUE16-NEXT: global_store_b16 v5, v0, s[4:5]
20052000
; GFX11-DL-TRUE16-NEXT: s_endpgm
20062001
;
20072002
; GFX11-DL-FAKE16-LABEL: notdot4_mixedtypes2:

llvm/test/CodeGen/AMDGPU/llvm.frexp.ll

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -424,15 +424,15 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
424424
; GFX11-SDAG-TRUE16-LABEL: test_frexp_v2f16_v2i32:
425425
; GFX11-SDAG-TRUE16: ; %bb.0:
426426
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
427-
; GFX11-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v1.l, v0.l
428-
; GFX11-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.h
429-
; GFX11-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.h, v0.h
430-
; GFX11-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.l, v0.l
431-
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
432-
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 16
433-
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 16
427+
; GFX11-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.l, v0.h
428+
; GFX11-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.h, v0.l
429+
; GFX11-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.l
430+
; GFX11-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v3.l, v0.h
431+
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
432+
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1.h, v1.l
433+
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 16
434434
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
435-
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
435+
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 16
436436
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
437437
;
438438
; GFX11-SDAG-FAKE16-LABEL: test_frexp_v2f16_v2i32:
@@ -457,15 +457,15 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
457457
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
458458
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
459459
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
460-
; GFX12-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v1.l, v0.l
461-
; GFX12-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.h
462-
; GFX12-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.h, v0.h
463-
; GFX12-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.l, v0.l
464-
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
465-
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 16
466-
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 16
460+
; GFX12-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.l, v0.h
461+
; GFX12-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.h, v0.l
462+
; GFX12-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.l
463+
; GFX12-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v3.l, v0.h
464+
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
465+
; GFX12-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1.h, v1.l
466+
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 16
467467
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
468-
; GFX12-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
468+
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 16
469469
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
470470
;
471471
; GFX12-SDAG-FAKE16-LABEL: test_frexp_v2f16_v2i32:
@@ -534,15 +534,15 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
534534
; GFX11-GISEL-TRUE16-LABEL: test_frexp_v2f16_v2i32:
535535
; GFX11-GISEL-TRUE16: ; %bb.0:
536536
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537-
; GFX11-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v1.l, v0.l
538-
; GFX11-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.h
539-
; GFX11-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.l, v0.l
540-
; GFX11-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.h, v0.h
541-
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
542-
; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 16
543-
; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 16
537+
; GFX11-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.l, v0.l
538+
; GFX11-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.h, v0.h
539+
; GFX11-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.l
540+
; GFX11-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v3.l, v0.h
541+
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
542+
; GFX11-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v1.l, v1.h
543+
; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 16
544544
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
545-
; GFX11-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
545+
; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 16
546546
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
547547
;
548548
; GFX11-GISEL-FAKE16-LABEL: test_frexp_v2f16_v2i32:
@@ -567,15 +567,15 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
567567
; GFX12-GISEL-TRUE16-NEXT: s_wait_samplecnt 0x0
568568
; GFX12-GISEL-TRUE16-NEXT: s_wait_bvhcnt 0x0
569569
; GFX12-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
570-
; GFX12-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v1.l, v0.l
571-
; GFX12-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.h
572-
; GFX12-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.l, v0.l
573-
; GFX12-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.h, v0.h
574-
; GFX12-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
575-
; GFX12-GISEL-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 16
576-
; GFX12-GISEL-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 16
570+
; GFX12-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.l, v0.l
571+
; GFX12-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.h, v0.h
572+
; GFX12-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.l
573+
; GFX12-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v3.l, v0.h
574+
; GFX12-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
575+
; GFX12-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v1.l, v1.h
576+
; GFX12-GISEL-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 16
577577
; GFX12-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
578-
; GFX12-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
578+
; GFX12-GISEL-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 16
579579
; GFX12-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
580580
;
581581
; GFX12-GISEL-FAKE16-LABEL: test_frexp_v2f16_v2i32:

0 commit comments

Comments
 (0)