Skip to content

Commit d3e1954

Browse files
committed
[AMDGPU][MC] Add dpp for V_PK_FMAC_F16 for GFX10
Adds dpp for v_pk_fmac_f16 for gfx10 and removes them for gfx11 and gfx12
1 parent f2d0bba commit d3e1954

File tree

5 files changed

+41
-22
lines changed

5 files changed

+41
-22
lines changed

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,6 +1615,9 @@ multiclass VOP2_Real_FULL_with_name_gfx11_gfx12<bits<6> op, string opName,
16151615
multiclass VOP2_Real_e32_gfx11_gfx12<bits<6> op> :
16161616
VOP2Only_Real<GFX11Gen, op>, VOP2Only_Real<GFX12Gen, op>;
16171617

1618+
multiclass VOP2_V_PK_FMAC_F16_gfx11_gfx12<bits<6> op> :
1619+
VOP2Only_Real_e32<GFX11Gen, op>, VOP2Only_Real_e32<GFX12Gen, op>;
1620+
16181621
multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
16191622
VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
16201623

@@ -1661,7 +1664,8 @@ defm V_SUBREV_CO_CI_U32 :
16611664

16621665
defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x02f,
16631666
"V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">;
1664-
defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx11_gfx12<0x03c>;
1667+
1668+
defm V_PK_FMAC_F16 : VOP2_V_PK_FMAC_F16_gfx11_gfx12<0x03c>;
16651669

16661670
defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x032, "v_add_f16">;
16671671
defm V_ADD_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x032, "v_add_f16">;
@@ -1945,6 +1949,11 @@ multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> :
19451949
VOP2be_Real_dpp_gfx10<op, opName, asmName>,
19461950
VOP2be_Real_dpp8_gfx10<op, opName, asmName>;
19471951

1952+
multiclass VOP2_FMAC_Real<bits<6> op> :
1953+
VOP2_Real_e32_gfx10<op>,
1954+
VOP2_Real_dpp_gfx10<op>,
1955+
VOP2_Real_dpp8_gfx10<op>;
1956+
19481957
multiclass VOP2_Real_gfx10<bits<6> op> :
19491958
VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>,
19501959
VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>;
@@ -1988,9 +1997,7 @@ defm V_MAX_F16 : VOP2_Real_gfx10<0x039>;
19881997
defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>;
19891998
defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>;
19901999

1991-
let IsSingle = 1 in {
1992-
defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
1993-
}
2000+
defm V_PK_FMAC_F16 : VOP2_FMAC_Real<0x03c>;
19942001

19952002
// VOP2 no carry-in, carry-out.
19962003
defm V_ADD_NC_U32 :

llvm/test/MC/AMDGPU/gfx10_asm_vop2.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13185,3 +13185,9 @@ v_pk_fmac_f16 v5, -4.0, v2
1318513185

1318613186
v_pk_fmac_f16 v5, v1, v255
1318713187
// GFX10: encoding: [0x01,0xff,0x0b,0x78]
13188+
13189+
v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3]
13190+
// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff]
13191+
13192+
v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3
13193+
// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x03]

llvm/test/MC/AMDGPU/literalv216.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,4 +291,4 @@ v_pk_add_u16 v5, v1, 123456.0
291291
// FIXME: v_pk_fmac_f16 cannot be promoted to VOP3 so '_e32' suffix is not valid
292292
v_pk_fmac_f16 v5, 0x12345678, v2
293293
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
294-
// GFX10: v_pk_fmac_f16 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12]
294+
// GFX10: v_pk_fmac_f16_e32 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12]

llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2.txt

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1779,54 +1779,60 @@
17791779
# GFX10: v_or_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x38]
17801780
0x6a,0x04,0x0a,0x38
17811781

1782-
# GFX10: v_pk_fmac_f16 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x79]
1782+
# GFX10: v_pk_fmac_f16_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x79]
17831783
0x01,0x05,0xfe,0x79
17841784

1785-
# GFX10: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
1785+
# GFX10: v_pk_fmac_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78]
17861786
0xc1,0x04,0x0a,0x78
17871787

1788-
# GFX10: v_pk_fmac_f16 v5, -4.0, v2 ; encoding: [0xf7,0x04,0x0a,0x78]
1788+
# GFX10: v_pk_fmac_f16_e32 v5, -4.0, v2 ; encoding: [0xf7,0x04,0x0a,0x78]
17891789
0xf7,0x04,0x0a,0x78
17901790

1791-
# GFX10: v_pk_fmac_f16 v5, 0, v2 ; encoding: [0x80,0x04,0x0a,0x78]
1791+
# GFX10: v_pk_fmac_f16_e32 v5, 0, v2 ; encoding: [0x80,0x04,0x0a,0x78]
17921792
0x80,0x04,0x0a,0x78
17931793

1794-
# GFX10: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
1794+
# GFX10: v_pk_fmac_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78]
17951795
0xf0,0x04,0x0a,0x78
17961796

1797-
# GFX10: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78]
1797+
# GFX10: v_pk_fmac_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78]
17981798
0x7f,0x04,0x0a,0x78
17991799

1800-
# GFX10: v_pk_fmac_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78]
1800+
# GFX10: v_pk_fmac_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78]
18011801
0x7e,0x04,0x0a,0x78
18021802

1803-
# GFX10: v_pk_fmac_f16 v5, m0, v2 ; encoding: [0x7c,0x04,0x0a,0x78]
1803+
# GFX10: v_pk_fmac_f16_e32 v5, m0, v2 ; encoding: [0x7c,0x04,0x0a,0x78]
18041804
0x7c,0x04,0x0a,0x78
18051805

1806-
# GFX10: v_pk_fmac_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78]
1806+
# GFX10: v_pk_fmac_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78]
18071807
0x01,0x04,0x0a,0x78
18081808

1809-
# GFX10: v_pk_fmac_f16 v5, s103, v2 ; encoding: [0x67,0x04,0x0a,0x78]
1809+
# GFX10: v_pk_fmac_f16_e32 v5, s103, v2 ; encoding: [0x67,0x04,0x0a,0x78]
18101810
0x67,0x04,0x0a,0x78
18111811

1812-
# GFX10: v_pk_fmac_f16 v5, ttmp11, v2 ; encoding: [0x77,0x04,0x0a,0x78]
1812+
# GFX10: v_pk_fmac_f16_e32 v5, ttmp11, v2 ; encoding: [0x77,0x04,0x0a,0x78]
18131813
0x77,0x04,0x0a,0x78
18141814

1815-
# GFX10: v_pk_fmac_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78]
1815+
# GFX10: v_pk_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78]
18161816
0x01,0x05,0x0a,0x78
18171817

1818-
# GFX10: v_pk_fmac_f16 v5, v1, v255 ; encoding: [0x01,0xff,0x0b,0x78]
1818+
# GFX10: v_pk_fmac_f16_e32 v5, v1, v255 ; encoding: [0x01,0xff,0x0b,0x78]
18191819
0x01,0xff,0x0b,0x78
18201820

1821-
# GFX10: v_pk_fmac_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78]
1821+
# GFX10: v_pk_fmac_f16_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78]
18221822
0xff,0x05,0x0a,0x78
18231823

1824-
# GFX10: v_pk_fmac_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78]
1824+
# GFX10: v_pk_fmac_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78]
18251825
0x6b,0x04,0x0a,0x78
18261826

1827-
# GFX10: v_pk_fmac_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78]
1827+
# GFX10: v_pk_fmac_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78]
18281828
0x6a,0x04,0x0a,0x78
18291829

1830+
#GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff]
1831+
0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff
1832+
1833+
#GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x03]
1834+
0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x03
1835+
18301836
# W32: v_sub_co_ci_u32_e32 v255, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0xfe,0x53]
18311837
# W64: v_sub_co_ci_u32_e32 v255, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0xfe,0x53]
18321838
0x01,0x05,0xfe,0x53

llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,5 +144,5 @@
144144
# Packed VOP2
145145
#===----------------------------------------------------------------------===//
146146

147-
# GFX10: v_pk_fmac_f16 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12]
147+
# GFX10: v_pk_fmac_f16_e32 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12]
148148
0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12

0 commit comments

Comments
 (0)