Skip to content

Commit 46d33b6

Browse files
authored
[AMDGPU][MC] Allow dpp in v_pk_fmac_f16 for GFX9 and GFX10 (#144782)
Allows dpp in v_pk_fmac_f16 for GFX9, and both dpp and dpp8 for GFX10.
1 parent 1b71ea4 commit 46d33b6

File tree

7 files changed

+218
-4
lines changed

7 files changed

+218
-4
lines changed

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1945,6 +1945,14 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
19451945
if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
19461946
def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
19471947
}
1948+
multiclass VOP2Only_Real_e32_gfx10<bits<6> op> {
1949+
let IsSingle = 1 in
1950+
defm NAME: VOP2_Real_e32_gfx10<op>;
1951+
}
1952+
multiclass VOP2_Real_e32_dpp_dpp8_gfx10<bits<6> op> :
1953+
VOP2Only_Real_e32_gfx10<op>,
1954+
VOP2_Real_dpp_gfx10<op>,
1955+
VOP2_Real_dpp8_gfx10<op>;
19481956

19491957
//===------------------------- VOP2 (with name) -------------------------===//
19501958
multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName,
@@ -2168,10 +2176,7 @@ defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>;
21682176
defm V_MAX_F16 : VOP2_Real_gfx10<0x039>;
21692177
defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>;
21702178
defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>;
2171-
2172-
let IsSingle = 1 in {
2173-
defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
2174-
}
2179+
defm V_PK_FMAC_F16 : VOP2_Real_e32_dpp_dpp8_gfx10<0x03c>;
21752180

21762181
// VOP2 no carry-in, carry-out.
21772182
defm V_ADD_NC_U32 :
@@ -2560,6 +2565,7 @@ defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_s
25602565
defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>;
25612566
defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>;
25622567
defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>;
2568+
defm V_PK_FMAC_F16 : VOP2_Real_e32e64_gfx9<0x03c>;
25632569
} // End AssemblerPredicate = isGFX9Only
25642570

25652571
defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>;

llvm/test/MC/AMDGPU/gfx10_asm_vop2.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13185,3 +13185,15 @@ v_pk_fmac_f16 v5, -4.0, v2
1318513185

1318613186
v_pk_fmac_f16 v5, v1, v255
1318713187
// GFX10: encoding: [0x01,0xff,0x0b,0x78]
13188+
13189+
v_pk_fmac_f16 v5, v1, v2
13190+
// GFX10: encoding: [0x01,0x05,0x0a,0x78]
13191+
13192+
v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3]
13193+
// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff]
13194+
13195+
v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
13196+
// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00]
13197+
13198+
v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
13199+
// GFX10: encoding: [0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05]

llvm/test/MC/AMDGPU/gfx10_unsupported_sdwa.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ v_min_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD s
3232
v_mul_lo_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
3333
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
3434

35+
v_pk_fmac_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
36+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
37+
3538
v_sub_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
3639
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
3740

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s
2+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx90a -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s
3+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s
4+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s
5+
6+
v_pk_fmac_f16 v5, v1, v2
7+
// CHECK-MI: [0x01,0x05,0x0a,0x78]
8+
9+
v_pk_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3]
10+
// CHECK-MI: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff]
11+
12+
v_pk_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
13+
// CHECK-MI: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00]
14+
15+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
16+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06]
17+
18+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
19+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06]
20+
21+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
22+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x00,0x06,0x06]
23+
24+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
25+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x01,0x06,0x06]
26+
27+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
28+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x02,0x06,0x06]
29+
30+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
31+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x03,0x06,0x06]
32+
33+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
34+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x04,0x06,0x06]
35+
36+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
37+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x05,0x06,0x06]
38+
39+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD src1_sel:DWORD
40+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x0e,0x06,0x06]
41+
42+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
43+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x16,0x06,0x06]
44+
45+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
46+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x16,0x06,0x06]
47+
48+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
49+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06]
50+
51+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
52+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x00,0x06]
53+
54+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
55+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x01,0x06]
56+
57+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD
58+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x02,0x06]
59+
60+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
61+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x03,0x06]
62+
63+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
64+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x04,0x06]
65+
66+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
67+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x05,0x06]
68+
69+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
70+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06]
71+
72+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
73+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x00]
74+
75+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
76+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x01]
77+
78+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
79+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x02]
80+
81+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
82+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x03]
83+
84+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
85+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x04]
86+
87+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
88+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x05]
89+
90+
v_pk_fmac_f16_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
91+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x16]

llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2476,3 +2476,10 @@
24762476
# W32: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff]
24772477
# W64: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff]
24782478
0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff
2479+
2480+
# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff]
2481+
0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff
2482+
2483+
# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00]
2484+
0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00
2485+

llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,3 +222,6 @@
222222
# W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa]
223223
# W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa]
224224
0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa
225+
226+
# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05]
227+
0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx908 -disassemble -show-encoding < %s | FileCheck -check-prefix=CHECK-MI %s
2+
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx90a -disassemble -show-encoding < %s | FileCheck -check-prefix=CHECK-MI %s
3+
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -disassemble -show-encoding < %s | FileCheck -check-prefix=CHECK-MI %s
4+
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -disassemble -show-encoding < %s | FileCheck -check-prefix=CHECK-MI %s
5+
6+
# CHECK-MI: v_pk_fmac_f16_e32 v5, v1, v2
7+
0x01,0x05,0x0a,0x78
8+
9+
# CHECK-MI: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
10+
0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff
11+
12+
# CHECK-MI: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
13+
0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00
14+
15+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
16+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06
17+
18+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
19+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06
20+
21+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
22+
0xf9,0x04,0x0a,0x78,0x01,0x00,0x06,0x06
23+
24+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
25+
0xf9,0x04,0x0a,0x78,0x01,0x01,0x06,0x06
26+
27+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
28+
0xf9,0x04,0x0a,0x78,0x01,0x02,0x06,0x06
29+
30+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
31+
0xf9,0x04,0x0a,0x78,0x01,0x03,0x06,0x06
32+
33+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
34+
0xf9,0x04,0x0a,0x78,0x01,0x04,0x06,0x06
35+
36+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
37+
0xf9,0x04,0x0a,0x78,0x01,0x05,0x06,0x06
38+
39+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD src1_sel:DWORD
40+
0xf9,0x04,0x0a,0x78,0x01,0x0e,0x06,0x06
41+
42+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
43+
0xf9,0x04,0x0a,0x78,0x01,0x16,0x06,0x06
44+
45+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
46+
0xf9,0x04,0x0a,0x78,0x01,0x16,0x06,0x06
47+
48+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
49+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06
50+
51+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
52+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x00,0x06
53+
54+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
55+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x01,0x06
56+
57+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD
58+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x02,0x06
59+
60+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
61+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x03,0x06
62+
63+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
64+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x04,0x06
65+
66+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
67+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x05,0x06
68+
69+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
70+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06
71+
72+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
73+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x00
74+
75+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
76+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x01
77+
78+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
79+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x02
80+
81+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
82+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x03
83+
84+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
85+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x04
86+
87+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
88+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x05
89+
90+
# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
91+
0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x16
92+

0 commit comments

Comments
 (0)