Skip to content

Commit d63c2e5

Browse files
authored
[AMDGPU][MC] Remove incorrect _e32 suffix from v_dot2c_f32_f16 and v_dot4c_i32_i8 (#77993)
The two VOP2 instructions cannot be encoded as VOP3. Fix #54691.
1 parent d338d15 commit d63c2e5

File tree

9 files changed

+37
-31
lines changed

9 files changed

+37
-31
lines changed

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2520,16 +2520,22 @@ multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> :
25202520
VOP2_Real_dpp_gfx10<op>,
25212521
VOP2_Real_dpp8_gfx10<op>;
25222522

2523+
multiclass VOP2Only_Real_DOT_ACC_gfx10<bits<6> op> : VOP2_Real_dpp_gfx10<op>,
2524+
VOP2_Real_dpp8_gfx10<op> {
2525+
let IsSingle = 1 in
2526+
defm NAME : VOP2_Real_e32_gfx10<op>;
2527+
}
2528+
25232529
let SubtargetPredicate = HasDot5Insts in {
25242530
defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>;
25252531
// NB: Opcode conflicts with V_DOT8C_I32_I4
25262532
// This opcode exists in gfx 10.1* only
2527-
defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>;
2533+
defm V_DOT2C_F32_F16 : VOP2Only_Real_DOT_ACC_gfx10<0x02>;
25282534
}
25292535

25302536
let SubtargetPredicate = HasDot6Insts in {
25312537
defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>;
2532-
defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>;
2538+
defm V_DOT4C_I32_I8 : VOP2Only_Real_DOT_ACC_gfx10<0x0d>;
25332539
}
25342540

25352541
let SubtargetPredicate = HasDot4Insts in {

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define i32 @v_sdot4(i32 %a, i32 %b, i32 %c) {
1313
; GFX10-LABEL: v_sdot4:
1414
; GFX10: ; %bb.0:
1515
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16-
; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
16+
; GFX10-NEXT: v_dot4c_i32_i8 v2, v0, v1
1717
; GFX10-NEXT: v_mov_b32_e32 v0, v2
1818
; GFX10-NEXT: s_setpc_b64 s[30:31]
1919
%r = call i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 false)
@@ -78,7 +78,7 @@ define i32 @v_sdot4_cast_v4i8(<4 x i8> %a, <4 x i8> %b, i32 %c) {
7878
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v6
7979
; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2
8080
; GFX10-NEXT: v_or3_b32 v1, v3, v4, v5
81-
; GFX10-NEXT: v_dot4c_i32_i8_e32 v8, v0, v1
81+
; GFX10-NEXT: v_dot4c_i32_i8 v8, v0, v1
8282
; GFX10-NEXT: v_mov_b32_e32 v0, v8
8383
; GFX10-NEXT: s_setpc_b64 s[30:31]
8484
%a.cast = bitcast <4 x i8> %a to i32
@@ -99,7 +99,7 @@ define i32 @v_sdot4_fnegf32_a(float %a, i32 %b, i32 %c) {
9999
; GFX10: ; %bb.0:
100100
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101101
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
102-
; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
102+
; GFX10-NEXT: v_dot4c_i32_i8 v2, v0, v1
103103
; GFX10-NEXT: v_mov_b32_e32 v0, v2
104104
; GFX10-NEXT: s_setpc_b64 s[30:31]
105105
%neg.a = fneg float %a
@@ -120,7 +120,7 @@ define i32 @v_sdot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) {
120120
; GFX10: ; %bb.0:
121121
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122122
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
123-
; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
123+
; GFX10-NEXT: v_dot4c_i32_i8 v2, v0, v1
124124
; GFX10-NEXT: v_mov_b32_e32 v0, v2
125125
; GFX10-NEXT: s_setpc_b64 s[30:31]
126126
%neg.a = fneg <2 x half> %a

llvm/test/CodeGen/AMDGPU/fdot2.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ entry:
5454
; GFX906: v_mac_f32_e32
5555

5656
; GFX906-DL-UNSAFE: v_dot2_f32_f16
57-
; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32
57+
; GFX10-DL-UNSAFE: v_dot2c_f32_f16
5858

5959
; GFX906-CONTRACT: v_dot2_f32_f16
6060

@@ -95,7 +95,7 @@ entry:
9595
; GFX906: v_mac_f32_e32
9696

9797
; GFX906-DL-UNSAFE: v_dot2_f32_f16
98-
; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32
98+
; GFX10-DL-UNSAFE: v_dot2c_f32_f16
9999

100100
; GFX906-CONTRACT: v_dot2_f32_f16
101101
; GFX906-DENORM-CONTRACT: v_dot2_f32_f16

llvm/test/CodeGen/AMDGPU/idot2.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2855,7 +2855,7 @@ define amdgpu_kernel void @notsdot2_sext8(ptr addrspace(1) %src1,
28552855
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc0c0001
28562856
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
28572857
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
2858-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
2858+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
28592859
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
28602860
; GFX10-DL-NEXT: s_endpgm
28612861
ptr addrspace(1) %src2,

llvm/test/CodeGen/AMDGPU/idot4s.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ define amdgpu_kernel void @idot4_acc32(ptr addrspace(1) %src1,
127127
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
128128
; GFX10-DL-NEXT: v_mov_b32_e32 v0, s2
129129
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
130-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
130+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
131131
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
132132
; GFX10-DL-NEXT: s_endpgm
133133
;
@@ -336,7 +336,7 @@ define amdgpu_kernel void @idot4_acc16(ptr addrspace(1) %src1,
336336
; GFX10-DL-NEXT: global_load_dword v3, v0, s[6:7]
337337
; GFX10-DL-NEXT: global_load_sshort v4, v1, s[2:3]
338338
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
339-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v4, v2, v3
339+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v4, v2, v3
340340
; GFX10-DL-NEXT: global_store_short v1, v4, s[2:3]
341341
; GFX10-DL-NEXT: s_endpgm
342342
;
@@ -710,7 +710,7 @@ define amdgpu_kernel void @idot4_multiuse_mul1(ptr addrspace(1) %src1,
710710
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
711711
; GFX10-DL-NEXT: v_mad_i32_i24 v0, v0, v3, s2
712712
; GFX10-DL-NEXT: v_mov_b32_e32 v3, 0
713-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
713+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
714714
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
715715
; GFX10-DL-NEXT: s_endpgm
716716
;
@@ -906,7 +906,7 @@ define amdgpu_kernel void @idot4_acc32_vecMul(ptr addrspace(1) %src1,
906906
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
907907
; GFX10-DL-NEXT: v_mov_b32_e32 v0, s2
908908
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
909-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
909+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
910910
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
911911
; GFX10-DL-NEXT: s_endpgm
912912
;
@@ -1335,7 +1335,7 @@ define amdgpu_kernel void @idot4_acc32_2ele(ptr addrspace(1) %src1,
13351335
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc0c0100
13361336
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
13371337
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
1338-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
1338+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
13391339
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
13401340
; GFX10-DL-NEXT: s_endpgm
13411341
;
@@ -1513,7 +1513,7 @@ define amdgpu_kernel void @idot4_acc32_3ele(ptr addrspace(1) %src1,
15131513
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc020100
15141514
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
15151515
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
1516-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
1516+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
15171517
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
15181518
; GFX10-DL-NEXT: s_endpgm
15191519
;
@@ -1698,7 +1698,7 @@ define amdgpu_kernel void @idot4_acc32_3ele_permuted(ptr addrspace(1) %src1,
16981698
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc020003
16991699
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
17001700
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
1701-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
1701+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
17021702
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
17031703
; GFX10-DL-NEXT: s_endpgm
17041704
;
@@ -1870,7 +1870,7 @@ define amdgpu_kernel void @idot4_acc32_opt(ptr addrspace(1) %src1,
18701870
; GFX10-DL-NEXT: global_load_dword v2, v0, s[6:7]
18711871
; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
18721872
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
1873-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
1873+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
18741874
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
18751875
; GFX10-DL-NEXT: s_endpgm
18761876
;
@@ -2070,7 +2070,7 @@ define amdgpu_kernel void @idot4_acc32_3src(ptr addrspace(1) %src1,
20702070
; GFX10-DL-NEXT: v_or_b32_e32 v0, v0, v1
20712071
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
20722072
; GFX10-DL-NEXT: v_mov_b32_e32 v1, s0
2073-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v1, v3, v0
2073+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v1, v3, v0
20742074
; GFX10-DL-NEXT: global_store_dword v2, v1, s[6:7]
20752075
; GFX10-DL-NEXT: s_endpgm
20762076
;
@@ -2276,7 +2276,7 @@ define amdgpu_kernel void @idot4_acc32_3src_3ele(ptr addrspace(1) %src1,
22762276
; GFX10-DL-NEXT: v_or_b32_e32 v0, v0, v1
22772277
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
22782278
; GFX10-DL-NEXT: v_mov_b32_e32 v1, s0
2279-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v1, v2, v0
2279+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v1, v2, v0
22802280
; GFX10-DL-NEXT: global_store_dword v3, v1, s[6:7]
22812281
; GFX10-DL-NEXT: s_endpgm
22822282
;
@@ -2479,7 +2479,7 @@ define amdgpu_kernel void @idot4_bad_source(ptr addrspace(1) %src1,
24792479
; GFX10-DL-NEXT: v_perm_b32 v1, v1, v1, 0xc0c0201
24802480
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
24812481
; GFX10-DL-NEXT: v_mad_i32_i24 v0, v0, s2, s3
2482-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
2482+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
24832483
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
24842484
; GFX10-DL-NEXT: s_endpgm
24852485
;
@@ -2674,7 +2674,7 @@ define amdgpu_kernel void @idot4_commutative(ptr addrspace(1) %src1,
26742674
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc020100
26752675
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
26762676
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
2677-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
2677+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
26782678
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
26792679
; GFX10-DL-NEXT: s_endpgm
26802680
;
@@ -2874,7 +2874,7 @@ define amdgpu_kernel void @idot4_acc32_3src_3ele_src0(ptr addrspace(1) %src1,
28742874
; GFX10-DL-NEXT: v_or_b32_e32 v0, v0, v1
28752875
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
28762876
; GFX10-DL-NEXT: v_mov_b32_e32 v1, s0
2877-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v1, v2, v0
2877+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v1, v2, v0
28782878
; GFX10-DL-NEXT: global_store_dword v3, v1, s[6:7]
28792879
; GFX10-DL-NEXT: s_endpgm
28802880
;
@@ -3105,7 +3105,7 @@ define amdgpu_kernel void @idot4_4src(ptr addrspace(1) %src1,
31053105
; GFX10-DL-NEXT: v_or_b32_e32 v1, v2, v1
31063106
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
31073107
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
3108-
; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
3108+
; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
31093109
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
31103110
; GFX10-DL-NEXT: s_endpgm
31113111
;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ entry:
2626
; GCN-LABEL: {{^}}test_llvm_amdgcn_fdot2_no_clamp
2727
; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
2828
; GFX940: v_dot2c_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
29-
; GFX10: {{v_dot2c_f32_f16_e32|v_dot2acc_f32_f16}} v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
29+
; GFX10: {{v_dot2c_f32_f16|v_dot2acc_f32_f16}} v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
3030
define amdgpu_kernel void @test_llvm_amdgcn_fdot2_no_clamp(
3131
ptr addrspace(1) %r,
3232
ptr addrspace(1) %a,

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ entry:
2929

3030
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_no_clamp
3131
; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
32-
; GFX10: v_dot4c_i32_i8_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
32+
; GFX10: v_dot4c_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
3333
; GF11: v_dot4_i32_iu8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} neg_lo:[1,1,0]{{$}}
3434
define amdgpu_kernel void @test_llvm_amdgcn_sdot4_no_clamp(
3535
ptr addrspace(1) %r,

llvm/test/MC/Disassembler/AMDGPU/gfx1011-xdl-insts.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1011 -disassemble -show-encoding < %s | FileCheck %s
22
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1012 -disassemble -show-encoding < %s | FileCheck %s
33

4-
# CHECK: v_dot2c_f32_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04]
4+
# CHECK: v_dot2c_f32_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04]
55
0x01,0x05,0x0a,0x04
66

7-
# CHECK: v_dot2c_f32_f16_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x05]
7+
# CHECK: v_dot2c_f32_f16 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x05]
88
0x01,0x05,0xfe,0x05
99

1010
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00]
@@ -85,10 +85,10 @@
8585
# CHECK: v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x80,0x00]
8686
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x80,0x00
8787

88-
# CHECK: v_dot4c_i32_i8_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1a]
88+
# CHECK: v_dot4c_i32_i8 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1a]
8989
0x01,0x05,0x0a,0x1a
9090

91-
# CHECK: v_dot4c_i32_i8_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x1b]
91+
# CHECK: v_dot4c_i32_i8 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x1b]
9292
0x01,0x05,0xfe,0x1b
9393

9494
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00]

llvm/test/MC/Disassembler/AMDGPU/gfx1011_dlops.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
# GFX10: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c]
3030
0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c
3131

32-
# GFX10: v_dot2c_f32_f16_e32 v5, v1, v2
32+
# GFX10: v_dot2c_f32_f16 v5, v1, v2
3333
0x01,0x05,0x0a,0x04
3434

3535
# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
@@ -44,7 +44,7 @@
4444
# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
4545
0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05
4646

47-
# GFX10: v_dot4c_i32_i8_e32 v5, v1, v2
47+
# GFX10: v_dot4c_i32_i8 v5, v1, v2
4848
0x01,0x05,0x0a,0x1a
4949

5050
# GFX10: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0

0 commit comments

Comments
 (0)