Skip to content

Commit 297b09b

Browse files
committed
Allow OPSEL for GFX11 and GFX12.
Section 7.5.1 of the RDNA3 ISA doc apparently indicates that OPSEL is allowed (although ignored) in the dot instructions in question. Therefore, we should allow it for GFX11 and GFX12.
1 parent df451f2 commit 297b09b

File tree

3 files changed

+14
-442
lines changed

3 files changed

+14
-442
lines changed

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -383,8 +383,15 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
383383

384384
let OtherPredicates = [HasDot7Insts] in {
385385
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
386+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
387+
// v_dot4_u32_u8 does not allow op_sel Pre-GFX11
388+
defm V_DOT4_U32_U8_PREGFX11 : VOP3PInst<"v_dot4_u32_u8",
386389
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>;
390+
387391
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
392+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
393+
// v_dot8_u32_u4 does not allow op_sel Pre-GFX11
394+
defm V_DOT8_U32_U4_PREGFX11 : VOP3PInst<"v_dot8_u32_u4",
388395
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>;
389396
} // End OtherPredicates = [HasDot7Insts]
390397

@@ -411,7 +418,7 @@ defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile,
411418

412419
multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
413420
let IsDOT = 1 in
414-
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>,
421+
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>,
415422
null_frag, 1>;
416423
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
417424
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.
@@ -1712,8 +1719,8 @@ defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
17121719
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
17131720

17141721
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
1715-
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
1716-
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
1722+
defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_vi <0x29>;
1723+
defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_vi <0x2b>;
17171724

17181725
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
17191726
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
@@ -1845,8 +1852,10 @@ defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
18451852
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
18461853

18471854
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x13>;
1848-
defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11_gfx12<0x17>;
1849-
defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11_gfx12<0x19>;
1855+
defm V_DOT4_U32_U8 : VOP3P_Real_gfx11_gfx12<0x17>;
1856+
defm V_DOT4_U32_U8_PREGFX11 : VOP3P_Real_gfx10<0x17>;
1857+
defm V_DOT8_U32_U4 : VOP3P_Real_gfx11_gfx12<0x19>;
1858+
defm V_DOT8_U32_U4_PREGFX11 : VOP3P_Real_gfx10<0x19>;
18501859

18511860
defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>;
18521861
defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>;

llvm/test/MC/AMDGPU/gfx11_asm_vop3p_err.s

Lines changed: 0 additions & 219 deletions
This file was deleted.

0 commit comments

Comments
 (0)