Skip to content

[AMDGPU] Support v_lshl_add_u64 in gfx1250 #145591

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
break;

// FIXME: DecoderTableGFX125064 is not defined yet.
if (isGFX1250() &&
tryDecodeInst(DecoderTableGFX1250_FAKE1664, MI, QW, Address, CS))
break;

if (isGFX12() &&
tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
Address, CS))
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
let HasExtDPP = 0;
}

let HasExt64BitDPP = 1 in {
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32>;
def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64>;

Expand All @@ -48,10 +49,13 @@ class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
let HasExtDPP = 0;
}

def V_LSHL_ADD_U64_PROF : VOP3_Profile<VOP_I64_I64_I32_I64>;

def DIV_FIXUP_F32_PROF : VOP3_Profile<VOP_F32_F32_F32_F32> {
let HasExtVOP3DPP = 0;
let HasExtDPP = 0;
}
} // End HasExt64BitDPP = 1;

//===----------------------------------------------------------------------===//
// VOP3 INTERP
Expand Down Expand Up @@ -722,7 +726,7 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
// V_LSHL_ADD_U64: D0.u64 = (S0.u64 << S1.u[2:0]) + S2.u64
// src0 is shifted left by 0-4 (use “0” to get ADD_U64).
let SubtargetPredicate = HasLshlAddU64Inst in
defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", V_LSHL_ADD_U64_PROF>;

let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
SchedRW = [WriteFloatCvt] in {
Expand Down Expand Up @@ -1889,6 +1893,9 @@ let AssemblerPredicate = isGFX11Plus in {
def : AMDGPUMnemonicAlias<"v_xor_add_u32", "v_xad_u32">;
}

// These instructions differ from GFX12 variant by supporting DPP:
defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>;

//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
Expand Down
26 changes: 21 additions & 5 deletions llvm/lib/Target/AMDGPU/VOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1552,12 +1552,17 @@ class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_fr
""));
}

multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> {
multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag,
list<Predicate> predicates = []> {
def _e64 : VOP3InstBase<OpName, P, node>;
let SubtargetPredicate = isGFX11Plus in {
if P.HasExtVOP3DPP then
def _e64_dpp : VOP3_DPP_Pseudo <OpName, P>;
} // end SubtargetPredicate = isGFX11Plus
if P.HasExtVOP3DPP then
def _e64_dpp : VOP3_DPP_Pseudo <OpName, P> {
let SubtargetPredicate = isGFX11Plus;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can (or should) we use "let OtherPredicates = [isGFX11Plus]" instead here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isGFX11Plus is really a subtarget.

}
else if P.HasExt64BitDPP then
def _e64_dpp : VOP3_DPP_Pseudo <OpName, P> {
let OtherPredicates = !listconcat(predicates, [HasDPALU_DPP]);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not comfortable that VOP3Inst here uses both SubtargetPredicate and OtherPredicates.
What if we define a VOP3Inst under another Predicate later?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is you would get it on all forms, while it is only needed on _64_dpp. I remember I was really struggling, before came to this.

}

class UniformUnaryFragOrOp<SDPatternOperator Op> {
Expand Down Expand Up @@ -1961,6 +1966,17 @@ multiclass VOP3Only_Realtriple_gfx12<bits<10> op, bit isSingle = 0> :
multiclass VOP3Only_Real_Base_gfx12<bits<10> op> :
VOP3_Real_Base<GFX12Gen, op, NAME, 1/*IsSingle*/>;

multiclass VOP3Only_Realtriple_with_name_gfx12_not_gfx1250<bits<10> op, string opName,
string asmName, string pseudo_mnemonic = "",
bit isSingle = 0> :
VOP3_Realtriple_with_name<GFX12Not12_50Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;

multiclass VOP3Only_Real_Base_gfx1250<bits<10> op> :
VOP3_Real_Base<GFX1250Gen, op, NAME, 1/*IsSingle*/>;

multiclass VOP3Only_Realtriple_gfx1250<bits<10> op, bit isSingle = 0> :
VOP3_Realtriple<GFX1250Gen, op, isSingle>;

multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName = NAME,
string pseudo_mnemonic = "", bit isSingle = 0> :
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
Expand Down
17 changes: 17 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s

v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9]
// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]

v_lshl_add_u64 v[2:3], v[4:5], 0, 1
// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]

v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3]
// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]

v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3]
// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]

v_lshl_add_u64 v[2:3], v[4:5], v7, 12345
// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
17 changes: 17 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s

v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9]
// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]

v_lshl_add_u64 v[2:3], v[4:5], 0, 1
// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]

v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3]
// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]

v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3]
// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]

v_lshl_add_u64 v[2:3], v[4:5], v7, 12345
// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
11 changes: 11 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX125X-ERR,GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s

v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] dpp8:[7,6,5,4,3,2,1,0]
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX125X-ERR-NEXT:{{^}}v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] dpp8:[7,6,5,4,3,2,1,0]
// GFX125X-ERR-NEXT:{{^}} ^

v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] quad_perm:[3,2,1,0]
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
// GFX125X-ERR-NEXT:{{^}}v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] quad_perm:[3,2,1,0]
// GFX125X-ERR-NEXT:{{^}} ^
21 changes: 21 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s

0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04
# GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]

0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02
# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]

0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00
# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]

0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04
# GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]

0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
# GFX1250-FAKE16: {{.*}}
# GFX1250-REAL16: {{.*}}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess these prefixes will be used as more instructions added.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there are 16-bit instructions there. Just not added yet.

Loading