Skip to content

Commit d06c2ef

Browse files
authored
[AMDGPU] Support v_lshl_add_u64 in gfx1250 (llvm#145591)
It also brings in some DPP changes needed to define it.
1 parent 51d1385 commit d06c2ef

File tree

7 files changed

+100
-6
lines changed

7 files changed

+100
-6
lines changed

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
661661
if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
662662
break;
663663

664+
// FIXME: DecoderTableGFX125064 is not defined yet.
665+
if (isGFX1250() &&
666+
tryDecodeInst(DecoderTableGFX1250_FAKE1664, MI, QW, Address, CS))
667+
break;
668+
664669
if (isGFX12() &&
665670
tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
666671
Address, CS))

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
3232
let HasExtDPP = 0;
3333
}
3434

35+
let HasExt64BitDPP = 1 in {
3536
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32>;
3637
def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64>;
3738

@@ -48,10 +49,13 @@ class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
4849
let HasExtDPP = 0;
4950
}
5051

52+
def V_LSHL_ADD_U64_PROF : VOP3_Profile<VOP_I64_I64_I32_I64>;
53+
5154
def DIV_FIXUP_F32_PROF : VOP3_Profile<VOP_F32_F32_F32_F32> {
5255
let HasExtVOP3DPP = 0;
5356
let HasExtDPP = 0;
5457
}
58+
} // End HasExt64BitDPP = 1;
5559

5660
//===----------------------------------------------------------------------===//
5761
// VOP3 INTERP
@@ -722,7 +726,7 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
722726
// V_LSHL_ADD_U64: D0.u64 = (S0.u64 << S1.u[2:0]) + S2.u64
723727
// src0 is shifted left by 0-4 (use “0” to get ADD_U64).
724728
let SubtargetPredicate = HasLshlAddU64Inst in
725-
defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
729+
defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", V_LSHL_ADD_U64_PROF>;
726730

727731
let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
728732
SchedRW = [WriteFloatCvt] in {
@@ -1889,6 +1893,9 @@ let AssemblerPredicate = isGFX11Plus in {
18891893
def : AMDGPUMnemonicAlias<"v_xor_add_u32", "v_xad_u32">;
18901894
}
18911895

1896+
// These instructions differ from GFX12 variant by supporting DPP:
1897+
defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>;
1898+
18921899
//===----------------------------------------------------------------------===//
18931900
// GFX10.
18941901
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1552,12 +1552,17 @@ class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_fr
15521552
""));
15531553
}
15541554

1555-
multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> {
1555+
multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag,
1556+
list<Predicate> predicates = []> {
15561557
def _e64 : VOP3InstBase<OpName, P, node>;
1557-
let SubtargetPredicate = isGFX11Plus in {
1558-
if P.HasExtVOP3DPP then
1559-
def _e64_dpp : VOP3_DPP_Pseudo <OpName, P>;
1560-
} // end SubtargetPredicate = isGFX11Plus
1558+
if P.HasExtVOP3DPP then
1559+
def _e64_dpp : VOP3_DPP_Pseudo <OpName, P> {
1560+
let SubtargetPredicate = isGFX11Plus;
1561+
}
1562+
else if P.HasExt64BitDPP then
1563+
def _e64_dpp : VOP3_DPP_Pseudo <OpName, P> {
1564+
let OtherPredicates = !listconcat(predicates, [HasDPALU_DPP]);
1565+
}
15611566
}
15621567

15631568
class UniformUnaryFragOrOp<SDPatternOperator Op> {
@@ -1961,6 +1966,17 @@ multiclass VOP3Only_Realtriple_gfx12<bits<10> op, bit isSingle = 0> :
19611966
multiclass VOP3Only_Real_Base_gfx12<bits<10> op> :
19621967
VOP3_Real_Base<GFX12Gen, op, NAME, 1/*IsSingle*/>;
19631968

1969+
multiclass VOP3Only_Realtriple_with_name_gfx12_not_gfx1250<bits<10> op, string opName,
1970+
string asmName, string pseudo_mnemonic = "",
1971+
bit isSingle = 0> :
1972+
VOP3_Realtriple_with_name<GFX12Not12_50Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
1973+
1974+
multiclass VOP3Only_Real_Base_gfx1250<bits<10> op> :
1975+
VOP3_Real_Base<GFX1250Gen, op, NAME, 1/*IsSingle*/>;
1976+
1977+
multiclass VOP3Only_Realtriple_gfx1250<bits<10> op, bit isSingle = 0> :
1978+
VOP3_Realtriple<GFX1250Gen, op, isSingle>;
1979+
19641980
multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName = NAME,
19651981
string pseudo_mnemonic = "", bit isSingle = 0> :
19661982
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
3+
4+
v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9]
5+
// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
6+
7+
v_lshl_add_u64 v[2:3], v[4:5], 0, 1
8+
// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
9+
10+
v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3]
11+
// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
12+
13+
v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3]
14+
// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
15+
16+
v_lshl_add_u64 v[2:3], v[4:5], v7, 12345
17+
// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
3+
4+
v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9]
5+
// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
6+
7+
v_lshl_add_u64 v[2:3], v[4:5], 0, 1
8+
// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
9+
10+
v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3]
11+
// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
12+
13+
v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3]
14+
// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
15+
16+
v_lshl_add_u64 v[2:3], v[4:5], v7, 12345
17+
// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX125X-ERR,GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
2+
3+
v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] dpp8:[7,6,5,4,3,2,1,0]
4+
// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
5+
// GFX125X-ERR-NEXT:{{^}}v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] dpp8:[7,6,5,4,3,2,1,0]
6+
// GFX125X-ERR-NEXT:{{^}} ^
7+
8+
v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] quad_perm:[3,2,1,0]
9+
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
10+
// GFX125X-ERR-NEXT:{{^}}v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] quad_perm:[3,2,1,0]
11+
// GFX125X-ERR-NEXT:{{^}} ^
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
3+
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
4+
5+
0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04
6+
# GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
7+
8+
0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02
9+
# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
10+
11+
0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00
12+
# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
13+
14+
0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04
15+
# GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
16+
17+
0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
18+
# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
19+
## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
20+
# GFX1250-FAKE16: {{.*}}
21+
# GFX1250-REAL16: {{.*}}

0 commit comments

Comments
 (0)