Skip to content

Commit 729467a

Browse files
committed
[AMDGPU] gfx11 LDSDIR instructions MC support
Contributors: Carl Ritson <[email protected]> Patch 8/N for upstreaming of AMDGPU gfx11 architecture. Depends on D125498 Reviewed By: critson, rampitec, #amdgpu Differential Revision: https://reviews.llvm.org/D125820
1 parent f94a447 commit 729467a

File tree

12 files changed

+369
-2
lines changed

12 files changed

+369
-2
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
161161
ImmTyCBSZ,
162162
ImmTyABID,
163163
ImmTyEndpgm,
164+
ImmTyWaitVDST,
164165
};
165166

166167
enum ImmKindTy {
@@ -835,6 +836,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
835836
bool isS16Imm() const;
836837
bool isU16Imm() const;
837838
bool isEndpgm() const;
839+
bool isWaitVDST() const;
838840

839841
StringRef getExpressionAsToken() const {
840842
assert(isExpr());
@@ -1042,6 +1044,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
10421044
case ImmTyCBSZ: OS << "CBSZ"; break;
10431045
case ImmTyABID: OS << "ABID"; break;
10441046
case ImmTyEndpgm: OS << "Endpgm"; break;
1047+
case ImmTyWaitVDST: OS << "WaitVDST"; break;
10451048
}
10461049
}
10471050

@@ -1758,6 +1761,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
17581761

17591762
OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
17601763
AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1764+
1765+
OperandMatchResultTy parseWaitVDST(OperandVector &Operands);
1766+
AMDGPUOperand::Ptr defaultWaitVDST() const;
17611767
};
17621768

17631769
struct OptionalOperand {
@@ -3969,7 +3975,7 @@ Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
39693975
const auto &Src = Inst.getOperand(SrcIdx);
39703976
if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
39713977

3972-
if (isGFX90A())
3978+
if (isGFX90A() || isGFX11Plus())
39733979
return StringRef("lds_direct is not supported on this GPU");
39743980

39753981
if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
@@ -7834,7 +7840,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
78347840
{"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
78357841
{"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
78367842
{"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7837-
{"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7843+
{"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7844+
{"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}
78387845
};
78397846

78407847
void AMDGPUAsmParser::onBeginOfFile() {
@@ -8839,3 +8846,15 @@ OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
88398846
}
88408847

88418848
bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8849+
8850+
//===----------------------------------------------------------------------===//
8851+
// LDSDIR
8852+
//===----------------------------------------------------------------------===//
8853+
8854+
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
8855+
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
8856+
}
8857+
8858+
bool AMDGPUOperand::isWaitVDST() const {
8859+
return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
8860+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
//===-- LDSDIRInstructions.td - LDS Direct Instruction Definitions --------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
//===----------------------------------------------------------------------===//
10+
// LDSDIR encoding
11+
//===----------------------------------------------------------------------===//
12+
13+
class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
14+
// encoding fields
15+
bits<2> attrchan;
16+
bits<6> attr;
17+
bits<4> waitvdst;
18+
bits<8> vdst;
19+
20+
// encoding
21+
let Inst{31-24} = 0xce; // encoding
22+
let Inst{23-22} = 0x0; // reserved
23+
let Inst{21-20} = op;
24+
let Inst{19-16} = waitvdst;
25+
let Inst{15-10} = !if(is_direct, ?, attr);
26+
let Inst{9-8} = !if(is_direct, ?, attrchan);
27+
let Inst{7-0} = vdst;
28+
}
29+
30+
//===----------------------------------------------------------------------===//
31+
// LDSDIR Classes
32+
//===----------------------------------------------------------------------===//
33+
34+
class LDSDIR_getIns<bit direct> {
35+
dag ret = !if(direct,
36+
(ins wait_vdst:$waitvdst),
37+
(ins Attr:$attr, AttrChan:$attrchan, wait_vdst:$waitvdst)
38+
);
39+
}
40+
41+
class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI<
42+
(outs VGPR_32:$vdst),
43+
LDSDIR_getIns<direct>.ret,
44+
asm> {
45+
let LDSDIR = 1;
46+
let EXP_CNT = 1;
47+
48+
let hasSideEffects = 0;
49+
let mayLoad = 1;
50+
let mayStore = 0;
51+
52+
string Mnemonic = opName;
53+
let UseNamedOperandTable = 1;
54+
55+
let Uses = [M0, EXEC];
56+
let DisableWQM = 0;
57+
let SchedRW = [WriteLDS];
58+
59+
bit is_direct;
60+
let is_direct = direct;
61+
}
62+
63+
class LDSDIR_Pseudo<string opName, bit direct> :
64+
LDSDIR_Common<opName, "", direct>,
65+
SIMCInstr<opName, SIEncodingFamily.NONE> {
66+
let isPseudo = 1;
67+
let isCodeGenOnly = 1;
68+
}
69+
70+
class LDSDIR_getAsm<bit direct> {
71+
string ret = !if(direct,
72+
" $vdst$waitvdst",
73+
" $vdst, $attr$attrchan$waitvdst"
74+
);
75+
}
76+
77+
class LDSDIR_Real<bits<2> op, LDSDIR_Pseudo lds, int subtarget> :
78+
LDSDIR_Common<lds.Mnemonic,
79+
lds.Mnemonic # LDSDIR_getAsm<lds.is_direct>.ret,
80+
lds.is_direct>,
81+
SIMCInstr <lds.Mnemonic, subtarget>,
82+
LDSDIRe<op, lds.is_direct> {
83+
let isPseudo = 0;
84+
let isCodeGenOnly = 0;
85+
}
86+
87+
//===----------------------------------------------------------------------===//
88+
// LDS Direct Instructions
89+
//===----------------------------------------------------------------------===//
90+
91+
def LDS_DIRECT_LOAD : LDSDIR_Pseudo<"lds_direct_load", 1>;
92+
def LDS_PARAM_LOAD : LDSDIR_Pseudo<"lds_param_load", 0>;
93+
94+
//===----------------------------------------------------------------------===//
95+
// GFX11+
96+
//===----------------------------------------------------------------------===//
97+
98+
multiclass LDSDIR_Real_gfx11<bits<2> op, LDSDIR_Pseudo lds = !cast<LDSDIR_Pseudo>(NAME)> {
99+
def _gfx11 : LDSDIR_Real<op, lds, SIEncodingFamily.GFX11> {
100+
let AssemblerPredicate = isGFX11Plus;
101+
let DecoderNamespace = "GFX11";
102+
}
103+
}
104+
105+
defm LDS_PARAM_LOAD : LDSDIR_Real_gfx11<0x0>;
106+
defm LDS_DIRECT_LOAD : LDSDIR_Real_gfx11<0x1>;

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,16 @@ void AMDGPUInstPrinter::printDefaultVccOperand(unsigned OpNo,
595595
O << ", ";
596596
}
597597

598+
void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
599+
const MCSubtargetInfo &STI,
600+
raw_ostream &O) {
601+
uint8_t Imm = MI->getOperand(OpNo).getImm();
602+
if (Imm != 0) {
603+
O << " wait_vdst:";
604+
printU4ImmDecOperand(MI, OpNo, O);
605+
}
606+
}
607+
598608
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
599609
const MCSubtargetInfo &STI,
600610
raw_ostream &O) {

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
175175
raw_ostream &O);
176176
void printDefaultVccOperand(unsigned OpNo, const MCSubtargetInfo &STI,
177177
raw_ostream &O);
178+
void printWaitVDST(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
179+
raw_ostream &O);
178180

179181
void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
180182
raw_ostream &O, unsigned N);

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ enum : uint64_t {
6363
VGPRSpill = 1 << 24,
6464
SGPRSpill = 1 << 25,
6565

66+
// LDSDIR instruction format.
67+
LDSDIR = 1 << 26,
68+
6669
// High bits - other information.
6770
VM_CNT = UINT64_C(1) << 32,
6871
EXP_CNT = UINT64_C(1) << 33,

llvm/lib/Target/AMDGPU/SIInstrFormats.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ class InstSI <dag outs, dag ins, string asm = "",
4848
field bit VGPRSpill = 0;
4949
field bit SGPRSpill = 0;
5050

51+
// LDSDIR instruction format.
52+
field bit LDSDIR = 0;
53+
5154
// High bits - other information.
5255
field bit VM_CNT = 0;
5356
field bit EXP_CNT = 0;
@@ -173,6 +176,8 @@ class InstSI <dag outs, dag ins, string asm = "",
173176
let TSFlags{24} = VGPRSpill;
174177
let TSFlags{25} = SGPRSpill;
175178

179+
let TSFlags{26} = LDSDIR;
180+
176181
let TSFlags{32} = VM_CNT;
177182
let TSFlags{33} = EXP_CNT;
178183
let TSFlags{34} = LGKM_CNT;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
666666
return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
667667
}
668668

669+
static bool isLDSDIR(const MachineInstr &MI) {
670+
return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
671+
}
672+
673+
bool isLDSDIR(uint16_t Opcode) const {
674+
return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
675+
}
676+
669677
static bool isScalarUnit(const MachineInstr &MI) {
670678
return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
671679
}

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,6 +1291,8 @@ def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
12911291

12921292
}
12931293

1294+
def wait_vdst : NamedOperandU8<"WaitVDST", NamedMatchClass<"WaitVDST">>;
1295+
12941296
} // End OperandType = "OPERAND_IMMEDIATE"
12951297

12961298
class KImmMatchClass<int size> : AsmOperandClass {

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ include "SMInstructions.td"
3030
include "FLATInstructions.td"
3131
include "BUFInstructions.td"
3232
include "EXPInstructions.td"
33+
include "LDSDIRInstructions.td"
3334

3435
//===----------------------------------------------------------------------===//
3536
// VINTRP Instructions

llvm/test/MC/AMDGPU/gfx11_err.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,9 @@ s_delay_alu instid0(VALU_DEP_1) | (SALU_CYCLE_1)
2020

2121
s_delay_alu instid0(VALU_DEP_1) | SALU_CYCLE_1)
2222
// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: expected a left parenthesis
23+
24+
lds_direct_load v15 wait_vdst:16
25+
// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
26+
27+
lds_direct_load v15 wait_vdst
28+
// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

llvm/test/MC/AMDGPU/ldsdir.s

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefix=GFX11 %s
2+
3+
lds_direct_load v1 wait_vdst:15
4+
// GFX11: lds_direct_load v1 wait_vdst:15 ; encoding: [0x01,0x00,0x1f,0xce]
5+
6+
lds_direct_load v2 wait_vdst:14
7+
// GFX11: lds_direct_load v2 wait_vdst:14 ; encoding: [0x02,0x00,0x1e,0xce]
8+
9+
lds_direct_load v3 wait_vdst:13
10+
// GFX11: lds_direct_load v3 wait_vdst:13 ; encoding: [0x03,0x00,0x1d,0xce]
11+
12+
lds_direct_load v4 wait_vdst:12
13+
// GFX11: lds_direct_load v4 wait_vdst:12 ; encoding: [0x04,0x00,0x1c,0xce]
14+
15+
lds_direct_load v5 wait_vdst:11
16+
// GFX11: lds_direct_load v5 wait_vdst:11 ; encoding: [0x05,0x00,0x1b,0xce]
17+
18+
lds_direct_load v6 wait_vdst:10
19+
// GFX11: lds_direct_load v6 wait_vdst:10 ; encoding: [0x06,0x00,0x1a,0xce]
20+
21+
lds_direct_load v7 wait_vdst:9
22+
// GFX11: lds_direct_load v7 wait_vdst:9 ; encoding: [0x07,0x00,0x19,0xce]
23+
24+
lds_direct_load v8 wait_vdst:8
25+
// GFX11: lds_direct_load v8 wait_vdst:8 ; encoding: [0x08,0x00,0x18,0xce]
26+
27+
lds_direct_load v9 wait_vdst:7
28+
// GFX11: lds_direct_load v9 wait_vdst:7 ; encoding: [0x09,0x00,0x17,0xce]
29+
30+
lds_direct_load v10 wait_vdst:6
31+
// GFX11: lds_direct_load v10 wait_vdst:6 ; encoding: [0x0a,0x00,0x16,0xce]
32+
33+
lds_direct_load v11 wait_vdst:5
34+
// GFX11: lds_direct_load v11 wait_vdst:5 ; encoding: [0x0b,0x00,0x15,0xce]
35+
36+
lds_direct_load v12 wait_vdst:4
37+
// GFX11: lds_direct_load v12 wait_vdst:4 ; encoding: [0x0c,0x00,0x14,0xce]
38+
39+
lds_direct_load v13 wait_vdst:3
40+
// GFX11: lds_direct_load v13 wait_vdst:3 ; encoding: [0x0d,0x00,0x13,0xce]
41+
42+
lds_direct_load v14 wait_vdst:2
43+
// GFX11: lds_direct_load v14 wait_vdst:2 ; encoding: [0x0e,0x00,0x12,0xce]
44+
45+
lds_direct_load v15 wait_vdst:1
46+
// GFX11: lds_direct_load v15 wait_vdst:1 ; encoding: [0x0f,0x00,0x11,0xce]
47+
48+
lds_direct_load v16 wait_vdst:0
49+
// GFX11: lds_direct_load v16 ; encoding: [0x10,0x00,0x10,0xce]
50+
51+
lds_direct_load v17
52+
// GFX11: lds_direct_load v17 ; encoding: [0x11,0x00,0x10,0xce]
53+
54+
lds_param_load v1, attr0.x wait_vdst:15
55+
// GFX11: lds_param_load v1, attr0.x wait_vdst:15 ; encoding: [0x01,0x00,0x0f,0xce]
56+
57+
lds_param_load v2, attr0.y wait_vdst:14
58+
// GFX11: lds_param_load v2, attr0.y wait_vdst:14 ; encoding: [0x02,0x01,0x0e,0xce]
59+
60+
lds_param_load v3, attr0.z wait_vdst:13
61+
// GFX11: lds_param_load v3, attr0.z wait_vdst:13 ; encoding: [0x03,0x02,0x0d,0xce]
62+
63+
lds_param_load v4, attr0.w wait_vdst:12
64+
// GFX11: lds_param_load v4, attr0.w wait_vdst:12 ; encoding: [0x04,0x03,0x0c,0xce]
65+
66+
lds_param_load v5, attr0.x wait_vdst:11
67+
// GFX11: lds_param_load v5, attr0.x wait_vdst:11 ; encoding: [0x05,0x00,0x0b,0xce]
68+
69+
lds_param_load v6, attr1.x wait_vdst:10
70+
// GFX11: lds_param_load v6, attr1.x wait_vdst:10 ; encoding: [0x06,0x04,0x0a,0xce]
71+
72+
lds_param_load v7, attr2.y wait_vdst:9
73+
// GFX11: lds_param_load v7, attr2.y wait_vdst:9 ; encoding: [0x07,0x09,0x09,0xce]
74+
75+
lds_param_load v8, attr3.z wait_vdst:8
76+
// GFX11: lds_param_load v8, attr3.z wait_vdst:8 ; encoding: [0x08,0x0e,0x08,0xce]
77+
78+
lds_param_load v9, attr4.w wait_vdst:7
79+
// GFX11: lds_param_load v9, attr4.w wait_vdst:7 ; encoding: [0x09,0x13,0x07,0xce]
80+
81+
lds_param_load v10, attr11.x wait_vdst:6
82+
// GFX11: lds_param_load v10, attr11.x wait_vdst:6 ; encoding: [0x0a,0x2c,0x06,0xce]
83+
84+
lds_param_load v11, attr22.y wait_vdst:5
85+
// GFX11: lds_param_load v11, attr22.y wait_vdst:5 ; encoding: [0x0b,0x59,0x05,0xce]
86+
87+
lds_param_load v12, attr33.z wait_vdst:4
88+
// GFX11: lds_param_load v12, attr33.z wait_vdst:4 ; encoding: [0x0c,0x86,0x04,0xce]
89+
90+
lds_param_load v13, attr63.x wait_vdst:3
91+
// GFX11: lds_param_load v13, attr63.x wait_vdst:3 ; encoding: [0x0d,0xfc,0x03,0xce]
92+
93+
lds_param_load v14, attr63.y wait_vdst:2
94+
// GFX11: lds_param_load v14, attr63.y wait_vdst:2 ; encoding: [0x0e,0xfd,0x02,0xce]
95+
96+
lds_param_load v15, attr63.z wait_vdst:1
97+
// GFX11: lds_param_load v15, attr63.z wait_vdst:1 ; encoding: [0x0f,0xfe,0x01,0xce]
98+
99+
lds_param_load v16, attr63.w wait_vdst:0
100+
// GFX11: lds_param_load v16, attr63.w ; encoding: [0x10,0xff,0x00,0xce]
101+
102+
lds_param_load v17, attr63.w
103+
// GFX11: lds_param_load v17, attr63.w ; encoding: [0x11,0xff,0x00,0xce]

0 commit comments

Comments
 (0)