Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 83873fb

Browse files
committed
[AMDGPU][MC] Added support of lds_direct operand
See bug 39293: https://bugs.llvm.org/show_bug.cgi?id=39293 Reviewers: artem.tamazov, rampitec Differential Revision: https://reviews.llvm.org/D57889 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353524 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent b997233 commit 83873fb

File tree

9 files changed

+301
-1
lines changed

9 files changed

+301
-1
lines changed

lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
686686
case AMDGPU::XNACK_MASK_HI:
687687
llvm_unreachable("xnack_mask registers should not be used");
688688

689+
case AMDGPU::LDS_DIRECT:
690+
llvm_unreachable("lds_direct register should not be used");
691+
689692
case AMDGPU::TBA:
690693
case AMDGPU::TBA_LO:
691694
case AMDGPU::TBA_HI:

lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
10951095
bool validateMIMGGatherDMask(const MCInst &Inst);
10961096
bool validateMIMGDataSize(const MCInst &Inst);
10971097
bool validateMIMGD16(const MCInst &Inst);
1098+
bool validateLdsDirect(const MCInst &Inst);
10981099
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
10991100
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
11001101
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
@@ -1599,6 +1600,8 @@ static unsigned getSpecialRegForName(StringRef RegName) {
15991600
.Case("vcc", AMDGPU::VCC)
16001601
.Case("flat_scratch", AMDGPU::FLAT_SCR)
16011602
.Case("xnack_mask", AMDGPU::XNACK_MASK)
1603+
.Case("lds_direct", AMDGPU::LDS_DIRECT)
1604+
.Case("src_lds_direct", AMDGPU::LDS_DIRECT)
16021605
.Case("m0", AMDGPU::M0)
16031606
.Case("scc", AMDGPU::SCC)
16041607
.Case("tba", AMDGPU::TBA)
@@ -2465,6 +2468,86 @@ bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
24652468
return true;
24662469
}
24672470

2471+
bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2472+
2473+
using namespace SIInstrFlags;
2474+
const unsigned Opcode = Inst.getOpcode();
2475+
const MCInstrDesc &Desc = MII.get(Opcode);
2476+
2477+
// lds_direct register is defined so that it can be used
2478+
// with 9-bit operands only. Ignore encodings which do not accept these.
2479+
if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2480+
return true;
2481+
2482+
const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2483+
const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2484+
const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2485+
2486+
const int SrcIndices[] = { Src1Idx, Src2Idx };
2487+
2488+
// lds_direct cannot be specified as either src1 or src2.
2489+
for (int SrcIdx : SrcIndices) {
2490+
if (SrcIdx == -1) break;
2491+
const MCOperand &Src = Inst.getOperand(SrcIdx);
2492+
if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2493+
return false;
2494+
}
2495+
}
2496+
2497+
if (Src0Idx == -1)
2498+
return true;
2499+
2500+
const MCOperand &Src = Inst.getOperand(Src0Idx);
2501+
if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2502+
return true;
2503+
2504+
// lds_direct is specified as src0. Check additional limitations.
2505+
2506+
// FIXME: This is a workaround for bug 37943
2507+
// which allows 64-bit VOP3 opcodes use 32-bit operands.
2508+
if (AMDGPU::getRegOperandSize(getMRI(), Desc, Src0Idx) != 4)
2509+
return false;
2510+
2511+
// Documentation does not disable lds_direct for SDWA, but SP3 assembler does.
2512+
// FIXME: This inconsistence needs to be investigated further.
2513+
if (Desc.TSFlags & SIInstrFlags::SDWA)
2514+
return false;
2515+
2516+
// The following opcodes do not accept lds_direct which is explicitly stated
2517+
// in AMD documentation. However SP3 disables lds_direct for most other 'rev'
2518+
// opcodes as well (e.g. for v_subrev_u32 but not for v_subrev_f32).
2519+
// FIXME: This inconsistence needs to be investigated further.
2520+
switch (Opcode) {
2521+
case AMDGPU::V_LSHLREV_B32_e32_si:
2522+
case AMDGPU::V_LSHLREV_B32_e64_si:
2523+
case AMDGPU::V_LSHLREV_B16_e32_vi:
2524+
case AMDGPU::V_LSHLREV_B16_e64_vi:
2525+
case AMDGPU::V_LSHLREV_B32_e32_vi:
2526+
case AMDGPU::V_LSHLREV_B32_e64_vi:
2527+
case AMDGPU::V_LSHLREV_B64_vi:
2528+
case AMDGPU::V_LSHRREV_B32_e32_si:
2529+
case AMDGPU::V_LSHRREV_B32_e64_si:
2530+
case AMDGPU::V_LSHRREV_B16_e32_vi:
2531+
case AMDGPU::V_LSHRREV_B16_e64_vi:
2532+
case AMDGPU::V_LSHRREV_B32_e32_vi:
2533+
case AMDGPU::V_LSHRREV_B32_e64_vi:
2534+
case AMDGPU::V_LSHRREV_B64_vi:
2535+
case AMDGPU::V_ASHRREV_I32_e64_si:
2536+
case AMDGPU::V_ASHRREV_I32_e32_si:
2537+
case AMDGPU::V_ASHRREV_I16_e32_vi:
2538+
case AMDGPU::V_ASHRREV_I16_e64_vi:
2539+
case AMDGPU::V_ASHRREV_I32_e32_vi:
2540+
case AMDGPU::V_ASHRREV_I32_e64_vi:
2541+
case AMDGPU::V_ASHRREV_I64_vi:
2542+
case AMDGPU::V_PK_LSHLREV_B16_vi:
2543+
case AMDGPU::V_PK_LSHRREV_B16_vi:
2544+
case AMDGPU::V_PK_ASHRREV_I16_vi:
2545+
return false;
2546+
default:
2547+
return true;
2548+
}
2549+
}
2550+
24682551
bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
24692552
unsigned Opcode = Inst.getOpcode();
24702553
const MCInstrDesc &Desc = MII.get(Opcode);
@@ -2500,6 +2583,11 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
25002583

25012584
bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
25022585
const SMLoc &IDLoc) {
2586+
if (!validateLdsDirect(Inst)) {
2587+
Error(IDLoc,
2588+
"invalid use of lds_direct");
2589+
return false;
2590+
}
25032591
if (!validateSOPLiteral(Inst)) {
25042592
Error(IDLoc,
25052593
"only one literal operand is allowed");

lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,7 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
781781
// ToDo: no support for execz register
782782
case 252: break;
783783
case 253: return createRegOperand(SCC);
784+
case 254: return createRegOperand(LDS_DIRECT);
784785
default: break;
785786
}
786787
return errOperand(Val, "unknown operand encoding " + Twine(Val));

lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,9 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
268268
case AMDGPU::XNACK_MASK:
269269
O << "xnack_mask";
270270
return;
271+
case AMDGPU::LDS_DIRECT:
272+
O << "src_lds_direct";
273+
return;
271274
case AMDGPU::VCC_LO:
272275
O << "vcc_lo";
273276
return;

lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
163163
// Reserve xnack_mask registers - support is not implemented in Codegen.
164164
reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
165165

166+
// Reserve lds_direct register - support is not implemented in Codegen.
167+
reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
168+
166169
// Reserve Trap Handler registers - support is not implemented in Codegen.
167170
reserveRegisterTuples(Reserved, AMDGPU::TBA);
168171
reserveRegisterTuples(Reserved, AMDGPU::TMA);

lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>;
7575
def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
7676
def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
7777

78+
def LDS_DIRECT : SIReg <"lds_direct", 254>;
79+
7880
def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
7981
def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
8082

@@ -409,6 +411,12 @@ def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
409411
let CopyCost = -1;
410412
}
411413

414+
def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
415+
(add LDS_DIRECT)> {
416+
let isAllocatable = 0;
417+
let CopyCost = -1;
418+
}
419+
412420
// Subset of SReg_32 without M0 for SMRD instructions and alike.
413421
// See comments in SIInstructions.td for more info.
414422
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
@@ -545,7 +553,7 @@ def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
545553
}
546554

547555
def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
548-
(add VGPR_32, SReg_32)> {
556+
(add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
549557
let isAllocatable = 0;
550558
}
551559

test/MC/AMDGPU/lds_direct-err.s

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9
2+
3+
//---------------------------------------------------------------------------//
4+
// lds_direct may be used only with vector ALU instructions
5+
//---------------------------------------------------------------------------//
6+
7+
s_and_b32 s2, lds_direct, s1
8+
// NOGFX9: error
9+
10+
//---------------------------------------------------------------------------//
11+
// lds_direct may not be used with V_{LSHL,LSHR,ASHL}REV opcodes
12+
//---------------------------------------------------------------------------//
13+
14+
v_ashrrev_i16 v0, lds_direct, v0
15+
// NOGFX9: error
16+
17+
v_ashrrev_i32 v0, lds_direct, v0
18+
// NOGFX9: error
19+
20+
v_lshlrev_b16 v0, lds_direct, v0
21+
// NOGFX9: error
22+
23+
v_lshlrev_b32 v0, lds_direct, v0
24+
// NOGFX9: error
25+
26+
v_lshrrev_b16 v0, lds_direct, v0
27+
// NOGFX9: error
28+
29+
v_lshrrev_b32 v0, lds_direct, v0
30+
// NOGFX9: error
31+
32+
v_pk_ashrrev_i16 v0, lds_direct, v0
33+
// NOGFX9: error
34+
35+
v_pk_lshlrev_b16 v0, lds_direct, v0
36+
// NOGFX9: error
37+
38+
v_pk_lshrrev_b16 v0, lds_direct, v0
39+
// NOGFX9: error
40+
41+
//---------------------------------------------------------------------------//
42+
// lds_direct cannot be used with 64-bit and larger operands
43+
//---------------------------------------------------------------------------//
44+
45+
v_add_f64 v[0:1], lds_direct, v[0:1]
46+
// NOGFX9: error
47+
48+
//---------------------------------------------------------------------------//
49+
// Only SRC0 may specify lds_direct
50+
//---------------------------------------------------------------------------//
51+
52+
v_add_i32 v0, v0, lds_direct
53+
// NOGFX9: error
54+
55+
v_add_i32 lds_direct, v0, v0
56+
// NOGFX9: error
57+
58+
v_fma_f32 v0, v0, v0, lds_direct
59+
// NOGFX9: error

test/MC/AMDGPU/lds_direct.s

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GFX9
2+
3+
//---------------------------------------------------------------------------//
4+
// VOP1/3
5+
//---------------------------------------------------------------------------//
6+
7+
v_mov_b32 v0, src_lds_direct
8+
// GFX9: v_mov_b32_e32 v0, src_lds_direct ; encoding: [0xfe,0x02,0x00,0x7e]
9+
10+
v_mov_b32_e64 v0, src_lds_direct
11+
// GFX9: v_mov_b32_e64 v0, src_lds_direct ; encoding: [0x00,0x00,0x41,0xd1,0xfe,0x00,0x00,0x00]
12+
13+
v_cvt_f64_i32 v[0:1], src_lds_direct
14+
// GFX9: v_cvt_f64_i32_e32 v[0:1], src_lds_direct ; encoding: [0xfe,0x08,0x00,0x7e]
15+
16+
v_cvt_f64_i32_e64 v[0:1], src_lds_direct
17+
// GFX9: v_cvt_f64_i32_e64 v[0:1], src_lds_direct ; encoding: [0x00,0x00,0x44,0xd1,0xfe,0x00,0x00,0x00]
18+
19+
v_mov_fed_b32 v0, src_lds_direct
20+
// GFX9: v_mov_fed_b32_e32 v0, src_lds_direct ; encoding: [0xfe,0x12,0x00,0x7e]
21+
22+
v_mov_fed_b32_e64 v0, src_lds_direct
23+
// GFX9: v_mov_fed_b32_e64 v0, src_lds_direct ; encoding: [0x00,0x00,0x49,0xd1,0xfe,0x00,0x00,0x00]
24+
25+
v_fract_f32 v0, src_lds_direct
26+
// GFX9: v_fract_f32_e32 v0, src_lds_direct ; encoding: [0xfe,0x36,0x00,0x7e]
27+
28+
v_fract_f32_e64 v0, src_lds_direct
29+
// GFX9: v_fract_f32_e64 v0, src_lds_direct ; encoding: [0x00,0x00,0x5b,0xd1,0xfe,0x00,0x00,0x00]
30+
31+
v_cvt_f16_u16 v0, src_lds_direct
32+
// GFX9: v_cvt_f16_u16_e32 v0, src_lds_direct ; encoding: [0xfe,0x72,0x00,0x7e]
33+
34+
//---------------------------------------------------------------------------//
35+
// VOP2/3
36+
//---------------------------------------------------------------------------//
37+
38+
v_cndmask_b32 v0, src_lds_direct, v0, vcc
39+
// GFX9: v_cndmask_b32_e32 v0, src_lds_direct, v0, vcc ; encoding: [0xfe,0x00,0x00,0x00]
40+
41+
v_cndmask_b32_e64 v0, src_lds_direct, v0, s[0:1]
42+
// GFX9: v_cndmask_b32_e64 v0, src_lds_direct, v0, s[0:1] ; encoding: [0x00,0x00,0x00,0xd1,0xfe,0x00,0x02,0x00]
43+
44+
v_add_f32 v0, src_lds_direct, v0
45+
// GFX9: v_add_f32_e32 v0, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x00,0x02]
46+
47+
v_add_f32_e64 v0, src_lds_direct, v0
48+
// GFX9: v_add_f32_e64 v0, src_lds_direct, v0 ; encoding: [0x00,0x00,0x01,0xd1,0xfe,0x00,0x02,0x00]
49+
50+
v_mul_i32_i24 v0, src_lds_direct, v0
51+
// GFX9: v_mul_i32_i24_e32 v0, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x00,0x0c]
52+
53+
v_add_co_u32 v0, vcc, src_lds_direct, v0
54+
// GFX9: v_add_co_u32_e32 v0, vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x00,0x32]
55+
56+
//---------------------------------------------------------------------------//
57+
// VOP3
58+
//---------------------------------------------------------------------------//
59+
60+
v_add_co_u32_e64 v0, s[0:1], src_lds_direct, v0
61+
// GFX9: v_add_co_u32_e64 v0, s[0:1], src_lds_direct, v0 ; encoding: [0x00,0x00,0x19,0xd1,0xfe,0x00,0x02,0x00]
62+
63+
v_madmk_f16 v0, src_lds_direct, 0x1121, v0
64+
// GFX9: v_madmk_f16 v0, src_lds_direct, 0x1121, v0 ; encoding: [0xfe,0x00,0x00,0x48,0x21,0x11,0x00,0x00]
65+
66+
v_madak_f16 v0, src_lds_direct, v0, 0x1121
67+
// GFX9: v_madak_f16 v0, src_lds_direct, v0, 0x1121 ; encoding: [0xfe,0x00,0x00,0x4a,0x21,0x11,0x00,0x00]
68+
69+
v_mad_f32 v0, src_lds_direct, v0, v0
70+
// GFX9: v_mad_f32 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x00,0xc1,0xd1,0xfe,0x00,0x02,0x04]
71+
72+
v_fma_f32 v0, src_lds_direct, v0, v0
73+
// GFX9: v_fma_f32 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x00,0xcb,0xd1,0xfe,0x00,0x02,0x04]
74+
75+
v_min3_i16 v0, src_lds_direct, v0, v0
76+
// GFX9: v_min3_i16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x00,0xf5,0xd1,0xfe,0x00,0x02,0x04]
77+
78+
v_max3_f16 v0, src_lds_direct, v0, v0
79+
// GFX9: v_max3_f16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x00,0xf7,0xd1,0xfe,0x00,0x02,0x04]
80+
81+
//---------------------------------------------------------------------------//
82+
// VOP3P
83+
//---------------------------------------------------------------------------//
84+
85+
v_pk_mad_i16 v0, src_lds_direct, v0, v0
86+
// GFX9: v_pk_mad_i16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x40,0x80,0xd3,0xfe,0x00,0x02,0x1c]
87+
88+
v_pk_add_i16 v0, src_lds_direct, v0
89+
// GFX9: v_pk_add_i16 v0, src_lds_direct, v0 ; encoding: [0x00,0x00,0x82,0xd3,0xfe,0x00,0x02,0x18]
90+
91+
//---------------------------------------------------------------------------//
92+
// VOPC
93+
//---------------------------------------------------------------------------//
94+
95+
v_cmp_lt_f16 vcc, src_lds_direct, v0
96+
// GFX9: v_cmp_lt_f16_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x42,0x7c]
97+
98+
v_cmp_eq_f32 vcc, src_lds_direct, v0
99+
// GFX9: v_cmp_eq_f32_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x84,0x7c]
100+
101+
v_cmpx_neq_f32 vcc, src_lds_direct, v0
102+
// GFX9: v_cmpx_neq_f32_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0xba,0x7c]
103+
104+
//---------------------------------------------------------------------------//
105+
// lds_direct alias
106+
//---------------------------------------------------------------------------//
107+
108+
v_cmp_lt_f16 vcc, lds_direct, v0
109+
// GFX9: v_cmp_lt_f16_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x42,0x7c]
110+
111+
//---------------------------------------------------------------------------//
112+
// FIXME: enable lds_direct for the following opcodes and add tests
113+
//---------------------------------------------------------------------------//
114+
115+
//v_readfirstlane_b32 s0, src_lds_direct
116+
//v_readlane_b32 s0, src_lds_direct, s0
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX9
2+
3+
# GFX9: v_mov_b32_e32 v0, src_lds_direct ; encoding: [0xfe,0x02,0x00,0x7e]
4+
0xfe,0x02,0x00,0x7e
5+
6+
# GFX9: v_mov_b32_e64 v0, src_lds_direct ; encoding: [0x00,0x00,0x41,0xd1,0xfe,0x00,0x00,0x00]
7+
0x00,0x00,0x41,0xd1,0xfe,0x00,0x00,0x00
8+
9+
# GFX9: v_add_f32_e32 v0, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x00,0x02]
10+
0xfe,0x00,0x00,0x02
11+
12+
# GFX9: v_pk_mad_i16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x40,0x80,0xd3,0xfe,0x00,0x02,0x1c]
13+
0x00,0x40,0x80,0xd3,0xfe,0x00,0x02,0x1c
14+
15+
# GFX9: v_pk_mul_lo_u16 v0, src_lds_direct, v0 ; encoding: [0x00,0x00,0x81,0xd3,0xfe,0x00,0x02,0x18]
16+
0x00,0x00,0x81,0xd3,0xfe,0x00,0x02,0x18
17+
18+
# GFX9: v_cmpx_le_i32_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0xa6,0x7d]
19+
0xfe,0x00,0xa6,0x7d

0 commit comments

Comments
 (0)