Skip to content

Commit 0e868af

Browse files
committed
[AMDGPU][MC][GFX11] Add validation of constant bus limitations for VOPD
Differential Revision: https://reviews.llvm.org/D133881
1 parent c89e60b commit 0e868af

File tree

3 files changed

+195
-66
lines changed

3 files changed

+195
-66
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 63 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -3468,79 +3468,76 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
34683468
}
34693469
}
34703470

3471-
bool
3472-
AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3473-
const OperandVector &Operands) {
3471+
bool AMDGPUAsmParser::validateConstantBusLimitations(
3472+
const MCInst &Inst, const OperandVector &Operands) {
34743473
const unsigned Opcode = Inst.getOpcode();
34753474
const MCInstrDesc &Desc = MII.get(Opcode);
34763475
unsigned LastSGPR = AMDGPU::NoRegister;
34773476
unsigned ConstantBusUseCount = 0;
34783477
unsigned NumLiterals = 0;
34793478
unsigned LiteralSize;
34803479

3481-
if (Desc.TSFlags &
3482-
(SIInstrFlags::VOPC |
3483-
SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3484-
SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3485-
SIInstrFlags::SDWA)) {
3486-
// Check special imm operands (used by madmk, etc)
3487-
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3488-
++NumLiterals;
3489-
LiteralSize = 4;
3490-
}
3491-
3492-
SmallDenseSet<unsigned> SGPRsUsed;
3493-
unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3494-
if (SGPRUsed != AMDGPU::NoRegister) {
3495-
SGPRsUsed.insert(SGPRUsed);
3496-
++ConstantBusUseCount;
3497-
}
3498-
3499-
const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3500-
const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3501-
const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3502-
3503-
const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3504-
3505-
for (int OpIdx : OpIndices) {
3506-
if (OpIdx == -1) break;
3507-
3508-
const MCOperand &MO = Inst.getOperand(OpIdx);
3509-
if (usesConstantBus(Inst, OpIdx)) {
3510-
if (MO.isReg()) {
3511-
LastSGPR = mc2PseudoReg(MO.getReg());
3512-
// Pairs of registers with a partial intersections like these
3513-
// s0, s[0:1]
3514-
// flat_scratch_lo, flat_scratch
3515-
// flat_scratch_lo, flat_scratch_hi
3516-
// are theoretically valid but they are disabled anyway.
3517-
// Note that this code mimics SIInstrInfo::verifyInstruction
3518-
if (SGPRsUsed.insert(LastSGPR).second) {
3519-
++ConstantBusUseCount;
3520-
}
3521-
} else { // Expression or a literal
3522-
3523-
if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3524-
continue; // special operand like VINTERP attr_chan
3525-
3526-
// An instruction may use only one literal.
3527-
// This has been validated on the previous step.
3528-
// See validateVOPLiteral.
3529-
// This literal may be used as more than one operand.
3530-
// If all these operands are of the same size,
3531-
// this literal counts as one scalar value.
3532-
// Otherwise it counts as 2 scalar values.
3533-
// See "GFX10 Shader Programming", section 3.6.2.3.
3534-
3535-
unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3536-
if (Size < 4) Size = 4;
3537-
3538-
if (NumLiterals == 0) {
3539-
NumLiterals = 1;
3540-
LiteralSize = Size;
3541-
} else if (LiteralSize != Size) {
3542-
NumLiterals = 2;
3543-
}
3480+
if (!(Desc.TSFlags &
3481+
(SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3482+
SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3483+
!isVOPD(Opcode))
3484+
return true;
3485+
3486+
// Check special imm operands (used by madmk, etc)
3487+
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3488+
++NumLiterals;
3489+
LiteralSize = 4;
3490+
}
3491+
3492+
SmallDenseSet<unsigned> SGPRsUsed;
3493+
unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3494+
if (SGPRUsed != AMDGPU::NoRegister) {
3495+
SGPRsUsed.insert(SGPRUsed);
3496+
++ConstantBusUseCount;
3497+
}
3498+
3499+
OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3500+
3501+
for (int OpIdx : OpIndices) {
3502+
if (OpIdx == -1)
3503+
continue;
3504+
3505+
const MCOperand &MO = Inst.getOperand(OpIdx);
3506+
if (usesConstantBus(Inst, OpIdx)) {
3507+
if (MO.isReg()) {
3508+
LastSGPR = mc2PseudoReg(MO.getReg());
3509+
// Pairs of registers with a partial intersections like these
3510+
// s0, s[0:1]
3511+
// flat_scratch_lo, flat_scratch
3512+
// flat_scratch_lo, flat_scratch_hi
3513+
// are theoretically valid but they are disabled anyway.
3514+
// Note that this code mimics SIInstrInfo::verifyInstruction
3515+
if (SGPRsUsed.insert(LastSGPR).second) {
3516+
++ConstantBusUseCount;
3517+
}
3518+
} else { // Expression or a literal
3519+
3520+
if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3521+
continue; // special operand like VINTERP attr_chan
3522+
3523+
// An instruction may use only one literal.
3524+
// This has been validated on the previous step.
3525+
// See validateVOPLiteral.
3526+
// This literal may be used as more than one operand.
3527+
// If all these operands are of the same size,
3528+
// this literal counts as one scalar value.
3529+
// Otherwise it counts as 2 scalar values.
3530+
// See "GFX10 Shader Programming", section 3.6.2.3.
3531+
3532+
unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3533+
if (Size < 4)
3534+
Size = 4;
3535+
3536+
if (NumLiterals == 0) {
3537+
NumLiterals = 1;
3538+
LiteralSize = Size;
3539+
} else if (LiteralSize != Size) {
3540+
NumLiterals = 2;
35443541
}
35453542
}
35463543
}

llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,72 @@ v_dual_fmamk_f32 v122, s0, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, s0,
7272
// GFX11: error: only one literal operand is allowed
7373
// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v122, s0, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, s0, 0x1234, v162
7474
// GFX11-NEXT:{{^}} ^
75+
76+
//===----------------------------------------------------------------------===//
77+
// A VOPD instruction cannot use more than 2 scalar operands
78+
//===----------------------------------------------------------------------===//
79+
80+
// 2 different SGPRs + LITERAL
81+
82+
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s75, v98
83+
// GFX11: error: invalid operand (violates constant bus restrictions)
84+
// GFX11-NEXT:{{^}}v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s75, v98
85+
// GFX11-NEXT:{{^}} ^
86+
87+
v_dual_mov_b32 v247, s73 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
88+
// GFX11: error: invalid operand (violates constant bus restrictions)
89+
// GFX11-NEXT:{{^}}v_dual_mov_b32 v247, s73 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
90+
// GFX11-NEXT:{{^}} ^
91+
92+
v_dual_fmamk_f32 v122, s0, 0xbabe, v161 :: v_dual_fmamk_f32 v123, s1, 0xbabe, v162
93+
// GFX11: error: invalid operand (violates constant bus restrictions)
94+
// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v122, s0, 0xbabe, v161 :: v_dual_fmamk_f32 v123, s1, 0xbabe, v162
95+
// GFX11-NEXT:{{^}} ^
96+
97+
// 2 different SGPRs + VCC
98+
99+
v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
100+
// GFX11: error: invalid operand (violates constant bus restrictions)
101+
// GFX11-NEXT:{{^}}v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
102+
// GFX11-NEXT:{{^}} ^
103+
104+
v_dual_cndmask_b32 v6, s1, v3 :: v_dual_add_f32 v255, s2, v2
105+
// GFX11: error: invalid operand (violates constant bus restrictions)
106+
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v6, s1, v3 :: v_dual_add_f32 v255, s2, v2
107+
// GFX11-NEXT:{{^}} ^
108+
109+
v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
110+
// GFX11: error: invalid operand (violates constant bus restrictions)
111+
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3
112+
// GFX11-NEXT:{{^}} ^
113+
114+
// SGPR + LITERAL + VCC
115+
116+
v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mov_b32 v254, 0xbabe
117+
// GFX11: error: invalid operand (violates constant bus restrictions)
118+
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mov_b32 v254, 0xbabe
119+
// GFX11-NEXT:{{^}} ^
120+
121+
v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_mov_b32 v254, s1
122+
// GFX11: error: invalid operand (violates constant bus restrictions)
123+
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_mov_b32 v254, s1
124+
// GFX11-NEXT:{{^}} ^
125+
126+
v_dual_cndmask_b32 v255, s3, v2 :: v_dual_fmamk_f32 v254, v1, 0xbabe, v162
127+
// GFX11: error: invalid operand (violates constant bus restrictions)
128+
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, s3, v2 :: v_dual_fmamk_f32 v254, v1, 0xbabe, v162
129+
// GFX11-NEXT:{{^}} ^
130+
131+
v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmamk_f32 v254, s3, 0xbabe, v162
132+
// GFX11: error: invalid operand (violates constant bus restrictions)
133+
// GFX11-NEXT:{{^}}v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmamk_f32 v254, s3, 0xbabe, v162
134+
// GFX11-NEXT:{{^}} ^
135+
136+
// SGPR + VCC + VCC_LO
137+
// This is a special case because implicit VCC operand has 64 bit size.
138+
// SP3 does not accept this instruction as well.
139+
140+
v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, s1, v3
141+
// GFX11: error: invalid operand (violates constant bus restrictions)
142+
// GFX11-NEXT:{{^}}v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, s1, v3
143+
// GFX11-NEXT:{{^}} ^

llvm/test/MC/AMDGPU/gfx11_asm_vopd_features.s

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,66 @@ v_dual_fmamk_f32 v122, v74, 0xfe0b, v162 :: v_dual_dot2acc_f32_f16 v24
3939

4040
v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162
4141
// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]
42+
43+
//===----------------------------------------------------------------------===//
44+
// A VOPD instruction can use 2 scalar operands,
45+
// but implicit VCC must be counted in.
46+
//===----------------------------------------------------------------------===//
47+
48+
// 2 different SGPRs
49+
50+
v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5
51+
// GFX11: encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]
52+
53+
// SGPR + LITERAL
54+
55+
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, v160, v98
56+
// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
57+
58+
v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
59+
// GFX11: encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]
60+
61+
// SGPR*2 + LITERAL
62+
63+
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s74, v98
64+
// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0x4a,0xc4,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
65+
66+
// SGPR + LITERAL*2
67+
68+
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, v6, 2.741, v1
69+
// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
70+
71+
// SGPR*2 + LITERAL*2
72+
73+
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, s74, 2.741, v1
74+
// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x4a,0x02,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
75+
76+
// LITERAL + VCC
77+
78+
v_dual_fmaak_f32 v122, v0, v161, 2.741 :: v_dual_cndmask_b32 v1, v2, v3
79+
// GFX11: encoding: [0x00,0x43,0x53,0xc8,0x02,0x07,0x00,0x7a,0x8b,0x6c,0x2f,0x40]
80+
81+
// LITERAL*2 + VCC
82+
83+
v_dual_fmaak_f32 v122, v0, v161, 2.741 :: v_dual_cndmask_b32 v1, 2.741, v3
84+
// GFX11: encoding: [0x00,0x43,0x53,0xc8,0xff,0x06,0x00,0x7a,0x8b,0x6c,0x2f,0x40]
85+
86+
// LITERAL*2 + VCC*2
87+
88+
v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_cndmask_b32 v6, 0xbabe, v3
89+
// GFX11: encoding: [0xff,0x04,0x52,0xca,0xff,0x06,0x06,0xff,0xbe,0xba,0x00,0x00]
90+
91+
// SGPR*2 + VCC
92+
93+
v_dual_add_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3
94+
// GFX11: encoding: [0x69,0x04,0x12,0xc9,0x69,0x06,0x06,0xff]
95+
96+
// SGPR*2 + VCC*2
97+
98+
v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3
99+
// GFX11: encoding: [0x01,0x04,0x52,0xca,0x01,0x06,0x06,0xff]
100+
101+
// VCC*2
102+
103+
v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, v1, v3
104+
// GFX11: encoding: [0x6a,0x04,0x12,0xc9,0x01,0x07,0x06,0xff]

0 commit comments

Comments
 (0)