Skip to content

Commit 13e6495

Browse files
authored
[AMDGPU] Fix decoder for BF16 inline constants (#82276)
Fix #82039.
1 parent 35f4592 commit 13e6495

File tree

6 files changed

+232
-115
lines changed

6 files changed

+232
-115
lines changed

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 76 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,12 @@ static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
154154
AMDGPUDisassembler::OpWidthTy OpWidth,
155155
unsigned Imm, unsigned EncImm,
156156
bool MandatoryLiteral, unsigned ImmWidth,
157+
AMDGPU::OperandSemantics Sema,
157158
const MCDisassembler *Decoder) {
158159
assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
159160
auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
160-
return addOperand(
161-
Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral, ImmWidth));
161+
return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
162+
ImmWidth, Sema));
162163
}
163164

164165
// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
@@ -174,15 +175,16 @@ template <AMDGPUDisassembler::OpWidthTy OpWidth>
174175
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
175176
const MCDisassembler *Decoder) {
176177
return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
177-
false, 0, Decoder);
178+
false, 0, AMDGPU::OperandSemantics::INT, Decoder);
178179
}
179180

180181
// Decoder for Src(9-bit encoding) registers only.
181182
template <AMDGPUDisassembler::OpWidthTy OpWidth>
182183
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
183184
uint64_t /* Addr */,
184185
const MCDisassembler *Decoder) {
185-
return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0, Decoder);
186+
return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
187+
AMDGPU::OperandSemantics::INT, Decoder);
186188
}
187189

188190
// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
@@ -191,7 +193,8 @@ static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
191193
template <AMDGPUDisassembler::OpWidthTy OpWidth>
192194
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
193195
const MCDisassembler *Decoder) {
194-
return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0, Decoder);
196+
return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
197+
AMDGPU::OperandSemantics::INT, Decoder);
195198
}
196199

197200
// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
@@ -200,36 +203,42 @@ template <AMDGPUDisassembler::OpWidthTy OpWidth>
200203
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
201204
uint64_t /* Addr */,
202205
const MCDisassembler *Decoder) {
203-
return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0, Decoder);
206+
return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
207+
AMDGPU::OperandSemantics::INT, Decoder);
204208
}
205209

206210
// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
207211
// register from RegClass or immediate. Registers that don't belong to RegClass
208212
// will be decoded and InstPrinter will report warning. Immediate will be
209213
// decoded into constant of size ImmWidth, should match width of immediate used
210214
// by OperandType (important for floating point types).
211-
template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth>
215+
template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
216+
unsigned OperandSemantics>
212217
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
213218
uint64_t /* Addr */,
214219
const MCDisassembler *Decoder) {
215-
return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth, Decoder);
220+
return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
221+
(AMDGPU::OperandSemantics)OperandSemantics, Decoder);
216222
}
217223

218224
// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
219225
// and decode using 'enum10' from decodeSrcOp.
220-
template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth>
226+
template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
227+
unsigned OperandSemantics>
221228
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
222229
uint64_t /* Addr */,
223230
const MCDisassembler *Decoder) {
224231
return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
225-
Decoder);
232+
(AMDGPU::OperandSemantics)OperandSemantics, Decoder);
226233
}
227234

228-
template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth>
235+
template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
236+
unsigned OperandSemantics>
229237
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm,
230238
uint64_t /* Addr */,
231239
const MCDisassembler *Decoder) {
232-
return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth, Decoder);
240+
return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
241+
(AMDGPU::OperandSemantics)OperandSemantics, Decoder);
233242
}
234243

235244
// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
@@ -394,8 +403,9 @@ static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
394403
const MCDisassembler *Decoder) {
395404
assert(Imm < (1 << 9) && "9-bit encoding");
396405
auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
397-
return addOperand(
398-
Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true));
406+
return addOperand(Inst,
407+
DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
408+
AMDGPU::OperandSemantics::FP64));
399409
}
400410

401411
#define DECODE_SDWA(DecName) \
@@ -1414,7 +1424,7 @@ static int64_t getInlineImmVal64(unsigned Imm) {
14141424
}
14151425
}
14161426

1417-
static int64_t getInlineImmVal16(unsigned Imm) {
1427+
static int64_t getInlineImmValF16(unsigned Imm) {
14181428
switch (Imm) {
14191429
case 240:
14201430
return 0x3800;
@@ -1439,9 +1449,40 @@ static int64_t getInlineImmVal16(unsigned Imm) {
14391449
}
14401450
}
14411451

1442-
MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) {
1443-
assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
1444-
&& Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
1452+
static int64_t getInlineImmValBF16(unsigned Imm) {
1453+
switch (Imm) {
1454+
case 240:
1455+
return 0x3F00;
1456+
case 241:
1457+
return 0xBF00;
1458+
case 242:
1459+
return 0x3F80;
1460+
case 243:
1461+
return 0xBF80;
1462+
case 244:
1463+
return 0x4000;
1464+
case 245:
1465+
return 0xC000;
1466+
case 246:
1467+
return 0x4080;
1468+
case 247:
1469+
return 0xC080;
1470+
case 248: // 1 / (2 * PI)
1471+
return 0x3E22;
1472+
default:
1473+
llvm_unreachable("invalid fp inline imm");
1474+
}
1475+
}
1476+
1477+
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1478+
return (Sema == AMDGPU::OperandSemantics::BF16) ? getInlineImmValBF16(Imm)
1479+
: getInlineImmValF16(Imm);
1480+
}
1481+
1482+
MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1483+
AMDGPU::OperandSemantics Sema) {
1484+
assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN &&
1485+
Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
14451486

14461487
// ToDo: case 248: 1/(2*PI) - is allowed only on VI
14471488
// ImmWidth 0 is a default case where operand should not allow immediates.
@@ -1454,7 +1495,7 @@ MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) {
14541495
case 64:
14551496
return MCOperand::createImm(getInlineImmVal64(Imm));
14561497
case 16:
1457-
return MCOperand::createImm(getInlineImmVal16(Imm));
1498+
return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
14581499
default:
14591500
llvm_unreachable("implement me");
14601501
}
@@ -1568,7 +1609,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
15681609

15691610
MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
15701611
bool MandatoryLiteral,
1571-
unsigned ImmWidth, bool IsFP) const {
1612+
unsigned ImmWidth,
1613+
AMDGPU::OperandSemantics Sema) const {
15721614
using namespace AMDGPU::EncValues;
15731615

15741616
assert(Val < 1024); // enum10
@@ -1581,14 +1623,13 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
15811623
: getVgprClassId(Width), Val - VGPR_MIN);
15821624
}
15831625
return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1584-
IsFP);
1626+
Sema);
15851627
}
15861628

1587-
MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
1588-
unsigned Val,
1589-
bool MandatoryLiteral,
1590-
unsigned ImmWidth,
1591-
bool IsFP) const {
1629+
MCOperand
1630+
AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
1631+
bool MandatoryLiteral, unsigned ImmWidth,
1632+
AMDGPU::OperandSemantics Sema) const {
15921633
// Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
15931634
// decoded earlier.
15941635
assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
@@ -1609,14 +1650,14 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
16091650
return decodeIntImmed(Val);
16101651

16111652
if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1612-
return decodeFPImmed(ImmWidth, Val);
1653+
return decodeFPImmed(ImmWidth, Val, Sema);
16131654

16141655
if (Val == LITERAL_CONST) {
16151656
if (MandatoryLiteral)
16161657
// Keep a sentinel value for deferred setting
16171658
return MCOperand::createImm(LITERAL_CONST);
16181659
else
1619-
return decodeLiteralConstant(IsFP && ImmWidth == 64);
1660+
return decodeLiteralConstant(Sema == AMDGPU::OperandSemantics::FP64);
16201661
}
16211662

16221663
switch (Width) {
@@ -1713,9 +1754,10 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
17131754
return errOperand(Val, "unknown operand encoding " + Twine(Val));
17141755
}
17151756

1716-
MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
1717-
const unsigned Val,
1718-
unsigned ImmWidth) const {
1757+
MCOperand
1758+
AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1759+
unsigned ImmWidth,
1760+
AMDGPU::OperandSemantics Sema) const {
17191761
using namespace AMDGPU::SDWA;
17201762
using namespace AMDGPU::EncValues;
17211763

@@ -1746,7 +1788,7 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
17461788
return decodeIntImmed(SVal);
17471789

17481790
if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1749-
return decodeFPImmed(ImmWidth, SVal);
1791+
return decodeFPImmed(ImmWidth, SVal, Sema);
17501792

17511793
return decodeSpecialReg32(SVal);
17521794
} else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
@@ -1756,11 +1798,11 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
17561798
}
17571799

17581800
MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1759-
return decodeSDWASrc(OPW16, Val, 16);
1801+
return decodeSDWASrc(OPW16, Val, 16, AMDGPU::OperandSemantics::FP16);
17601802
}
17611803

17621804
MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1763-
return decodeSDWASrc(OPW32, Val, 32);
1805+
return decodeSDWASrc(OPW32, Val, 32, AMDGPU::OperandSemantics::FP32);
17641806
}
17651807

17661808
MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#ifndef LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
1616
#define LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
1717

18+
#include "SIDefines.h"
1819
#include "llvm/ADT/APInt.h"
1920
#include "llvm/ADT/SmallString.h"
2021
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -231,25 +232,29 @@ class AMDGPUDisassembler : public MCDisassembler {
231232
unsigned getTtmpClassId(const OpWidthTy Width) const;
232233

233234
static MCOperand decodeIntImmed(unsigned Imm);
234-
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm);
235+
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm,
236+
AMDGPU::OperandSemantics Sema);
235237

236238
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
237239
MCOperand decodeLiteralConstant(bool ExtendFP64) const;
238240

239-
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
240-
bool MandatoryLiteral = false, unsigned ImmWidth = 0,
241-
bool IsFP = false) const;
241+
MCOperand decodeSrcOp(
242+
const OpWidthTy Width, unsigned Val, bool MandatoryLiteral = false,
243+
unsigned ImmWidth = 0,
244+
AMDGPU::OperandSemantics Sema = AMDGPU::OperandSemantics::INT) const;
242245

243-
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
244-
bool MandatoryLiteral = false,
245-
unsigned ImmWidth = 0, bool IsFP = false) const;
246+
MCOperand decodeNonVGPRSrcOp(
247+
const OpWidthTy Width, unsigned Val, bool MandatoryLiteral = false,
248+
unsigned ImmWidth = 0,
249+
AMDGPU::OperandSemantics Sema = AMDGPU::OperandSemantics::INT) const;
246250

247251
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const;
248252
MCOperand decodeSpecialReg32(unsigned Val) const;
249253
MCOperand decodeSpecialReg64(unsigned Val) const;
250254

251255
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val,
252-
unsigned ImmWidth = 0) const;
256+
unsigned ImmWidth,
257+
AMDGPU::OperandSemantics Sema) const;
253258
MCOperand decodeSDWASrc16(unsigned Val) const;
254259
MCOperand decodeSDWASrc32(unsigned Val) const;
255260
MCOperand decodeSDWAVopcDst(unsigned Val) const;

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,15 @@ enum OperandType : unsigned {
269269
OPERAND_KIMM_LAST = OPERAND_KIMM16
270270

271271
};
272+
273+
// Should be in sync with the OperandSemantics defined in SIRegisterInfo.td
274+
enum OperandSemantics : unsigned {
275+
INT = 0,
276+
FP16 = 1,
277+
BF16 = 2,
278+
FP32 = 3,
279+
FP64 = 4,
280+
};
272281
}
273282

274283
// Input operand modifiers bit-masks

0 commit comments

Comments
 (0)