Skip to content

Commit ab6c3d5

Browse files
authored
[AMDGPU] Change the representation of double literals in operands (llvm#68740)
A 64-bit literal can be used as a 32-bit zero or sign extended operand. In case of double zeroes are added to the low 32 bits. Currently asm parser stores only high 32 bits of a double into an operand. To support codegen as requested by the llvm#67781 we need to change the representation to store a full 64-bit value so that codegen can simply add immediates to an instruction. There is some code to support compatibility with existing tests and asm kernels. We allow to use short hex strings to represent only a high 32 bit of a double value as a valid literal.
1 parent b3a39a9 commit ab6c3d5

File tree

9 files changed

+71
-22
lines changed

9 files changed

+71
-22
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2141,9 +2141,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
21412141
const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
21422142
"Can't encode literal as exact 64-bit floating-point operand. "
21432143
"Low 32-bits will be set to zero");
2144+
Val &= 0xffffffff00000000u;
21442145
}
21452146

2146-
Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2147+
Inst.addOperand(MCOperand::createImm(Val));
21472148
setImmKindLiteral();
21482149
return;
21492150
}
@@ -2242,7 +2243,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
22422243
return;
22432244
}
22442245

2245-
Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2246+
Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? Val << 32 : Lo_32(Val);
2247+
2248+
Inst.addOperand(MCOperand::createImm(Val));
22462249
setImmKindLiteral();
22472250
return;
22482251

@@ -4309,7 +4312,19 @@ bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
43094312
continue;
43104313

43114314
if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4312-
uint32_t Value = static_cast<uint32_t>(MO.getImm());
4315+
uint64_t Value = static_cast<uint64_t>(MO.getImm());
4316+
bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4317+
AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4318+
bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4319+
4320+
if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4321+
Error(getLitLoc(Operands), "invalid operand for instruction");
4322+
return false;
4323+
}
4324+
4325+
if (IsFP64 && IsValid32Op)
4326+
Value = Hi_32(Value);
4327+
43134328
if (NumLiterals == 0 || LiteralValue != Value) {
43144329
LiteralValue = Value;
43154330
++NumLiterals;

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,15 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,
378378
return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
379379
}
380380

381+
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
382+
uint64_t Addr,
383+
const MCDisassembler *Decoder) {
384+
assert(Imm < (1 << 9) && "9-bit encoding");
385+
auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
386+
return addOperand(
387+
Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true));
388+
}
389+
381390
static DecodeStatus
382391
DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
383392
const MCDisassembler *Decoder) {
@@ -1219,7 +1228,7 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
12191228
return MCOperand::createImm(Literal);
12201229
}
12211230

1222-
MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
1231+
MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
12231232
// For now all literal constants are supposed to be unsigned integer
12241233
// ToDo: deal with signed/unsigned 64-bit integer constants
12251234
// ToDo: deal with float/double constants
@@ -1229,9 +1238,11 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
12291238
Twine(Bytes.size()));
12301239
}
12311240
HasLiteral = true;
1232-
Literal = eatBytes<uint32_t>(Bytes);
1241+
Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1242+
if (ExtendFP64)
1243+
Literal64 <<= 32;
12331244
}
1234-
return MCOperand::createImm(Literal);
1245+
return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
12351246
}
12361247

12371248
MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
@@ -1448,7 +1459,7 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
14481459

14491460
MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
14501461
bool MandatoryLiteral,
1451-
unsigned ImmWidth) const {
1462+
unsigned ImmWidth, bool IsFP) const {
14521463
using namespace AMDGPU::EncValues;
14531464

14541465
assert(Val < 1024); // enum10
@@ -1460,13 +1471,15 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
14601471
return createRegOperand(IsAGPR ? getAgprClassId(Width)
14611472
: getVgprClassId(Width), Val - VGPR_MIN);
14621473
}
1463-
return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth);
1474+
return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1475+
IsFP);
14641476
}
14651477

14661478
MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
14671479
unsigned Val,
14681480
bool MandatoryLiteral,
1469-
unsigned ImmWidth) const {
1481+
unsigned ImmWidth,
1482+
bool IsFP) const {
14701483
// Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
14711484
// decoded earlier.
14721485
assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
@@ -1494,7 +1507,7 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
14941507
// Keep a sentinel value for deferred setting
14951508
return MCOperand::createImm(LITERAL_CONST);
14961509
else
1497-
return decodeLiteralConstant();
1510+
return decodeLiteralConstant(IsFP && ImmWidth == 64);
14981511
}
14991512

15001513
switch (Width) {

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ class AMDGPUDisassembler : public MCDisassembler {
9797
const unsigned TargetMaxInstBytes;
9898
mutable ArrayRef<uint8_t> Bytes;
9999
mutable uint32_t Literal;
100+
mutable uint64_t Literal64;
100101
mutable bool HasLiteral;
101102
mutable std::optional<bool> EnableWavefrontSize32;
102103

@@ -229,15 +230,15 @@ class AMDGPUDisassembler : public MCDisassembler {
229230
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm);
230231

231232
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
232-
MCOperand decodeLiteralConstant() const;
233+
MCOperand decodeLiteralConstant(bool ExtendFP64) const;
233234

234235
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
235-
bool MandatoryLiteral = false,
236-
unsigned ImmWidth = 0) const;
236+
bool MandatoryLiteral = false, unsigned ImmWidth = 0,
237+
bool IsFP = false) const;
237238

238239
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
239240
bool MandatoryLiteral = false,
240-
unsigned ImmWidth = 0) const;
241+
unsigned ImmWidth = 0, bool IsFP = false) const;
241242

242243
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const;
243244
MCOperand decodeSpecialReg32(unsigned Val) const;

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
426426

427427
void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
428428
const MCSubtargetInfo &STI,
429-
raw_ostream &O) {
429+
raw_ostream &O, bool IsFP) {
430430
int64_t SImm = static_cast<int64_t>(Imm);
431431
if (SImm >= -16 && SImm <= 64) {
432432
O << SImm;
@@ -454,7 +454,10 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
454454
else if (Imm == 0x3fc45f306dc9c882 &&
455455
STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
456456
O << "0.15915494309189532";
457-
else {
457+
else if (IsFP) {
458+
assert(AMDGPU::isValid32BitLiteral(Imm, true));
459+
O << formatHex(static_cast<uint64_t>(Hi_32(Imm)));
460+
} else {
458461
assert(isUInt<32>(Imm) || isInt<32>(Imm));
459462

460463
// In rare situations, we will have a 32-bit literal in a 64-bit
@@ -605,11 +608,13 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
605608
printImmediate32(Op.getImm(), STI, O);
606609
break;
607610
case AMDGPU::OPERAND_REG_IMM_INT64:
608-
case AMDGPU::OPERAND_REG_IMM_FP64:
609611
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
612+
printImmediate64(Op.getImm(), STI, O, false);
613+
break;
614+
case AMDGPU::OPERAND_REG_IMM_FP64:
610615
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
611616
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
612-
printImmediate64(Op.getImm(), STI, O);
617+
printImmediate64(Op.getImm(), STI, O, true);
613618
break;
614619
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
615620
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
@@ -671,7 +676,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
671676
if (RCBits == 32)
672677
printImmediate32(llvm::bit_cast<uint32_t>((float)Value), STI, O);
673678
else if (RCBits == 64)
674-
printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O);
679+
printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O, true);
675680
else
676681
llvm_unreachable("Invalid register class size");
677682
}

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class AMDGPUInstPrinter : public MCInstPrinter {
9191
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
9292
raw_ostream &O);
9393
void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
94-
raw_ostream &O);
94+
raw_ostream &O, bool IsFP);
9595
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
9696
raw_ostream &O);
9797
void printRegularOperand(const MCInst *MI, unsigned OpNo,

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,10 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
411411
} else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
412412
llvm_unreachable("Must be immediate or expr");
413413

414-
support::endian::write<uint32_t>(CB, Imm, llvm::endianness::little);
414+
if (Desc.operands()[i].OperandType == AMDGPU::OPERAND_REG_IMM_FP64)
415+
Imm = Hi_32(Imm);
416+
417+
support::endian::write<uint32_t>(CB, Imm, support::endianness::little);
415418

416419
// Only one literal value allowed
417420
break;

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1263,7 +1263,9 @@ def VSrc_f32 : RegOrF32 <"VS_32", "OPERAND_REG_IMM">;
12631263
def VSrc_v2b16 : RegOrV2B16 <"VS_32", "OPERAND_REG_IMM">;
12641264
def VSrc_v2f16 : RegOrV2F16 <"VS_32", "OPERAND_REG_IMM">;
12651265
def VSrc_b64 : RegOrB64 <"VS_64", "OPERAND_REG_IMM">;
1266-
def VSrc_f64 : RegOrF64 <"VS_64", "OPERAND_REG_IMM">;
1266+
def VSrc_f64 : RegOrF64 <"VS_64", "OPERAND_REG_IMM"> {
1267+
let DecoderMethod = "decodeOperand_VSrc_f64";
1268+
}
12671269
def VSrc_v2b32 : RegOrV2B32 <"VS_64", "OPERAND_REG_IMM">;
12681270
def VSrc_v2f32 : RegOrV2F32 <"VS_64", "OPERAND_REG_IMM">;
12691271

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2519,6 +2519,13 @@ bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
25192519
return Lo16 == Hi16;
25202520
}
25212521

2522+
bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2523+
if (IsFP64)
2524+
return !(Val & 0xffffffffu);
2525+
2526+
return isUInt<32>(Val) || isInt<32>(Val);
2527+
}
2528+
25222529
bool isArgPassedInSGPR(const Argument *A) {
25232530
const Function *F = A->getParent();
25242531

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,6 +1290,9 @@ bool isInlinableIntLiteralV216(int32_t Literal);
12901290
LLVM_READNONE
12911291
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
12921292

1293+
LLVM_READNONE
1294+
bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1295+
12931296
bool isArgPassedInSGPR(const Argument *Arg);
12941297

12951298
bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);

0 commit comments

Comments
 (0)