-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Fix packed 16-bit inline constants #76522
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -460,56 +460,84 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm, | |
} | ||
} | ||
|
||
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm, | ||
const MCSubtargetInfo &STI, | ||
raw_ostream &O) { | ||
int16_t SImm = static_cast<int16_t>(Imm); | ||
if (isInlinableIntLiteral(SImm)) { | ||
O << SImm; | ||
return; | ||
} | ||
|
||
// This must accept a 32-bit immediate value to correctly handle packed 16-bit | ||
// operations. | ||
static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI, | ||
raw_ostream &O) { | ||
if (Imm == 0x3C00) | ||
O<< "1.0"; | ||
O << "1.0"; | ||
else if (Imm == 0xBC00) | ||
O<< "-1.0"; | ||
O << "-1.0"; | ||
else if (Imm == 0x3800) | ||
O<< "0.5"; | ||
O << "0.5"; | ||
else if (Imm == 0xB800) | ||
O<< "-0.5"; | ||
O << "-0.5"; | ||
else if (Imm == 0x4000) | ||
O<< "2.0"; | ||
O << "2.0"; | ||
else if (Imm == 0xC000) | ||
O<< "-2.0"; | ||
O << "-2.0"; | ||
else if (Imm == 0x4400) | ||
O<< "4.0"; | ||
O << "4.0"; | ||
else if (Imm == 0xC400) | ||
O<< "-4.0"; | ||
else if (Imm == 0x3118 && | ||
STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm)) { | ||
O << "-4.0"; | ||
else if (Imm == 0x3118 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm)) | ||
O << "0.15915494"; | ||
} else { | ||
uint64_t Imm16 = static_cast<uint16_t>(Imm); | ||
O << formatHex(Imm16); | ||
} | ||
} | ||
else | ||
return false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not just print it here and not handle it in both places below? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For this one, there is actually a very subtle difference between how it's used in
(One might argue that perhaps |
||
|
||
void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, | ||
const MCSubtargetInfo &STI, | ||
raw_ostream &O) { | ||
uint16_t Lo16 = static_cast<uint16_t>(Imm); | ||
printImmediate16(Lo16, STI, O); | ||
return true; | ||
} | ||
|
||
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, | ||
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm, | ||
const MCSubtargetInfo &STI, | ||
raw_ostream &O) { | ||
int16_t SImm = static_cast<int16_t>(Imm); | ||
if (isInlinableIntLiteral(SImm)) { | ||
O << SImm; | ||
return; | ||
} | ||
|
||
uint16_t HImm = static_cast<uint16_t>(Imm); | ||
if (printImmediateFloat16(HImm, STI, O)) | ||
return; | ||
|
||
uint64_t Imm16 = static_cast<uint16_t>(Imm); | ||
O << formatHex(Imm16); | ||
} | ||
|
||
void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType, | ||
const MCSubtargetInfo &STI, | ||
raw_ostream &O) { | ||
int32_t SImm = static_cast<int32_t>(Imm); | ||
if (SImm >= -16 && SImm <= 64) { | ||
if (isInlinableIntLiteral(SImm)) { | ||
O << SImm; | ||
return; | ||
} | ||
|
||
switch (OpType) { | ||
case AMDGPU::OPERAND_REG_IMM_V2INT16: | ||
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: | ||
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: | ||
if (printImmediateFloat32(Imm, STI, O)) | ||
return; | ||
break; | ||
case AMDGPU::OPERAND_REG_IMM_V2FP16: | ||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: | ||
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: | ||
if (isUInt<16>(Imm) && | ||
printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O)) | ||
return; | ||
break; | ||
default: | ||
llvm_unreachable("bad operand type"); | ||
} | ||
|
||
O << formatHex(static_cast<uint64_t>(Imm)); | ||
} | ||
|
||
bool AMDGPUInstPrinter::printImmediateFloat32(uint32_t Imm, | ||
const MCSubtargetInfo &STI, | ||
raw_ostream &O) { | ||
if (Imm == llvm::bit_cast<uint32_t>(0.0f)) | ||
O << "0.0"; | ||
else if (Imm == llvm::bit_cast<uint32_t>(1.0f)) | ||
|
@@ -532,7 +560,24 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, | |
STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm)) | ||
O << "0.15915494"; | ||
else | ||
O << formatHex(static_cast<uint64_t>(Imm)); | ||
return false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one doesn't have the casting problem, but I wanted to keep consistency between the two methods. |
||
|
||
return true; | ||
} | ||
|
||
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, | ||
const MCSubtargetInfo &STI, | ||
raw_ostream &O) { | ||
int32_t SImm = static_cast<int32_t>(Imm); | ||
if (isInlinableIntLiteral(SImm)) { | ||
O << SImm; | ||
return; | ||
} | ||
|
||
if (printImmediateFloat32(Imm, STI, O)) | ||
return; | ||
|
||
O << formatHex(static_cast<uint64_t>(Imm)); | ||
} | ||
|
||
void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, | ||
|
@@ -741,25 +786,11 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo, | |
break; | ||
case AMDGPU::OPERAND_REG_IMM_V2INT16: | ||
case AMDGPU::OPERAND_REG_IMM_V2FP16: | ||
if (!isUInt<16>(Op.getImm()) && | ||
STI.hasFeature(AMDGPU::FeatureVOP3Literal)) { | ||
printImmediate32(Op.getImm(), STI, O); | ||
break; | ||
} | ||
|
||
// Deal with 16-bit FP inline immediates not working. | ||
if (OpTy == AMDGPU::OPERAND_REG_IMM_V2FP16) { | ||
printImmediate16(static_cast<uint16_t>(Op.getImm()), STI, O); | ||
break; | ||
} | ||
[[fallthrough]]; | ||
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: | ||
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: | ||
printImmediateInt16(static_cast<uint16_t>(Op.getImm()), STI, O); | ||
break; | ||
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: | ||
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: | ||
printImmediateV216(Op.getImm(), STI, O); | ||
printImmediateV216(Op.getImm(), OpTy, STI, O); | ||
break; | ||
case MCOI::OPERAND_UNKNOWN: | ||
case MCOI::OPERAND_PCREL: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a separate change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume this is done in this patch to avoid having to special case these instructions in the fold logic. If so, its ok with me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that's right.