-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Stop using SDWA DecoderNamespaces. NFCI. #82233
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) Changes64-bit SDWA encodings have to be checked first because their first 32 Full diff: https://github.com/llvm/llvm-project/pull/82233.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 98988f881f1b44..014a83b345ffd1 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -447,8 +447,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes_,
uint64_t Address,
raw_ostream &CS) const {
- bool IsSDWA = false;
-
unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
Bytes = Bytes_.slice(0, MaxInstBytesNum);
@@ -562,15 +560,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
break;
}
- Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS);
- if (Res) { IsSDWA = true; break; }
-
- Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address, CS);
- if (Res) { IsSDWA = true; break; }
-
- Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address, CS);
- if (Res) { IsSDWA = true; break; }
-
if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) {
Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS);
if (Res)
@@ -585,6 +574,52 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res)
break;
}
+
+ Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
+ if (Res)
+ break;
+
+ if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
+ Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
+ if (Res)
+ break;
+ }
+
+ if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
+ Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
+ if (Res)
+ break;
+ }
+
+ Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI,
+ QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI,
+ QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS);
+ if (Res)
+ break;
}
// Reinitialize Bytes as DPP64 could have eaten too much
@@ -624,49 +659,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Address, CS);
if (Res)
break;
-
- if (Bytes.size() < 4) break;
- const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
-
- if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
- Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
- if (Res)
- break;
- }
-
- if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
- Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
- if (Res)
- break;
- }
-
- Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
- Address, CS);
- if (Res)
- break;
-
- Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
- Address, CS);
- if (Res)
- break;
-
- Res = tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS);
- if (Res)
- break;
-
- Res = tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS);
} while (false);
if (Res && AMDGPU::isMAC(MI.getOpcode())) {
@@ -764,7 +756,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP))
Res = convertVINTERPInst(MI);
- if (Res && IsSDWA)
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA))
Res = convertSDWAInst(MI);
int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 1486df04ed950b..0fe2845f8edc31 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -2571,11 +2571,13 @@ multiclass SOPP_Real_32_gfx11_Renamed_gfx12<bits<7> op, string gfx12_name> :
multiclass SOPP_Real_With_Relaxation_gfx12<bits<7> op> {
defm "" : SOPP_Real_32_gfx12<op>;
+ let isCodeGenOnly = 1 in
defm _pad_s_nop : SOPP_Real_64_gfx12<op>;
}
multiclass SOPP_Real_With_Relaxation_gfx11<bits<7> op> {
defm "" : SOPP_Real_32_gfx11<op>;
+ let isCodeGenOnly = 1 in
defm _pad_s_nop : SOPP_Real_64_gfx11<op>;
}
@@ -2697,6 +2699,7 @@ multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op> :
//relaxation for insts with no operands not implemented
multiclass SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op> {
defm "" : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<op>;
+ let isCodeGenOnly = 1 in
defm _pad_s_nop : SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<op>;
}
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 5461c645e608fe..6081e167fd939e 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -980,15 +980,11 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
- VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
- let DecoderNamespace = "SDWA10";
- }
+ VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
multiclass VOP1_Real_dpp_gfx10<bits<9> op> {
if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
- def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> {
- let DecoderNamespace = "SDWA10";
- }
+ def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10>;
}
multiclass VOP1_Real_dpp8_gfx10<bits<9> op> {
if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index eba9bf64884ec8..4437d5f2a03338 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1740,15 +1740,11 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
- VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
- let DecoderNamespace = "SDWA10";
- }
+ VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
multiclass VOP2_Real_dpp_gfx10<bits<6> op> {
if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
- def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> {
- let DecoderNamespace = "SDWA10";
- }
+ def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10>;
}
multiclass VOP2_Real_dpp8_gfx10<bits<6> op> {
if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
@@ -1777,35 +1773,33 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
let AsmString = asmName # ps.AsmOperands;
}
}
- let DecoderNamespace = "SDWA10" in {
- multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName,
- string asmName> {
- if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
- def _sdwa_gfx10 :
- VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
- VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
- VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
- let AsmString = asmName # ps.AsmOperands;
- }
- }
- multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
- string asmName> {
- if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
- def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> {
- VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.Pfl.AsmDPP16;
+ multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
+ def _sdwa_gfx10 :
+ VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
+ VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
+ VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
+ let AsmString = asmName # ps.AsmOperands;
}
+ }
+ multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
+ def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> {
+ VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP16;
}
- multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
- string asmName> {
- if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
- def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
- VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.Pfl.AsmDPP8;
- let DecoderNamespace = "DPP8";
- }
+ }
+ multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
+ def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+ VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP8;
+ let DecoderNamespace = "DPP8";
}
- } // End DecoderNamespace = "SDWA10"
+ }
//===------------------------------ VOP2be ------------------------------===//
multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> {
@@ -1832,7 +1826,6 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
- let DecoderNamespace = "SDWA10";
}
if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_w32_gfx10 :
@@ -1841,9 +1834,8 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands);
let isAsmParserOnly = 1;
- let DecoderNamespace = "SDWA10";
let WaveSizePredicate = isWave32;
- }
+ }
if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_w64_gfx10 :
Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
@@ -1851,7 +1843,6 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
let AsmString = asmName # Ps.AsmOperands;
let isAsmParserOnly = 1;
- let DecoderNamespace = "SDWA10";
let WaveSizePredicate = isWave64;
}
}
@@ -1861,7 +1852,6 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> {
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
let AsmString = asmName # !subst(", vcc", "", AsmDPP);
- let DecoderNamespace = "SDWA10";
}
if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
def _dpp_w32_gfx10 :
@@ -2305,7 +2295,7 @@ multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
let AsmString = AsmName # ps.AsmOperands;
- let DecoderNamespace = "SDWA9";
+ let DecoderNamespace = "GFX9";
}
}
@@ -2329,7 +2319,7 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
def _dpp_gfx9 :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
- let DecoderNamespace = "SDWA9";
+ let DecoderNamespace = "GFX9";
}
}
@@ -2489,7 +2479,7 @@ let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in {
def _dpp_gfx90a :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>,
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
- let DecoderNamespace = "SDWA9";
+ let DecoderNamespace = "GFX9";
}
}
} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A"
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index f2bb58ed4c3b56..4d10f1a8ec9410 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -616,7 +616,7 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
let AssemblerPredicate = HasSDWA;
let AsmVariantName = !if(P.HasExtSDWA, AMDGPUAsmVariants.SDWA,
AMDGPUAsmVariants.Disable);
- let DecoderNamespace = "SDWA";
+ let DecoderNamespace = "GFX8";
VOPProfile Pfl = P;
}
@@ -672,7 +672,7 @@ class Base_VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
let AssemblerPredicate = HasSDWA9;
let AsmVariantName = !if(ps.Pfl.HasExtSDWA9, AMDGPUAsmVariants.SDWA9,
AMDGPUAsmVariants.Disable);
- let DecoderNamespace = "SDWA9";
+ let DecoderNamespace = "GFX9";
// Copy relevant pseudo op flags
let AsmMatchConverter = ps.AsmMatchConverter;
@@ -693,7 +693,7 @@ class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
class Base_VOP_SDWA10_Real<VOP_SDWA_Pseudo ps> : Base_VOP_SDWA9_Real<ps> {
let SubtargetPredicate = HasSDWA10;
let AssemblerPredicate = HasSDWA10;
- let DecoderNamespace = "SDWA10";
+ let DecoderNamespace = "GFX10";
}
class VOP_SDWA10_Real<VOP_SDWA_Pseudo ps> :
|
This PR also includes the prerequisite #82014 as the first commit. Please only review the second commit. |
64-bit SDWA encodings have to be checked first because their first 32 bits are a special case of the corresponding 32-bit non-SDWA encoding of the same instruction. But all 64-bit encodings are checked first, so we don't need special handling for SDWA.
7ad5baa
to
94c6cbc
Compare
Rebased on #82014. |
64-bit SDWA encodings have to be checked first because their first 32
bits are a special case of the corresponding 32-bit non-SDWA encoding of
the same instruction. But all 64-bit encodings are checked first, so we
don't need special handling for SDWA.