Skip to content

Commit b34afa8

Browse files
committed
Combined the fp4 and fp8 queries in getFPDstSelType.
1 parent 085035e commit b34afa8

File tree

3 files changed

+15
-13
lines changed

3 files changed

+15
-13
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -920,22 +920,23 @@ getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
920920
return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
921921
}
922922

923+
AMDGPU::FPType IsFP4OrFP8ConvOpc = AMDGPU::getFPDstSelType(Opcode);
923924
if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel)) {
924925
// Type 2: VOP3 which write the hi bits
925926
if (TII->getNamedImmOperand(MI, AMDGPU::OpName::src0_modifiers) &
926927
SISrcMods::DST_OP_SEL)
927928
return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
928929

929930
// Type 3: FP8DstSelInst with op_sel[3:2] != 0)
930-
if (AMDGPU::isFP8DstSelInst(Opcode) &&
931+
if (IsFP4OrFP8ConvOpc == AMDGPU::FPType::FP8 &&
931932
(TII->getNamedImmOperand(MI, AMDGPU::OpName::src2_modifiers) &
932933
SISrcMods::OP_SEL_0))
933934
return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
934935
}
935936

936937
// Special case: nop is required for all the opsel values for fp4 sr variant
937938
// cvt scale instructions
938-
if (AMDGPU::isFP4DstSelInst(Opcode))
939+
if (IsFP4OrFP8ConvOpc == AMDGPU::FPType::FP4)
939940
return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
940941

941942
return nullptr;

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -664,14 +664,16 @@ bool isTrue16Inst(unsigned Opc) {
664664
return Info ? Info->IsTrue16 : false;
665665
}
666666

667-
bool isFP8DstSelInst(unsigned Opc) {
668-
const FP8DstByteSelInfo *Info = getFP8DstByteSelHelper(Opc);
669-
return Info ? Info->HasFP8DstByteSel : false;
670-
}
667+
FPType getFPDstSelType(unsigned Opc) {
668+
const FP8DstByteSelInfo *Info8 = getFP8DstByteSelHelper(Opc);
669+
if (Info8 && Info8->HasFP8DstByteSel)
670+
return FPType::FP8;
671+
672+
const FP4DstByteSelInfo *Info4 = getFP4DstByteSelHelper(Opc);
673+
if (Info4 && Info4->HasFP4DstByteSel)
674+
return FPType::FP4;
671675

672-
bool isFP4DstSelInst(unsigned Opc) {
673-
const FP4DstByteSelInfo *Info = getFP4DstByteSelHelper(Opc);
674-
return Info ? Info->HasFP4DstByteSel : false;
676+
return FPType::None;
675677
}
676678

677679
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ static constexpr unsigned GFX12 = 1;
5555

5656
enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
5757

58+
enum class FPType { None, FP4, FP8 };
59+
5860
/// \returns True if \p STI is AMDHSA.
5961
bool isHsaAbi(const MCSubtargetInfo &STI);
6062

@@ -885,10 +887,7 @@ LLVM_READONLY
885887
bool isTrue16Inst(unsigned Opc);
886888

887889
LLVM_READONLY
888-
bool isFP8DstSelInst(unsigned Opc);
889-
890-
LLVM_READONLY
891-
bool isFP4DstSelInst(unsigned Opc);
890+
FPType getFPDstSelType(unsigned Opc);
892891

893892
LLVM_READONLY
894893
bool isInvalidSingleUseConsumerInst(unsigned Opc);

0 commit comments

Comments
 (0)