Skip to content

Commit c36718f

Browse files
committed
[AMDGPU][MC] Add pseudo scalar transcendental instructions for GFX12
1 parent 295415e commit c36718f

File tree

7 files changed

+1193
-0
lines changed

7 files changed

+1193
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,12 @@ def FeatureVGPRSingleUseHintInsts : SubtargetFeature<"vgpr-singleuse-hint",
822822
"Has single-use VGPR hint instructions"
823823
>;
824824

825+
def FeaturePseudoScalarTrans : SubtargetFeature<"pseudo-scalar-trans",
826+
"HasPseudoScalarTrans",
827+
"true",
828+
"Has Pseudo Scalar Transcendental instructions"
829+
>;
830+
825831
//===------------------------------------------------------------===//
826832
// Subtarget Features (options and debugging)
827833
//===------------------------------------------------------------===//
@@ -1467,6 +1473,7 @@ def FeatureISAVersion12 : FeatureSet<
14671473
FeaturePackedTID,
14681474
FeatureVcmpxPermlaneHazard,
14691475
FeatureSALUFloatInsts,
1476+
FeaturePseudoScalarTrans,
14701477
FeatureVGPRSingleUseHintInsts,
14711478
FeatureMADIntraFwdBug,
14721479
FeatureScalarDwordx3Loads]>;
@@ -2009,6 +2016,9 @@ def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
20092016
def HasVGPRSingleUseHintInsts : Predicate<"Subtarget->hasVGPRSingleUseHintInsts()">,
20102017
AssemblerPredicate<(all_of FeatureVGPRSingleUseHintInsts)>;
20112018

2019+
def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
2020+
AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;
2021+
20122022
def HasGDS : Predicate<"Subtarget->hasGDS()">;
20132023

20142024
def HasGWS : Predicate<"Subtarget->hasGWS()">;

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ DECODE_OPERAND_REG_8(VReg_512)
209209
DECODE_OPERAND_REG_8(VReg_1024)
210210

211211
DECODE_OPERAND_REG_7(SReg_32, OPW32)
212+
DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
212213
DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
213214
DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
214215
DECODE_OPERAND_REG_7(SReg_64, OPW64)

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
198198
bool ScalarizeGlobal = false;
199199
bool HasSALUFloatInsts = false;
200200
bool HasVGPRSingleUseHintInsts = false;
201+
bool HasPseudoScalarTrans = false;
201202

202203
bool HasVcmpxPermlaneHazard = false;
203204
bool HasVMEMtoScalarWriteHazard = false;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,33 @@ let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
840840
defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_I16_V2I16_V2I16_I16>, int_amdgcn_fdot2_bf16_bf16>;
841841
}
842842

843+
class VOP_Pseudo_Scalar<RegisterClass Dst, RegisterOperand SrcOp,
844+
ValueType dstVt, ValueType srcVt = dstVt>
845+
: VOPProfile<[dstVt, srcVt, untyped, untyped]> {
846+
let DstRC = VOPDstOperand<Dst>;
847+
let Src0RC64 = SrcOp;
848+
849+
let HasOMod = 1;
850+
let HasModifiers = 1;
851+
}
852+
853+
def VOP_Pseudo_Scalar_F32 : VOP_Pseudo_Scalar<SReg_32_XEXEC, SSrc_f32, f32>;
854+
def VOP_Pseudo_Scalar_F16 : VOP_Pseudo_Scalar<SReg_32_XEXEC, SSrc_f16, f32, f16>;
855+
856+
let SubtargetPredicate = HasPseudoScalarTrans, TRANS = 1,
857+
isReMaterializable = 1 in {
858+
defm V_S_EXP_F32 : VOP3PseudoScalarInst<"v_s_exp_f32", VOP_Pseudo_Scalar_F32>;
859+
defm V_S_EXP_F16 : VOP3PseudoScalarInst<"v_s_exp_f16", VOP_Pseudo_Scalar_F16>;
860+
defm V_S_LOG_F32 : VOP3PseudoScalarInst<"v_s_log_f32", VOP_Pseudo_Scalar_F32>;
861+
defm V_S_LOG_F16 : VOP3PseudoScalarInst<"v_s_log_f16", VOP_Pseudo_Scalar_F16>;
862+
defm V_S_RCP_F32 : VOP3PseudoScalarInst<"v_s_rcp_f32", VOP_Pseudo_Scalar_F32>;
863+
defm V_S_RCP_F16 : VOP3PseudoScalarInst<"v_s_rcp_f16", VOP_Pseudo_Scalar_F16>;
864+
defm V_S_RSQ_F32 : VOP3PseudoScalarInst<"v_s_rsq_f32", VOP_Pseudo_Scalar_F32>;
865+
defm V_S_RSQ_F16 : VOP3PseudoScalarInst<"v_s_rsq_f16", VOP_Pseudo_Scalar_F16>;
866+
defm V_S_SQRT_F32 : VOP3PseudoScalarInst<"v_s_sqrt_f32", VOP_Pseudo_Scalar_F32>;
867+
defm V_S_SQRT_F16 : VOP3PseudoScalarInst<"v_s_sqrt_f16", VOP_Pseudo_Scalar_F16>;
868+
}
869+
843870
//===----------------------------------------------------------------------===//
844871
// Integer Clamp Patterns
845872
//===----------------------------------------------------------------------===//
@@ -906,6 +933,16 @@ defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>;
906933
defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>;
907934
defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26e>;
908935
defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26f>;
936+
defm V_S_EXP_F32 : VOP3Only_Real_Base_gfx12<0x280>;
937+
defm V_S_EXP_F16 : VOP3Only_Real_Base_gfx12<0x281>;
938+
defm V_S_LOG_F32 : VOP3Only_Real_Base_gfx12<0x282>;
939+
defm V_S_LOG_F16 : VOP3Only_Real_Base_gfx12<0x283>;
940+
defm V_S_RCP_F32 : VOP3Only_Real_Base_gfx12<0x284>;
941+
defm V_S_RCP_F16 : VOP3Only_Real_Base_gfx12<0x285>;
942+
defm V_S_RSQ_F32 : VOP3Only_Real_Base_gfx12<0x286>;
943+
defm V_S_RSQ_F16 : VOP3Only_Real_Base_gfx12<0x287>;
944+
defm V_S_SQRT_F32 : VOP3Only_Real_Base_gfx12<0x288>;
945+
defm V_S_SQRT_F16 : VOP3Only_Real_Base_gfx12<0x289>;
909946
defm V_MAD_CO_U64_U32 : VOP3be_Real_with_name_gfx12<0x2fe, "V_MAD_U64_U32", "v_mad_co_u64_u32">;
910947
defm V_MAD_CO_I64_I32 : VOP3be_Real_with_name_gfx12<0x2ff, "V_MAD_I64_I32", "v_mad_co_i64_i32">;
911948
defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,6 +1303,10 @@ multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_f
13031303
} // end SubtargetPredicate = isGFX11Plus
13041304
}
13051305

1306+
multiclass VOP3PseudoScalarInst<string OpName, VOPProfile P> {
1307+
def _e64 : VOP3_Pseudo<OpName, P>;
1308+
}
1309+
13061310
//===----------------------------------------------------------------------===//
13071311
// VOP3 DPP
13081312
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)