Skip to content

Commit 569ef8d

Browse files
authored
[AMDGPU] Add pseudo scalar trans instructions for GFX12 (llvm#75204)
1 parent 2812cb0 commit 569ef8d

18 files changed

+2496
-26
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,12 @@ def FeatureVGPRSingleUseHintInsts : SubtargetFeature<"vgpr-singleuse-hint",
822822
"Has single-use VGPR hint instructions"
823823
>;
824824

825+
def FeaturePseudoScalarTrans : SubtargetFeature<"pseudo-scalar-trans",
826+
"HasPseudoScalarTrans",
827+
"true",
828+
"Has Pseudo Scalar Transcendental instructions"
829+
>;
830+
825831
//===------------------------------------------------------------===//
826832
// Subtarget Features (options and debugging)
827833
//===------------------------------------------------------------===//
@@ -1467,6 +1473,7 @@ def FeatureISAVersion12 : FeatureSet<
14671473
FeaturePackedTID,
14681474
FeatureVcmpxPermlaneHazard,
14691475
FeatureSALUFloatInsts,
1476+
FeaturePseudoScalarTrans,
14701477
FeatureVGPRSingleUseHintInsts,
14711478
FeatureMADIntraFwdBug,
14721479
FeatureScalarDwordx3Loads]>;
@@ -2009,6 +2016,9 @@ def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
20092016
def HasVGPRSingleUseHintInsts : Predicate<"Subtarget->hasVGPRSingleUseHintInsts()">,
20102017
AssemblerPredicate<(all_of FeatureVGPRSingleUseHintInsts)>;
20112018

2019+
def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
2020+
AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;
2021+
20122022
def HasGDS : Predicate<"Subtarget->hasGDS()">;
20132023

20142024
def HasGWS : Predicate<"Subtarget->hasGWS()">;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3781,14 +3781,20 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
37813781
return getDefaultMappingSOP(MI);
37823782
return getDefaultMappingVOP(MI);
37833783
}
3784+
case AMDGPU::G_FSQRT:
3785+
case AMDGPU::G_FEXP2:
3786+
case AMDGPU::G_FLOG2: {
3787+
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
3788+
if (Subtarget.hasPseudoScalarTrans() && (Size == 16 || Size == 32) &&
3789+
isSALUMapping(MI))
3790+
return getDefaultMappingSOP(MI);
3791+
return getDefaultMappingVOP(MI);
3792+
}
37843793
case AMDGPU::G_SADDSAT: // FIXME: Could lower sat ops for SALU
37853794
case AMDGPU::G_SSUBSAT:
37863795
case AMDGPU::G_UADDSAT:
37873796
case AMDGPU::G_USUBSAT:
37883797
case AMDGPU::G_FMAD:
3789-
case AMDGPU::G_FSQRT:
3790-
case AMDGPU::G_FEXP2:
3791-
case AMDGPU::G_FLOG2:
37923798
case AMDGPU::G_FLDEXP:
37933799
case AMDGPU::G_FMINNUM_IEEE:
37943800
case AMDGPU::G_FMAXNUM_IEEE:
@@ -4253,12 +4259,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
42534259
case Intrinsic::amdgcn_sin:
42544260
case Intrinsic::amdgcn_cos:
42554261
case Intrinsic::amdgcn_log_clamp:
4256-
case Intrinsic::amdgcn_log:
4257-
case Intrinsic::amdgcn_exp2:
4258-
case Intrinsic::amdgcn_rcp:
42594262
case Intrinsic::amdgcn_rcp_legacy:
4260-
case Intrinsic::amdgcn_sqrt:
4261-
case Intrinsic::amdgcn_rsq:
42624263
case Intrinsic::amdgcn_rsq_legacy:
42634264
case Intrinsic::amdgcn_rsq_clamp:
42644265
case Intrinsic::amdgcn_fmul_legacy:
@@ -4315,6 +4316,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
43154316
case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
43164317
case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
43174318
return getDefaultMappingVOP(MI);
4319+
case Intrinsic::amdgcn_log:
4320+
case Intrinsic::amdgcn_exp2:
4321+
case Intrinsic::amdgcn_rcp:
4322+
case Intrinsic::amdgcn_rsq:
4323+
case Intrinsic::amdgcn_sqrt: {
4324+
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4325+
if (Subtarget.hasPseudoScalarTrans() && (Size == 16 || Size == 32) &&
4326+
isSALUMapping(MI))
4327+
return getDefaultMappingSOP(MI);
4328+
return getDefaultMappingVOP(MI);
4329+
}
43184330
case Intrinsic::amdgcn_sbfe:
43194331
case Intrinsic::amdgcn_ubfe:
43204332
if (isSALUMapping(MI))

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ DECODE_OPERAND_REG_8(VReg_512)
209209
DECODE_OPERAND_REG_8(VReg_1024)
210210

211211
DECODE_OPERAND_REG_7(SReg_32, OPW32)
212+
DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
212213
DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
213214
DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
214215
DECODE_OPERAND_REG_7(SReg_64, OPW64)

llvm/lib/Target/AMDGPU/GCNProcessors.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,10 +284,10 @@ def : ProcessorModel<"gfx1151", GFX11SpeedModel,
284284
// GCN GFX12.
285285
//===----------------------------------------------------------------------===//
286286

287-
def : ProcessorModel<"gfx1200", GFX11SpeedModel,
287+
def : ProcessorModel<"gfx1200", GFX12SpeedModel,
288288
FeatureISAVersion12.Features
289289
>;
290290

291-
def : ProcessorModel<"gfx1201", GFX11SpeedModel,
291+
def : ProcessorModel<"gfx1201", GFX12SpeedModel,
292292
FeatureISAVersion12.Features
293293
>;

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
198198
bool ScalarizeGlobal = false;
199199
bool HasSALUFloatInsts = false;
200200
bool HasVGPRSingleUseHintInsts = false;
201+
bool HasPseudoScalarTrans = false;
201202

202203
bool HasVcmpxPermlaneHazard = false;
203204
bool HasVMEMtoScalarWriteHazard = false;
@@ -1160,6 +1161,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
11601161

11611162
bool hasVGPRSingleUseHintInsts() const { return HasVGPRSingleUseHintInsts; }
11621163

1164+
bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; }
1165+
11631166
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
11641167
/// SGPRs
11651168
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5305,6 +5305,16 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
53055305
case AMDGPU::S_CMP_NLE_F16: return AMDGPU::V_CMP_NLE_F16_t16_e64;
53065306
case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64;
53075307
case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64;
5308+
case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
5309+
case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_t16_e64;
5310+
case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
5311+
case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_t16_e64;
5312+
case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64;
5313+
case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_t16_e64;
5314+
case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64;
5315+
case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_t16_e64;
5316+
case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64;
5317+
case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_t16_e64;
53085318
}
53095319
llvm_unreachable(
53105320
"Unexpected scalar opcode without corresponding vector one!");
@@ -7189,7 +7199,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
71897199
// Use the new VALU Opcode.
71907200
auto NewInstr = BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode))
71917201
.setMIFlags(Inst.getFlags());
7192-
if (isVOP3(NewOpcode)) {
7202+
if (isVOP3(NewOpcode) && !isVOP3(Opcode)) {
71937203
// Intersperse VOP3 modifiers among the SALU operands.
71947204
NewInstr->addOperand(Inst.getOperand(0));
71957205
if (AMDGPU::getNamedOperandIdx(NewOpcode,

llvm/lib/Target/AMDGPU/SISchedule.td

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ def Write8PassDGEMM : SchedWrite;
6868
// Scalar float instructions
6969
def WriteSFPU : SchedWrite;
7070

71+
// F16 or F32 pseudo scalar transcendental instructions
72+
def WritePseudoScalarTrans : SchedWrite;
73+
7174
// FIXME: Should there be a class for instructions which are VALU
7275
// instructions and have VALU rates, but write to the SALU (i.e. VOPC
7376
// instructions)
@@ -93,6 +96,7 @@ def SIDPFullSpeedModel : SISchedMachineModel;
9396
def SIDPGFX940FullSpeedModel : SISchedMachineModel;
9497
def GFX10SpeedModel : SISchedMachineModel;
9598
def GFX11SpeedModel : SISchedMachineModel;
99+
def GFX12SpeedModel : SISchedMachineModel;
96100

97101
// XXX: Are the resource counts correct?
98102
def HWBranch : ProcResource<1> {
@@ -174,6 +178,7 @@ multiclass SICommonWriteRes {
174178
def : HWWriteRes<Write16PassMAI, [HWXDL], 16>;
175179

176180
def : UnsupportedWriteRes<WriteSFPU>;
181+
def : UnsupportedWriteRes<WritePseudoScalarTrans>;
177182
} // End RetireOOO = 1
178183

179184
def : ReadAdvance<MIVGPRRead, -2>;
@@ -318,6 +323,7 @@ def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
318323
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
319324

320325
def : UnsupportedWriteRes<WriteSFPU>;
326+
def : UnsupportedWriteRes<WritePseudoScalarTrans>;
321327
} // End RetireOOO = 1
322328

323329
def : InstRW<[WriteCopy], (instrs COPY)>;
@@ -351,6 +357,36 @@ def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
351357
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
352358
} // End RetireOOO = 1
353359

360+
def : UnsupportedWriteRes<WritePseudoScalarTrans>;
361+
354362
def : InstRW<[WriteCopy], (instrs COPY)>;
355363

356364
} // End SchedModel = GFX11SpeedModel
365+
366+
let SchedModel = GFX12SpeedModel in {
367+
368+
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
369+
def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
370+
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
371+
def : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>;
372+
def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>;
373+
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
374+
def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 38>;
375+
def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 38>;
376+
def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 38>;
377+
def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>;
378+
def : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 40>;
379+
def : HWWriteRes<WritePseudoScalarTrans, [HWVALU, HWRC], 7>;
380+
381+
def : HWWriteRes<WriteBranch, [HWBranch], 32>;
382+
def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
383+
def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
384+
def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
385+
def : HWWriteRes<WriteSFPU, [HWSALU, HWRC], 4>;
386+
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
387+
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
388+
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
389+
390+
def : InstRW<[WriteCopy], (instrs COPY)>;
391+
392+
} // End SchedModel = GFX12SpeedModel

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -675,19 +675,8 @@ let SubtargetPredicate = isGFX12Plus in {
675675

676676
} // End SubtargetPredicate = isGFX12Plus
677677

678-
def SelectPat : PatFrag <
679-
(ops node:$src1, node:$src2),
680-
(select SCC, $src1, $src2),
681-
[{ return !N->isDivergent(); }]
682-
>;
683-
684678
let Uses = [SCC] in {
685-
let AddedComplexity = 20 in {
686-
def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32",
687-
[(set i32:$sdst, (SelectPat i32:$src0, i32:$src1))]
688-
>;
689-
}
690-
679+
def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32">;
691680
def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64">;
692681
} // End Uses = [SCC]
693682

@@ -1808,6 +1797,27 @@ def : GetFPModePat<fpmode_mask_gfx6plus>;
18081797
// SOP2 Patterns
18091798
//===----------------------------------------------------------------------===//
18101799

1800+
def UniformSelect : PatFrag<
1801+
(ops node:$src0, node:$src1),
1802+
(select SCC, $src0, $src1),
1803+
[{ return !N->isDivergent(); }]
1804+
>;
1805+
1806+
let AddedComplexity = 20 in {
1807+
def : GCNPat<
1808+
(i32 (UniformSelect i32:$src0, i32:$src1)),
1809+
(S_CSELECT_B32 SSrc_b32:$src0, SSrc_b32:$src1)
1810+
>;
1811+
1812+
// TODO: The predicate should not be necessary, but enabling this pattern for
1813+
// all subtargets generates worse code in some cases.
1814+
let OtherPredicates = [HasPseudoScalarTrans] in
1815+
def : GCNPat<
1816+
(f32 (UniformSelect f32:$src0, f32:$src1)),
1817+
(S_CSELECT_B32 SSrc_b32:$src0, SSrc_b32:$src1)
1818+
>;
1819+
}
1820+
18111821
// V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector
18121822
// case, the sgpr-copies pass will fix this to use the vector version.
18131823
def : GCNPat <

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,49 @@ let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
867867
defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_I16_V2I16_V2I16_I16>, int_amdgcn_fdot2_bf16_bf16>;
868868
}
869869

870+
class VOP_Pseudo_Scalar<RegisterClass Dst, RegisterOperand SrcOp,
871+
ValueType dstVt, ValueType srcVt = dstVt>
872+
: VOPProfile<[dstVt, srcVt, untyped, untyped]> {
873+
let DstRC = VOPDstOperand<Dst>;
874+
let Src0RC64 = SrcOp;
875+
876+
let HasOMod = 1;
877+
let HasModifiers = 1;
878+
}
879+
880+
def VOP_Pseudo_Scalar_F32 : VOP_Pseudo_Scalar<SReg_32_XEXEC, SSrc_f32, f32>;
881+
def VOP_Pseudo_Scalar_F16 : VOP_Pseudo_Scalar<SReg_32_XEXEC, SSrc_f16, f32, f16>;
882+
883+
let SubtargetPredicate = HasPseudoScalarTrans, TRANS = 1,
884+
isReMaterializable = 1, SchedRW = [WritePseudoScalarTrans] in {
885+
defm V_S_EXP_F32 : VOP3PseudoScalarInst<"v_s_exp_f32", VOP_Pseudo_Scalar_F32, AMDGPUexp>;
886+
defm V_S_EXP_F16 : VOP3PseudoScalarInst<"v_s_exp_f16", VOP_Pseudo_Scalar_F16>;
887+
defm V_S_LOG_F32 : VOP3PseudoScalarInst<"v_s_log_f32", VOP_Pseudo_Scalar_F32, AMDGPUlog>;
888+
defm V_S_LOG_F16 : VOP3PseudoScalarInst<"v_s_log_f16", VOP_Pseudo_Scalar_F16>;
889+
defm V_S_RCP_F32 : VOP3PseudoScalarInst<"v_s_rcp_f32", VOP_Pseudo_Scalar_F32, AMDGPUrcp>;
890+
defm V_S_RCP_F16 : VOP3PseudoScalarInst<"v_s_rcp_f16", VOP_Pseudo_Scalar_F16>;
891+
defm V_S_RSQ_F32 : VOP3PseudoScalarInst<"v_s_rsq_f32", VOP_Pseudo_Scalar_F32, AMDGPUrsq>;
892+
defm V_S_RSQ_F16 : VOP3PseudoScalarInst<"v_s_rsq_f16", VOP_Pseudo_Scalar_F16>;
893+
defm V_S_SQRT_F32 : VOP3PseudoScalarInst<"v_s_sqrt_f32", VOP_Pseudo_Scalar_F32, any_amdgcn_sqrt>;
894+
defm V_S_SQRT_F16 : VOP3PseudoScalarInst<"v_s_sqrt_f16", VOP_Pseudo_Scalar_F16>;
895+
}
896+
897+
class PseudoScalarPatF16<SDPatternOperator node, VOP3_Pseudo inst> : GCNPat <
898+
(f16 (UniformUnaryFrag<node> (f16 (VOP3Mods0 f16:$src0, i32:$src0_modifiers,
899+
i1:$clamp, i32:$omod)))),
900+
(f16 (COPY_TO_REGCLASS (f32 (inst i32:$src0_modifiers, f16:$src0, i1:$clamp,
901+
i32:$omod)),
902+
SReg_32_XEXEC))
903+
>;
904+
905+
let SubtargetPredicate = HasPseudoScalarTrans in {
906+
def : PseudoScalarPatF16<AMDGPUexpf16, V_S_EXP_F16_e64>;
907+
def : PseudoScalarPatF16<AMDGPUlogf16, V_S_LOG_F16_e64>;
908+
def : PseudoScalarPatF16<AMDGPUrcp, V_S_RCP_F16_e64>;
909+
def : PseudoScalarPatF16<AMDGPUrsq, V_S_RSQ_F16_e64>;
910+
def : PseudoScalarPatF16<any_amdgcn_sqrt, V_S_SQRT_F16_e64>;
911+
}
912+
870913
//===----------------------------------------------------------------------===//
871914
// Integer Clamp Patterns
872915
//===----------------------------------------------------------------------===//
@@ -933,6 +976,16 @@ defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>;
933976
defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>;
934977
defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26e>;
935978
defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26f>;
979+
defm V_S_EXP_F32 : VOP3Only_Real_Base_gfx12<0x280>;
980+
defm V_S_EXP_F16 : VOP3Only_Real_Base_gfx12<0x281>;
981+
defm V_S_LOG_F32 : VOP3Only_Real_Base_gfx12<0x282>;
982+
defm V_S_LOG_F16 : VOP3Only_Real_Base_gfx12<0x283>;
983+
defm V_S_RCP_F32 : VOP3Only_Real_Base_gfx12<0x284>;
984+
defm V_S_RCP_F16 : VOP3Only_Real_Base_gfx12<0x285>;
985+
defm V_S_RSQ_F32 : VOP3Only_Real_Base_gfx12<0x286>;
986+
defm V_S_RSQ_F16 : VOP3Only_Real_Base_gfx12<0x287>;
987+
defm V_S_SQRT_F32 : VOP3Only_Real_Base_gfx12<0x288>;
988+
defm V_S_SQRT_F16 : VOP3Only_Real_Base_gfx12<0x289>;
936989
defm V_MAD_CO_U64_U32 : VOP3be_Real_with_name_gfx12<0x2fe, "V_MAD_U64_U32", "v_mad_co_u64_u32">;
937990
defm V_MAD_CO_I64_I32 : VOP3be_Real_with_name_gfx12<0x2ff, "V_MAD_I64_I32", "v_mad_co_i64_i32">;
938991
defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,6 +1303,19 @@ multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_f
13031303
} // end SubtargetPredicate = isGFX11Plus
13041304
}
13051305

1306+
class UniformUnaryFragOrOp<SDPatternOperator Op> {
1307+
SDPatternOperator ret = !if(!or(!isa<SDNode>(Op), !isa<PatFrags>(Op)),
1308+
UniformUnaryFrag<Op>, Op);
1309+
}
1310+
1311+
multiclass VOP3PseudoScalarInst<string OpName, VOPProfile P,
1312+
SDPatternOperator node = null_frag> {
1313+
def _e64 : VOP3_Pseudo<OpName, P, [(set P.DstVT:$vdst,
1314+
(UniformUnaryFragOrOp<node>.ret
1315+
(P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp,
1316+
i32:$omod))))]>;
1317+
}
1318+
13061319
//===----------------------------------------------------------------------===//
13071320
// VOP3 DPP
13081321
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)