Skip to content

Commit 923285b

Browse files
authored
[AMDGPU] Add gfx1150 SALU Float instructions (#66884)
1 parent 270547f commit 923285b

File tree

9 files changed

+5519
-11
lines changed

9 files changed

+5519
-11
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,12 @@ def FeatureForceStoreSC0SC1 : SubtargetFeature<"force-store-sc0-sc1",
773773
"Has SC0 and SC1 on stores"
774774
>;
775775

776+
def FeatureSALUFloatInsts : SubtargetFeature<"salu-float",
777+
"HasSALUFloatInsts",
778+
"true",
779+
"Has SALU floating point instructions"
780+
>;
781+
776782
//===------------------------------------------------------------===//
777783
// Subtarget Features (options and debugging)
778784
//===------------------------------------------------------------===//
@@ -1364,11 +1370,12 @@ def FeatureISAVersion11_0_3 : FeatureSet<
13641370

13651371
def FeatureISAVersion11_5_0 : FeatureSet<
13661372
!listconcat(FeatureISAVersion11_Common.Features,
1367-
[])>;
1373+
[FeatureSALUFloatInsts])>;
13681374

13691375
def FeatureISAVersion11_5_1 : FeatureSet<
13701376
!listconcat(FeatureISAVersion11_Common.Features,
1371-
[FeatureGFX11FullVGPRs])>;
1377+
[FeatureSALUFloatInsts,
1378+
FeatureGFX11FullVGPRs])>;
13721379

13731380
//===----------------------------------------------------------------------===//
13741381

@@ -1869,6 +1876,9 @@ def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">;
18691876

18701877
def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
18711878

1879+
def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
1880+
AssemblerPredicate<(all_of FeatureSALUFloatInsts)>;
1881+
18721882
def HasGDS : Predicate<"Subtarget->hasGDS()">;
18731883

18741884
def HasGWS : Predicate<"Subtarget->hasGWS()">;

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128)
238238

239239
DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64)
240240
DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32)
241+
DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16)
241242
DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32)
242243
DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16)
243244
DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16)
@@ -259,6 +260,7 @@ DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32)
259260
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16)
260261
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16)
261262
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32)
263+
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(SReg_32, OPW32, 32)
262264

263265
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
264266
uint64_t Addr,

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
192192
bool UnalignedDSAccess = false;
193193
bool HasPackedTID = false;
194194
bool ScalarizeGlobal = false;
195+
bool HasSALUFloatInsts = false;
195196

196197
bool HasVcmpxPermlaneHazard = false;
197198
bool HasVMEMtoScalarWriteHazard = false;
@@ -1136,6 +1137,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
11361137
// hasGFX90AInsts is also true.
11371138
bool hasGFX940Insts() const { return GFX940Insts; }
11381139

1140+
bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1141+
11391142
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
11401143
/// SGPRs
11411144
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,12 +1153,21 @@ class RegOrF16_Lo128_Deferred <string RegisterClass,
11531153
// SSrc_* Operands with an SGPR or a 32-bit immediate
11541154
//===----------------------------------------------------------------------===//
11551155

1156+
def SSrc_b16 : RegOrB16 <"SReg_32", "OPERAND_REG_IMM">;
1157+
def SSrc_f16 : RegOrF16 <"SReg_32", "OPERAND_REG_IMM">;
11561158
def SSrc_b32 : RegOrB32 <"SReg_32", "OPERAND_REG_IMM">;
11571159
def SSrc_f32 : RegOrF32 <"SReg_32", "OPERAND_REG_IMM">;
11581160
def SSrc_b64 : RegOrB64 <"SReg_64", "OPERAND_REG_IMM">;
11591161

11601162
def SSrcOrLds_b32 : RegOrB32 <"SRegOrLds_32", "OPERAND_REG_IMM">;
11611163

1164+
//===----------------------------------------------------------------------===//
1165+
// SSrc_32_Deferred Operands with an SGPR or a 32-bit immediate for use with
1166+
// FMAMK/FMAAK
1167+
//===----------------------------------------------------------------------===//
1168+
1169+
def SSrc_f32_Deferred : RegOrF32_Deferred<"SReg_32", "OPERAND_REG_IMM">;
1170+
11621171
//===----------------------------------------------------------------------===//
11631172
// SCSrc_* Operands with an SGPR or a inline constant
11641173
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/SISchedule.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ def Write16PassMAI : SchedWrite;
6565
def Write4PassDGEMM : SchedWrite;
6666
def Write8PassDGEMM : SchedWrite;
6767

68+
// Scalar float instructions
69+
def WriteSFPU : SchedWrite;
70+
6871
// FIXME: Should there be a class for instructions which are VALU
6972
// instructions and have VALU rates, but write to the SALU (i.e. VOPC
7073
// instructions)
@@ -128,6 +131,10 @@ class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
128131
class HWVALUWriteRes<SchedWrite write, int latency> :
129132
HWWriteRes<write, [HWVALU], latency>;
130133

134+
class UnsupportedWriteRes<SchedWrite write> : WriteRes<write, []> {
135+
let Unsupported = 1;
136+
}
137+
131138
def PredMIReadVGPR : SchedPredicate<[{TII->hasVGPRUses(*MI)}]>;
132139

133140
def MIReadVGPR : SchedReadVariant<[
@@ -165,6 +172,8 @@ multiclass SICommonWriteRes {
165172
def : HWWriteRes<Write8PassMAI, [HWXDL], 8>;
166173
let ReleaseAtCycles = [16] in
167174
def : HWWriteRes<Write16PassMAI, [HWXDL], 16>;
175+
176+
def : UnsupportedWriteRes<WriteSFPU>;
168177
} // End RetireOOO = 1
169178

170179
def : ReadAdvance<MIVGPRRead, -2>;
@@ -307,6 +316,8 @@ def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
307316
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
308317
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
309318
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
319+
320+
def : UnsupportedWriteRes<WriteSFPU>;
310321
} // End RetireOOO = 1
311322

312323
def : InstRW<[WriteCopy], (instrs COPY)>;
@@ -334,6 +345,7 @@ def : HWWriteRes<WriteBranch, [HWBranch], 32>;
334345
def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
335346
def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
336347
def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
348+
def : HWWriteRes<WriteSFPU, [HWSALU, HWRC], 4>;
337349
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
338350
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
339351
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;

0 commit comments

Comments
 (0)