Skip to content

Commit 9a43ae5

Browse files
authored
[AMDGPU] Introduce a "new" target feature xf32-insts (#115214)
The feature itself is not new. Just to use it to guard corresponding instructions. No test is needed, like its parent PR.
1 parent c13258a commit 9a43ae5

File tree

3 files changed

+21
-2
lines changed

3 files changed

+21
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,6 +1110,13 @@ def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6",
11101110
"Target Requires Code Object V6"
11111111
>;
11121112

1113+
def FeatureXF32Insts : SubtargetFeature<"xf32-insts",
1114+
"HasXF32Insts",
1115+
"true",
1116+
"Has instructions that support xf32 format, such as "
1117+
"v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32"
1118+
>;
1119+
11131120
// Dummy feature used to disable assembler instructions.
11141121
def FeatureDisable : SubtargetFeature<"",
11151122
"FeatureDisable","true",
@@ -1448,6 +1455,7 @@ def FeatureISAVersion9_4_Common : FeatureSet<
14481455
FeatureFP8ConversionInsts,
14491456
FeatureCvtFP8VOP1Bug,
14501457
FeaturePkFmacF16Inst,
1458+
FeatureXF32Insts,
14511459
FeatureAtomicFaddRtnInsts,
14521460
FeatureAtomicFaddNoRtnInsts,
14531461
FeatureAtomicBufferGlobalPkAddF16Insts,
@@ -2289,6 +2297,9 @@ def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">;
22892297

22902298
def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">;
22912299

2300+
def HasXF32Insts : Predicate<"Subtarget->hasXF32Insts()">,
2301+
AssemblerPredicate<(all_of FeatureXF32Insts)>;
2302+
22922303
// Include AMDGPU TD files
22932304
include "SISchedule.td"
22942305
include "GCNProcessors.td"

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
179179
bool HasDefaultComponentZero = false;
180180
bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
181181
bool HasDefaultComponentBroadcast = false;
182+
bool HasXF32Insts = false;
182183
/// The maximum number of instructions that may be placed within an S_CLAUSE,
183184
/// which is one greater than the maximum argument to S_CLAUSE. A value of 0
184185
/// indicates a lack of S_CLAUSE support.
@@ -1302,6 +1303,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13021303
return getGeneration() == GFX12;
13031304
}
13041305

1306+
/// \returns true if the target has instructions with xf32 format support.
1307+
bool hasXF32Insts() const { return HasXF32Insts; }
1308+
13051309
/// \returns The maximum number of instructions that can be enclosed in an
13061310
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
13071311
/// instruction.

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -757,10 +757,12 @@ let Predicates = [isGFX90APlus] in {
757757
let SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 in {
758758
defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>;
759759
defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>;
760+
} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1
761+
762+
let SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 in {
760763
defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>;
761764
defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>;
762-
763-
} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1
765+
} // End SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1
764766

765767
let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in {
766768
defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_bf8>;
@@ -1764,8 +1766,10 @@ defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx90a <0x6f>;
17641766

17651767
defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">;
17661768
defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">;
1769+
let SubtargetPredicate = HasXF32Insts in {
17671770
defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">;
17681771
defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">;
1772+
} // End SubtargetPredicate = HasXF32Insts
17691773
let SubtargetPredicate = HasFP8Insts in {
17701774
defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x70>;
17711775
defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x71>;

0 commit comments

Comments
 (0)