Skip to content

Commit e6da5c2

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Add subtarget features for minimum3/maximum3 instructions (llvm#116308)
gfx12 and gfx950 managed to produce 3 different permutations of this feature. gfx12 supports f32 and f16, and gfx950 supports f32 and v2f16. Change-Id: I18fa032af449c832fa9a6b099a5ef5039c8e57fb
1 parent 8aa77ec commit e6da5c2

File tree

3 files changed

+36
-3
lines changed

3 files changed

+36
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,18 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
137137
"Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
138138
>;
139139

140+
def FeatureMinimum3Maximum3F32 : SubtargetFeature<"minimum3-maximum3-f32",
141+
"HasMinimum3Maximum3F32",
142+
"true",
143+
"Has v_minimum3_f32 and v_maximum3_f32 instructions"
144+
>;
145+
146+
def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16",
147+
"HasMinimum3Maximum3F16",
148+
"true",
149+
"Has v_minimum3_f16 and v_maximum3_f16 instructions"
150+
>;
151+
140152
def FeatureSupportsXNACK : SubtargetFeature<"xnack-support",
141153
"SupportsXNACK",
142154
"true",
@@ -1253,6 +1265,7 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
12531265
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
12541266
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
12551267
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
1268+
FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16,
12561269
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
12571270
]
12581271
>;
@@ -1990,6 +2003,15 @@ def isGFX12Plus :
19902003
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">,
19912004
AssemblerPredicate<(all_of FeatureGFX12Insts)>;
19922005

2006+
def HasMinimum3Maximum3F32 :
2007+
Predicate<"Subtarget->hasMinimum3Maximum3F32()">,
2008+
AssemblerPredicate<(all_of FeatureMinimum3Maximum3F32)>;
2009+
2010+
def HasMinimum3Maximum3F16 :
2011+
Predicate<"Subtarget->hasMinimum3Maximum3F16()">,
2012+
AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>;
2013+
2014+
19932015
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
19942016
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
19952017

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
241241
bool HasVOPDInsts = false;
242242
bool HasVALUTransUseHazard = false;
243243
bool HasForceStoreSC0SC1 = false;
244-
244+
bool HasRequiredExportPriority = false;
245+
bool HasVmemWriteVgprInOrder = false;
246+
bool HasMinimum3Maximum3F32 = false;
247+
bool HasMinimum3Maximum3F16 = false;
245248
bool RequiresCOV6 = false;
246249

247250
// Dummy feature to use for assembler in tablegen.
@@ -1302,6 +1305,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13021305
/// \returns true if the target has instructions with xf32 format support.
13031306
bool hasXF32Insts() const { return HasXF32Insts; }
13041307

1308+
bool hasMinimum3Maximum3F32() const {
1309+
return HasMinimum3Maximum3F32;
1310+
}
1311+
1312+
bool hasMinimum3Maximum3F16() const {
1313+
return HasMinimum3Maximum3F16;
1314+
}
1315+
13051316
/// \returns The maximum number of instructions that can be enclosed in an
13061317
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
13071318
/// instruction.

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ let mayRaiseFPException = 0 in {
226226
defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
227227
} // End mayRaiseFPException = 0
228228

229-
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
229+
let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
230230
defm V_MINIMUM3_F32 : VOP3Inst <"v_minimum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfminimum3>;
231231
defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
232232
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
@@ -635,7 +635,7 @@ defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3
635635
defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>;
636636
defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
637637

638-
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
638+
let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
639639
defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>;
640640
defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
641641
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0

0 commit comments

Comments
 (0)