Skip to content

Commit d3fcf31

Browse files
authored
AMDGPU: Use HasFP8ConversionInsts appropriately, NFC (#82433)
The corresponding fp8 conversion instructions are available for a subtarget when and only when the subtarget "HasFP8ConversionInsts". We should not assume all the future subtargets (gfx12+) have FP8ConversionInsts. In this patch, we use OtherPredicates to carry HasFP8ConversionInsts feature. This is because SubtargetPredicate is not copied from pseudos to reals for DPP16 and DPP6. To avoid overriding OtherPredicates in a few places, we use the newly introduced True16Predicate to hold UseRealTrue16Insts instead. This work repalces the inadvertently closed pull request: #82024
1 parent 3ff8055 commit d3fcf31

File tree

3 files changed

+9
-6
lines changed

3 files changed

+9
-6
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===//
23
//
34
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -565,7 +566,7 @@ class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> {
565566
def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>;
566567
def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>;
567568

568-
let SubtargetPredicate = HasFP8ConversionInsts, mayRaiseFPException = 0,
569+
let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
569570
SchedRW = [WriteFloatCvt] in {
570571
defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>;
571572
defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>;
@@ -653,7 +654,7 @@ class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index,
653654
(inst_e32 $src))
654655
>;
655656

656-
let SubtargetPredicate = isGFX12Plus in {
657+
let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] in {
657658
foreach Index = [0, 1, 2, 3] in {
658659
def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_fp8, Index,
659660
V_CVT_F32_FP8_e32, V_CVT_F32_FP8_OP_SEL_e64>;
@@ -670,7 +671,7 @@ class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index,
670671
(inst_e32 $src))
671672
>;
672673

673-
let SubtargetPredicate = isGFX12Plus in {
674+
let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] in {
674675
foreach Index = [0, -1] in {
675676
def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index,
676677
V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_OP_SEL_e64>;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,7 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
640640
let SubtargetPredicate = isGFX940Plus in
641641
defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
642642

643-
let SubtargetPredicate = HasFP8ConversionInsts, mayRaiseFPException = 0,
643+
let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
644644
SchedRW = [WriteFloatCvt] in {
645645
let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in {
646646
defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>;
@@ -667,6 +667,7 @@ class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst>
667667
!if(index{0}, SRCMODS.OP_SEL_0, 0), $old, 0)
668668
>;
669669

670+
let OtherPredicates = [HasFP8ConversionInsts] in {
670671
foreach Index = [0, -1] in {
671672
def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_fp8_f32, Index, V_CVT_PK_FP8_F32_e64>;
672673
def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>;
@@ -676,6 +677,7 @@ foreach Index = [0, 1, 2, 3] in {
676677
def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>;
677678
def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>;
678679
}
680+
}
679681

680682
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
681683
// This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemoni
210210
class VOP3_Real_Gen <VOP_Pseudo ps, GFXGen Gen, string asm_name = ps.Mnemonic> :
211211
VOP3_Real <ps, Gen.Subtarget, asm_name> {
212212
let AssemblerPredicate = Gen.AssemblerPredicate;
213-
let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
213+
let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
214214
let DecoderNamespace = Gen.DecoderNamespace#
215215
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
216216
}
@@ -1349,7 +1349,7 @@ class VOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen,
13491349
string opName = ps.OpName> :
13501350
VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {
13511351
let AssemblerPredicate = Gen.AssemblerPredicate;
1352-
let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
1352+
let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
13531353
let DecoderNamespace = "DPP"#Gen.DecoderNamespace#
13541354
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
13551355
}

0 commit comments

Comments
 (0)