-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Simplify OtherPredicates handling in MadFmaMixPats. NFC. #127044
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This removes some of the complexity added by ad6cd7e by setting OtherPredicates outside MadFmaMixPats rather than inside it.
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesThis removes some of the complexity added by ad6cd7e by setting Full diff: https://github.com/llvm/llvm-project/pull/127044.diff 1 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 5e825e7259a95..21898da1912f5 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -154,12 +154,10 @@ defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_
multiclass MadFmaMixPats<SDPatternOperator fma_like,
Instruction mix_inst,
Instruction mixlo_inst,
- Instruction mixhi_inst,
- bit HasFP32Denormals> {
+ Instruction mixhi_inst> {
// At least one of the operands needs to be an fpextend of an f16
// for this to be worthwhile, so we need three patterns here.
// TODO: Could we use a predicate to inspect src1/2/3 instead?
- let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]) in {
def : GCNPat <
(f32 (fma_like (f32 (VOP3PMadMixModsExt f16:$src0, i32:$src0_mods)),
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_mods)),
@@ -228,13 +226,12 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
DSTCLAMP.NONE,
(i32 (IMPLICIT_DEF)))
>;
- } // End OtherPredicates
// FIXME: Special case handling for maxhi (especially for clamp)
// because dealing with the write to high half of the register is
// difficult.
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
- let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]), True16Predicate = p in {
+ let True16Predicate = p in {
def : GCNPat <
(build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
@@ -260,9 +257,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
VGPR_32:$elt0))
>;
- } // end OtherPredicates
+ } // end True16Predicate
- let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]), True16Predicate = UseRealTrue16Insts in {
+ let True16Predicate = UseRealTrue16Insts in {
def : GCNPat <
(build_vector (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
@@ -297,7 +294,7 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
DSTCLAMP.ENABLE,
(REG_SEQUENCE VGPR_32, $elt0, lo16, (f16 (IMPLICIT_DEF)), hi16)))
>;
- } // end OtherPredicates
+ } // end True16Predicate
}
class MinimumMaximumByMinimum3Maximum3VOP3P<SDPatternOperator node,
@@ -330,9 +327,9 @@ defm V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3P_Mix_Profile<VOP_F
}
} // End FPDPRounding = 1
}
-} // OtherPredicates = [NoFP32Denormals]
-defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16, 0 /*HasFP32Denormals*/>;
+defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
+} // OtherPredicates = [NoFP32Denormals]
} // End SubtargetPredicate = HasMadMixInsts
@@ -353,7 +350,7 @@ defm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F
} // End FPDPRounding = 1
}
-defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16, 1 /*HasPF32Denormals*/>;
+defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
}
// Defines patterns that extract signed 4bit from each Idx[0].
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. The previous style was a relic from before we had True16Predicate.
…vm#127044) This removes some of the complexity added by ad6cd7e by setting OtherPredicates outside MadFmaMixPats rather than inside it.
…vm#127044) This removes some of the complexity added by ad6cd7e by setting OtherPredicates outside MadFmaMixPats rather than inside it.
This removes some of the complexity added by ad6cd7e by setting
OtherPredicates outside MadFmaMixPats rather than inside it.