Skip to content

[AMDGPU] Simplify OtherPredicates handling in MadFmaMixPats. NFC. #127044

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 13, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 8 additions & 11 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,10 @@ defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_
multiclass MadFmaMixPats<SDPatternOperator fma_like,
Instruction mix_inst,
Instruction mixlo_inst,
Instruction mixhi_inst,
bit HasFP32Denormals> {
Instruction mixhi_inst> {
// At least one of the operands needs to be an fpextend of an f16
// for this to be worthwhile, so we need three patterns here.
// TODO: Could we use a predicate to inspect src1/2/3 instead?
let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]) in {
def : GCNPat <
(f32 (fma_like (f32 (VOP3PMadMixModsExt f16:$src0, i32:$src0_mods)),
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_mods)),
Expand Down Expand Up @@ -228,13 +226,12 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
DSTCLAMP.NONE,
(i32 (IMPLICIT_DEF)))
>;
} // End OtherPredicates

// FIXME: Special case handling for maxhi (especially for clamp)
// because dealing with the write to high half of the register is
// difficult.
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]), True16Predicate = p in {
let True16Predicate = p in {

def : GCNPat <
(build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
Expand All @@ -260,9 +257,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
VGPR_32:$elt0))
>;

} // end OtherPredicates
} // end True16Predicate

let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]), True16Predicate = UseRealTrue16Insts in {
let True16Predicate = UseRealTrue16Insts in {
def : GCNPat <
(build_vector (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
Expand Down Expand Up @@ -297,7 +294,7 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
DSTCLAMP.ENABLE,
(REG_SEQUENCE VGPR_32, $elt0, lo16, (f16 (IMPLICIT_DEF)), hi16)))
>;
} // end OtherPredicates
} // end True16Predicate
}

class MinimumMaximumByMinimum3Maximum3VOP3P<SDPatternOperator node,
Expand Down Expand Up @@ -330,9 +327,9 @@ defm V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3P_Mix_Profile<VOP_F
}
} // End FPDPRounding = 1
}
} // OtherPredicates = [NoFP32Denormals]

defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16, 0 /*HasFP32Denormals*/>;
defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
} // OtherPredicates = [NoFP32Denormals]
} // End SubtargetPredicate = HasMadMixInsts


Expand All @@ -353,7 +350,7 @@ defm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F
} // End FPDPRounding = 1
}

defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16, 1 /*HasPF32Denormals*/>;
defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
}

// Defines patterns that extract signed 4bit from each Idx[0].
Expand Down