Skip to content

Commit 22d271b

Browse files
authored
[AMDGPU] Add UniformBinFrag to SALU fminimum/fmaximum patterns. NFCI. (#142169)
SALU patterns should have UniformBinFrag because they can only handle uniform inputs. VALU patterns do not need DivergentBinFrag because they work for both uniform and divergent inputs; instead we can use AddedComplexity to ensure that SALU patterns are preferred.
1 parent af6e3c0 commit 22d271b

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -922,11 +922,11 @@ let SubtargetPredicate = HasSALUFloatInsts, mayRaiseFPException = 1,
922922

923923
// On GFX12 MIN/MAX instructions do not read MODE register.
924924
let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 1, isCommutable = 1,
925-
isReMaterializable = 1, SchedRW = [WriteSFPU], AddedComplexity = 17 in {
926-
def S_MINIMUM_F32 : SOP2_F32_Inst<"s_minimum_f32", fminimum>;
927-
def S_MAXIMUM_F32 : SOP2_F32_Inst<"s_maximum_f32", fmaximum>;
928-
def S_MINIMUM_F16 : SOP2_F16_Inst<"s_minimum_f16", fminimum>;
929-
def S_MAXIMUM_F16 : SOP2_F16_Inst<"s_maximum_f16", fmaximum>;
925+
isReMaterializable = 1, SchedRW = [WriteSFPU], AddedComplexity = 25 in {
926+
def S_MINIMUM_F32 : SOP2_F32_Inst<"s_minimum_f32", UniformBinFrag<fminimum>>;
927+
def S_MAXIMUM_F32 : SOP2_F32_Inst<"s_maximum_f32", UniformBinFrag<fmaximum>>;
928+
def S_MINIMUM_F16 : SOP2_F16_Inst<"s_minimum_f16", UniformBinFrag<fminimum>>;
929+
def S_MAXIMUM_F16 : SOP2_F16_Inst<"s_maximum_f16", UniformBinFrag<fmaximum>>;
930930
}
931931

932932
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,10 +168,10 @@ defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs
168168
} // End SchedRW = [WriteIntMul]
169169

170170
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1 in {
171-
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
172-
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
173-
defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, DivergentBinFrag<fminimum>>;
174-
defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, DivergentBinFrag<fmaximum>>;
171+
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, fminimum>;
172+
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, fmaximum>;
173+
defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, fminimum>;
174+
defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, fmaximum>;
175175

176176
let SchedRW = [WriteDoubleAdd] in {
177177
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;

0 commit comments

Comments
 (0)