Skip to content

Commit e6d4fd0

Browse files
arsenmtstellar
authored andcommitted
AMDGPU: Widen f16 minimum/maximum to v2f16 on gfx950 (llvm#128121)
Unfortunately we only have the vector versions of v2f16 minimum3 and maximum. Widen to v2f16 so we can lower as minimum333(x, y, y). (cherry picked from commit e729dc7)
1 parent e0c4a33 commit e6d4fd0

File tree

6 files changed

+966
-585
lines changed

6 files changed

+966
-585
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -869,8 +869,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
869869
if (Subtarget->hasMinimum3Maximum3F32())
870870
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
871871

872-
if (Subtarget->hasMinimum3Maximum3PKF16())
872+
if (Subtarget->hasMinimum3Maximum3PKF16()) {
873873
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::v2f16, Legal);
874+
875+
// If only the vector form is available, we need to widen to a vector.
876+
if (!Subtarget->hasMinimum3Maximum3F16())
877+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
878+
}
874879
}
875880

876881
setOperationAction(ISD::INTRINSIC_WO_CHAIN,
@@ -5963,6 +5968,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
59635968
case ISD::FMINNUM:
59645969
case ISD::FMAXNUM:
59655970
return lowerFMINNUM_FMAXNUM(Op, DAG);
5971+
case ISD::FMINIMUM:
5972+
case ISD::FMAXIMUM:
5973+
return lowerFMINIMUM_FMAXIMUM(Op, DAG);
59665974
case ISD::FLDEXP:
59675975
case ISD::STRICT_FLDEXP:
59685976
return lowerFLDEXP(Op, DAG);
@@ -5984,8 +5992,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
59845992
case ISD::FMUL:
59855993
case ISD::FMINNUM_IEEE:
59865994
case ISD::FMAXNUM_IEEE:
5987-
case ISD::FMINIMUM:
5988-
case ISD::FMAXIMUM:
59895995
case ISD::FMINIMUMNUM:
59905996
case ISD::FMAXIMUMNUM:
59915997
case ISD::UADDSAT:
@@ -6840,6 +6846,34 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
68406846
return Op;
68416847
}
68426848

6849+
SDValue SITargetLowering::lowerFMINIMUM_FMAXIMUM(SDValue Op,
6850+
SelectionDAG &DAG) const {
6851+
EVT VT = Op.getValueType();
6852+
if (VT.isVector())
6853+
return splitBinaryVectorOp(Op, DAG);
6854+
6855+
assert(!Subtarget->hasIEEEMinMax() && !Subtarget->hasMinimum3Maximum3F16() &&
6856+
Subtarget->hasMinimum3Maximum3PKF16() && VT == MVT::f16 &&
6857+
"should not need to widen f16 minimum/maximum to v2f16");
6858+
6859+
// Widen f16 operation to v2f16
6860+
6861+
// fminimum f16:x, f16:y ->
6862+
// extract_vector_elt (fminimum (v2f16 (scalar_to_vector x))
6863+
// (v2f16 (scalar_to_vector y))), 0
6864+
SDLoc SL(Op);
6865+
SDValue WideSrc0 =
6866+
DAG.getNode(ISD::SCALAR_TO_VECTOR, SL, MVT::v2f16, Op.getOperand(0));
6867+
SDValue WideSrc1 =
6868+
DAG.getNode(ISD::SCALAR_TO_VECTOR, SL, MVT::v2f16, Op.getOperand(1));
6869+
6870+
SDValue Widened =
6871+
DAG.getNode(Op.getOpcode(), SL, MVT::v2f16, WideSrc0, WideSrc1);
6872+
6873+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::f16, Widened,
6874+
DAG.getConstant(0, SL, MVT::i32));
6875+
}
6876+
68436877
SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const {
68446878
bool IsStrict = Op.getOpcode() == ISD::STRICT_FLDEXP;
68456879
EVT VT = Op.getValueType();

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
146146
/// Custom lowering for ISD::FP_ROUND for MVT::f16.
147147
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
148148
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
149+
SDValue lowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const;
149150
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
150151
SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
151152
SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;

0 commit comments

Comments
 (0)