Skip to content

[X86][SelectionDAG] - Add support for llvm.canonicalize intrinsic #106370

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,7 @@ namespace {
SDValue visitFSQRT(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
SDValue visitFPOW(SDNode *N);
SDValue visitFCANONICALIZE(SDNode *N);
SDValue visitSINT_TO_FP(SDNode *N);
SDValue visitUINT_TO_FP(SDNode *N);
SDValue visitFP_TO_SINT(SDNode *N);
Expand Down Expand Up @@ -1980,6 +1981,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FREEZE: return visitFREEZE(N);
case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
Expand Down Expand Up @@ -2090,6 +2092,19 @@ static SDValue getInputChainForNode(SDNode *N) {
return SDValue();
}

SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
SDValue Operand = N->getOperand(0);
EVT VT = Operand.getValueType();
SDLoc dl(N);

// Canonicalize undef to quiet NaN.
if (Operand.isUndef()) {
APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
return DAG.getConstantFP(CanonicalQNaN, dl, VT);
}
return SDValue();
}

SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// If N has two operands, where one has an input chain equal to the other,
// the 'other' chain is redundant.
Expand Down
34 changes: 34 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
}
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
}
}
if (Subtarget.hasAVX10_2()) {
Expand All @@ -353,6 +355,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.hasSSE2()) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
Expand Down Expand Up @@ -716,6 +721,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);

Expand Down Expand Up @@ -932,6 +938,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (isTypeLegal(MVT::f80)) {
setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
}

setOperationAction(ISD::SETCC, MVT::f128, Custom);
Expand Down Expand Up @@ -1065,9 +1072,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom);

setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom);

setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
Expand Down Expand Up @@ -1128,6 +1137,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMULO, MVT::v2i32, Custom);

setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);

Expand Down Expand Up @@ -1460,6 +1470,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,

setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
}

setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
Expand Down Expand Up @@ -1725,6 +1736,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom);

// There is no byte sized k-register load or store without AVX512DQ.
if (!Subtarget.hasDQI()) {
Expand Down Expand Up @@ -1804,6 +1818,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
}
setOperationAction(ISD::LRINT, MVT::v16f32,
Subtarget.hasDQI() ? Legal : Custom);
Expand Down Expand Up @@ -32664,6 +32679,24 @@ static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
return Op;
}

static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG) {
SDNode *N = Op.getNode();
SDValue Operand = N->getOperand(0);
EVT VT = Operand.getValueType();
SDLoc dl(N);

SDValue One = DAG.getConstantFP(1.0, dl, VT);

// TODO: Fix Crash for bf16 when generating strict_fmul as it
// leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
// ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
// promote this operator's result!
SDValue Chain = DAG.getEntryNode();
SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should move to generic code as the default expansion

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pawan-nirpal-031 Are you happy to work on a follow up patch?

Copy link
Contributor Author

@pawan-nirpal-031 pawan-nirpal-031 Sep 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I will address this in a follow up patch Simon.

@arsenm won't it interfere with how the other targets want to handle it? Which is why I was reluctant to place it in any common infra, in the first place. If it is feasible I will move it over there.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it will only provide a reasonable default for other targets. Targets are still free to make it legal or custom lower as they choose

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay sure then. I will move it in the following PR, should I also handle constants in the next one or we keep it for later PR?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PRs are ideally always as minimal as possible, so keep it separate

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright then, thanks for the suggestion. I will start creating the next PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @arsenm , should I do it in visitFCANONICALIZE in the generic combiner? I am not aware if there is any generic lowering!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. Usual place would be in LegalizeDAG, or a helper in TargetLowering used by LegalizeDAG

{Chain, Operand, One});
return StrictFmul;
}

static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
unsigned OpNo) {
const APInt Operand(32, OpNo);
Expand Down Expand Up @@ -32803,6 +32836,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::FSHL:
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
case ISD::FCANONICALIZE: return LowerFCanonicalize(Op, DAG);
case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::STRICT_UINT_TO_FP:
Expand Down
Loading