-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Adding lowerings for vector ISD::LRINT and ISD::LLRINT #90065
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1092,6 +1092,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |
setOperationAction(ISD::FABS, MVT::v2f64, Custom); | ||
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom); | ||
|
||
setOperationAction(ISD::LRINT, MVT::v4f32, Custom); | ||
|
||
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { | ||
setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom); | ||
setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom); | ||
|
@@ -1431,6 +1433,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |
setOperationAction(ISD::FMINIMUM, VT, Custom); | ||
} | ||
|
||
setOperationAction(ISD::LRINT, MVT::v8f32, Custom); | ||
setOperationAction(ISD::LRINT, MVT::v4f64, Custom); | ||
|
||
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted | ||
// even though v8i16 is a legal type. | ||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32); | ||
|
@@ -1731,6 +1736,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |
for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) | ||
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not qualified to review this line, as I don't know what There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's mapped to |
||
if (Subtarget.hasDQI() && Subtarget.hasVLX()) { | ||
RKSimon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for (MVT VT : {MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) { | ||
setOperationAction(ISD::LRINT, VT, Legal); | ||
setOperationAction(ISD::LLRINT, VT, Legal); | ||
} | ||
} | ||
|
||
// This block controls legalization for 512-bit operations with 8/16/32/64 bit | ||
// elements. 512-bits can be disabled based on prefer-vector-width and | ||
|
@@ -1765,6 +1776,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |
setOperationAction(ISD::STRICT_FMA, VT, Legal); | ||
setOperationAction(ISD::FCOPYSIGN, VT, Custom); | ||
} | ||
setOperationAction(ISD::LRINT, MVT::v16f32, | ||
Subtarget.hasDQI() ? Legal : Custom); | ||
setOperationAction(ISD::LRINT, MVT::v8f64, | ||
Subtarget.hasDQI() ? Legal : Custom); | ||
if (Subtarget.hasDQI()) | ||
setOperationAction(ISD::LLRINT, MVT::v8f64, Legal); | ||
|
||
for (MVT VT : { MVT::v16i1, MVT::v16i8 }) { | ||
setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32); | ||
|
@@ -2488,6 +2505,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |
ISD::FMAXNUM, | ||
ISD::SUB, | ||
ISD::LOAD, | ||
ISD::LRINT, | ||
ISD::LLRINT, | ||
ISD::MLOAD, | ||
ISD::STORE, | ||
ISD::MSTORE, | ||
|
@@ -21161,8 +21180,12 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { | |
SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op, | ||
SelectionDAG &DAG) const { | ||
SDValue Src = Op.getOperand(0); | ||
EVT DstVT = Op.getSimpleValueType(); | ||
MVT SrcVT = Src.getSimpleValueType(); | ||
|
||
if (SrcVT.isVector()) | ||
return DstVT.getScalarType() == MVT::i32 ? Op : SDValue(); | ||
|
||
if (SrcVT == MVT::f16) | ||
return SDValue(); | ||
|
||
|
@@ -51556,6 +51579,22 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, | |
return SDValue(); | ||
} | ||
|
||
static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG, | ||
const X86Subtarget &Subtarget) { | ||
EVT VT = N->getValueType(0); | ||
SDValue Src = N->getOperand(0); | ||
EVT SrcVT = Src.getValueType(); | ||
SDLoc DL(N); | ||
|
||
if (!Subtarget.hasDQI() || !Subtarget.hasVLX() || VT != MVT::v2i64 || | ||
SrcVT != MVT::v2f32) | ||
return SDValue(); | ||
|
||
return DAG.getNode(X86ISD::CVTP2SI, DL, VT, | ||
DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, Src, | ||
DAG.getUNDEF(SrcVT))); | ||
} | ||
|
||
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify | ||
/// the codegen. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be handled in in ReplaceNodeResults ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, the result type is legal. |
||
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) ) | ||
|
@@ -51902,6 +51941,11 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, | |
return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc); | ||
} | ||
|
||
// Try to combine (trunc (vNi64 (lrint x))) to (vNi32 (lrint x)). | ||
if (Src.getOpcode() == ISD::LRINT && VT.getScalarType() == MVT::i32 && | ||
Src.hasOneUse()) | ||
return DAG.getNode(ISD::LRINT, DL, VT, Src.getOperand(0)); | ||
|
||
return SDValue(); | ||
} | ||
|
||
|
@@ -56848,6 +56892,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, | |
case ISD::UINT_TO_FP: | ||
case ISD::STRICT_UINT_TO_FP: | ||
return combineUIntToFP(N, DAG, Subtarget); | ||
case ISD::LRINT: | ||
case ISD::LLRINT: return combineLRINT_LLRINT(N, DAG, Subtarget); | ||
case ISD::FADD: | ||
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); | ||
case X86ISD::VFCMULC: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8811,7 +8811,18 @@ let Predicates = [HasVLX] in { | |
def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), | ||
v4i32x_info.ImmAllZerosV, VK2WM:$mask), | ||
(VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; | ||
|
||
def : Pat<(v4i32 (lrint VR128X:$src)), (VCVTPS2DQZ128rr VR128X:$src)>; | ||
def : Pat<(v4i32 (lrint (loadv4f32 addr:$src))), (VCVTPS2DQZ128rm addr:$src)>; | ||
def : Pat<(v8i32 (lrint VR256X:$src)), (VCVTPS2DQZ256rr VR256X:$src)>; | ||
def : Pat<(v8i32 (lrint (loadv8f32 addr:$src))), (VCVTPS2DQZ256rm addr:$src)>; | ||
def : Pat<(v4i32 (lrint VR256X:$src)), (VCVTPD2DQZ256rr VR256X:$src)>; | ||
def : Pat<(v4i32 (lrint (loadv4f64 addr:$src))), (VCVTPD2DQZ256rm addr:$src)>; | ||
} | ||
def : Pat<(v16i32 (lrint VR512:$src)), (VCVTPS2DQZrr VR512:$src)>; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do these need to be wrapped in a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we don't bother to check it since VR512 is not available without |
||
def : Pat<(v16i32 (lrint (loadv16f32 addr:$src))), (VCVTPS2DQZrm addr:$src)>; | ||
def : Pat<(v8i32 (lrint VR512:$src)), (VCVTPD2DQZrr VR512:$src)>; | ||
def : Pat<(v8i32 (lrint (loadv8f64 addr:$src))), (VCVTPD2DQZrm addr:$src)>; | ||
|
||
let Predicates = [HasDQI, HasVLX] in { | ||
def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), | ||
|
@@ -8857,6 +8868,30 @@ let Predicates = [HasDQI, HasVLX] in { | |
(X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), | ||
v2i64x_info.ImmAllZerosV)), | ||
(VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; | ||
|
||
def : Pat<(v4i64 (lrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>; | ||
def : Pat<(v4i64 (lrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>; | ||
def : Pat<(v4i64 (llrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>; | ||
def : Pat<(v4i64 (llrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>; | ||
def : Pat<(v2i64 (lrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>; | ||
def : Pat<(v2i64 (lrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>; | ||
def : Pat<(v4i64 (lrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>; | ||
def : Pat<(v4i64 (lrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>; | ||
def : Pat<(v2i64 (llrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>; | ||
def : Pat<(v2i64 (llrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>; | ||
def : Pat<(v4i64 (llrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>; | ||
def : Pat<(v4i64 (llrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>; | ||
} | ||
|
||
let Predicates = [HasDQI] in { | ||
def : Pat<(v8i64 (lrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>; | ||
def : Pat<(v8i64 (lrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>; | ||
def : Pat<(v8i64 (llrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>; | ||
def : Pat<(v8i64 (llrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>; | ||
def : Pat<(v8i64 (lrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>; | ||
def : Pat<(v8i64 (lrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>; | ||
def : Pat<(v8i64 (llrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>; | ||
def : Pat<(v8i64 (llrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>; | ||
} | ||
|
||
let Predicates = [HasVLX] in { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this correct? The MVT of
ISD::LRINT
is usually set to its output type, not input type. Shouldn't this beMVT::v4i32
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it's correct.
ISD::LRINT
uses input type, see https://github.com/llvm/llvm-project/blob/main/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp#L1002-L1006