@@ -1092,6 +1092,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1092
1092
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
1093
1093
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
1094
1094
1095
+ setOperationAction(ISD::LRINT, MVT::v4f32, Custom);
1096
+
1095
1097
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1096
1098
setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
1097
1099
setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
@@ -1431,6 +1433,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1431
1433
setOperationAction(ISD::FMINIMUM, VT, Custom);
1432
1434
}
1433
1435
1436
+ setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
1437
+ setOperationAction(ISD::LRINT, MVT::v4f64, Custom);
1438
+
1434
1439
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1435
1440
// even though v8i16 is a legal type.
1436
1441
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
@@ -1731,6 +1736,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1731
1736
for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1732
1737
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1733
1738
}
1739
+ if (Subtarget.hasDQI() && Subtarget.hasVLX()) {
1740
+ for (MVT VT : {MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1741
+ setOperationAction(ISD::LRINT, VT, Legal);
1742
+ setOperationAction(ISD::LLRINT, VT, Legal);
1743
+ }
1744
+ }
1734
1745
1735
1746
// This block controls legalization for 512-bit operations with 8/16/32/64 bit
1736
1747
// elements. 512-bits can be disabled based on prefer-vector-width and
@@ -1765,6 +1776,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1765
1776
setOperationAction(ISD::STRICT_FMA, VT, Legal);
1766
1777
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1767
1778
}
1779
+ setOperationAction(ISD::LRINT, MVT::v16f32,
1780
+ Subtarget.hasDQI() ? Legal : Custom);
1781
+ setOperationAction(ISD::LRINT, MVT::v8f64,
1782
+ Subtarget.hasDQI() ? Legal : Custom);
1783
+ if (Subtarget.hasDQI())
1784
+ setOperationAction(ISD::LLRINT, MVT::v8f64, Legal);
1768
1785
1769
1786
for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {
1770
1787
setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
@@ -2488,6 +2505,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
2488
2505
ISD::FMAXNUM,
2489
2506
ISD::SUB,
2490
2507
ISD::LOAD,
2508
+ ISD::LRINT,
2509
+ ISD::LLRINT,
2491
2510
ISD::MLOAD,
2492
2511
ISD::STORE,
2493
2512
ISD::MSTORE,
@@ -21161,8 +21180,12 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
21161
21180
SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op,
21162
21181
SelectionDAG &DAG) const {
21163
21182
SDValue Src = Op.getOperand(0);
21183
+ EVT DstVT = Op.getSimpleValueType();
21164
21184
MVT SrcVT = Src.getSimpleValueType();
21165
21185
21186
+ if (SrcVT.isVector())
21187
+ return DstVT.getScalarType() == MVT::i32 ? Op : SDValue();
21188
+
21166
21189
if (SrcVT == MVT::f16)
21167
21190
return SDValue();
21168
21191
@@ -51542,6 +51565,22 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
51542
51565
return SDValue();
51543
51566
}
51544
51567
51568
+ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
51569
+ const X86Subtarget &Subtarget) {
51570
+ EVT VT = N->getValueType(0);
51571
+ SDValue Src = N->getOperand(0);
51572
+ EVT SrcVT = Src.getValueType();
51573
+ SDLoc DL(N);
51574
+
51575
+ if (!Subtarget.hasDQI() || !Subtarget.hasVLX() || VT != MVT::v2i64 ||
51576
+ SrcVT != MVT::v2f32)
51577
+ return SDValue();
51578
+
51579
+ return DAG.getNode(X86ISD::CVTP2SI, DL, VT,
51580
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, Src,
51581
+ DAG.getUNDEF(SrcVT)));
51582
+ }
51583
+
51545
51584
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
51546
51585
/// the codegen.
51547
51586
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
@@ -51888,6 +51927,11 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
51888
51927
return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);
51889
51928
}
51890
51929
51930
+ // Try to combine (trunc (vNi64 (lrint x))) to (vNi32 (lrint x)).
51931
+ if (Src.getOpcode() == ISD::LRINT && VT.getScalarType() == MVT::i32 &&
51932
+ Src.hasOneUse())
51933
+ return DAG.getNode(ISD::LRINT, DL, VT, Src.getOperand(0));
51934
+
51891
51935
return SDValue();
51892
51936
}
51893
51937
@@ -56834,6 +56878,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
56834
56878
case ISD::UINT_TO_FP:
56835
56879
case ISD::STRICT_UINT_TO_FP:
56836
56880
return combineUIntToFP(N, DAG, Subtarget);
56881
+ case ISD::LRINT:
56882
+ case ISD::LLRINT: return combineLRINT_LLRINT(N, DAG, Subtarget);
56837
56883
case ISD::FADD:
56838
56884
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
56839
56885
case X86ISD::VFCMULC:
0 commit comments