@@ -284,6 +284,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
284
284
}
285
285
}
286
286
287
+ if (Subtarget.hasSSE2()) {
288
+ // Custom lowering for saturating float to int conversions.
289
+ // We handle promotion to larger result types manually.
290
+ for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
291
+ setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
292
+ setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
293
+ }
294
+ if (Subtarget.is64Bit()) {
295
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
296
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
297
+ }
298
+ }
299
+
287
300
// Handle address space casts between mixed sized pointers.
288
301
setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
289
302
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
@@ -21428,6 +21441,155 @@ SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
21428
21441
return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI);
21429
21442
}
21430
21443
21444
+ SDValue
21445
+ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
21446
+ // This is based on the TargetLowering::expandFP_TO_INT_SAT implementation,
21447
+ // but making use of X86 specifics to produce better instruction sequences.
21448
+ SDNode *Node = Op.getNode();
21449
+ bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
21450
+ unsigned FpToIntOpcode = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
21451
+ SDLoc dl(SDValue(Node, 0));
21452
+ SDValue Src = Node->getOperand(0);
21453
+
21454
+ // There are three types involved here: SrcVT is the source floating point
21455
+ // type, DstVT is the type of the result, and TmpVT is the result of the
21456
+ // intermediate FP_TO_*INT operation we'll use (which may be a promotion of
21457
+ // DstVT).
21458
+ EVT SrcVT = Src.getValueType();
21459
+ EVT DstVT = Node->getValueType(0);
21460
+ EVT TmpVT = DstVT;
21461
+
21462
+ // This code is only for floats and doubles. Fall back to generic code for
21463
+ // anything else.
21464
+ if (!isScalarFPTypeInSSEReg(SrcVT))
21465
+ return SDValue();
21466
+
21467
+ unsigned SatWidth = Node->getConstantOperandVal(1);
21468
+ unsigned DstWidth = DstVT.getScalarSizeInBits();
21469
+ unsigned TmpWidth = TmpVT.getScalarSizeInBits();
21470
+ assert(SatWidth <= DstWidth && SatWidth <= TmpWidth &&
21471
+ "Expected saturation width smaller than result width");
21472
+
21473
+ // Promote result of FP_TO_*INT to at least 32 bits.
21474
+ if (TmpWidth < 32) {
21475
+ TmpVT = MVT::i32;
21476
+ TmpWidth = 32;
21477
+ }
21478
+
21479
+ // Promote conversions to unsigned 32-bit to 64-bit, because it will allow
21480
+ // us to use a native signed conversion instead.
21481
+ if (SatWidth == 32 && !IsSigned && Subtarget.is64Bit()) {
21482
+ TmpVT = MVT::i64;
21483
+ TmpWidth = 64;
21484
+ }
21485
+
21486
+ // If the saturation width is smaller than the size of the temporary result,
21487
+ // we can always use signed conversion, which is native.
21488
+ if (SatWidth < TmpWidth)
21489
+ FpToIntOpcode = ISD::FP_TO_SINT;
21490
+
21491
+ // Determine minimum and maximum integer values and their corresponding
21492
+ // floating-point values.
21493
+ APInt MinInt, MaxInt;
21494
+ if (IsSigned) {
21495
+ MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
21496
+ MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
21497
+ } else {
21498
+ MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
21499
+ MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
21500
+ }
21501
+
21502
+ APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
21503
+ APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
21504
+
21505
+ APFloat::opStatus MinStatus = MinFloat.convertFromAPInt(
21506
+ MinInt, IsSigned, APFloat::rmTowardZero);
21507
+ APFloat::opStatus MaxStatus = MaxFloat.convertFromAPInt(
21508
+ MaxInt, IsSigned, APFloat::rmTowardZero);
21509
+ bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact)
21510
+ && !(MaxStatus & APFloat::opStatus::opInexact);
21511
+
21512
+ SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
21513
+ SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
21514
+
21515
+ // If the integer bounds are exactly representable as floats, emit a
21516
+ // min+max+fptoi sequence. Otherwise use comparisons and selects.
21517
+ if (AreExactFloatBounds) {
21518
+ if (DstVT != TmpVT) {
21519
+ // Clamp by MinFloat from below. If Src is NaN, propagate NaN.
21520
+ SDValue MinClamped = DAG.getNode(
21521
+ X86ISD::FMAX, dl, SrcVT, MinFloatNode, Src);
21522
+ // Clamp by MaxFloat from above. If Src is NaN, propagate NaN.
21523
+ SDValue BothClamped = DAG.getNode(
21524
+ X86ISD::FMIN, dl, SrcVT, MaxFloatNode, MinClamped);
21525
+ // Convert clamped value to integer.
21526
+ SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, BothClamped);
21527
+
21528
+ // NaN will become INDVAL, with the top bit set and the rest zero.
21529
+ // Truncation will discard the top bit, resulting in zero.
21530
+ return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
21531
+ }
21532
+
21533
+ // Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
21534
+ SDValue MinClamped = DAG.getNode(
21535
+ X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
21536
+ // Clamp by MaxFloat from above. NaN cannot occur.
21537
+ SDValue BothClamped = DAG.getNode(
21538
+ X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);
21539
+ // Convert clamped value to integer.
21540
+ SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped);
21541
+
21542
+ if (!IsSigned) {
21543
+ // In the unsigned case we're done, because we mapped NaN to MinFloat,
21544
+ // which is zero.
21545
+ return FpToInt;
21546
+ }
21547
+
21548
+ // Otherwise, select zero if Src is NaN.
21549
+ SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
21550
+ return DAG.getSelectCC(
21551
+ dl, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
21552
+ }
21553
+
21554
+ SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
21555
+ SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
21556
+
21557
+ // Result of direct conversion, which may be selected away.
21558
+ SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, Src);
21559
+
21560
+ if (DstVT != TmpVT) {
21561
+ // NaN will become INDVAL, with the top bit set and the rest zero.
21562
+ // Truncation will discard the top bit, resulting in zero.
21563
+ FpToInt = DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
21564
+ }
21565
+
21566
+ SDValue Select = FpToInt;
21567
+ // For signed conversions where we saturate to the same size as the
21568
+ // result type of the fptoi instructions, INDVAL coincides with integer
21569
+ // minimum, so we don't need to explicitly check it.
21570
+ if (!IsSigned || SatWidth != TmpVT.getScalarSizeInBits()) {
21571
+ // If Src ULT MinFloat, select MinInt. In particular, this also selects
21572
+ // MinInt if Src is NaN.
21573
+ Select = DAG.getSelectCC(
21574
+ dl, Src, MinFloatNode, MinIntNode, Select, ISD::CondCode::SETULT);
21575
+ }
21576
+
21577
+ // If Src OGT MaxFloat, select MaxInt.
21578
+ Select = DAG.getSelectCC(
21579
+ dl, Src, MaxFloatNode, MaxIntNode, Select, ISD::CondCode::SETOGT);
21580
+
21581
+ // In the unsigned case we are done, because we mapped NaN to MinInt, which
21582
+ // is already zero. The promoted case was already handled above.
21583
+ if (!IsSigned || DstVT != TmpVT) {
21584
+ return Select;
21585
+ }
21586
+
21587
+ // Otherwise, select 0 if Src is NaN.
21588
+ SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
21589
+ return DAG.getSelectCC(
21590
+ dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
21591
+ }
21592
+
21431
21593
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
21432
21594
bool IsStrict = Op->isStrictFPOpcode();
21433
21595
@@ -29807,6 +29969,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
29807
29969
case ISD::STRICT_FP_TO_SINT:
29808
29970
case ISD::FP_TO_UINT:
29809
29971
case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
29972
+ case ISD::FP_TO_SINT_SAT:
29973
+ case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG);
29810
29974
case ISD::FP_EXTEND:
29811
29975
case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
29812
29976
case ISD::FP_ROUND:
0 commit comments