Skip to content

Commit 07605ea

Browse files
bevin-hanssonbjope
authored andcommitted
[X86] Improved lowering for saturating float to int.
Adapted from D54696 by @nikic. This patch improves lowering of saturating float to int conversions, FP_TO_[SU]INT_SAT, for X86. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D86079
1 parent 0bd9a13 commit 07605ea

File tree

4 files changed

+457
-644
lines changed

4 files changed

+457
-644
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
284284
}
285285
}
286286

287+
if (Subtarget.hasSSE2()) {
288+
// Custom lowering for saturating float to int conversions.
289+
// We handle promotion to larger result types manually.
290+
for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
291+
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
292+
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
293+
}
294+
if (Subtarget.is64Bit()) {
295+
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
296+
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
297+
}
298+
}
299+
287300
// Handle address space casts between mixed sized pointers.
288301
setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
289302
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
@@ -21428,6 +21441,155 @@ SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
2142821441
return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI);
2142921442
}
2143021443

21444+
SDValue
21445+
X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
21446+
// This is based on the TargetLowering::expandFP_TO_INT_SAT implementation,
21447+
// but making use of X86 specifics to produce better instruction sequences.
21448+
SDNode *Node = Op.getNode();
21449+
bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
21450+
unsigned FpToIntOpcode = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
21451+
SDLoc dl(SDValue(Node, 0));
21452+
SDValue Src = Node->getOperand(0);
21453+
21454+
// There are three types involved here: SrcVT is the source floating point
21455+
// type, DstVT is the type of the result, and TmpVT is the result of the
21456+
// intermediate FP_TO_*INT operation we'll use (which may be a promotion of
21457+
// DstVT).
21458+
EVT SrcVT = Src.getValueType();
21459+
EVT DstVT = Node->getValueType(0);
21460+
EVT TmpVT = DstVT;
21461+
21462+
// This code is only for floats and doubles. Fall back to generic code for
21463+
// anything else.
21464+
if (!isScalarFPTypeInSSEReg(SrcVT))
21465+
return SDValue();
21466+
21467+
unsigned SatWidth = Node->getConstantOperandVal(1);
21468+
unsigned DstWidth = DstVT.getScalarSizeInBits();
21469+
unsigned TmpWidth = TmpVT.getScalarSizeInBits();
21470+
assert(SatWidth <= DstWidth && SatWidth <= TmpWidth &&
21471+
"Expected saturation width smaller than result width");
21472+
21473+
// Promote result of FP_TO_*INT to at least 32 bits.
21474+
if (TmpWidth < 32) {
21475+
TmpVT = MVT::i32;
21476+
TmpWidth = 32;
21477+
}
21478+
21479+
// Promote conversions to unsigned 32-bit to 64-bit, because it will allow
21480+
// us to use a native signed conversion instead.
21481+
if (SatWidth == 32 && !IsSigned && Subtarget.is64Bit()) {
21482+
TmpVT = MVT::i64;
21483+
TmpWidth = 64;
21484+
}
21485+
21486+
// If the saturation width is smaller than the size of the temporary result,
21487+
// we can always use signed conversion, which is native.
21488+
if (SatWidth < TmpWidth)
21489+
FpToIntOpcode = ISD::FP_TO_SINT;
21490+
21491+
// Determine minimum and maximum integer values and their corresponding
21492+
// floating-point values.
21493+
APInt MinInt, MaxInt;
21494+
if (IsSigned) {
21495+
MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
21496+
MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
21497+
} else {
21498+
MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
21499+
MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
21500+
}
21501+
21502+
APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
21503+
APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
21504+
21505+
APFloat::opStatus MinStatus = MinFloat.convertFromAPInt(
21506+
MinInt, IsSigned, APFloat::rmTowardZero);
21507+
APFloat::opStatus MaxStatus = MaxFloat.convertFromAPInt(
21508+
MaxInt, IsSigned, APFloat::rmTowardZero);
21509+
bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact)
21510+
&& !(MaxStatus & APFloat::opStatus::opInexact);
21511+
21512+
SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
21513+
SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
21514+
21515+
// If the integer bounds are exactly representable as floats, emit a
21516+
// min+max+fptoi sequence. Otherwise use comparisons and selects.
21517+
if (AreExactFloatBounds) {
21518+
if (DstVT != TmpVT) {
21519+
// Clamp by MinFloat from below. If Src is NaN, propagate NaN.
21520+
SDValue MinClamped = DAG.getNode(
21521+
X86ISD::FMAX, dl, SrcVT, MinFloatNode, Src);
21522+
// Clamp by MaxFloat from above. If Src is NaN, propagate NaN.
21523+
SDValue BothClamped = DAG.getNode(
21524+
X86ISD::FMIN, dl, SrcVT, MaxFloatNode, MinClamped);
21525+
// Convert clamped value to integer.
21526+
SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, BothClamped);
21527+
21528+
// NaN will become INDVAL, with the top bit set and the rest zero.
21529+
// Truncation will discard the top bit, resulting in zero.
21530+
return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
21531+
}
21532+
21533+
// Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
21534+
SDValue MinClamped = DAG.getNode(
21535+
X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
21536+
// Clamp by MaxFloat from above. NaN cannot occur.
21537+
SDValue BothClamped = DAG.getNode(
21538+
X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);
21539+
// Convert clamped value to integer.
21540+
SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped);
21541+
21542+
if (!IsSigned) {
21543+
// In the unsigned case we're done, because we mapped NaN to MinFloat,
21544+
// which is zero.
21545+
return FpToInt;
21546+
}
21547+
21548+
// Otherwise, select zero if Src is NaN.
21549+
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
21550+
return DAG.getSelectCC(
21551+
dl, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
21552+
}
21553+
21554+
SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
21555+
SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
21556+
21557+
// Result of direct conversion, which may be selected away.
21558+
SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, Src);
21559+
21560+
if (DstVT != TmpVT) {
21561+
// NaN will become INDVAL, with the top bit set and the rest zero.
21562+
// Truncation will discard the top bit, resulting in zero.
21563+
FpToInt = DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
21564+
}
21565+
21566+
SDValue Select = FpToInt;
21567+
// For signed conversions where we saturate to the same size as the
21568+
// result type of the fptoi instructions, INDVAL coincides with integer
21569+
// minimum, so we don't need to explicitly check it.
21570+
if (!IsSigned || SatWidth != TmpVT.getScalarSizeInBits()) {
21571+
// If Src ULT MinFloat, select MinInt. In particular, this also selects
21572+
// MinInt if Src is NaN.
21573+
Select = DAG.getSelectCC(
21574+
dl, Src, MinFloatNode, MinIntNode, Select, ISD::CondCode::SETULT);
21575+
}
21576+
21577+
// If Src OGT MaxFloat, select MaxInt.
21578+
Select = DAG.getSelectCC(
21579+
dl, Src, MaxFloatNode, MaxIntNode, Select, ISD::CondCode::SETOGT);
21580+
21581+
// In the unsigned case we are done, because we mapped NaN to MinInt, which
21582+
// is already zero. The promoted case was already handled above.
21583+
if (!IsSigned || DstVT != TmpVT) {
21584+
return Select;
21585+
}
21586+
21587+
// Otherwise, select 0 if Src is NaN.
21588+
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
21589+
return DAG.getSelectCC(
21590+
dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
21591+
}
21592+
2143121593
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
2143221594
bool IsStrict = Op->isStrictFPOpcode();
2143321595

@@ -29807,6 +29969,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
2980729969
case ISD::STRICT_FP_TO_SINT:
2980829970
case ISD::FP_TO_UINT:
2980929971
case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
29972+
case ISD::FP_TO_SINT_SAT:
29973+
case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG);
2981029974
case ISD::FP_EXTEND:
2981129975
case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
2981229976
case ISD::FP_ROUND:

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,6 +1499,7 @@ namespace llvm {
14991499
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
15001500
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
15011501
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1502+
SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
15021503
SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
15031504
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
15041505
SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;

0 commit comments

Comments
 (0)