Skip to content

Commit 0d074ba

Browse files
authored
[DAG] Support saturated truncate (#99418)
A truncate is considered saturated if no additional conversion is required between the target and return values. If the target is saturated when attempting to truncate from a vector, there is an opportunity to optimize it. Previously, each architecture had its own attempt at optimization, leading to redundant code. This patch implements common logic by introducing three new ISDs: `ISD::TRUNCATE_SSAT_S`: When the operand is a signed value and the range of values matches the range of signed values of the destination type. `ISD::TRUNCATE_SSAT_U`: When the operand is a signed value and the range of values matches the range of unsigned values of the destination type. `ISD::TRUNCATE_USAT_U`: When the operand is an unsigned value and the range of values matches the range of unsigned values of the destination type. These ISDs indicate a saturated truncate. Fixes #85903
1 parent 5ab99bf commit 0d074ba

15 files changed

+584
-270
lines changed

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,26 @@ enum NodeType {
814814

815815
/// TRUNCATE - Completely drop the high bits.
816816
TRUNCATE,
817+
/// TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand
818+
/// [SU] located in middle, prefix for `SAT` means indicates whether
819+
/// existing truncate target was a signed operation. For examples,
820+
/// If `truncate(smin(smax(x, C), C))` was saturated then become `S`.
821+
/// If `truncate(umin(x, C))` was saturated then become `U`.
822+
/// [SU] located in last indicates whether range of truncated values is
823+
/// sign-saturated. For example, if `truncate(smin(smax(x, C), C))` is a
824+
/// truncation to `i8`, then if value of C ranges from `-128 to 127`, it will
825+
/// be saturated against signed values, resulting in `S`, which will combine
826+
/// to `TRUNCATE_SSAT_S`. If the value of C ranges from `0 to 255`, it will
827+
/// be saturated against unsigned values, resulting in `U`, which will
828+
/// combine to `TRUNATE_SSAT_U`. Similarly, in `truncate(umin(x, C))`, if
829+
/// value of C ranges from `0 to 255`, it becomes `U` because it is saturated
830+
/// for unsigned values. As a result, it combines to `TRUNCATE_USAT_U`.
831+
TRUNCATE_SSAT_S, // saturate signed input to signed result -
832+
// truncate(smin(smax(x, C), C))
833+
TRUNCATE_SSAT_U, // saturate signed input to unsigned result -
834+
// truncate(smin(smax(x, 0), C))
835+
TRUNCATE_USAT_U, // saturate unsigned input to unsigned result -
836+
// truncate(umin(x, C))
817837

818838
/// [SU]INT_TO_FP - These operators convert integers (whose interpreted sign
819839
/// depends on the first letter) to floating point.

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,9 @@ def sext : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>;
477477
def zext : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>;
478478
def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>;
479479
def trunc : SDNode<"ISD::TRUNCATE" , SDTIntTruncOp>;
480+
def truncssat_s : SDNode<"ISD::TRUNCATE_SSAT_S", SDTIntTruncOp>;
481+
def truncssat_u : SDNode<"ISD::TRUNCATE_SSAT_U", SDTIntTruncOp>;
482+
def truncusat_u : SDNode<"ISD::TRUNCATE_USAT_U", SDTIntTruncOp>;
480483
def bitconvert : SDNode<"ISD::BITCAST" , SDTUnaryOp>;
481484
def addrspacecast : SDNode<"ISD::ADDRSPACECAST", SDTUnaryOp>;
482485
def freeze : SDNode<"ISD::FREEZE" , SDTFreeze>;

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 135 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ namespace {
486486
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
487487
SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
488488
SDValue visitTRUNCATE(SDNode *N);
489+
SDValue visitTRUNCATE_USAT_U(SDNode *N);
489490
SDValue visitBITCAST(SDNode *N);
490491
SDValue visitFREEZE(SDNode *N);
491492
SDValue visitBUILD_PAIR(SDNode *N);
@@ -1910,6 +1911,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
19101911
case ISD::ZERO_EXTEND_VECTOR_INREG:
19111912
case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
19121913
case ISD::TRUNCATE: return visitTRUNCATE(N);
1914+
case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
19131915
case ISD::BITCAST: return visitBITCAST(N);
19141916
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
19151917
case ISD::FADD: return visitFADD(N);
@@ -13198,7 +13200,9 @@ SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
1319813200
unsigned CastOpcode = Cast->getOpcode();
1319913201
assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
1320013202
CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13201-
CastOpcode == ISD::FP_ROUND) &&
13203+
CastOpcode == ISD::TRUNCATE_SSAT_S ||
13204+
CastOpcode == ISD::TRUNCATE_SSAT_U ||
13205+
CastOpcode == ISD::TRUNCATE_USAT_U || CastOpcode == ISD::FP_ROUND) &&
1320213206
"Unexpected opcode for vector select narrowing/widening");
1320313207

1320413208
// We only do this transform before legal ops because the pattern may be
@@ -14910,6 +14914,132 @@ SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
1491014914
return SDValue();
1491114915
}
1491214916

14917+
SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
14918+
EVT VT = N->getValueType(0);
14919+
SDValue N0 = N->getOperand(0);
14920+
14921+
std::function<SDValue(SDValue)> MatchFPTOINT = [&](SDValue Val) -> SDValue {
14922+
if (Val.getOpcode() == ISD::FP_TO_UINT)
14923+
return Val;
14924+
return SDValue();
14925+
};
14926+
14927+
SDValue FPInstr = MatchFPTOINT(N0);
14928+
if (!FPInstr)
14929+
return SDValue();
14930+
14931+
EVT FPVT = FPInstr.getOperand(0).getValueType();
14932+
if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
14933+
FPVT, VT))
14934+
return SDValue();
14935+
return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(FPInstr), VT,
14936+
FPInstr.getOperand(0),
14937+
DAG.getValueType(VT.getScalarType()));
14938+
}
14939+
14940+
/// Detect patterns of truncation with unsigned saturation:
14941+
///
14942+
/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
14943+
/// Return the source value x to be truncated or SDValue() if the pattern was
14944+
/// not matched.
14945+
///
14946+
static SDValue detectUSatUPattern(SDValue In, EVT VT) {
14947+
unsigned NumDstBits = VT.getScalarSizeInBits();
14948+
unsigned NumSrcBits = In.getScalarValueSizeInBits();
14949+
// Saturation with truncation. We truncate from InVT to VT.
14950+
assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
14951+
14952+
SDValue Min;
14953+
APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
14954+
if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
14955+
return Min;
14956+
14957+
return SDValue();
14958+
}
14959+
14960+
/// Detect patterns of truncation with signed saturation:
14961+
/// (truncate (smin (smax (x, signed_min_of_dest_type),
14962+
/// signed_max_of_dest_type)) to dest_type)
14963+
/// or:
14964+
/// (truncate (smax (smin (x, signed_max_of_dest_type),
14965+
/// signed_min_of_dest_type)) to dest_type).
14966+
///
14967+
/// Return the source value to be truncated or SDValue() if the pattern was not
14968+
/// matched.
14969+
static SDValue detectSSatSPattern(SDValue In, EVT VT) {
14970+
unsigned NumDstBits = VT.getScalarSizeInBits();
14971+
unsigned NumSrcBits = In.getScalarValueSizeInBits();
14972+
// Saturation with truncation. We truncate from InVT to VT.
14973+
assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
14974+
14975+
SDValue Val;
14976+
APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
14977+
APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
14978+
14979+
if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
14980+
m_SpecificInt(SignedMax))))
14981+
return Val;
14982+
14983+
if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
14984+
m_SpecificInt(SignedMin))))
14985+
return Val;
14986+
14987+
return SDValue();
14988+
}
14989+
14990+
/// Detect patterns of truncation with unsigned saturation:
14991+
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
14992+
const SDLoc &DL) {
14993+
unsigned NumDstBits = VT.getScalarSizeInBits();
14994+
unsigned NumSrcBits = In.getScalarValueSizeInBits();
14995+
// Saturation with truncation. We truncate from InVT to VT.
14996+
assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
14997+
14998+
SDValue Val;
14999+
APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15000+
// Min == 0, Max is unsigned max of destination type.
15001+
if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
15002+
m_Zero())))
15003+
return Val;
15004+
15005+
if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
15006+
m_SpecificInt(UnsignedMax))))
15007+
return Val;
15008+
15009+
if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
15010+
m_SpecificInt(UnsignedMax))))
15011+
return Val;
15012+
15013+
return SDValue();
15014+
}
15015+
15016+
static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
15017+
SDLoc &DL, const TargetLowering &TLI,
15018+
SelectionDAG &DAG) {
15019+
auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
15020+
return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
15021+
TLI.isTypeDesirableForOp(Opc, VT));
15022+
};
15023+
15024+
if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
15025+
if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
15026+
if (SDValue SSatVal = detectSSatSPattern(Src, VT))
15027+
return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
15028+
if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15029+
if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15030+
return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15031+
} else if (Src.getOpcode() == ISD::UMIN) {
15032+
if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15033+
if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15034+
return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15035+
if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
15036+
if (SDValue USatVal = detectUSatUPattern(Src, VT))
15037+
return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
15038+
}
15039+
15040+
return SDValue();
15041+
}
15042+
1491315043
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
1491415044
SDValue N0 = N->getOperand(0);
1491515045
EVT VT = N->getValueType(0);
@@ -14925,6 +15055,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
1492515055
if (N0.getOpcode() == ISD::TRUNCATE)
1492615056
return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
1492715057

15058+
// fold saturated truncate
15059+
if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
15060+
return SaturatedTR;
15061+
1492815062
// fold (truncate c1) -> c1
1492915063
if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
1493015064
return C;

llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
380380
case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg";
381381
case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
382382
case ISD::TRUNCATE: return "truncate";
383+
case ISD::TRUNCATE_SSAT_S: return "truncate_ssat_s";
384+
case ISD::TRUNCATE_SSAT_U: return "truncate_ssat_u";
385+
case ISD::TRUNCATE_USAT_U: return "truncate_usat_u";
383386
case ISD::FP_ROUND: return "fp_round";
384387
case ISD::STRICT_FP_ROUND: return "strict_fp_round";
385388
case ISD::FP_EXTEND: return "fp_extend";

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,11 @@ void TargetLoweringBase::initActions() {
753753
// Absolute difference
754754
setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand);
755755

756+
// Saturated trunc
757+
setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Expand);
758+
setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Expand);
759+
setOperationAction(ISD::TRUNCATE_USAT_U, VT, Expand);
760+
756761
// These default to Expand so they will be expanded to CTLZ/CTTZ by default.
757762
setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
758763
Expand);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,6 +1410,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
14101410
}
14111411
}
14121412

1413+
for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1414+
setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Legal);
1415+
setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Legal);
1416+
setOperationAction(ISD::TRUNCATE_USAT_U, VT, Legal);
1417+
}
1418+
14131419
if (Subtarget->hasSME()) {
14141420
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
14151421
}
@@ -29228,6 +29234,18 @@ bool AArch64TargetLowering::hasInlineStackProbe(
2922829234
MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
2922929235
}
2923029236

29237+
bool AArch64TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
29238+
switch (Opc) {
29239+
case ISD::TRUNCATE_SSAT_S:
29240+
case ISD::TRUNCATE_SSAT_U:
29241+
case ISD::TRUNCATE_USAT_U:
29242+
if (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32)
29243+
return true;
29244+
}
29245+
29246+
return TargetLowering::isTypeDesirableForOp(Opc, VT);
29247+
}
29248+
2923129249
#ifndef NDEBUG
2923229250
void AArch64TargetLowering::verifyTargetSDNode(const SDNode *N) const {
2923329251
switch (N->getOpcode()) {

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,11 @@ class AArch64TargetLowering : public TargetLowering {
745745
bool generateFMAsInMachineCombiner(EVT VT,
746746
CodeGenOptLevel OptLevel) const override;
747747

748+
/// Return true if the target has native support for
749+
/// the specified value type and it is 'desirable' to use the type for the
750+
/// given node type.
751+
bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
752+
748753
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
749754
ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
750755

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5420,64 +5420,75 @@ def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
54205420
def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
54215421

54225422
// trunc(umin(X, 255)) -> UQXTRN v8i8
5423-
def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
5423+
def : Pat<(v8i8 (truncusat_u (v8i16 V128:$Vn))),
54245424
(UQXTNv8i8 V128:$Vn)>;
54255425
// trunc(umin(X, 65535)) -> UQXTRN v4i16
5426-
def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
5426+
def : Pat<(v4i16 (truncusat_u (v4i32 V128:$Vn))),
54275427
(UQXTNv4i16 V128:$Vn)>;
5428+
// trunc(umin(X, 4294967295)) -> UQXTRN v2i32
5429+
def : Pat<(v2i32 (truncusat_u (v2i64 V128:$Vn))),
5430+
(UQXTNv2i32 V128:$Vn)>;
54285431
// trunc(smin(smax(X, -128), 128)) -> SQXTRN
5429-
// with reversed min/max
5430-
def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5431-
(v8i16 VImm7F)))),
5432-
(SQXTNv8i8 V128:$Vn)>;
5433-
def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5434-
(v8i16 VImm80)))),
5432+
def : Pat<(v8i8 (truncssat_s (v8i16 V128:$Vn))),
54355433
(SQXTNv8i8 V128:$Vn)>;
54365434
// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
5437-
// with reversed min/max
5438-
def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5439-
(v4i32 VImm7FFF)))),
5440-
(SQXTNv4i16 V128:$Vn)>;
5441-
def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5442-
(v4i32 VImm8000)))),
5435+
def : Pat<(v4i16 (truncssat_s (v4i32 V128:$Vn))),
54435436
(SQXTNv4i16 V128:$Vn)>;
5444-
5445-
// concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn)
5437+
// trunc(smin(smax(X, -2147483648), 2147483647)) -> SQXTRN
5438+
def : Pat<(v2i32 (truncssat_s (v2i64 V128:$Vn))),
5439+
(SQXTNv2i32 V128:$Vn)>;
5440+
// trunc(umin(smax(X, 0), 255)) -> SQXTUN
5441+
def : Pat<(v8i8 (truncssat_u (v8i16 V128:$Vn))),
5442+
(SQXTUNv8i8 V128:$Vn)>;
5443+
// trunc(umin(smax(X, 0), 65535)) -> SQXTUN
5444+
def : Pat<(v4i16 (truncssat_u (v4i32 V128:$Vn))),
5445+
(SQXTUNv4i16 V128:$Vn)>;
5446+
// trunc(umin(smax(X, 0), 4294967295)) -> SQXTUN
5447+
def : Pat<(v2i32 (truncssat_u (v2i64 V128:$Vn))),
5448+
(SQXTUNv2i32 V128:$Vn)>;
5449+
5450+
// truncusat_u
5451+
// concat_vectors(Vd, truncusat_u(Vn)) ~> UQXTRN(Vd, Vn)
54465452
def : Pat<(v16i8 (concat_vectors
54475453
(v8i8 V64:$Vd),
5448-
(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))))),
5454+
(v8i8 (truncusat_u (v8i16 V128:$Vn))))),
54495455
(UQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5450-
// concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn)
54515456
def : Pat<(v8i16 (concat_vectors
54525457
(v4i16 V64:$Vd),
5453-
(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))))),
5458+
(v4i16 (truncusat_u (v4i32 V128:$Vn))))),
54545459
(UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5460+
def : Pat<(v4i32 (concat_vectors
5461+
(v2i32 V64:$Vd),
5462+
(v2i32 (truncusat_u (v2i64 V128:$Vn))))),
5463+
(UQXTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
54555464

5456-
// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
5457-
// with reversed min/max
5465+
// concat_vectors(Vd, truncssat_s(Vn)) ~> SQXTN2(Vd, Vn)
54585466
def : Pat<(v16i8 (concat_vectors
54595467
(v8i8 V64:$Vd),
5460-
(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5461-
(v8i16 VImm7F)))))),
5468+
(v8i8 (truncssat_s (v8i16 V128:$Vn))))),
54625469
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5463-
def : Pat<(v16i8 (concat_vectors
5464-
(v8i8 V64:$Vd),
5465-
(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5466-
(v8i16 VImm80)))))),
5467-
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5468-
5469-
// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
5470-
// with reversed min/max
54715470
def : Pat<(v8i16 (concat_vectors
54725471
(v4i16 V64:$Vd),
5473-
(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5474-
(v4i32 VImm7FFF)))))),
5472+
(v4i16 (truncssat_s (v4i32 V128:$Vn))))),
54755473
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5474+
def : Pat<(v4i32 (concat_vectors
5475+
(v2i32 V64:$Vd),
5476+
(v2i32 (truncssat_s (v2i64 V128:$Vn))))),
5477+
(SQXTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5478+
5479+
// concat_vectors(Vd, truncssat_u(Vn)) ~> SQXTUN2(Vd, Vn)
5480+
def : Pat<(v16i8 (concat_vectors
5481+
(v8i8 V64:$Vd),
5482+
(v8i8 (truncssat_u (v8i16 V128:$Vn))))),
5483+
(SQXTUNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
54765484
def : Pat<(v8i16 (concat_vectors
54775485
(v4i16 V64:$Vd),
5478-
(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5479-
(v4i32 VImm8000)))))),
5480-
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5486+
(v4i16 (truncssat_u (v4i32 V128:$Vn))))),
5487+
(SQXTUNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5488+
def : Pat<(v4i32 (concat_vectors
5489+
(v2i32 V64:$Vd),
5490+
(v2i32 (truncssat_u (v2i64 V128:$Vn))))),
5491+
(SQXTUNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
54815492

54825493
// Select BSWAP vector instructions into REV instructions
54835494
def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))),

0 commit comments

Comments
 (0)