@@ -20822,7 +20822,8 @@ static SDValue truncateVectorWithPACKSS(EVT DstVT, SDValue In, const SDLoc &DL,
20822
20822
static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
20823
20823
SDValue In, const SDLoc &DL,
20824
20824
SelectionDAG &DAG,
20825
- const X86Subtarget &Subtarget) {
20825
+ const X86Subtarget &Subtarget,
20826
+ const SDNodeFlags Flags = SDNodeFlags()) {
20826
20827
// Requires SSE2.
20827
20828
if (!Subtarget.hasSSE2())
20828
20829
return SDValue();
@@ -20868,7 +20869,8 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
20868
20869
// e.g. Masks, zext_in_reg, etc.
20869
20870
// Pre-SSE41 we can only use PACKUSWB.
20870
20871
KnownBits Known = DAG.computeKnownBits(In);
20871
- if ((NumSrcEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) {
20872
+ if ((Flags.hasNoUnsignedWrap() && NumDstEltBits <= NumPackedZeroBits) ||
20873
+ (NumSrcEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) {
20872
20874
PackOpcode = X86ISD::PACKUS;
20873
20875
return In;
20874
20876
}
@@ -20887,7 +20889,7 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
20887
20889
return SDValue();
20888
20890
20889
20891
unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits;
20890
- if (MinSignBits < NumSignBits) {
20892
+ if (Flags.hasNoSignedWrap() || MinSignBits < NumSignBits) {
20891
20893
PackOpcode = X86ISD::PACKSS;
20892
20894
return In;
20893
20895
}
@@ -20909,10 +20911,9 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
20909
20911
/// This function lowers a vector truncation of 'extended sign-bits' or
20910
20912
/// 'extended zero-bits' values.
20911
20913
/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations.
20912
- static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In,
20913
- const SDLoc &DL,
20914
- const X86Subtarget &Subtarget,
20915
- SelectionDAG &DAG) {
20914
+ static SDValue LowerTruncateVecPackWithSignBits(
20915
+ MVT DstVT, SDValue In, const SDLoc &DL, const X86Subtarget &Subtarget,
20916
+ SelectionDAG &DAG, const SDNodeFlags Flags = SDNodeFlags()) {
20916
20917
MVT SrcVT = In.getSimpleValueType();
20917
20918
MVT DstSVT = DstVT.getVectorElementType();
20918
20919
MVT SrcSVT = SrcVT.getVectorElementType();
@@ -20934,8 +20935,8 @@ static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In,
20934
20935
}
20935
20936
20936
20937
unsigned PackOpcode;
20937
- if (SDValue Src =
20938
- matchTruncateWithPACK(PackOpcode, DstVT, In, DL, DAG, Subtarget))
20938
+ if (SDValue Src = matchTruncateWithPACK(PackOpcode, DstVT, In, DL, DAG,
20939
+ Subtarget, Flags ))
20939
20940
return truncateVectorWithPACK(PackOpcode, DstVT, Src, DL, DAG, Subtarget);
20940
20941
20941
20942
return SDValue();
@@ -21105,8 +21106,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
21105
21106
// Pre-AVX512 (or prefer-256bit) see if we can make use of PACKSS/PACKUS.
21106
21107
if (!Subtarget.hasAVX512() ||
21107
21108
(InVT.is512BitVector() && VT.is256BitVector()))
21108
- if (SDValue SignPack =
21109
- LowerTruncateVecPackWithSignBits( VT, In, DL, Subtarget, DAG))
21109
+ if (SDValue SignPack = LowerTruncateVecPackWithSignBits(
21110
+ VT, In, DL, Subtarget, DAG, Op->getFlags() ))
21110
21111
return SignPack;
21111
21112
21112
21113
// Pre-AVX512 see if we can make use of PACKSS/PACKUS.
@@ -21123,8 +21124,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
21123
21124
// Attempt to truncate with PACKUS/PACKSS even on AVX512 if we'd have to
21124
21125
// concat from subvectors to use VPTRUNC etc.
21125
21126
if (!Subtarget.hasAVX512() || isFreeToSplitVector(In.getNode(), DAG))
21126
- if (SDValue SignPack =
21127
- LowerTruncateVecPackWithSignBits( VT, In, DL, Subtarget, DAG))
21127
+ if (SDValue SignPack = LowerTruncateVecPackWithSignBits(
21128
+ VT, In, DL, Subtarget, DAG, Op->getFlags() ))
21128
21129
return SignPack;
21129
21130
21130
21131
// vpmovqb/w/d, vpmovdb/w, vpmovwb
@@ -33594,10 +33595,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
33594
33595
33595
33596
// See if there are sufficient leading bits to perform a PACKUS/PACKSS.
33596
33597
unsigned PackOpcode;
33597
- if (SDValue Src =
33598
- matchTruncateWithPACK(PackOpcode, VT, In, dl, DAG, Subtarget)) {
33599
- if (SDValue Res = truncateVectorWithPACK(PackOpcode, VT, Src,
33600
- dl, DAG, Subtarget)) {
33598
+ if (SDValue Src = matchTruncateWithPACK(PackOpcode, VT, In, dl, DAG,
33599
+ Subtarget, N->getFlags() )) {
33600
+ if (SDValue Res =
33601
+ truncateVectorWithPACK(PackOpcode, VT, Src, dl, DAG, Subtarget)) {
33601
33602
Res = widenSubVector(WidenVT, Res, false, Subtarget, DAG, dl);
33602
33603
Results.push_back(Res);
33603
33604
return;
0 commit comments