Skip to content

Commit 1b209ff

Browse files
committed
[DAG] Move vselect(icmp_ult, 0, sub(x,y)) -> usubsat(x,y) to DAGCombine (PR40111)
Move the X86 VSELECT->USUBSAT fold to DAGCombiner - there's nothing target specific about these folds.
1 parent 7a4f1d5 commit 1b209ff

File tree

3 files changed

+66
-80
lines changed

3 files changed

+66
-80
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9743,6 +9743,68 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
97439743
}
97449744
}
97459745
}
9746+
9747+
// Match VSELECTs into sub with unsigned saturation.
9748+
if (hasOperation(ISD::USUBSAT, VT)) {
9749+
// Check if one of the arms of the VSELECT is a zero vector. If it's on
9750+
// the left side invert the predicate to simplify logic below.
9751+
SDValue Other;
9752+
ISD::CondCode SatCC = CC;
9753+
if (ISD::isBuildVectorAllZeros(N1.getNode())) {
9754+
Other = N2;
9755+
SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
9756+
} else if (ISD::isBuildVectorAllZeros(N2.getNode())) {
9757+
Other = N1;
9758+
}
9759+
9760+
if (Other && Other.getNumOperands() == 2 && Other.getOperand(0) == LHS) {
9761+
SDValue CondLHS = LHS, CondRHS = RHS;
9762+
SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
9763+
9764+
// Look for a general sub with unsigned saturation first.
9765+
// x >= y ? x-y : 0 --> usubsat x, y
9766+
// x > y ? x-y : 0 --> usubsat x, y
9767+
if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
9768+
Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
9769+
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
9770+
9771+
if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
9772+
if (isa<BuildVectorSDNode>(CondRHS)) {
9773+
// If the RHS is a constant we have to reverse the const
9774+
// canonicalization.
9775+
// x > C-1 ? x+-C : 0 --> usubsat x, C
9776+
auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
9777+
return (!Op && !Cond) ||
9778+
(Op && Cond &&
9779+
Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
9780+
};
9781+
if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
9782+
ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
9783+
/*AllowUndefs*/ true)) {
9784+
OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
9785+
OpRHS);
9786+
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
9787+
}
9788+
9789+
// Another special case: If C was a sign bit, the sub has been
9790+
// canonicalized into a xor.
9791+
// FIXME: Would it be better to use computeKnownBits to determine
9792+
// whether it's safe to decanonicalize the xor?
9793+
// x s< 0 ? x^C : 0 --> usubsat x, C
9794+
if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
9795+
if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
9796+
ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
9797+
OpRHSConst->getAPIntValue().isSignMask()) {
9798+
// Note that we have to rebuild the RHS constant here to ensure
9799+
// we don't rely on particular values of undef lanes.
9800+
OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
9801+
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
9802+
}
9803+
}
9804+
}
9805+
}
9806+
}
9807+
}
97469808
}
97479809

97489810
if (SimplifySelectOps(N, N1, N2))

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 0 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -40912,75 +40912,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
4091240912
}
4091340913
}
4091440914

40915-
// Match VSELECTs into subs with unsigned saturation.
40916-
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
40917-
// psubus is available in SSE2 for i8 and i16 vectors.
40918-
Subtarget.hasSSE2() && VT.getVectorNumElements() >= 2 &&
40919-
isPowerOf2_32(VT.getVectorNumElements()) &&
40920-
(VT.getVectorElementType() == MVT::i8 ||
40921-
VT.getVectorElementType() == MVT::i16)) {
40922-
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
40923-
40924-
// Check if one of the arms of the VSELECT is a zero vector. If it's on the
40925-
// left side invert the predicate to simplify logic below.
40926-
SDValue Other;
40927-
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
40928-
Other = RHS;
40929-
CC = ISD::getSetCCInverse(CC, VT.getVectorElementType());
40930-
} else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
40931-
Other = LHS;
40932-
}
40933-
40934-
if (Other.getNode() && Other->getNumOperands() == 2 &&
40935-
Other->getOperand(0) == Cond.getOperand(0)) {
40936-
SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
40937-
SDValue CondRHS = Cond->getOperand(1);
40938-
40939-
// Look for a general sub with unsigned saturation first.
40940-
// x >= y ? x-y : 0 --> subus x, y
40941-
// x > y ? x-y : 0 --> subus x, y
40942-
if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
40943-
Other->getOpcode() == ISD::SUB && OpRHS == CondRHS)
40944-
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
40945-
40946-
if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
40947-
if (isa<BuildVectorSDNode>(CondRHS)) {
40948-
// If the RHS is a constant we have to reverse the const
40949-
// canonicalization.
40950-
// x > C-1 ? x+-C : 0 --> subus x, C
40951-
auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
40952-
return (!Op && !Cond) ||
40953-
(Op && Cond &&
40954-
Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
40955-
};
40956-
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
40957-
ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
40958-
/*AllowUndefs*/ true)) {
40959-
OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
40960-
OpRHS);
40961-
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
40962-
}
40963-
40964-
// Another special case: If C was a sign bit, the sub has been
40965-
// canonicalized into a xor.
40966-
// FIXME: Would it be better to use computeKnownBits to determine
40967-
// whether it's safe to decanonicalize the xor?
40968-
// x s< 0 ? x^C : 0 --> subus x, C
40969-
if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
40970-
if (CC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
40971-
ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
40972-
OpRHSConst->getAPIntValue().isSignMask()) {
40973-
// Note that we have to rebuild the RHS constant here to ensure we
40974-
// don't rely on particular values of undef lanes.
40975-
OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
40976-
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
40977-
}
40978-
}
40979-
}
40980-
}
40981-
}
40982-
}
40983-
4098440915
// Check if the first operand is all zeros and Cond type is vXi1.
4098540916
// If this an avx512 target we can improve the use of zero masking by
4098640917
// swapping the operands and inverting the condition.

llvm/test/CodeGen/X86/usub_sat_vec.ll

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,17 +1161,10 @@ define void @PR48223(<32 x i16>* %p0) {
11611161
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
11621162
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
11631163
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
1164-
; AVX512F-NEXT: vpmaxuw %ymm2, %ymm1, %ymm3
1165-
; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm3
1166-
; AVX512F-NEXT: vpmaxuw %ymm2, %ymm0, %ymm2
1167-
; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
1168-
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
1169-
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [65472,65472,65472,65472,65472,65472,65472,65472,65472,65472,65472,65472,65472,65472,65472,65472]
1170-
; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1
1171-
; AVX512F-NEXT: vpaddw %ymm3, %ymm0, %ymm0
1172-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1173-
; AVX512F-NEXT: vpandq %zmm0, %zmm2, %zmm0
1174-
; AVX512F-NEXT: vmovdqa64 %zmm0, (%rdi)
1164+
; AVX512F-NEXT: vpsubusw %ymm2, %ymm1, %ymm1
1165+
; AVX512F-NEXT: vpsubusw %ymm2, %ymm0, %ymm0
1166+
; AVX512F-NEXT: vmovdqa %ymm0, (%rdi)
1167+
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdi)
11751168
; AVX512F-NEXT: vzeroupper
11761169
; AVX512F-NEXT: retq
11771170
;

0 commit comments

Comments
 (0)