Skip to content

Commit 9f96db8

Browse files
committed
[X86] Fold (icmp ult (add x,-C),2) -> (or (icmp eq X,C), (icmp eq X,C+1)) for Vectors
This is undoing a middle-end transform which does the opposite. Since X86 doesn't have unsigned vector comparison instructions pre-AVX512, the simplified form gets worse codegen. Fixes #66479 Proofs: https://alive2.llvm.org/ce/z/UCz3wt Closes #84104 Closes #66479
1 parent 3e73a08 commit 9f96db8

File tree

2 files changed

+214
-223
lines changed

2 files changed

+214
-223
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53441,6 +53441,69 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
5344153441
truncateAVX512SetCCNoBWI(VT, OpVT, LHS, RHS, CC, DL, DAG, Subtarget))
5344253442
return R;
5344353443

53444+
// In the middle end transforms:
53445+
// `(or (icmp eq X, C), (icmp eq X, C+1))`
53446+
// -> `(icmp ult (add x, -C), 2)`
53447+
// Likewise inverted cases with `ugt`.
53448+
//
53449+
// Since x86, pre avx512, doesn't have unsigned vector compares, this results
53450+
// in worse codegen. So, undo the middle-end transform and go back to `(or
53451+
// (icmp eq), (icmp eq))` form.
53452+
// Also skip AVX1 with ymm vectors, as the umin approach combines better than
53453+
// the xmm approach.
53454+
//
53455+
// NB: We don't handle the similiar simplication of `(and (icmp ne), (icmp
53456+
// ne))` as it doesn't end up instruction positive.
53457+
// TODO: We might want to do this for avx512 as well if we `sext` the result.
53458+
if (VT.isVector() && OpVT.isVector() && OpVT.isInteger() &&
53459+
ISD::isUnsignedIntSetCC(CC) && LHS.getOpcode() == ISD::ADD &&
53460+
!Subtarget.hasAVX512() &&
53461+
(OpVT.getSizeInBits() <= 128 || !Subtarget.hasAVX() ||
53462+
Subtarget.hasAVX2()) &&
53463+
LHS.hasOneUse()) {
53464+
53465+
APInt CmpC;
53466+
SDValue AddC = LHS.getOperand(1);
53467+
if (ISD::isConstantSplatVector(RHS.getNode(), CmpC) &&
53468+
DAG.isConstantIntBuildVectorOrConstantInt(AddC)) {
53469+
// See which form we have depending on the constant/condition.
53470+
SDValue C0 = SDValue();
53471+
SDValue C1 = SDValue();
53472+
53473+
// If we had `(add x, -1)` and can lower with `umin`, don't transform as
53474+
// we will end up generating an additional constant. Keeping in the
53475+
// current form has a slight latency cost, but it probably worth saving a
53476+
// constant.
53477+
if (ISD::isConstantSplatVectorAllOnes(AddC.getNode()) &&
53478+
DAG.getTargetLoweringInfo().isOperationLegal(ISD::UMIN, OpVT)) {
53479+
// Pass
53480+
}
53481+
// Normal Cases
53482+
else if ((CC == ISD::SETULT && CmpC == 2) ||
53483+
(CC == ISD::SETULE && CmpC == 1)) {
53484+
// These will constant fold.
53485+
C0 = DAG.getNegative(AddC, DL, OpVT);
53486+
C1 = DAG.getNode(ISD::SUB, DL, OpVT, C0,
53487+
DAG.getAllOnesConstant(DL, OpVT));
53488+
}
53489+
// Inverted Cases
53490+
else if ((CC == ISD::SETUGT && (-CmpC) == 3) ||
53491+
(CC == ISD::SETUGE && (-CmpC) == 2)) {
53492+
// These will constant fold.
53493+
C0 = DAG.getNOT(DL, AddC, OpVT);
53494+
C1 = DAG.getNode(ISD::ADD, DL, OpVT, C0,
53495+
DAG.getAllOnesConstant(DL, OpVT));
53496+
}
53497+
if (C0 && C1) {
53498+
SDValue NewLHS =
53499+
DAG.getSetCC(DL, VT, LHS.getOperand(0), C0, ISD::SETEQ);
53500+
SDValue NewRHS =
53501+
DAG.getSetCC(DL, VT, LHS.getOperand(0), C1, ISD::SETEQ);
53502+
return DAG.getNode(ISD::OR, DL, VT, NewLHS, NewRHS);
53503+
}
53504+
}
53505+
}
53506+
5344453507
// For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early
5344553508
// to avoid scalarization via legalization because v4i32 is not a legal type.
5344653509
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&

0 commit comments

Comments
 (0)