@@ -53441,6 +53441,69 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
53441
53441
truncateAVX512SetCCNoBWI(VT, OpVT, LHS, RHS, CC, DL, DAG, Subtarget))
53442
53442
return R;
53443
53443
53444
+ // In the middle end transforms:
53445
+ // `(or (icmp eq X, C), (icmp eq X, C+1))`
53446
+ // -> `(icmp ult (add x, -C), 2)`
53447
+ // Likewise inverted cases with `ugt`.
53448
+ //
53449
+ // Since x86, pre avx512, doesn't have unsigned vector compares, this results
53450
+ // in worse codegen. So, undo the middle-end transform and go back to `(or
53451
+ // (icmp eq), (icmp eq))` form.
53452
+ // Also skip AVX1 with ymm vectors, as the umin approach combines better than
53453
+ // the xmm approach.
53454
+ //
53455
+ // NB: We don't handle the similiar simplication of `(and (icmp ne), (icmp
53456
+ // ne))` as it doesn't end up instruction positive.
53457
+ // TODO: We might want to do this for avx512 as well if we `sext` the result.
53458
+ if (VT.isVector() && OpVT.isVector() && OpVT.isInteger() &&
53459
+ ISD::isUnsignedIntSetCC(CC) && LHS.getOpcode() == ISD::ADD &&
53460
+ !Subtarget.hasAVX512() &&
53461
+ (OpVT.getSizeInBits() <= 128 || !Subtarget.hasAVX() ||
53462
+ Subtarget.hasAVX2()) &&
53463
+ LHS.hasOneUse()) {
53464
+
53465
+ APInt CmpC;
53466
+ SDValue AddC = LHS.getOperand(1);
53467
+ if (ISD::isConstantSplatVector(RHS.getNode(), CmpC) &&
53468
+ DAG.isConstantIntBuildVectorOrConstantInt(AddC)) {
53469
+ // See which form we have depending on the constant/condition.
53470
+ SDValue C0 = SDValue();
53471
+ SDValue C1 = SDValue();
53472
+
53473
+ // If we had `(add x, -1)` and can lower with `umin`, don't transform as
53474
+ // we will end up generating an additional constant. Keeping in the
53475
+ // current form has a slight latency cost, but it probably worth saving a
53476
+ // constant.
53477
+ if (ISD::isConstantSplatVectorAllOnes(AddC.getNode()) &&
53478
+ DAG.getTargetLoweringInfo().isOperationLegal(ISD::UMIN, OpVT)) {
53479
+ // Pass
53480
+ }
53481
+ // Normal Cases
53482
+ else if ((CC == ISD::SETULT && CmpC == 2) ||
53483
+ (CC == ISD::SETULE && CmpC == 1)) {
53484
+ // These will constant fold.
53485
+ C0 = DAG.getNegative(AddC, DL, OpVT);
53486
+ C1 = DAG.getNode(ISD::SUB, DL, OpVT, C0,
53487
+ DAG.getAllOnesConstant(DL, OpVT));
53488
+ }
53489
+ // Inverted Cases
53490
+ else if ((CC == ISD::SETUGT && (-CmpC) == 3) ||
53491
+ (CC == ISD::SETUGE && (-CmpC) == 2)) {
53492
+ // These will constant fold.
53493
+ C0 = DAG.getNOT(DL, AddC, OpVT);
53494
+ C1 = DAG.getNode(ISD::ADD, DL, OpVT, C0,
53495
+ DAG.getAllOnesConstant(DL, OpVT));
53496
+ }
53497
+ if (C0 && C1) {
53498
+ SDValue NewLHS =
53499
+ DAG.getSetCC(DL, VT, LHS.getOperand(0), C0, ISD::SETEQ);
53500
+ SDValue NewRHS =
53501
+ DAG.getSetCC(DL, VT, LHS.getOperand(0), C1, ISD::SETEQ);
53502
+ return DAG.getNode(ISD::OR, DL, VT, NewLHS, NewRHS);
53503
+ }
53504
+ }
53505
+ }
53506
+
53444
53507
// For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early
53445
53508
// to avoid scalarization via legalization because v4i32 is not a legal type.
53446
53509
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&
0 commit comments