@@ -41341,6 +41341,154 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
41341
41341
return SDValue();
41342
41342
}
41343
41343
41344
+ // Simplify a decomposed (sext (setcc)). Assumes prior check that
41345
+ // bitwidth(sext)==bitwidth(setcc operands).
41346
+ static SDValue simplifySExtOfDecomposedSetCCImpl(
41347
+ SelectionDAG &DAG, const SDLoc &DL, ISD::CondCode CC, SDValue Op0,
41348
+ SDValue Op1, const APInt &OriginalDemandedBits,
41349
+ const APInt &OriginalDemandedElts, bool AllowNOT, unsigned Depth) {
41350
+ // Possible TODO: We could handle any power of two demanded bit + unsigned
41351
+ // comparison. There are no x86 specific comparisons that are unsigned so its
41352
+ // unneeded.
41353
+ if (!OriginalDemandedBits.isSignMask())
41354
+ return SDValue();
41355
+
41356
+ EVT OpVT = Op0.getValueType();
41357
+ // We need need nofpclass(nan inf nzero) to handle floats.
41358
+ auto hasOkayFPFlags = [](SDValue Op) {
41359
+ return Op->getFlags().hasNoNaNs() && Op->getFlags().hasNoInfs() &&
41360
+ Op->getFlags().hasNoSignedZeros();
41361
+ };
41362
+
41363
+ if (OpVT.isFloatingPoint() && !hasOkayFPFlags(Op0))
41364
+ return SDValue();
41365
+
41366
+ auto ValsEq = [OpVT](const APInt &V0, APInt V1) -> bool {
41367
+ if (OpVT.isFloatingPoint()) {
41368
+ const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(OpVT);
41369
+ return V0.eq(APFloat(Sem, V1).bitcastToAPInt());
41370
+ }
41371
+ return V0.eq(V1);
41372
+ };
41373
+
41374
+ // Assume we canonicalized constants to Op1. That isn't always true but we
41375
+ // call this function twice with inverted CC/Operands so its fine either way.
41376
+ APInt Op1C;
41377
+ unsigned ValWidth = OriginalDemandedBits.getBitWidth();
41378
+ if (ISD::isConstantSplatVectorAllZeros(Op1.getNode())) {
41379
+ Op1C = APInt::getZero(ValWidth);
41380
+ } else if (ISD::isConstantSplatVectorAllOnes(Op1.getNode())) {
41381
+ Op1C = APInt::getAllOnes(ValWidth);
41382
+ } else if (auto *C = dyn_cast<ConstantFPSDNode>(Op1)) {
41383
+ Op1C = C->getValueAPF().bitcastToAPInt();
41384
+ } else if (auto *C = dyn_cast<ConstantSDNode>(Op1)) {
41385
+ Op1C = C->getAPIntValue();
41386
+ } else if (ISD::isConstantSplatVector(Op1.getNode(), Op1C)) {
41387
+ // isConstantSplatVector sets `Op1C`.
41388
+ } else {
41389
+ return SDValue();
41390
+ }
41391
+
41392
+ bool Not = false;
41393
+ bool Okay = false;
41394
+ assert(OriginalDemandedBits.getBitWidth() == Op1C.getBitWidth() &&
41395
+ "Invalid constant operand");
41396
+
41397
+ switch (CC) {
41398
+ case ISD::SETGE:
41399
+ case ISD::SETOGE:
41400
+ Not = true;
41401
+ [[fallthrough]];
41402
+ case ISD::SETLT:
41403
+ case ISD::SETOLT:
41404
+ // signbit(sext(x s< 0)) == signbit(x)
41405
+ // signbit(sext(x s>= 0)) == signbit(~x)
41406
+ Okay = ValsEq(Op1C, APInt::getZero(ValWidth));
41407
+ // For float ops we need to ensure Op0 is de-norm. Otherwise DAZ can break
41408
+ // this fold.
41409
+ // NB: We only need de-norm check here, for the rest of the constants any
41410
+ // relationship with a de-norm value and zero will be identical.
41411
+ if (Okay && OpVT.isFloatingPoint()) {
41412
+ // Values from integers are always normal.
41413
+ if (Op0.getOpcode() == ISD::SINT_TO_FP ||
41414
+ Op0.getOpcode() == ISD::UINT_TO_FP)
41415
+ break;
41416
+
41417
+ // See if we can prove normal with known bits.
41418
+ KnownBits Op0Known =
41419
+ DAG.computeKnownBits(Op0, OriginalDemandedElts, Depth);
41420
+ // Negative/positive doesn't matter.
41421
+ Op0Known.One.clearSignBit();
41422
+ Op0Known.Zero.clearSignBit();
41423
+
41424
+ // Get min normal value.
41425
+ const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(OpVT);
41426
+ KnownBits MinNormal = KnownBits::makeConstant(
41427
+ APFloat::getSmallestNormalized(Sem).bitcastToAPInt());
41428
+ // Are we above de-norm range?
41429
+ std::optional<bool> Op0Normal = KnownBits::uge(Op0Known, MinNormal);
41430
+ Okay = Op0Normal.value_or(false);
41431
+ }
41432
+ break;
41433
+ case ISD::SETGT:
41434
+ case ISD::SETOGT:
41435
+ Not = true;
41436
+ [[fallthrough]];
41437
+ case ISD::SETLE:
41438
+ case ISD::SETOLE:
41439
+ // signbit(sext(x s<= -1)) == signbit(x)
41440
+ // signbit(sext(x s> -1)) == signbit(~x)
41441
+ Okay = ValsEq(Op1C, APInt::getAllOnes(ValWidth));
41442
+ break;
41443
+ case ISD::SETULT:
41444
+ Not = true;
41445
+ [[fallthrough]];
41446
+ case ISD::SETUGE:
41447
+ // signbit(sext(x u>= SIGNED_MIN)) == signbit(x)
41448
+ // signbit(sext(x u< SIGNED_MIN)) == signbit(~x)
41449
+ Okay = !OpVT.isFloatingPoint() && ValsEq(Op1C, OriginalDemandedBits);
41450
+ break;
41451
+ case ISD::SETULE:
41452
+ Not = true;
41453
+ [[fallthrough]];
41454
+ case ISD::SETUGT:
41455
+ // signbit(sext(x u> SIGNED_MAX)) == signbit(x)
41456
+ // signbit(sext(x u<= SIGNED_MAX)) == signbit(~x)
41457
+ Okay = !OpVT.isFloatingPoint() && ValsEq(Op1C, OriginalDemandedBits - 1);
41458
+ break;
41459
+ default:
41460
+ break;
41461
+ }
41462
+
41463
+ Okay &= Not ? AllowNOT : true;
41464
+ if (!Okay)
41465
+ return SDValue();
41466
+
41467
+ if (!Not)
41468
+ return Op0;
41469
+
41470
+ if (!OpVT.isFloatingPoint())
41471
+ return DAG.getNOT(DL, Op0, OpVT);
41472
+
41473
+ // Possible TODO: We could use `fneg` to do not.
41474
+ return SDValue();
41475
+ }
41476
+
41477
+ static SDValue simplifySExtOfDecomposedSetCC(SelectionDAG &DAG, SDLoc &DL,
41478
+ ISD::CondCode CC, SDValue Op0,
41479
+ SDValue Op1,
41480
+ const APInt &OriginalDemandedBits,
41481
+ const APInt &OriginalDemandedElts,
41482
+ bool AllowNOT, unsigned Depth) {
41483
+ if (SDValue R = simplifySExtOfDecomposedSetCCImpl(
41484
+ DAG, DL, CC, Op0, Op1, OriginalDemandedBits, OriginalDemandedElts,
41485
+ AllowNOT, Depth))
41486
+ return R;
41487
+ return simplifySExtOfDecomposedSetCCImpl(
41488
+ DAG, DL, ISD::getSetCCSwappedOperands(CC), Op1, Op0, OriginalDemandedBits,
41489
+ OriginalDemandedElts, AllowNOT, Depth);
41490
+ }
41491
+
41344
41492
// Simplify variable target shuffle masks based on the demanded elements.
41345
41493
// TODO: Handle DemandedBits in mask indices as well?
41346
41494
bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetShuffle(
@@ -42520,13 +42668,26 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
42520
42668
}
42521
42669
break;
42522
42670
}
42523
- case X86ISD::PCMPGT:
42524
- // icmp sgt(0, R) == ashr(R, BitWidth-1).
42525
- // iff we only need the sign bit then we can use R directly.
42526
- if (OriginalDemandedBits.isSignMask() &&
42527
- ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
42528
- return TLO.CombineTo(Op, Op.getOperand(1));
42671
+ case X86ISD::PCMPGT: {
42672
+ SDLoc DL(Op);
42673
+ if (SDValue R = simplifySExtOfDecomposedSetCC(
42674
+ TLO.DAG, DL, ISD::SETGT, Op.getOperand(0), Op.getOperand(1),
42675
+ OriginalDemandedBits, OriginalDemandedElts,
42676
+ /*AllowNOT*/ true, Depth))
42677
+ return TLO.CombineTo(Op, R);
42678
+ break;
42679
+ }
42680
+ case X86ISD::CMPP: {
42681
+ SDLoc DL(Op);
42682
+ ISD::CondCode CC = X86::getCondForCMPPImm(
42683
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
42684
+ if (SDValue R = simplifySExtOfDecomposedSetCC(
42685
+ TLO.DAG, DL, CC, Op.getOperand(0), Op.getOperand(1),
42686
+ OriginalDemandedBits, OriginalDemandedElts,
42687
+ !(TLO.LegalOperations() && TLO.LegalTypes()), Depth))
42688
+ return TLO.CombineTo(Op, R);
42529
42689
break;
42690
+ }
42530
42691
case X86ISD::MOVMSK: {
42531
42692
SDValue Src = Op.getOperand(0);
42532
42693
MVT SrcVT = Src.getSimpleValueType();
@@ -42710,13 +42871,25 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
42710
42871
if (DemandedBits.isSignMask())
42711
42872
return Op.getOperand(0);
42712
42873
break;
42713
- case X86ISD::PCMPGT:
42714
- // icmp sgt(0, R) == ashr(R, BitWidth-1).
42715
- // iff we only need the sign bit then we can use R directly.
42716
- if (DemandedBits.isSignMask() &&
42717
- ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
42718
- return Op.getOperand(1);
42874
+ case X86ISD::PCMPGT: {
42875
+ SDLoc DL(Op);
42876
+ if (SDValue R = simplifySExtOfDecomposedSetCC(
42877
+ DAG, DL, ISD::SETGT, Op.getOperand(0), Op.getOperand(1),
42878
+ DemandedBits, DemandedElts, /*AllowNOT*/ false, Depth))
42879
+ return R;
42880
+ break;
42881
+ }
42882
+ case X86ISD::CMPP: {
42883
+ SDLoc DL(Op);
42884
+ ISD::CondCode CC = X86::getCondForCMPPImm(
42885
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
42886
+ if (SDValue R = simplifySExtOfDecomposedSetCC(DAG, DL, CC, Op.getOperand(0),
42887
+ Op.getOperand(1),
42888
+ DemandedBits, DemandedElts,
42889
+ /*AllowNOT*/ false, Depth))
42890
+ return R;
42719
42891
break;
42892
+ }
42720
42893
case X86ISD::BLENDV: {
42721
42894
// BLENDV: Cond (MSB) ? LHS : RHS
42722
42895
SDValue Cond = Op.getOperand(0);
@@ -48392,7 +48565,7 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG,
48392
48565
48393
48566
// We do not split for SSE at all, but we need to split vectors for AVX1 and
48394
48567
// AVX2.
48395
- if (!Subtarget.useAVX512Regs() && VT.is512BitVector() &&
48568
+ if (!Subtarget.useAVX512Regs() && VT.is512BitVector() &&
48396
48569
TLI.isTypeLegal(VT.getHalfNumVectorElementsVT(*DAG.getContext()))) {
48397
48570
SDValue LoX, HiX;
48398
48571
std::tie(LoX, HiX) = splitVector(X, DAG, DL);
0 commit comments