@@ -41340,6 +41340,154 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
41340
41340
return SDValue();
41341
41341
}
41342
41342
41343
+ // Simplify a decomposed (sext (setcc)). Assumes prior check that
41344
+ // bitwidth(sext)==bitwidth(setcc operands).
41345
+ static SDValue simplifySExtOfDecomposedSetCCImpl(
41346
+ SelectionDAG &DAG, const SDLoc &DL, ISD::CondCode CC, SDValue Op0,
41347
+ SDValue Op1, const APInt &OriginalDemandedBits,
41348
+ const APInt &OriginalDemandedElts, bool AllowNOT, unsigned Depth) {
41349
+ // Possible TODO: We could handle any power of two demanded bit + unsigned
41350
+ // comparison. There are no x86 specific comparisons that are unsigned so its
41351
+ // unneeded.
41352
+ if (!OriginalDemandedBits.isSignMask())
41353
+ return SDValue();
41354
+
41355
+ EVT OpVT = Op0.getValueType();
41356
+ // We need need nofpclass(nan inf nzero) to handle floats.
41357
+ auto hasOkayFPFlags = [](SDValue Op) {
41358
+ return Op->getFlags().hasNoNaNs() && Op->getFlags().hasNoInfs() &&
41359
+ Op->getFlags().hasNoSignedZeros();
41360
+ };
41361
+
41362
+ if (OpVT.isFloatingPoint() && !hasOkayFPFlags(Op0))
41363
+ return SDValue();
41364
+
41365
+ auto ValsEq = [OpVT](const APInt &V0, APInt V1) -> bool {
41366
+ if (OpVT.isFloatingPoint()) {
41367
+ const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(OpVT);
41368
+ return V0.eq(APFloat(Sem, V1).bitcastToAPInt());
41369
+ }
41370
+ return V0.eq(V1);
41371
+ };
41372
+
41373
+ // Assume we canonicalized constants to Op1. That isn't always true but we
41374
+ // call this function twice with inverted CC/Operands so its fine either way.
41375
+ APInt Op1C;
41376
+ unsigned ValWidth = OriginalDemandedBits.getBitWidth();
41377
+ if (ISD::isConstantSplatVectorAllZeros(Op1.getNode())) {
41378
+ Op1C = APInt::getZero(ValWidth);
41379
+ } else if (ISD::isConstantSplatVectorAllOnes(Op1.getNode())) {
41380
+ Op1C = APInt::getAllOnes(ValWidth);
41381
+ } else if (auto *C = dyn_cast<ConstantFPSDNode>(Op1)) {
41382
+ Op1C = C->getValueAPF().bitcastToAPInt();
41383
+ } else if (auto *C = dyn_cast<ConstantSDNode>(Op1)) {
41384
+ Op1C = C->getAPIntValue();
41385
+ } else if (ISD::isConstantSplatVector(Op1.getNode(), Op1C)) {
41386
+ // isConstantSplatVector sets `Op1C`.
41387
+ } else {
41388
+ return SDValue();
41389
+ }
41390
+
41391
+ bool Not = false;
41392
+ bool Okay = false;
41393
+ assert(OriginalDemandedBits.getBitWidth() == Op1C.getBitWidth() &&
41394
+ "Invalid constant operand");
41395
+
41396
+ switch (CC) {
41397
+ case ISD::SETGE:
41398
+ case ISD::SETOGE:
41399
+ Not = true;
41400
+ [[fallthrough]];
41401
+ case ISD::SETLT:
41402
+ case ISD::SETOLT:
41403
+ // signbit(sext(x s< 0)) == signbit(x)
41404
+ // signbit(sext(x s>= 0)) == signbit(~x)
41405
+ Okay = ValsEq(Op1C, APInt::getZero(ValWidth));
41406
+ // For float ops we need to ensure Op0 is de-norm. Otherwise DAZ can break
41407
+ // this fold.
41408
+ // NB: We only need de-norm check here, for the rest of the constants any
41409
+ // relationship with a de-norm value and zero will be identical.
41410
+ if (Okay && OpVT.isFloatingPoint()) {
41411
+ // Values from integers are always normal.
41412
+ if (Op0.getOpcode() == ISD::SINT_TO_FP ||
41413
+ Op0.getOpcode() == ISD::UINT_TO_FP)
41414
+ break;
41415
+
41416
+ // See if we can prove normal with known bits.
41417
+ KnownBits Op0Known =
41418
+ DAG.computeKnownBits(Op0, OriginalDemandedElts, Depth);
41419
+ // Negative/positive doesn't matter.
41420
+ Op0Known.One.clearSignBit();
41421
+ Op0Known.Zero.clearSignBit();
41422
+
41423
+ // Get min normal value.
41424
+ const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(OpVT);
41425
+ KnownBits MinNormal = KnownBits::makeConstant(
41426
+ APFloat::getSmallestNormalized(Sem).bitcastToAPInt());
41427
+ // Are we above de-norm range?
41428
+ std::optional<bool> Op0Normal = KnownBits::uge(Op0Known, MinNormal);
41429
+ Okay = Op0Normal.value_or(false);
41430
+ }
41431
+ break;
41432
+ case ISD::SETGT:
41433
+ case ISD::SETOGT:
41434
+ Not = true;
41435
+ [[fallthrough]];
41436
+ case ISD::SETLE:
41437
+ case ISD::SETOLE:
41438
+ // signbit(sext(x s<= -1)) == signbit(x)
41439
+ // signbit(sext(x s> -1)) == signbit(~x)
41440
+ Okay = ValsEq(Op1C, APInt::getAllOnes(ValWidth));
41441
+ break;
41442
+ case ISD::SETULT:
41443
+ Not = true;
41444
+ [[fallthrough]];
41445
+ case ISD::SETUGE:
41446
+ // signbit(sext(x u>= SIGNED_MIN)) == signbit(x)
41447
+ // signbit(sext(x u< SIGNED_MIN)) == signbit(~x)
41448
+ Okay = !OpVT.isFloatingPoint() && ValsEq(Op1C, OriginalDemandedBits);
41449
+ break;
41450
+ case ISD::SETULE:
41451
+ Not = true;
41452
+ [[fallthrough]];
41453
+ case ISD::SETUGT:
41454
+ // signbit(sext(x u> SIGNED_MAX)) == signbit(x)
41455
+ // signbit(sext(x u<= SIGNED_MAX)) == signbit(~x)
41456
+ Okay = !OpVT.isFloatingPoint() && ValsEq(Op1C, OriginalDemandedBits - 1);
41457
+ break;
41458
+ default:
41459
+ break;
41460
+ }
41461
+
41462
+ Okay &= Not ? AllowNOT : true;
41463
+ if (!Okay)
41464
+ return SDValue();
41465
+
41466
+ if (!Not)
41467
+ return Op0;
41468
+
41469
+ if (!OpVT.isFloatingPoint())
41470
+ return DAG.getNOT(DL, Op0, OpVT);
41471
+
41472
+ // Possible TODO: We could use `fneg` to do not.
41473
+ return SDValue();
41474
+ }
41475
+
41476
+ static SDValue simplifySExtOfDecomposedSetCC(SelectionDAG &DAG, SDLoc &DL,
41477
+ ISD::CondCode CC, SDValue Op0,
41478
+ SDValue Op1,
41479
+ const APInt &OriginalDemandedBits,
41480
+ const APInt &OriginalDemandedElts,
41481
+ bool AllowNOT, unsigned Depth) {
41482
+ if (SDValue R = simplifySExtOfDecomposedSetCCImpl(
41483
+ DAG, DL, CC, Op0, Op1, OriginalDemandedBits, OriginalDemandedElts,
41484
+ AllowNOT, Depth))
41485
+ return R;
41486
+ return simplifySExtOfDecomposedSetCCImpl(
41487
+ DAG, DL, ISD::getSetCCSwappedOperands(CC), Op1, Op0, OriginalDemandedBits,
41488
+ OriginalDemandedElts, AllowNOT, Depth);
41489
+ }
41490
+
41343
41491
// Simplify variable target shuffle masks based on the demanded elements.
41344
41492
// TODO: Handle DemandedBits in mask indices as well?
41345
41493
bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetShuffle(
@@ -42519,13 +42667,26 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
42519
42667
}
42520
42668
break;
42521
42669
}
42522
- case X86ISD::PCMPGT:
42523
- // icmp sgt(0, R) == ashr(R, BitWidth-1).
42524
- // iff we only need the sign bit then we can use R directly.
42525
- if (OriginalDemandedBits.isSignMask() &&
42526
- ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
42527
- return TLO.CombineTo(Op, Op.getOperand(1));
42670
+ case X86ISD::PCMPGT: {
42671
+ SDLoc DL(Op);
42672
+ if (SDValue R = simplifySExtOfDecomposedSetCC(
42673
+ TLO.DAG, DL, ISD::SETGT, Op.getOperand(0), Op.getOperand(1),
42674
+ OriginalDemandedBits, OriginalDemandedElts,
42675
+ /*AllowNOT*/ true, Depth))
42676
+ return TLO.CombineTo(Op, R);
42677
+ break;
42678
+ }
42679
+ case X86ISD::CMPP: {
42680
+ SDLoc DL(Op);
42681
+ ISD::CondCode CC = X86::getCondForCMPPImm(
42682
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
42683
+ if (SDValue R = simplifySExtOfDecomposedSetCC(
42684
+ TLO.DAG, DL, CC, Op.getOperand(0), Op.getOperand(1),
42685
+ OriginalDemandedBits, OriginalDemandedElts,
42686
+ !(TLO.LegalOperations() && TLO.LegalTypes()), Depth))
42687
+ return TLO.CombineTo(Op, R);
42528
42688
break;
42689
+ }
42529
42690
case X86ISD::MOVMSK: {
42530
42691
SDValue Src = Op.getOperand(0);
42531
42692
MVT SrcVT = Src.getSimpleValueType();
@@ -42709,13 +42870,25 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
42709
42870
if (DemandedBits.isSignMask())
42710
42871
return Op.getOperand(0);
42711
42872
break;
42712
- case X86ISD::PCMPGT:
42713
- // icmp sgt(0, R) == ashr(R, BitWidth-1).
42714
- // iff we only need the sign bit then we can use R directly.
42715
- if (DemandedBits.isSignMask() &&
42716
- ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
42717
- return Op.getOperand(1);
42873
+ case X86ISD::PCMPGT: {
42874
+ SDLoc DL(Op);
42875
+ if (SDValue R = simplifySExtOfDecomposedSetCC(
42876
+ DAG, DL, ISD::SETGT, Op.getOperand(0), Op.getOperand(1),
42877
+ DemandedBits, DemandedElts, /*AllowNOT*/ false, Depth))
42878
+ return R;
42879
+ break;
42880
+ }
42881
+ case X86ISD::CMPP: {
42882
+ SDLoc DL(Op);
42883
+ ISD::CondCode CC = X86::getCondForCMPPImm(
42884
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
42885
+ if (SDValue R = simplifySExtOfDecomposedSetCC(DAG, DL, CC, Op.getOperand(0),
42886
+ Op.getOperand(1),
42887
+ DemandedBits, DemandedElts,
42888
+ /*AllowNOT*/ false, Depth))
42889
+ return R;
42718
42890
break;
42891
+ }
42719
42892
case X86ISD::BLENDV: {
42720
42893
// BLENDV: Cond (MSB) ? LHS : RHS
42721
42894
SDValue Cond = Op.getOperand(0);
@@ -48391,7 +48564,7 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG,
48391
48564
48392
48565
// We do not split for SSE at all, but we need to split vectors for AVX1 and
48393
48566
// AVX2.
48394
- if (!Subtarget.useAVX512Regs() && VT.is512BitVector() &&
48567
+ if (!Subtarget.useAVX512Regs() && VT.is512BitVector() &&
48395
48568
TLI.isTypeLegal(VT.getHalfNumVectorElementsVT(*DAG.getContext()))) {
48396
48569
SDValue LoX, HiX;
48397
48570
std::tie(LoX, HiX) = splitVector(X, DAG, DL);
0 commit comments