@@ -3412,6 +3412,9 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
3412
3412
const Value *Rhs) const {
3413
3413
using namespace llvm::PatternMatch;
3414
3414
int BaseCost = BrMergingBaseCostThresh.getValue();
3415
+ // With CCMP, branches can be merged in a more efficient way.
3416
+ if (BaseCost >= 0 && Subtarget.hasCCMP())
3417
+ BaseCost += 6;
3415
3418
// a == b && a == c is a fast pattern on x86.
3416
3419
ICmpInst::Predicate Pred;
3417
3420
if (BaseCost >= 0 && Opc == Instruction::And &&
@@ -33970,6 +33973,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
33970
33973
NODE_NAME_CASE(TESTUI)
33971
33974
NODE_NAME_CASE(FP80_ADD)
33972
33975
NODE_NAME_CASE(STRICT_FP80_ADD)
33976
+ NODE_NAME_CASE(CCMP)
33977
+ NODE_NAME_CASE(CTEST)
33973
33978
}
33974
33979
return nullptr;
33975
33980
#undef NODE_NAME_CASE
@@ -54605,7 +54610,187 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
54605
54610
return true;
54606
54611
}
54607
54612
54613
+ static SDValue combineX86SubCmpToCcmpCtestHelper(
54614
+ SDNode *N, SDValue Flag, SDValue SetCC0, SDValue SetCC1, SelectionDAG &DAG,
54615
+ TargetLowering::DAGCombinerInfo &DCI, unsigned NewOpc) {
54616
+ SDValue LHS = N->getOperand(0);
54617
+ SDValue Sub = SetCC1.getOperand(1);
54618
+
54619
+ SDNode *BrCond = *Flag->uses().begin();
54620
+ if (BrCond->getOpcode() != X86ISD::BRCOND)
54621
+ return SDValue();
54622
+ unsigned CondNo = 2;
54623
+ if (static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo)) !=
54624
+ X86::COND_NE)
54625
+ return SDValue();
54626
+
54627
+ X86::CondCode CC0 =
54628
+ static_cast<X86::CondCode>(SetCC0.getConstantOperandVal(0));
54629
+ // CCMP/CTEST is not conditional when the source condition is COND_P/COND_NP.
54630
+ if (CC0 == X86::COND_P || CC0 == X86::COND_NP)
54631
+ return SDValue();
54632
+
54633
+ bool IsOR = LHS.getOpcode() == ISD::OR;
54634
+
54635
+ SDValue SCC =
54636
+ IsOR ? DAG.getTargetConstant(X86::GetOppositeBranchCondition(CC0),
54637
+ SDLoc(SetCC0.getOperand(0)), MVT::i8)
54638
+ : SetCC0.getOperand(0);
54639
+
54640
+ SDValue CC1N = SetCC1.getOperand(0);
54641
+ X86::CondCode CC1 =
54642
+ static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue());
54643
+ X86::CondCode OppositeCC1 = X86::GetOppositeBranchCondition(CC1);
54644
+ X86::CondCode CFlagsCC = IsOR ? CC1 : OppositeCC1;
54645
+ SDValue CFlags = DAG.getTargetConstant(
54646
+ X86::getCondFlagsFromCondCode(CFlagsCC), SDLoc(BrCond), MVT::i8);
54647
+ SDValue CCMP = (NewOpc == X86ISD::CCMP)
54648
+ ? DAG.getNode(X86ISD::CCMP, SDLoc(N), Flag.getValueType(),
54649
+ {Sub.getOperand(0), Sub.getOperand(1),
54650
+ CFlags, SCC, SetCC0.getOperand(1)})
54651
+ : DAG.getNode(X86ISD::CTEST, SDLoc(N), Flag.getValueType(),
54652
+ {Sub.getOperand(0), Sub.getOperand(0),
54653
+ CFlags, SCC, SetCC0.getOperand(1)});
54654
+ DAG.ReplaceAllUsesOfValueWith(Flag, CCMP);
54655
+
54656
+ SmallVector<SDValue> Ops(BrCond->op_values());
54657
+ if (isNullConstant(N->getOperand(1)) && Ops[CondNo] != CC1N)
54658
+ Ops[CondNo] = CC1N;
54659
+ else if (isOneConstant(N->getOperand(1)))
54660
+ Ops[CondNo] = DAG.getTargetConstant(OppositeCC1, SDLoc(BrCond), MVT::i8);
54661
+
54662
+ SDValue NewBrCond =
54663
+ DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops);
54664
+ if (BrCond != NewBrCond.getNode()) {
54665
+ DAG.ReplaceAllUsesWith(BrCond, &NewBrCond);
54666
+ DCI.recursivelyDeleteUnusedNodes(BrCond);
54667
+ }
54668
+ return CCMP;
54669
+ }
54670
+
54671
+ static SDValue combineX86SubCmpToCcmp(SDNode *N, SDValue Flag,
54672
+ SelectionDAG &DAG,
54673
+ TargetLowering::DAGCombinerInfo &DCI,
54674
+ const X86Subtarget &ST) {
54675
+ // cmp(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 0)
54676
+ // brcond ne
54677
+ //
54678
+ // ->
54679
+ //
54680
+ // ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
54681
+ // brcond cc1
54682
+ //
54683
+ //
54684
+ // sub(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 1)
54685
+ // brcond ne
54686
+ //
54687
+ // ->
54688
+ //
54689
+ // ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
54690
+ // brcond ~cc1
54691
+ //
54692
+ // if only flag has users, where cflags is determined by cc1.
54693
+
54694
+ SDValue LHS = N->getOperand(0);
54695
+
54696
+ if (!ST.hasCCMP() ||
54697
+ (LHS.getOpcode() != ISD::AND && LHS.getOpcode() != ISD::OR) ||
54698
+ !Flag.hasOneUse())
54699
+ return SDValue();
54700
+
54701
+ SDValue SetCC0 = LHS.getOperand(0);
54702
+ SDValue SetCC1 = LHS.getOperand(1);
54703
+ if (SetCC0.getOpcode() != X86ISD::SETCC ||
54704
+ SetCC1.getOpcode() != X86ISD::SETCC)
54705
+ return SDValue();
54706
+
54707
+ // and/or is commutable. Try to commute the operands and then test again.
54708
+ if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB) {
54709
+ std::swap(SetCC0, SetCC1);
54710
+ if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB)
54711
+ return SDValue();
54712
+ }
54713
+
54714
+ return combineX86SubCmpToCcmpCtestHelper(N, Flag, SetCC0, SetCC1, DAG, DCI,
54715
+ X86ISD::CCMP);
54716
+ }
54717
+
54718
+ static SDValue combineX86SubCmpToCtest(SDNode *N, SDValue Flag,
54719
+ SelectionDAG &DAG,
54720
+ TargetLowering::DAGCombinerInfo &DCI,
54721
+ const X86Subtarget &ST) {
54722
+ // cmp(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 0)
54723
+ // brcond ne
54724
+ //
54725
+ // ->
54726
+ //
54727
+ // ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
54728
+ // brcond cc1
54729
+ //
54730
+ //
54731
+ // sub(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 1)
54732
+ // brcond ne
54733
+ //
54734
+ // ->
54735
+ //
54736
+ // ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
54737
+ // brcond ~cc1
54738
+ //
54739
+ // if only flag has users, where cflags is determined by cc1.
54740
+
54741
+ SDValue LHS = N->getOperand(0);
54742
+
54743
+ if (!ST.hasCCMP() ||
54744
+ (LHS.getOpcode() != ISD::AND && LHS.getOpcode() != ISD::OR) ||
54745
+ !Flag.hasOneUse())
54746
+ return SDValue();
54747
+
54748
+ SDValue SetCC0 = LHS.getOperand(0);
54749
+ SDValue SetCC1 = LHS.getOperand(1);
54750
+ if (SetCC0.getOpcode() != X86ISD::SETCC ||
54751
+ SetCC1.getOpcode() != X86ISD::SETCC)
54752
+ return SDValue();
54753
+
54754
+ auto IsOp1CmpZero = [&](SDValue V) {
54755
+ SDValue Op = V.getOperand(1);
54756
+ return Op.getOpcode() == X86ISD::CMP && isNullConstant(Op.getOperand(1));
54757
+ };
54758
+ // and/or is commutable. Try to commute the operands and then test again.
54759
+ if (!IsOp1CmpZero(SetCC1)) {
54760
+ std::swap(SetCC0, SetCC1);
54761
+ if (!IsOp1CmpZero(SetCC1))
54762
+ return SDValue();
54763
+ }
54764
+
54765
+ return combineX86SubCmpToCcmpCtestHelper(N, Flag, SetCC0, SetCC1, DAG, DCI,
54766
+ X86ISD::CTEST);
54767
+ }
54768
+
54769
+ static bool isOnlyFlagUsedX86SubOne(SDNode *N) {
54770
+ return N->getOpcode() == X86ISD::SUB && isOneConstant(N->getOperand(1)) &&
54771
+ !N->hasAnyUseOfValue(0);
54772
+ }
54773
+
54774
+ static SDValue combineX86SubToCcmp(SDNode *N, SelectionDAG &DAG,
54775
+ TargetLowering::DAGCombinerInfo &DCI,
54776
+ const X86Subtarget &ST) {
54777
+ if (!isOnlyFlagUsedX86SubOne(N))
54778
+ return SDValue();
54779
+
54780
+ return combineX86SubCmpToCcmp(N, SDValue(N, 1), DAG, DCI, ST);
54781
+ }
54782
+
54783
+ static SDValue combineX86SubToCtest(SDNode *N, SelectionDAG &DAG,
54784
+ TargetLowering::DAGCombinerInfo &DCI,
54785
+ const X86Subtarget &ST) {
54786
+ if (!isOnlyFlagUsedX86SubOne(N))
54787
+ return SDValue();
54788
+
54789
+ return combineX86SubCmpToCtest(N, SDValue(N, 1), DAG, DCI, ST);
54790
+ }
54791
+
54608
54792
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
54793
+ TargetLowering::DAGCombinerInfo &DCI,
54609
54794
const X86Subtarget &Subtarget) {
54610
54795
// Only handle test patterns.
54611
54796
if (!isNullConstant(N->getOperand(1)))
@@ -54620,6 +54805,14 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
54620
54805
EVT VT = Op.getValueType();
54621
54806
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
54622
54807
54808
+ if (SDValue CCMP =
54809
+ combineX86SubCmpToCcmp(N, SDValue(N, 0), DAG, DCI, Subtarget))
54810
+ return CCMP;
54811
+
54812
+ if (SDValue CTEST =
54813
+ combineX86SubCmpToCtest(N, SDValue(N, 0), DAG, DCI, Subtarget))
54814
+ return CTEST;
54815
+
54623
54816
// If we have a constant logical shift that's only used in a comparison
54624
54817
// against zero turn it into an equivalent AND. This allows turning it into
54625
54818
// a TEST instruction later.
@@ -54748,7 +54941,8 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
54748
54941
}
54749
54942
54750
54943
static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
54751
- TargetLowering::DAGCombinerInfo &DCI) {
54944
+ TargetLowering::DAGCombinerInfo &DCI,
54945
+ const X86Subtarget &ST) {
54752
54946
assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
54753
54947
"Expected X86ISD::ADD or X86ISD::SUB");
54754
54948
@@ -54759,6 +54953,12 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
54759
54953
bool IsSub = X86ISD::SUB == N->getOpcode();
54760
54954
unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;
54761
54955
54956
+ if (SDValue CCMP = combineX86SubToCcmp(N, DAG, DCI, ST))
54957
+ return CCMP;
54958
+
54959
+ if (SDValue CTEST = combineX86SubToCtest(N, DAG, DCI, ST))
54960
+ return CTEST;
54961
+
54762
54962
// If we don't use the flag result, simplify back to a generic ADD/SUB.
54763
54963
if (!N->hasAnyUseOfValue(1)) {
54764
54964
SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
@@ -57058,11 +57258,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
57058
57258
case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
57059
57259
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
57060
57260
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
57061
- case X86ISD::CMP: return combineCMP(N, DAG, Subtarget);
57261
+ case X86ISD::CMP: return combineCMP(N, DAG, DCI, Subtarget);
57062
57262
case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget);
57063
57263
case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
57064
57264
case X86ISD::ADD:
57065
- case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI);
57265
+ case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI, Subtarget );
57066
57266
case X86ISD::SBB: return combineSBB(N, DAG);
57067
57267
case X86ISD::ADC: return combineADC(N, DAG, DCI);
57068
57268
case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);
0 commit comments