@@ -88,6 +88,12 @@ static cl::opt<int> BrMergingBaseCostThresh(
88
88
"to never merge branches."),
89
89
cl::Hidden);
90
90
91
+ static cl::opt<int> BrMergingCcmpBias(
92
+ "x86-br-merging-ccmp-bias", cl::init(6),
93
+ cl::desc("Increases 'x86-br-merging-base-cost' in cases that the target "
94
+ "supports conditional compare instructions."),
95
+ cl::Hidden);
96
+
91
97
static cl::opt<int> BrMergingLikelyBias(
92
98
"x86-br-merging-likely-bias", cl::init(0),
93
99
cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely "
@@ -3403,6 +3409,9 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
3403
3409
const Value *Rhs) const {
3404
3410
using namespace llvm::PatternMatch;
3405
3411
int BaseCost = BrMergingBaseCostThresh.getValue();
3412
+ // With CCMP, branches can be merged in a more efficient way.
3413
+ if (BaseCost >= 0 && Subtarget.hasCCMP())
3414
+ BaseCost += BrMergingCcmpBias;
3406
3415
// a == b && a == c is a fast pattern on x86.
3407
3416
ICmpInst::Predicate Pred;
3408
3417
if (BaseCost >= 0 && Opc == Instruction::And &&
@@ -33937,6 +33946,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
33937
33946
NODE_NAME_CASE(TESTUI)
33938
33947
NODE_NAME_CASE(FP80_ADD)
33939
33948
NODE_NAME_CASE(STRICT_FP80_ADD)
33949
+ NODE_NAME_CASE(CCMP)
33950
+ NODE_NAME_CASE(CTEST)
33940
33951
}
33941
33952
return nullptr;
33942
33953
#undef NODE_NAME_CASE
@@ -49208,6 +49219,147 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
49208
49219
return SDValue();
49209
49220
}
49210
49221
49222
+ static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
49223
+ SelectionDAG &DAG,
49224
+ TargetLowering::DAGCombinerInfo &DCI,
49225
+ const X86Subtarget &ST) {
49226
+ // cmp(setcc(cc, X), 0)
49227
+ // brcond ne
49228
+ // ->
49229
+ // X
49230
+ // brcond cc
49231
+
49232
+ // sub(setcc(cc, X), 1)
49233
+ // brcond ne
49234
+ // ->
49235
+ // X
49236
+ // brcond ~cc
49237
+ //
49238
+ // if only flag has users
49239
+
49240
+ SDValue SetCC = N->getOperand(0);
49241
+
49242
+ // TODO: Remove the check hasCCMP() and update the non-APX tests.
49243
+ if (!ST.hasCCMP() || SetCC.getOpcode() != X86ISD::SETCC || !Flag.hasOneUse())
49244
+ return SDValue();
49245
+
49246
+ // Check the only user of flag is `brcond ne`.
49247
+ SDNode *BrCond = *Flag->uses().begin();
49248
+ if (BrCond->getOpcode() != X86ISD::BRCOND)
49249
+ return SDValue();
49250
+ unsigned CondNo = 2;
49251
+ if (static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo)) !=
49252
+ X86::COND_NE)
49253
+ return SDValue();
49254
+
49255
+ SDValue X = SetCC.getOperand(1);
49256
+ // Replace API is called manually here b/c the number of results may change.
49257
+ DAG.ReplaceAllUsesOfValueWith(Flag, X);
49258
+
49259
+ SDValue CCN = SetCC.getOperand(0);
49260
+ X86::CondCode CC =
49261
+ static_cast<X86::CondCode>(CCN->getAsAPIntVal().getSExtValue());
49262
+ X86::CondCode OppositeCC = X86::GetOppositeBranchCondition(CC);
49263
+ // Update CC for the consumer of the flag.
49264
+ // The old CC is `ne`. Hence, when comparing the result with 0, we are
49265
+ // checking if the second condition evaluates to true. When comparing the
49266
+ // result with 1, we are checking uf the second condition evaluates to false.
49267
+ SmallVector<SDValue> Ops(BrCond->op_values());
49268
+ if (isNullConstant(N->getOperand(1)))
49269
+ Ops[CondNo] = CCN;
49270
+ else if (isOneConstant(N->getOperand(1)))
49271
+ Ops[CondNo] = DAG.getTargetConstant(OppositeCC, SDLoc(BrCond), MVT::i8);
49272
+ else
49273
+ llvm_unreachable("expect constant 0 or 1");
49274
+
49275
+ SDValue NewBrCond =
49276
+ DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops);
49277
+ // Avoid self-assign error b/c CC1 can be `e/ne`.
49278
+ if (BrCond != NewBrCond.getNode())
49279
+ DCI.CombineTo(BrCond, NewBrCond);
49280
+ return X;
49281
+ }
49282
+
49283
+ static SDValue combineAndOrForCcmpCtest(SDNode *N, SelectionDAG &DAG,
49284
+ TargetLowering::DAGCombinerInfo &DCI,
49285
+ const X86Subtarget &ST) {
49286
+ // and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y)))
49287
+ // ->
49288
+ // setcc(cc1, ccmp(X, Y, ~cflags/cflags, cc0/~cc0, flag0))
49289
+
49290
+ // and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0)))
49291
+ // ->
49292
+ // setcc(cc1, ctest(X, X, ~cflags/cflags, cc0/~cc0, flag0))
49293
+ //
49294
+ // where cflags is determined by cc1.
49295
+
49296
+ if (!ST.hasCCMP())
49297
+ return SDValue();
49298
+
49299
+ SDValue SetCC0 = N->getOperand(0);
49300
+ SDValue SetCC1 = N->getOperand(1);
49301
+ if (SetCC0.getOpcode() != X86ISD::SETCC ||
49302
+ SetCC1.getOpcode() != X86ISD::SETCC)
49303
+ return SDValue();
49304
+
49305
+ auto GetCombineToOpc = [&](SDValue V) -> unsigned {
49306
+ SDValue Op = V.getOperand(1);
49307
+ unsigned Opc = Op.getOpcode();
49308
+ if (Opc == X86ISD::SUB)
49309
+ return X86ISD::CCMP;
49310
+ if (Opc == X86ISD::CMP && isNullConstant(Op.getOperand(1)))
49311
+ return X86ISD::CTEST;
49312
+ return 0U;
49313
+ };
49314
+
49315
+ unsigned NewOpc = 0;
49316
+
49317
+ // AND/OR is commutable. Canonicalize the operands to make SETCC with SUB/CMP
49318
+ // appear on the right.
49319
+ if (!(NewOpc = GetCombineToOpc(SetCC1))) {
49320
+ std::swap(SetCC0, SetCC1);
49321
+ if (!(NewOpc = GetCombineToOpc(SetCC1)))
49322
+ return SDValue();
49323
+ }
49324
+
49325
+ X86::CondCode CC0 =
49326
+ static_cast<X86::CondCode>(SetCC0.getConstantOperandVal(0));
49327
+ // CCMP/CTEST is not conditional when the source condition is COND_P/COND_NP.
49328
+ if (CC0 == X86::COND_P || CC0 == X86::COND_NP)
49329
+ return SDValue();
49330
+
49331
+ bool IsOR = N->getOpcode() == ISD::OR;
49332
+
49333
+ // CMP/TEST is executed and updates the EFLAGS normally only when SrcCC
49334
+ // evaluates to true. So we need to inverse CC0 as SrcCC when the logic
49335
+ // operator is OR. Similar for CC1.
49336
+ SDValue SrcCC =
49337
+ IsOR ? DAG.getTargetConstant(X86::GetOppositeBranchCondition(CC0),
49338
+ SDLoc(SetCC0.getOperand(0)), MVT::i8)
49339
+ : SetCC0.getOperand(0);
49340
+ SDValue CC1N = SetCC1.getOperand(0);
49341
+ X86::CondCode CC1 =
49342
+ static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue());
49343
+ X86::CondCode OppositeCC1 = X86::GetOppositeBranchCondition(CC1);
49344
+ X86::CondCode CFlagsCC = IsOR ? CC1 : OppositeCC1;
49345
+ SDLoc DL(N);
49346
+ SDValue CFlags = DAG.getTargetConstant(
49347
+ X86::getCCMPCondFlagsFromCondCode(CFlagsCC), DL, MVT::i8);
49348
+ SDValue Sub = SetCC1.getOperand(1);
49349
+
49350
+ // Replace any uses of the old flag produced by SUB/CMP with the new one
49351
+ // produced by CCMP/CTEST.
49352
+ SDValue CCMP = (NewOpc == X86ISD::CCMP)
49353
+ ? DAG.getNode(X86ISD::CCMP, DL, MVT::i32,
49354
+ {Sub.getOperand(0), Sub.getOperand(1),
49355
+ CFlags, SrcCC, SetCC0.getOperand(1)})
49356
+ : DAG.getNode(X86ISD::CTEST, DL, MVT::i32,
49357
+ {Sub.getOperand(0), Sub.getOperand(0),
49358
+ CFlags, SrcCC, SetCC0.getOperand(1)});
49359
+
49360
+ return DAG.getNode(X86ISD::SETCC, DL, MVT::i8, {CC1N, CCMP});
49361
+ }
49362
+
49211
49363
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
49212
49364
TargetLowering::DAGCombinerInfo &DCI,
49213
49365
const X86Subtarget &Subtarget) {
@@ -49291,6 +49443,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
49291
49443
}
49292
49444
}
49293
49445
49446
+ if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget))
49447
+ return SetCC;
49448
+
49294
49449
if (SDValue V = combineScalarAndWithMaskSetcc(N, DAG, Subtarget))
49295
49450
return V;
49296
49451
@@ -50076,6 +50231,9 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
50076
50231
}
50077
50232
}
50078
50233
50234
+ if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget))
50235
+ return SetCC;
50236
+
50079
50237
if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
50080
50238
return R;
50081
50239
@@ -54597,6 +54755,7 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
54597
54755
}
54598
54756
54599
54757
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
54758
+ TargetLowering::DAGCombinerInfo &DCI,
54600
54759
const X86Subtarget &Subtarget) {
54601
54760
// Only handle test patterns.
54602
54761
if (!isNullConstant(N->getOperand(1)))
@@ -54611,6 +54770,10 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
54611
54770
EVT VT = Op.getValueType();
54612
54771
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
54613
54772
54773
+ if (SDValue CMP =
54774
+ combineX86SubCmpForFlags(N, SDValue(N, 0), DAG, DCI, Subtarget))
54775
+ return CMP;
54776
+
54614
54777
// If we have a constant logical shift that's only used in a comparison
54615
54778
// against zero turn it into an equivalent AND. This allows turning it into
54616
54779
// a TEST instruction later.
@@ -54739,7 +54902,8 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
54739
54902
}
54740
54903
54741
54904
static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
54742
- TargetLowering::DAGCombinerInfo &DCI) {
54905
+ TargetLowering::DAGCombinerInfo &DCI,
54906
+ const X86Subtarget &ST) {
54743
54907
assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
54744
54908
"Expected X86ISD::ADD or X86ISD::SUB");
54745
54909
@@ -54750,6 +54914,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
54750
54914
bool IsSub = X86ISD::SUB == N->getOpcode();
54751
54915
unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;
54752
54916
54917
+ if (IsSub && isOneConstant(N->getOperand(1)) && !N->hasAnyUseOfValue(0))
54918
+ if (SDValue CMP = combineX86SubCmpForFlags(N, SDValue(N, 1), DAG, DCI, ST))
54919
+ return CMP;
54920
+
54753
54921
// If we don't use the flag result, simplify back to a generic ADD/SUB.
54754
54922
if (!N->hasAnyUseOfValue(1)) {
54755
54923
SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
@@ -57049,11 +57217,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
57049
57217
case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
57050
57218
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
57051
57219
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
57052
- case X86ISD::CMP: return combineCMP(N, DAG, Subtarget);
57220
+ case X86ISD::CMP: return combineCMP(N, DAG, DCI, Subtarget);
57053
57221
case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget);
57054
57222
case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
57055
57223
case X86ISD::ADD:
57056
- case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI);
57224
+ case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI, Subtarget );
57057
57225
case X86ISD::SBB: return combineSBB(N, DAG);
57058
57226
case X86ISD::ADC: return combineADC(N, DAG, DCI);
57059
57227
case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);
0 commit comments