Skip to content

Commit 331eb8a

Browse files
authored
[X86][CodeGen] Support lowering for CCMP/CTEST (#91747)
DAG combine for `CCMP` and `CTESTrr`: ``` and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))) -> setcc(cc1, ccmp(X, Y, ~cflags/cflags, cc0/~cc0, flag0)) and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))) -> setcc(cc1, ctest(X, X, ~cflags/cflags, cc0/~cc0, flag0)) ``` where `cflags` is determined by `cc1`. Generic DAG combine: ``` cmp(setcc(cc, X), 0) brcond ne -> X brcond cc sub(setcc(cc, X), 1) brcond ne -> X brcond ~cc ``` Post DAG transform: `ANDrr/rm + CTESTrr -> CTESTrr/CTESTmr` Pattern match for `CTESTri`: ``` X= and A, B ctest(X, X, cflags, cc0/, flag0) -> ctest(A, B, cflags, cc0/, flag0) ``` `CTESTmi` is already handled by the memory folding mechanism in MIR.
1 parent d0bb917 commit 331eb8a

File tree

10 files changed

+2335
-17
lines changed

10 files changed

+2335
-17
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,11 +1801,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
18011801

18021802
if (N->getNumValues() == RV->getNumValues())
18031803
DAG.ReplaceAllUsesWith(N, RV.getNode());
1804-
else {
1805-
assert(N->getValueType(0) == RV.getValueType() &&
1806-
N->getNumValues() == 1 && "Type mismatch");
1804+
else
18071805
DAG.ReplaceAllUsesWith(N, &RV);
1808-
}
18091806

18101807
// Push the new node and any users onto the worklist. Omit this if the
18111808
// new node is the EntryToken (e.g. if a store managed to get optimized

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1553,11 +1553,16 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
15531553
switch (Opc) {
15541554
default:
15551555
continue;
1556-
// TESTrr+ANDrr/rm -> TESTrr/TESTmr
1556+
// ANDrr/rm + TESTrr+ -> TESTrr/TESTmr
15571557
case X86::TEST8rr:
15581558
case X86::TEST16rr:
15591559
case X86::TEST32rr:
1560-
case X86::TEST64rr: {
1560+
case X86::TEST64rr:
1561+
// ANDrr/rm + CTESTrr -> CTESTrr/CTESTmr
1562+
case X86::CTEST8rr:
1563+
case X86::CTEST16rr:
1564+
case X86::CTEST32rr:
1565+
case X86::CTEST64rr: {
15611566
auto &Op0 = N->getOperand(0);
15621567
if (Op0 != N->getOperand(1) || !Op0->hasNUsesOfValue(2, Op0.getResNo()) ||
15631568
!Op0.isMachineOpcode())
@@ -1575,8 +1580,11 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
15751580
CASE_ND(AND64rr) {
15761581
if (And->hasAnyUseOfValue(1))
15771582
continue;
1578-
MachineSDNode *Test = CurDAG->getMachineNode(
1579-
Opc, SDLoc(N), MVT::i32, And.getOperand(0), And.getOperand(1));
1583+
SmallVector<SDValue> Ops(N->op_values());
1584+
Ops[0] = And.getOperand(0);
1585+
Ops[1] = And.getOperand(1);
1586+
MachineSDNode *Test =
1587+
CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i32, Ops);
15801588
ReplaceUses(N, Test);
15811589
MadeChange = true;
15821590
continue;
@@ -1588,8 +1596,9 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
15881596
if (And->hasAnyUseOfValue(1))
15891597
continue;
15901598
unsigned NewOpc;
1599+
bool IsCTESTCC = X86::isCTESTCC(Opc);
15911600
#define FROM_TO(A, B) \
1592-
CASE_ND(A) NewOpc = X86::B; \
1601+
CASE_ND(A) NewOpc = IsCTESTCC ? X86::C##B : X86::B; \
15931602
break;
15941603
switch (And.getMachineOpcode()) {
15951604
FROM_TO(AND8rm, TEST8mr);
@@ -1600,10 +1609,20 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
16001609
#undef FROM_TO
16011610
#undef CASE_ND
16021611
// Need to swap the memory and register operand.
1603-
SDValue Ops[] = {And.getOperand(1), And.getOperand(2),
1604-
And.getOperand(3), And.getOperand(4),
1605-
And.getOperand(5), And.getOperand(0),
1606-
And.getOperand(6) /* Chain */};
1612+
SmallVector<SDValue> Ops = {And.getOperand(1), And.getOperand(2),
1613+
And.getOperand(3), And.getOperand(4),
1614+
And.getOperand(5), And.getOperand(0)};
1615+
// CC, Cflags.
1616+
if (IsCTESTCC) {
1617+
Ops.push_back(N->getOperand(2));
1618+
Ops.push_back(N->getOperand(3));
1619+
}
1620+
// Chain of memory load
1621+
Ops.push_back(And.getOperand(6));
1622+
// Glue
1623+
if (IsCTESTCC)
1624+
Ops.push_back(N->getOperand(4));
1625+
16071626
MachineSDNode *Test = CurDAG->getMachineNode(
16081627
NewOpc, SDLoc(N), MVT::i32, MVT::Other, Ops);
16091628
CurDAG->setNodeMemRefs(

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 171 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ static cl::opt<int> BrMergingBaseCostThresh(
8888
"to never merge branches."),
8989
cl::Hidden);
9090

91+
static cl::opt<int> BrMergingCcmpBias(
92+
"x86-br-merging-ccmp-bias", cl::init(6),
93+
cl::desc("Increases 'x86-br-merging-base-cost' in cases that the target "
94+
"supports conditional compare instructions."),
95+
cl::Hidden);
96+
9197
static cl::opt<int> BrMergingLikelyBias(
9298
"x86-br-merging-likely-bias", cl::init(0),
9399
cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely "
@@ -3403,6 +3409,9 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
34033409
const Value *Rhs) const {
34043410
using namespace llvm::PatternMatch;
34053411
int BaseCost = BrMergingBaseCostThresh.getValue();
3412+
// With CCMP, branches can be merged in a more efficient way.
3413+
if (BaseCost >= 0 && Subtarget.hasCCMP())
3414+
BaseCost += BrMergingCcmpBias;
34063415
// a == b && a == c is a fast pattern on x86.
34073416
ICmpInst::Predicate Pred;
34083417
if (BaseCost >= 0 && Opc == Instruction::And &&
@@ -33937,6 +33946,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3393733946
NODE_NAME_CASE(TESTUI)
3393833947
NODE_NAME_CASE(FP80_ADD)
3393933948
NODE_NAME_CASE(STRICT_FP80_ADD)
33949+
NODE_NAME_CASE(CCMP)
33950+
NODE_NAME_CASE(CTEST)
3394033951
}
3394133952
return nullptr;
3394233953
#undef NODE_NAME_CASE
@@ -49208,6 +49219,147 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
4920849219
return SDValue();
4920949220
}
4921049221

49222+
static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
49223+
SelectionDAG &DAG,
49224+
TargetLowering::DAGCombinerInfo &DCI,
49225+
const X86Subtarget &ST) {
49226+
// cmp(setcc(cc, X), 0)
49227+
// brcond ne
49228+
// ->
49229+
// X
49230+
// brcond cc
49231+
49232+
// sub(setcc(cc, X), 1)
49233+
// brcond ne
49234+
// ->
49235+
// X
49236+
// brcond ~cc
49237+
//
49238+
// if only flag has users
49239+
49240+
SDValue SetCC = N->getOperand(0);
49241+
49242+
// TODO: Remove the check hasCCMP() and update the non-APX tests.
49243+
if (!ST.hasCCMP() || SetCC.getOpcode() != X86ISD::SETCC || !Flag.hasOneUse())
49244+
return SDValue();
49245+
49246+
// Check the only user of flag is `brcond ne`.
49247+
SDNode *BrCond = *Flag->uses().begin();
49248+
if (BrCond->getOpcode() != X86ISD::BRCOND)
49249+
return SDValue();
49250+
unsigned CondNo = 2;
49251+
if (static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo)) !=
49252+
X86::COND_NE)
49253+
return SDValue();
49254+
49255+
SDValue X = SetCC.getOperand(1);
49256+
// Replace API is called manually here b/c the number of results may change.
49257+
DAG.ReplaceAllUsesOfValueWith(Flag, X);
49258+
49259+
SDValue CCN = SetCC.getOperand(0);
49260+
X86::CondCode CC =
49261+
static_cast<X86::CondCode>(CCN->getAsAPIntVal().getSExtValue());
49262+
X86::CondCode OppositeCC = X86::GetOppositeBranchCondition(CC);
49263+
// Update CC for the consumer of the flag.
49264+
// The old CC is `ne`. Hence, when comparing the result with 0, we are
49265+
// checking if the second condition evaluates to true. When comparing the
49266+
// result with 1, we are checking uf the second condition evaluates to false.
49267+
SmallVector<SDValue> Ops(BrCond->op_values());
49268+
if (isNullConstant(N->getOperand(1)))
49269+
Ops[CondNo] = CCN;
49270+
else if (isOneConstant(N->getOperand(1)))
49271+
Ops[CondNo] = DAG.getTargetConstant(OppositeCC, SDLoc(BrCond), MVT::i8);
49272+
else
49273+
llvm_unreachable("expect constant 0 or 1");
49274+
49275+
SDValue NewBrCond =
49276+
DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops);
49277+
// Avoid self-assign error b/c CC1 can be `e/ne`.
49278+
if (BrCond != NewBrCond.getNode())
49279+
DCI.CombineTo(BrCond, NewBrCond);
49280+
return X;
49281+
}
49282+
49283+
static SDValue combineAndOrForCcmpCtest(SDNode *N, SelectionDAG &DAG,
49284+
TargetLowering::DAGCombinerInfo &DCI,
49285+
const X86Subtarget &ST) {
49286+
// and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y)))
49287+
// ->
49288+
// setcc(cc1, ccmp(X, Y, ~cflags/cflags, cc0/~cc0, flag0))
49289+
49290+
// and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0)))
49291+
// ->
49292+
// setcc(cc1, ctest(X, X, ~cflags/cflags, cc0/~cc0, flag0))
49293+
//
49294+
// where cflags is determined by cc1.
49295+
49296+
if (!ST.hasCCMP())
49297+
return SDValue();
49298+
49299+
SDValue SetCC0 = N->getOperand(0);
49300+
SDValue SetCC1 = N->getOperand(1);
49301+
if (SetCC0.getOpcode() != X86ISD::SETCC ||
49302+
SetCC1.getOpcode() != X86ISD::SETCC)
49303+
return SDValue();
49304+
49305+
auto GetCombineToOpc = [&](SDValue V) -> unsigned {
49306+
SDValue Op = V.getOperand(1);
49307+
unsigned Opc = Op.getOpcode();
49308+
if (Opc == X86ISD::SUB)
49309+
return X86ISD::CCMP;
49310+
if (Opc == X86ISD::CMP && isNullConstant(Op.getOperand(1)))
49311+
return X86ISD::CTEST;
49312+
return 0U;
49313+
};
49314+
49315+
unsigned NewOpc = 0;
49316+
49317+
// AND/OR is commutable. Canonicalize the operands to make SETCC with SUB/CMP
49318+
// appear on the right.
49319+
if (!(NewOpc = GetCombineToOpc(SetCC1))) {
49320+
std::swap(SetCC0, SetCC1);
49321+
if (!(NewOpc = GetCombineToOpc(SetCC1)))
49322+
return SDValue();
49323+
}
49324+
49325+
X86::CondCode CC0 =
49326+
static_cast<X86::CondCode>(SetCC0.getConstantOperandVal(0));
49327+
// CCMP/CTEST is not conditional when the source condition is COND_P/COND_NP.
49328+
if (CC0 == X86::COND_P || CC0 == X86::COND_NP)
49329+
return SDValue();
49330+
49331+
bool IsOR = N->getOpcode() == ISD::OR;
49332+
49333+
// CMP/TEST is executed and updates the EFLAGS normally only when SrcCC
49334+
// evaluates to true. So we need to inverse CC0 as SrcCC when the logic
49335+
// operator is OR. Similar for CC1.
49336+
SDValue SrcCC =
49337+
IsOR ? DAG.getTargetConstant(X86::GetOppositeBranchCondition(CC0),
49338+
SDLoc(SetCC0.getOperand(0)), MVT::i8)
49339+
: SetCC0.getOperand(0);
49340+
SDValue CC1N = SetCC1.getOperand(0);
49341+
X86::CondCode CC1 =
49342+
static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue());
49343+
X86::CondCode OppositeCC1 = X86::GetOppositeBranchCondition(CC1);
49344+
X86::CondCode CFlagsCC = IsOR ? CC1 : OppositeCC1;
49345+
SDLoc DL(N);
49346+
SDValue CFlags = DAG.getTargetConstant(
49347+
X86::getCCMPCondFlagsFromCondCode(CFlagsCC), DL, MVT::i8);
49348+
SDValue Sub = SetCC1.getOperand(1);
49349+
49350+
// Replace any uses of the old flag produced by SUB/CMP with the new one
49351+
// produced by CCMP/CTEST.
49352+
SDValue CCMP = (NewOpc == X86ISD::CCMP)
49353+
? DAG.getNode(X86ISD::CCMP, DL, MVT::i32,
49354+
{Sub.getOperand(0), Sub.getOperand(1),
49355+
CFlags, SrcCC, SetCC0.getOperand(1)})
49356+
: DAG.getNode(X86ISD::CTEST, DL, MVT::i32,
49357+
{Sub.getOperand(0), Sub.getOperand(0),
49358+
CFlags, SrcCC, SetCC0.getOperand(1)});
49359+
49360+
return DAG.getNode(X86ISD::SETCC, DL, MVT::i8, {CC1N, CCMP});
49361+
}
49362+
4921149363
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
4921249364
TargetLowering::DAGCombinerInfo &DCI,
4921349365
const X86Subtarget &Subtarget) {
@@ -49291,6 +49443,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
4929149443
}
4929249444
}
4929349445

49446+
if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget))
49447+
return SetCC;
49448+
4929449449
if (SDValue V = combineScalarAndWithMaskSetcc(N, DAG, Subtarget))
4929549450
return V;
4929649451

@@ -50076,6 +50231,9 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
5007650231
}
5007750232
}
5007850233

50234+
if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget))
50235+
return SetCC;
50236+
5007950237
if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
5008050238
return R;
5008150239

@@ -54597,6 +54755,7 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
5459754755
}
5459854756

5459954757
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
54758+
TargetLowering::DAGCombinerInfo &DCI,
5460054759
const X86Subtarget &Subtarget) {
5460154760
// Only handle test patterns.
5460254761
if (!isNullConstant(N->getOperand(1)))
@@ -54611,6 +54770,10 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
5461154770
EVT VT = Op.getValueType();
5461254771
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5461354772

54773+
if (SDValue CMP =
54774+
combineX86SubCmpForFlags(N, SDValue(N, 0), DAG, DCI, Subtarget))
54775+
return CMP;
54776+
5461454777
// If we have a constant logical shift that's only used in a comparison
5461554778
// against zero turn it into an equivalent AND. This allows turning it into
5461654779
// a TEST instruction later.
@@ -54739,7 +54902,8 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
5473954902
}
5474054903

5474154904
static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
54742-
TargetLowering::DAGCombinerInfo &DCI) {
54905+
TargetLowering::DAGCombinerInfo &DCI,
54906+
const X86Subtarget &ST) {
5474354907
assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
5474454908
"Expected X86ISD::ADD or X86ISD::SUB");
5474554909

@@ -54750,6 +54914,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
5475054914
bool IsSub = X86ISD::SUB == N->getOpcode();
5475154915
unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;
5475254916

54917+
if (IsSub && isOneConstant(N->getOperand(1)) && !N->hasAnyUseOfValue(0))
54918+
if (SDValue CMP = combineX86SubCmpForFlags(N, SDValue(N, 1), DAG, DCI, ST))
54919+
return CMP;
54920+
5475354921
// If we don't use the flag result, simplify back to a generic ADD/SUB.
5475454922
if (!N->hasAnyUseOfValue(1)) {
5475554923
SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
@@ -57049,11 +57217,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
5704957217
case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
5705057218
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
5705157219
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
57052-
case X86ISD::CMP: return combineCMP(N, DAG, Subtarget);
57220+
case X86ISD::CMP: return combineCMP(N, DAG, DCI, Subtarget);
5705357221
case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget);
5705457222
case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
5705557223
case X86ISD::ADD:
57056-
case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI);
57224+
case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI, Subtarget);
5705757225
case X86ISD::SBB: return combineSBB(N, DAG);
5705857226
case X86ISD::ADC: return combineADC(N, DAG, DCI);
5705957227
case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,10 @@ namespace llvm {
735735
// Perform an FP80 add after changing precision control in FPCW.
736736
FP80_ADD,
737737

738+
// Conditional compare instructions
739+
CCMP,
740+
CTEST,
741+
738742
/// X86 strict FP compare instructions.
739743
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
740744
STRICT_FCMPS,

0 commit comments

Comments
 (0)