Skip to content

Commit 5759bb6

Browse files
committed
[X86][CodeGen] Support lowering for CCMP/CTEST + transform CTESTrr+ANDrr/rm -> CTESTrr/CTESTmr
1 parent e90126e commit 5759bb6

File tree

10 files changed

+1981
-15
lines changed

10 files changed

+1981
-15
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,11 +1801,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
18011801

18021802
if (N->getNumValues() == RV->getNumValues())
18031803
DAG.ReplaceAllUsesWith(N, RV.getNode());
1804-
else {
1805-
assert(N->getValueType(0) == RV.getValueType() &&
1806-
N->getNumValues() == 1 && "Type mismatch");
1804+
else
18071805
DAG.ReplaceAllUsesWith(N, &RV);
1808-
}
18091806

18101807
// Push the new node and any users onto the worklist. Omit this if the
18111808
// new node is the EntryToken (e.g. if a store managed to get optimized

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1557,7 +1557,12 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
15571557
case X86::TEST8rr:
15581558
case X86::TEST16rr:
15591559
case X86::TEST32rr:
1560-
case X86::TEST64rr: {
1560+
case X86::TEST64rr:
1561+
// CTESTrr+ANDrr/rm -> CTESTrr/CTESTmr
1562+
case X86::CTEST8rr:
1563+
case X86::CTEST16rr:
1564+
case X86::CTEST32rr:
1565+
case X86::CTEST64rr: {
15611566
auto &Op0 = N->getOperand(0);
15621567
if (Op0 != N->getOperand(1) || !Op0->hasNUsesOfValue(2, Op0.getResNo()) ||
15631568
!Op0.isMachineOpcode())
@@ -1575,8 +1580,11 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
15751580
CASE_ND(AND64rr) {
15761581
if (And->hasAnyUseOfValue(1))
15771582
continue;
1578-
MachineSDNode *Test = CurDAG->getMachineNode(
1579-
Opc, SDLoc(N), MVT::i32, And.getOperand(0), And.getOperand(1));
1583+
SmallVector<SDValue> Ops(N->op_values());
1584+
Ops[0] = And.getOperand(0);
1585+
Ops[1] = And.getOperand(1);
1586+
MachineSDNode *Test =
1587+
CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i32, Ops);
15801588
ReplaceUses(N, Test);
15811589
MadeChange = true;
15821590
continue;
@@ -1588,8 +1596,9 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
15881596
if (And->hasAnyUseOfValue(1))
15891597
continue;
15901598
unsigned NewOpc;
1599+
unsigned NumOps = N->getNumOperands();
15911600
#define FROM_TO(A, B) \
1592-
CASE_ND(A) NewOpc = X86::B; \
1601+
CASE_ND(A) NewOpc = NumOps > 2 ? X86::C##B : X86::B; \
15931602
break;
15941603
switch (And.getMachineOpcode()) {
15951604
FROM_TO(AND8rm, TEST8mr);
@@ -1600,10 +1609,21 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
16001609
#undef FROM_TO
16011610
#undef CASE_ND
16021611
// Need to swap the memory and register operand.
1603-
SDValue Ops[] = {And.getOperand(1), And.getOperand(2),
1604-
And.getOperand(3), And.getOperand(4),
1605-
And.getOperand(5), And.getOperand(0),
1606-
And.getOperand(6) /* Chain */};
1612+
SmallVector<SDValue> Ops = {And.getOperand(1), And.getOperand(2),
1613+
And.getOperand(3), And.getOperand(4),
1614+
And.getOperand(5), And.getOperand(0)};
1615+
bool IsCTESTCC = X86::isCTESTCC(Opc);
1616+
// CC, Cflags.
1617+
if (IsCTESTCC) {
1618+
Ops.push_back(N->getOperand(2));
1619+
Ops.push_back(N->getOperand(3));
1620+
}
1621+
// Chain
1622+
Ops.push_back(And.getOperand(6));
1623+
// Glue
1624+
if (IsCTESTCC)
1625+
Ops.push_back(N->getOperand(4));
1626+
16071627
MachineSDNode *Test = CurDAG->getMachineNode(
16081628
NewOpc, SDLoc(N), MVT::i32, MVT::Other, Ops);
16091629
CurDAG->setNodeMemRefs(

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 203 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3412,6 +3412,9 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
34123412
const Value *Rhs) const {
34133413
using namespace llvm::PatternMatch;
34143414
int BaseCost = BrMergingBaseCostThresh.getValue();
3415+
// With CCMP, branches can be merged in a more efficient way.
3416+
if (BaseCost >= 0 && Subtarget.hasCCMP())
3417+
BaseCost += 6;
34153418
// a == b && a == c is a fast pattern on x86.
34163419
ICmpInst::Predicate Pred;
34173420
if (BaseCost >= 0 && Opc == Instruction::And &&
@@ -33970,6 +33973,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3397033973
NODE_NAME_CASE(TESTUI)
3397133974
NODE_NAME_CASE(FP80_ADD)
3397233975
NODE_NAME_CASE(STRICT_FP80_ADD)
33976+
NODE_NAME_CASE(CCMP)
33977+
NODE_NAME_CASE(CTEST)
3397333978
}
3397433979
return nullptr;
3397533980
#undef NODE_NAME_CASE
@@ -54605,7 +54610,187 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
5460554610
return true;
5460654611
}
5460754612

54613+
static SDValue combineX86SubCmpToCcmpCtestHelper(
54614+
SDNode *N, SDValue Flag, SDValue SetCC0, SDValue SetCC1, SelectionDAG &DAG,
54615+
TargetLowering::DAGCombinerInfo &DCI, unsigned NewOpc) {
54616+
SDValue LHS = N->getOperand(0);
54617+
SDValue Sub = SetCC1.getOperand(1);
54618+
54619+
SDNode *BrCond = *Flag->uses().begin();
54620+
if (BrCond->getOpcode() != X86ISD::BRCOND)
54621+
return SDValue();
54622+
unsigned CondNo = 2;
54623+
if (static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo)) !=
54624+
X86::COND_NE)
54625+
return SDValue();
54626+
54627+
X86::CondCode CC0 =
54628+
static_cast<X86::CondCode>(SetCC0.getConstantOperandVal(0));
54629+
// CCMP/CTEST is not conditional when the source condition is COND_P/COND_NP.
54630+
if (CC0 == X86::COND_P || CC0 == X86::COND_NP)
54631+
return SDValue();
54632+
54633+
bool IsOR = LHS.getOpcode() == ISD::OR;
54634+
54635+
SDValue SCC =
54636+
IsOR ? DAG.getTargetConstant(X86::GetOppositeBranchCondition(CC0),
54637+
SDLoc(SetCC0.getOperand(0)), MVT::i8)
54638+
: SetCC0.getOperand(0);
54639+
54640+
SDValue CC1N = SetCC1.getOperand(0);
54641+
X86::CondCode CC1 =
54642+
static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue());
54643+
X86::CondCode OppositeCC1 = X86::GetOppositeBranchCondition(CC1);
54644+
X86::CondCode CFlagsCC = IsOR ? CC1 : OppositeCC1;
54645+
SDValue CFlags = DAG.getTargetConstant(
54646+
X86::getCondFlagsFromCondCode(CFlagsCC), SDLoc(BrCond), MVT::i8);
54647+
SDValue CCMP = (NewOpc == X86ISD::CCMP)
54648+
? DAG.getNode(X86ISD::CCMP, SDLoc(N), Flag.getValueType(),
54649+
{Sub.getOperand(0), Sub.getOperand(1),
54650+
CFlags, SCC, SetCC0.getOperand(1)})
54651+
: DAG.getNode(X86ISD::CTEST, SDLoc(N), Flag.getValueType(),
54652+
{Sub.getOperand(0), Sub.getOperand(0),
54653+
CFlags, SCC, SetCC0.getOperand(1)});
54654+
DAG.ReplaceAllUsesOfValueWith(Flag, CCMP);
54655+
54656+
SmallVector<SDValue> Ops(BrCond->op_values());
54657+
if (isNullConstant(N->getOperand(1)) && Ops[CondNo] != CC1N)
54658+
Ops[CondNo] = CC1N;
54659+
else if (isOneConstant(N->getOperand(1)))
54660+
Ops[CondNo] = DAG.getTargetConstant(OppositeCC1, SDLoc(BrCond), MVT::i8);
54661+
54662+
SDValue NewBrCond =
54663+
DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops);
54664+
if (BrCond != NewBrCond.getNode()) {
54665+
DAG.ReplaceAllUsesWith(BrCond, &NewBrCond);
54666+
DCI.recursivelyDeleteUnusedNodes(BrCond);
54667+
}
54668+
return CCMP;
54669+
}
54670+
54671+
static SDValue combineX86SubCmpToCcmp(SDNode *N, SDValue Flag,
54672+
SelectionDAG &DAG,
54673+
TargetLowering::DAGCombinerInfo &DCI,
54674+
const X86Subtarget &ST) {
54675+
// cmp(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 0)
54676+
// brcond ne
54677+
//
54678+
// ->
54679+
//
54680+
// ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
54681+
// brcond cc1
54682+
//
54683+
//
54684+
// sub(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 1)
54685+
// brcond ne
54686+
//
54687+
// ->
54688+
//
54689+
// ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
54690+
// brcond ~cc1
54691+
//
54692+
// if only flag has users, where cflags is determined by cc1.
54693+
54694+
SDValue LHS = N->getOperand(0);
54695+
54696+
if (!ST.hasCCMP() ||
54697+
(LHS.getOpcode() != ISD::AND && LHS.getOpcode() != ISD::OR) ||
54698+
!Flag.hasOneUse())
54699+
return SDValue();
54700+
54701+
SDValue SetCC0 = LHS.getOperand(0);
54702+
SDValue SetCC1 = LHS.getOperand(1);
54703+
if (SetCC0.getOpcode() != X86ISD::SETCC ||
54704+
SetCC1.getOpcode() != X86ISD::SETCC)
54705+
return SDValue();
54706+
54707+
// and/or is commutable. Try to commute the operands and then test again.
54708+
if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB) {
54709+
std::swap(SetCC0, SetCC1);
54710+
if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB)
54711+
return SDValue();
54712+
}
54713+
54714+
return combineX86SubCmpToCcmpCtestHelper(N, Flag, SetCC0, SetCC1, DAG, DCI,
54715+
X86ISD::CCMP);
54716+
}
54717+
54718+
static SDValue combineX86SubCmpToCtest(SDNode *N, SDValue Flag,
54719+
SelectionDAG &DAG,
54720+
TargetLowering::DAGCombinerInfo &DCI,
54721+
const X86Subtarget &ST) {
54722+
// cmp(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 0)
54723+
// brcond ne
54724+
//
54725+
// ->
54726+
//
54727+
// ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
54728+
// brcond cc1
54729+
//
54730+
//
54731+
// sub(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 1)
54732+
// brcond ne
54733+
//
54734+
// ->
54735+
//
54736+
// ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
54737+
// brcond ~cc1
54738+
//
54739+
// if only flag has users, where cflags is determined by cc1.
54740+
54741+
SDValue LHS = N->getOperand(0);
54742+
54743+
if (!ST.hasCCMP() ||
54744+
(LHS.getOpcode() != ISD::AND && LHS.getOpcode() != ISD::OR) ||
54745+
!Flag.hasOneUse())
54746+
return SDValue();
54747+
54748+
SDValue SetCC0 = LHS.getOperand(0);
54749+
SDValue SetCC1 = LHS.getOperand(1);
54750+
if (SetCC0.getOpcode() != X86ISD::SETCC ||
54751+
SetCC1.getOpcode() != X86ISD::SETCC)
54752+
return SDValue();
54753+
54754+
auto IsOp1CmpZero = [&](SDValue V) {
54755+
SDValue Op = V.getOperand(1);
54756+
return Op.getOpcode() == X86ISD::CMP && isNullConstant(Op.getOperand(1));
54757+
};
54758+
// and/or is commutable. Try to commute the operands and then test again.
54759+
if (!IsOp1CmpZero(SetCC1)) {
54760+
std::swap(SetCC0, SetCC1);
54761+
if (!IsOp1CmpZero(SetCC1))
54762+
return SDValue();
54763+
}
54764+
54765+
return combineX86SubCmpToCcmpCtestHelper(N, Flag, SetCC0, SetCC1, DAG, DCI,
54766+
X86ISD::CTEST);
54767+
}
54768+
54769+
static bool isOnlyFlagUsedX86SubOne(SDNode *N) {
54770+
return N->getOpcode() == X86ISD::SUB && isOneConstant(N->getOperand(1)) &&
54771+
!N->hasAnyUseOfValue(0);
54772+
}
54773+
54774+
static SDValue combineX86SubToCcmp(SDNode *N, SelectionDAG &DAG,
54775+
TargetLowering::DAGCombinerInfo &DCI,
54776+
const X86Subtarget &ST) {
54777+
if (!isOnlyFlagUsedX86SubOne(N))
54778+
return SDValue();
54779+
54780+
return combineX86SubCmpToCcmp(N, SDValue(N, 1), DAG, DCI, ST);
54781+
}
54782+
54783+
static SDValue combineX86SubToCtest(SDNode *N, SelectionDAG &DAG,
54784+
TargetLowering::DAGCombinerInfo &DCI,
54785+
const X86Subtarget &ST) {
54786+
if (!isOnlyFlagUsedX86SubOne(N))
54787+
return SDValue();
54788+
54789+
return combineX86SubCmpToCtest(N, SDValue(N, 1), DAG, DCI, ST);
54790+
}
54791+
5460854792
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
54793+
TargetLowering::DAGCombinerInfo &DCI,
5460954794
const X86Subtarget &Subtarget) {
5461054795
// Only handle test patterns.
5461154796
if (!isNullConstant(N->getOperand(1)))
@@ -54620,6 +54805,14 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
5462054805
EVT VT = Op.getValueType();
5462154806
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5462254807

54808+
if (SDValue CCMP =
54809+
combineX86SubCmpToCcmp(N, SDValue(N, 0), DAG, DCI, Subtarget))
54810+
return CCMP;
54811+
54812+
if (SDValue CTEST =
54813+
combineX86SubCmpToCtest(N, SDValue(N, 0), DAG, DCI, Subtarget))
54814+
return CTEST;
54815+
5462354816
// If we have a constant logical shift that's only used in a comparison
5462454817
// against zero turn it into an equivalent AND. This allows turning it into
5462554818
// a TEST instruction later.
@@ -54748,7 +54941,8 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
5474854941
}
5474954942

5475054943
static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
54751-
TargetLowering::DAGCombinerInfo &DCI) {
54944+
TargetLowering::DAGCombinerInfo &DCI,
54945+
const X86Subtarget &ST) {
5475254946
assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
5475354947
"Expected X86ISD::ADD or X86ISD::SUB");
5475454948

@@ -54759,6 +54953,12 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
5475954953
bool IsSub = X86ISD::SUB == N->getOpcode();
5476054954
unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;
5476154955

54956+
if (SDValue CCMP = combineX86SubToCcmp(N, DAG, DCI, ST))
54957+
return CCMP;
54958+
54959+
if (SDValue CTEST = combineX86SubToCtest(N, DAG, DCI, ST))
54960+
return CTEST;
54961+
5476254962
// If we don't use the flag result, simplify back to a generic ADD/SUB.
5476354963
if (!N->hasAnyUseOfValue(1)) {
5476454964
SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
@@ -57058,11 +57258,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
5705857258
case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
5705957259
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
5706057260
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
57061-
case X86ISD::CMP: return combineCMP(N, DAG, Subtarget);
57261+
case X86ISD::CMP: return combineCMP(N, DAG, DCI, Subtarget);
5706257262
case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget);
5706357263
case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
5706457264
case X86ISD::ADD:
57065-
case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI);
57265+
case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI, Subtarget);
5706657266
case X86ISD::SBB: return combineSBB(N, DAG);
5706757267
case X86ISD::ADC: return combineADC(N, DAG, DCI);
5706857268
case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,10 @@ namespace llvm {
789789
// Perform an FP80 add after changing precision control in FPCW.
790790
STRICT_FP80_ADD,
791791

792+
// Conditional compare instructions
793+
CCMP,
794+
CTEST,
795+
792796
// WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
793797
// non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
794798

0 commit comments

Comments
 (0)