Skip to content

Commit 6637ab5

Browse files
committed
[X86][CodeGen] Support lowering for CCMP/CTEST
1 parent e586556 commit 6637ab5

File tree

9 files changed

+1956
-7
lines changed

9 files changed

+1956
-7
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,11 +1801,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
18011801

18021802
if (N->getNumValues() == RV->getNumValues())
18031803
DAG.ReplaceAllUsesWith(N, RV.getNode());
1804-
else {
1805-
assert(N->getValueType(0) == RV.getValueType() &&
1806-
N->getNumValues() == 1 && "Type mismatch");
1804+
else
18071805
DAG.ReplaceAllUsesWith(N, &RV);
1808-
}
18091806

18101807
// Push the new node and any users onto the worklist. Omit this if the
18111808
// new node is the EntryToken (e.g. if a store managed to get optimized

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 203 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3405,6 +3405,9 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
34053405
const Value *Rhs) const {
34063406
using namespace llvm::PatternMatch;
34073407
int BaseCost = BrMergingBaseCostThresh.getValue();
3408+
// With CCMP, branches can be merged in a more efficient way.
3409+
if (BaseCost >= 0 && Subtarget.hasCCMP())
3410+
BaseCost += 6;
34083411
// a == b && a == c is a fast pattern on x86.
34093412
ICmpInst::Predicate Pred;
34103413
if (BaseCost >= 0 && Opc == Instruction::And &&
@@ -33931,6 +33934,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3393133934
NODE_NAME_CASE(TESTUI)
3393233935
NODE_NAME_CASE(FP80_ADD)
3393333936
NODE_NAME_CASE(STRICT_FP80_ADD)
33937+
NODE_NAME_CASE(CCMP)
33938+
NODE_NAME_CASE(CTEST)
3393433939
}
3393533940
return nullptr;
3393633941
#undef NODE_NAME_CASE
@@ -54566,7 +54571,187 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
5456654571
return true;
5456754572
}
5456854573

54574+
static SDValue combineX86SubCmpToCcmpCtestHelper(
54575+
SDNode *N, SDValue Flag, SDValue SetCC0, SDValue SetCC1, SelectionDAG &DAG,
54576+
TargetLowering::DAGCombinerInfo &DCI, unsigned NewOpc) {
54577+
SDValue LHS = N->getOperand(0);
54578+
SDValue Sub = SetCC1.getOperand(1);
54579+
54580+
SDNode *BrCond = *Flag->uses().begin();
54581+
if (BrCond->getOpcode() != X86ISD::BRCOND)
54582+
return SDValue();
54583+
unsigned CondNo = 2;
54584+
if (static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo)) !=
54585+
X86::COND_NE)
54586+
return SDValue();
54587+
54588+
X86::CondCode CC0 =
54589+
static_cast<X86::CondCode>(SetCC0.getConstantOperandVal(0));
54590+
// CCMP/CTEST is not conditional when the source condition is COND_P/COND_NP.
54591+
if (CC0 == X86::COND_P || CC0 == X86::COND_NP)
54592+
return SDValue();
54593+
54594+
bool IsOR = LHS.getOpcode() == ISD::OR;
54595+
54596+
SDValue SCC =
54597+
IsOR ? DAG.getTargetConstant(X86::GetOppositeBranchCondition(CC0),
54598+
SDLoc(SetCC0.getOperand(0)), MVT::i8)
54599+
: SetCC0.getOperand(0);
54600+
54601+
SDValue CC1N = SetCC1.getOperand(0);
54602+
X86::CondCode CC1 =
54603+
static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue());
54604+
X86::CondCode OppositeCC1 = X86::GetOppositeBranchCondition(CC1);
54605+
X86::CondCode CFlagsCC = IsOR ? CC1 : OppositeCC1;
54606+
SDValue CFlags = DAG.getTargetConstant(
54607+
X86::getCondFlagsFromCondCode(CFlagsCC), SDLoc(BrCond), MVT::i8);
54608+
SDValue CCMP = (NewOpc == X86ISD::CCMP)
54609+
? DAG.getNode(X86ISD::CCMP, SDLoc(N), Flag.getValueType(),
54610+
{Sub.getOperand(0), Sub.getOperand(1),
54611+
CFlags, SCC, SetCC0.getOperand(1)})
54612+
: DAG.getNode(X86ISD::CTEST, SDLoc(N), Flag.getValueType(),
54613+
{Sub.getOperand(0), Sub.getOperand(0),
54614+
CFlags, SCC, SetCC0.getOperand(1)});
54615+
DAG.ReplaceAllUsesOfValueWith(Flag, CCMP);
54616+
54617+
SmallVector<SDValue> Ops(BrCond->op_values());
54618+
if (isNullConstant(N->getOperand(1)) && Ops[CondNo] != CC1N)
54619+
Ops[CondNo] = CC1N;
54620+
else if (isOneConstant(N->getOperand(1)))
54621+
Ops[CondNo] = DAG.getTargetConstant(OppositeCC1, SDLoc(BrCond), MVT::i8);
54622+
54623+
SDValue NewBrCond =
54624+
DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops);
54625+
if (BrCond != NewBrCond.getNode()) {
54626+
DAG.ReplaceAllUsesWith(BrCond, &NewBrCond);
54627+
DCI.recursivelyDeleteUnusedNodes(BrCond);
54628+
}
54629+
return CCMP;
54630+
}
54631+
54632+
static SDValue combineX86SubCmpToCcmp(SDNode *N, SDValue Flag,
54633+
SelectionDAG &DAG,
54634+
TargetLowering::DAGCombinerInfo &DCI,
54635+
const X86Subtarget &ST) {
54636+
// cmp(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 0)
54637+
// brcond ne
54638+
//
54639+
// ->
54640+
//
54641+
// ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
54642+
// brcond cc1
54643+
//
54644+
//
54645+
// sub(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 1)
54646+
// brcond ne
54647+
//
54648+
// ->
54649+
//
54650+
// ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
54651+
// brcond ~cc1
54652+
//
54653+
// if only flag has users, where cflags is determined by cc1.
54654+
54655+
SDValue LHS = N->getOperand(0);
54656+
54657+
if (!ST.hasCCMP() ||
54658+
(LHS.getOpcode() != ISD::AND && LHS.getOpcode() != ISD::OR) ||
54659+
!Flag.hasOneUse())
54660+
return SDValue();
54661+
54662+
SDValue SetCC0 = LHS.getOperand(0);
54663+
SDValue SetCC1 = LHS.getOperand(1);
54664+
if (SetCC0.getOpcode() != X86ISD::SETCC ||
54665+
SetCC1.getOpcode() != X86ISD::SETCC)
54666+
return SDValue();
54667+
54668+
// and/or is commutable. Try to commute the operands and then test again.
54669+
if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB) {
54670+
std::swap(SetCC0, SetCC1);
54671+
if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB)
54672+
return SDValue();
54673+
}
54674+
54675+
return combineX86SubCmpToCcmpCtestHelper(N, Flag, SetCC0, SetCC1, DAG, DCI,
54676+
X86ISD::CCMP);
54677+
}
54678+
54679+
static SDValue combineX86SubCmpToCtest(SDNode *N, SDValue Flag,
54680+
SelectionDAG &DAG,
54681+
TargetLowering::DAGCombinerInfo &DCI,
54682+
const X86Subtarget &ST) {
54683+
// cmp(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 0)
54684+
// brcond ne
54685+
//
54686+
// ->
54687+
//
54688+
// ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
54689+
// brcond cc1
54690+
//
54691+
//
54692+
// sub(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 1)
54693+
// brcond ne
54694+
//
54695+
// ->
54696+
//
54697+
// ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
54698+
// brcond ~cc1
54699+
//
54700+
// if only flag has users, where cflags is determined by cc1.
54701+
54702+
SDValue LHS = N->getOperand(0);
54703+
54704+
if (!ST.hasCCMP() ||
54705+
(LHS.getOpcode() != ISD::AND && LHS.getOpcode() != ISD::OR) ||
54706+
!Flag.hasOneUse())
54707+
return SDValue();
54708+
54709+
SDValue SetCC0 = LHS.getOperand(0);
54710+
SDValue SetCC1 = LHS.getOperand(1);
54711+
if (SetCC0.getOpcode() != X86ISD::SETCC ||
54712+
SetCC1.getOpcode() != X86ISD::SETCC)
54713+
return SDValue();
54714+
54715+
auto IsOp1CmpZero = [&](SDValue V) {
54716+
SDValue Op = V.getOperand(1);
54717+
return Op.getOpcode() == X86ISD::CMP && isNullConstant(Op.getOperand(1));
54718+
};
54719+
// and/or is commutable. Try to commute the operands and then test again.
54720+
if (!IsOp1CmpZero(SetCC1)) {
54721+
std::swap(SetCC0, SetCC1);
54722+
if (!IsOp1CmpZero(SetCC1))
54723+
return SDValue();
54724+
}
54725+
54726+
return combineX86SubCmpToCcmpCtestHelper(N, Flag, SetCC0, SetCC1, DAG, DCI,
54727+
X86ISD::CTEST);
54728+
}
54729+
54730+
static bool isOnlyFlagUsedX86SubOne(SDNode *N) {
54731+
return N->getOpcode() == X86ISD::SUB && isOneConstant(N->getOperand(1)) &&
54732+
!N->hasAnyUseOfValue(0);
54733+
}
54734+
54735+
static SDValue combineX86SubToCcmp(SDNode *N, SelectionDAG &DAG,
54736+
TargetLowering::DAGCombinerInfo &DCI,
54737+
const X86Subtarget &ST) {
54738+
if (!isOnlyFlagUsedX86SubOne(N))
54739+
return SDValue();
54740+
54741+
return combineX86SubCmpToCcmp(N, SDValue(N, 1), DAG, DCI, ST);
54742+
}
54743+
54744+
static SDValue combineX86SubToCtest(SDNode *N, SelectionDAG &DAG,
54745+
TargetLowering::DAGCombinerInfo &DCI,
54746+
const X86Subtarget &ST) {
54747+
if (!isOnlyFlagUsedX86SubOne(N))
54748+
return SDValue();
54749+
54750+
return combineX86SubCmpToCtest(N, SDValue(N, 1), DAG, DCI, ST);
54751+
}
54752+
5456954753
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
54754+
TargetLowering::DAGCombinerInfo &DCI,
5457054755
const X86Subtarget &Subtarget) {
5457154756
// Only handle test patterns.
5457254757
if (!isNullConstant(N->getOperand(1)))
@@ -54581,6 +54766,14 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
5458154766
EVT VT = Op.getValueType();
5458254767
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5458354768

54769+
if (SDValue CCMP =
54770+
combineX86SubCmpToCcmp(N, SDValue(N, 0), DAG, DCI, Subtarget))
54771+
return CCMP;
54772+
54773+
if (SDValue CTEST =
54774+
combineX86SubCmpToCtest(N, SDValue(N, 0), DAG, DCI, Subtarget))
54775+
return CTEST;
54776+
5458454777
// If we have a constant logical shift that's only used in a comparison
5458554778
// against zero turn it into an equivalent AND. This allows turning it into
5458654779
// a TEST instruction later.
@@ -54709,7 +54902,8 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
5470954902
}
5471054903

5471154904
static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
54712-
TargetLowering::DAGCombinerInfo &DCI) {
54905+
TargetLowering::DAGCombinerInfo &DCI,
54906+
const X86Subtarget &ST) {
5471354907
assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
5471454908
"Expected X86ISD::ADD or X86ISD::SUB");
5471554909

@@ -54720,6 +54914,12 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
5472054914
bool IsSub = X86ISD::SUB == N->getOpcode();
5472154915
unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;
5472254916

54917+
if (SDValue CCMP = combineX86SubToCcmp(N, DAG, DCI, ST))
54918+
return CCMP;
54919+
54920+
if (SDValue CTEST = combineX86SubToCtest(N, DAG, DCI, ST))
54921+
return CTEST;
54922+
5472354923
// If we don't use the flag result, simplify back to a generic ADD/SUB.
5472454924
if (!N->hasAnyUseOfValue(1)) {
5472554925
SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
@@ -57018,11 +57218,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
5701857218
case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
5701957219
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
5702057220
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
57021-
case X86ISD::CMP: return combineCMP(N, DAG, Subtarget);
57221+
case X86ISD::CMP: return combineCMP(N, DAG, DCI, Subtarget);
5702257222
case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget);
5702357223
case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
5702457224
case X86ISD::ADD:
57025-
case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI);
57225+
case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI, Subtarget);
5702657226
case X86ISD::SBB: return combineSBB(N, DAG);
5702757227
case X86ISD::ADC: return combineADC(N, DAG, DCI);
5702857228
case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,10 @@ namespace llvm {
789789
// Perform an FP80 add after changing precision control in FPCW.
790790
STRICT_FP80_ADD,
791791

792+
// Conditional compare instructions
793+
CCMP,
794+
CTEST,
795+
792796
// WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
793797
// non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
794798

llvm/lib/Target/X86/X86InstrConditionalCompare.td

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,34 @@ let mayLoad = 1 in {
7878
}
7979
}
8080

81+
def : Pat<(X86ccmp GR8:$src1, GR8:$src2, timm:$dcf, timm:$cond, EFLAGS),
82+
(CCMP8rr GR8:$src1, GR8:$src2, timm:$dcf, timm:$cond)>;
83+
def : Pat<(X86ccmp GR16:$src1, GR16:$src2, timm:$dcf, timm:$cond, EFLAGS),
84+
(CCMP16rr GR16:$src1, GR16:$src2, timm:$dcf, timm:$cond)>;
85+
def : Pat<(X86ccmp GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond, EFLAGS),
86+
(CCMP32rr GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond)>;
87+
def : Pat<(X86ccmp GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond, EFLAGS),
88+
(CCMP64rr GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond)>;
89+
90+
def : Pat<(X86ccmp GR8:$src1, (i8 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
91+
(CCMP8ri GR8:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
92+
def : Pat<(X86ccmp GR16:$src1, (i16 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
93+
(CCMP16ri GR16:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
94+
def : Pat<(X86ccmp GR32:$src1, (i32 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
95+
(CCMP32ri GR32:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
96+
def : Pat<(X86ccmp GR64:$src1, (i64 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
97+
(CCMP64ri32 GR64:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
98+
99+
def : Pat<(X86ccmp GR8:$src1, (loadi8 addr:$src2), timm:$dcf, timm:$cond, EFLAGS),
100+
(CCMP8rm GR8:$src1, addr:$src2, timm:$dcf, timm:$cond)>;
101+
def : Pat<(X86ccmp GR16:$src1, (loadi16 addr:$src2), timm:$dcf, timm:$cond, EFLAGS),
102+
(CCMP16rm GR16:$src1, addr:$src2, timm:$dcf, timm:$cond)>;
103+
def : Pat<(X86ccmp GR32:$src1, (loadi32 addr:$src2), timm:$dcf, timm:$cond, EFLAGS),
104+
(CCMP32rm GR32:$src1, addr:$src2, timm:$dcf, timm:$cond)>;
105+
def : Pat<(X86ccmp GR64:$src1, (loadi64 addr:$src2), timm:$dcf, timm:$cond, EFLAGS),
106+
(CCMP64rm GR64:$src1, addr:$src2, timm:$dcf, timm:$cond)>;
107+
108+
81109
//===----------------------------------------------------------------------===//
82110
// CTEST Instructions
83111
//
@@ -108,3 +136,30 @@ let mayLoad = 1 in {
108136
def CTEST64mr: Ctest<0x85, MRMDestMem, Xi64, i64mem, GR64>;
109137
}
110138
}
139+
140+
def : Pat<(X86ctest GR8:$src1, GR8:$src2, timm:$dcf, timm:$cond, EFLAGS),
141+
(CTEST8rr GR8:$src1, GR8:$src2, timm:$dcf, timm:$cond)>;
142+
def : Pat<(X86ctest GR16:$src1, GR16:$src2, timm:$dcf, timm:$cond, EFLAGS),
143+
(CTEST16rr GR16:$src1, GR16:$src2, timm:$dcf, timm:$cond)>;
144+
def : Pat<(X86ctest GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond, EFLAGS),
145+
(CTEST32rr GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond)>;
146+
def : Pat<(X86ctest GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond, EFLAGS),
147+
(CTEST64rr GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond)>;
148+
149+
def : Pat<(X86ctest GR8:$src1, (i8 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
150+
(CTEST8ri GR8:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
151+
def : Pat<(X86ctest GR16:$src1, (i16 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
152+
(CTEST16ri GR16:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
153+
def : Pat<(X86ctest GR32:$src1, (i32 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
154+
(CTEST32ri GR32:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
155+
def : Pat<(X86ctest GR64:$src1, (i64 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
156+
(CTEST64ri32 GR64:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
157+
158+
def : Pat<(X86ctest (loadi8 addr:$src1), GR8:$src2, timm:$dcf, timm:$cond, EFLAGS),
159+
(CTEST8mr addr:$src1, GR8:$src2, timm:$dcf, timm:$cond)>;
160+
def : Pat<(X86ctest (loadi16 addr:$src1), GR16:$src2, timm:$dcf, timm:$cond, EFLAGS),
161+
(CTEST16mr addr:$src1, GR16:$src2, timm:$dcf, timm:$cond)>;
162+
def : Pat<(X86ctest (loadi32 addr:$src1), GR32:$src2, timm:$dcf, timm:$cond, EFLAGS),
163+
(CTEST32mr addr:$src1, GR32:$src2, timm:$dcf, timm:$cond)>;
164+
def : Pat<(X86ctest (loadi64 addr:$src1), GR64:$src2, timm:$dcf, timm:$cond, EFLAGS),
165+
(CTEST64mr addr:$src1, GR64:$src2, timm:$dcf, timm:$cond)>;

llvm/lib/Target/X86/X86InstrFragments.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
1212
def SDTX86FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisFP<1>,
1313
SDTCisSameAs<1, 2>]>;
1414

15+
def SDTX86Ccmp : SDTypeProfile<1, 5,
16+
[SDTCisVT<3, i8>, SDTCisVT<4, i8>, SDTCisVT<5, i32>]>;
17+
1518
def SDTX86Cmov : SDTypeProfile<1, 4,
1619
[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
1720
SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
@@ -138,6 +141,9 @@ def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>;
138141
def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>;
139142
def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
140143

144+
def X86ccmp : SDNode<"X86ISD::CCMP", SDTX86Ccmp>;
145+
def X86ctest : SDNode<"X86ISD::CTEST", SDTX86Ccmp>;
146+
141147
def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
142148
def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
143149
[SDNPHasChain]>;

0 commit comments

Comments
 (0)