Skip to content

Commit 6fa2c9c

Browse files
committed
[X86][CodeGen] Support lowering for CCMP/CTEST
1 parent 05f4448 commit 6fa2c9c

File tree

6 files changed

+1374
-7
lines changed

6 files changed

+1374
-7
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1764,11 +1764,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
17641764

17651765
if (N->getNumValues() == RV->getNumValues())
17661766
DAG.ReplaceAllUsesWith(N, RV.getNode());
1767-
else {
1768-
assert(N->getValueType(0) == RV.getValueType() &&
1769-
N->getNumValues() == 1 && "Type mismatch");
1767+
else
17701768
DAG.ReplaceAllUsesWith(N, &RV);
1771-
}
17721769

17731770
// Push the new node and any users onto the worklist. Omit this if the
17741771
// new node is the EntryToken (e.g. if a store managed to get optimized

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 159 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33881,6 +33881,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3388133881
NODE_NAME_CASE(TESTUI)
3388233882
NODE_NAME_CASE(FP80_ADD)
3388333883
NODE_NAME_CASE(STRICT_FP80_ADD)
33884+
NODE_NAME_CASE(CCMP)
33885+
NODE_NAME_CASE(CTEST)
3388433886
}
3388533887
return nullptr;
3388633888
#undef NODE_NAME_CASE
@@ -54508,7 +54510,154 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
5450854510
return true;
5450954511
}
5451054512

54513+
static int getCondFlagsFromCondCode(X86::CondCode CC) {
54514+
// CCMP/CTEST has two conditional operands:
54515+
// - SCC: source conditonal code (same as CMOV)
54516+
// - DCF: destination conditional flags, which has 4 valid bits
54517+
//
54518+
// +----+----+----+----+
54519+
// | OF | SF | ZF | CF |
54520+
// +----+----+----+----+
54521+
//
54522+
// If SCC(source conditional code) evaluates to false, CCMP/CTEST will updates
54523+
// the conditional flags by as follows:
54524+
//
54525+
// OF = DCF.OF
54526+
// SF = DCF.SF
54527+
// ZF = DCF.ZF
54528+
// CF = DCF.CF
54529+
// PF = DCF.CF
54530+
// AF = 0 (Auxiliary Carry Flag)
54531+
//
54532+
// Otherwise, the CMP or TEST is executed and it updates the
54533+
// CSPAZO flags normally.
54534+
//
54535+
// NOTE:
54536+
// If SCC = P, then SCC evaluates to true regardless of the CSPAZO value.
54537+
// If SCC = NP, then SCC evaluates to false regardless of the CSPAZO value.
54538+
54539+
enum { CF = 1, ZF = 2, SF = 4, OF = 8, PF = CF };
54540+
54541+
switch (CC) {
54542+
default:
54543+
llvm_unreachable("Illegal condition code!");
54544+
case X86::COND_NO:
54545+
case X86::COND_NE:
54546+
case X86::COND_GE:
54547+
case X86::COND_G:
54548+
case X86::COND_AE:
54549+
case X86::COND_A:
54550+
case X86::COND_NS:
54551+
case X86::COND_NP:
54552+
return 0;
54553+
case X86::COND_O:
54554+
return OF;
54555+
case X86::COND_B:
54556+
case X86::COND_BE:
54557+
return CF;
54558+
break;
54559+
case X86::COND_E:
54560+
case X86::COND_LE:
54561+
return ZF;
54562+
case X86::COND_S:
54563+
case X86::COND_L:
54564+
return SF;
54565+
case X86::COND_P:
54566+
return PF;
54567+
}
54568+
}
54569+
54570+
static SDValue
54571+
combineX86SubCmpToCcmpHelper(SDNode *N, SDValue Flag, SelectionDAG &DAG,
54572+
TargetLowering::DAGCombinerInfo &DCI,
54573+
const X86Subtarget &ST) {
54574+
// sub(and(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 1)
54575+
// brcond ne
54576+
//
54577+
// OR
54578+
//
54579+
// sub(and(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 1)
54580+
// brcond ne
54581+
//
54582+
// ->
54583+
//
54584+
// ccmp(X, Y, cflags, cc0, flag0)
54585+
// brcond cc1
54586+
//
54587+
// if only flag has users, where cflags is determined by cc1.
54588+
54589+
SDValue LHS = N->getOperand(0);
54590+
54591+
if (!ST.hasCCMP() || LHS.getOpcode() != ISD::AND || !Flag.hasOneUse())
54592+
return SDValue();
54593+
54594+
SDValue SetCC0 = LHS.getOperand(0);
54595+
SDValue SetCC1 = LHS.getOperand(1);
54596+
if (SetCC0.getOpcode() != X86ISD::SETCC ||
54597+
SetCC1.getOpcode() != X86ISD::SETCC)
54598+
return SDValue();
54599+
54600+
// and is commutable. Try to commute the operands and then test again.
54601+
if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB) {
54602+
std::swap(SetCC0, SetCC1);
54603+
if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB)
54604+
return SDValue();
54605+
}
54606+
SDValue Sub = SetCC1.getOperand(1);
54607+
54608+
SDNode *BrCond = *Flag->uses().begin();
54609+
if (BrCond->getOpcode() != X86ISD::BRCOND)
54610+
return SDValue();
54611+
54612+
X86::CondCode CC0 =
54613+
static_cast<X86::CondCode>(SetCC0.getConstantOperandVal(0));
54614+
if (CC0 == X86::COND_P || CC0 == X86::COND_NP)
54615+
return SDValue();
54616+
54617+
SDValue CFlags = DAG.getTargetConstant(
54618+
getCondFlagsFromCondCode(X86::GetOppositeBranchCondition(
54619+
static_cast<X86::CondCode>(SetCC1.getConstantOperandVal(0)))),
54620+
SDLoc(BrCond), MVT::i8);
54621+
SDValue CCMP = DAG.getNode(X86ISD::CCMP, SDLoc(N), Flag.getValueType(),
54622+
{Sub.getOperand(0), Sub.getOperand(1), CFlags,
54623+
SetCC0.getOperand(0), SetCC0.getOperand(1)});
54624+
DAG.ReplaceAllUsesOfValueWith(Flag, CCMP);
54625+
54626+
SmallVector<SDValue> Ops(BrCond->op_values());
54627+
unsigned CondNo = 2;
54628+
X86::CondCode OldCC =
54629+
static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo));
54630+
assert(OldCC == X86::COND_NE && "Unexpected CC");
54631+
if (Ops[CondNo] != SetCC1.getOperand(0)) {
54632+
Ops[CondNo] = SetCC1.getOperand(0);
54633+
SDValue NewBrCond = DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond),
54634+
BrCond->getValueType(0), Ops);
54635+
DAG.ReplaceAllUsesWith(BrCond, &NewBrCond);
54636+
DCI.recursivelyDeleteUnusedNodes(BrCond);
54637+
}
54638+
return CCMP;
54639+
}
54640+
54641+
static SDValue combineX86CmpToCcmp(SDNode *N, SelectionDAG &DAG,
54642+
TargetLowering::DAGCombinerInfo &DCI,
54643+
const X86Subtarget &ST) {
54644+
54645+
return combineX86SubCmpToCcmpHelper(N, SDValue(N, 0), DAG, DCI, ST);
54646+
}
54647+
54648+
static SDValue combineX86SubToCcmp(SDNode *N, SelectionDAG &DAG,
54649+
TargetLowering::DAGCombinerInfo &DCI,
54650+
const X86Subtarget &ST) {
54651+
54652+
if (N->getOpcode() != X86ISD::SUB || !isOneConstant(N->getOperand(1)) ||
54653+
N->hasAnyUseOfValue(0))
54654+
return SDValue();
54655+
54656+
return combineX86SubCmpToCcmpHelper(N, SDValue(N, 1), DAG, DCI, ST);
54657+
}
54658+
5451154659
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
54660+
TargetLowering::DAGCombinerInfo &DCI,
5451254661
const X86Subtarget &Subtarget) {
5451354662
// Only handle test patterns.
5451454663
if (!isNullConstant(N->getOperand(1)))
@@ -54523,6 +54672,9 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
5452354672
EVT VT = Op.getValueType();
5452454673
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5452554674

54675+
if (SDValue CCMP = combineX86CmpToCcmp(N, DAG, DCI, Subtarget))
54676+
return CCMP;
54677+
5452654678
// If we have a constant logical shift that's only used in a comparison
5452754679
// against zero turn it into an equivalent AND. This allows turning it into
5452854680
// a TEST instruction later.
@@ -54651,7 +54803,8 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
5465154803
}
5465254804

5465354805
static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
54654-
TargetLowering::DAGCombinerInfo &DCI) {
54806+
TargetLowering::DAGCombinerInfo &DCI,
54807+
const X86Subtarget &ST) {
5465554808
assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
5465654809
"Expected X86ISD::ADD or X86ISD::SUB");
5465754810

@@ -54662,6 +54815,9 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
5466254815
bool IsSub = X86ISD::SUB == N->getOpcode();
5466354816
unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;
5466454817

54818+
if (SDValue CCMP = combineX86SubToCcmp(N, DAG, DCI, ST))
54819+
return CCMP;
54820+
5466554821
// If we don't use the flag result, simplify back to a generic ADD/SUB.
5466654822
if (!N->hasAnyUseOfValue(1)) {
5466754823
SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
@@ -56960,11 +57116,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
5696057116
case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
5696157117
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
5696257118
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
56963-
case X86ISD::CMP: return combineCMP(N, DAG, Subtarget);
57119+
case X86ISD::CMP: return combineCMP(N, DAG, DCI, Subtarget);
5696457120
case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget);
5696557121
case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
5696657122
case X86ISD::ADD:
56967-
case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI);
57123+
case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI, Subtarget);
5696857124
case X86ISD::SBB: return combineSBB(N, DAG);
5696957125
case X86ISD::ADC: return combineADC(N, DAG, DCI);
5697057126
case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,10 @@ namespace llvm {
789789
// Perform an FP80 add after changing precision control in FPCW.
790790
STRICT_FP80_ADD,
791791

792+
// Conditional compare instructions
793+
CCMP,
794+
CTEST,
795+
792796
// WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
793797
// non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
794798

llvm/lib/Target/X86/X86InstrConditionalCompare.td

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,34 @@ let mayLoad = 1 in {
7878
}
7979
}
8080

81+
def : Pat<(X86ccmp GR8:$src1, GR8:$src2, timm:$dcf, timm:$cond, EFLAGS),
82+
(CCMP8rr GR8:$src1, GR8:$src2, timm:$dcf, timm:$cond)>;
83+
def : Pat<(X86ccmp GR16:$src1, GR16:$src2, timm:$dcf, timm:$cond, EFLAGS),
84+
(CCMP16rr GR16:$src1, GR16:$src2, timm:$dcf, timm:$cond)>;
85+
def : Pat<(X86ccmp GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond, EFLAGS),
86+
(CCMP32rr GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond)>;
87+
def : Pat<(X86ccmp GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond, EFLAGS),
88+
(CCMP64rr GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond)>;
89+
90+
def : Pat<(X86ccmp GR8:$src1, (i8 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
91+
(CCMP8ri GR8:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
92+
def : Pat<(X86ccmp GR16:$src1, (i16 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
93+
(CCMP16ri GR16:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
94+
def : Pat<(X86ccmp GR32:$src1, (i32 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
95+
(CCMP32ri GR32:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
96+
def : Pat<(X86ccmp GR64:$src1, (i64 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
97+
(CCMP64ri32 GR64:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
98+
99+
def : Pat<(X86ccmp GR8:$src1, (loadi8 addr:$src2), timm:$dcf, timm:$cond, EFLAGS),
100+
(CCMP8rm GR8:$src1, addr:$src2, timm:$dcf, timm:$cond)>;
101+
def : Pat<(X86ccmp GR16:$src1, (loadi16 addr:$src2), timm:$dcf, timm:$cond, EFLAGS),
102+
(CCMP16rm GR16:$src1, addr:$src2, timm:$dcf, timm:$cond)>;
103+
def : Pat<(X86ccmp GR32:$src1, (loadi32 addr:$src2), timm:$dcf, timm:$cond, EFLAGS),
104+
(CCMP32rm GR32:$src1, addr:$src2, timm:$dcf, timm:$cond)>;
105+
def : Pat<(X86ccmp GR64:$src1, (loadi64 addr:$src2), timm:$dcf, timm:$cond, EFLAGS),
106+
(CCMP64rm GR64:$src1, addr:$src2, timm:$dcf, timm:$cond)>;
107+
108+
81109
//===----------------------------------------------------------------------===//
82110
// CTEST Instructions
83111
//
@@ -108,3 +136,6 @@ let mayLoad = 1 in {
108136
def CTEST64mr: Ctest<0x85, MRMDestMem, Xi64, i64mem, GR64>;
109137
}
110138
}
139+
140+
def : Pat<(X86ctest GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond, EFLAGS),
141+
(CTEST32rr GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond)>;

llvm/lib/Target/X86/X86InstrFragments.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
1212
def SDTX86FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisFP<1>,
1313
SDTCisSameAs<1, 2>]>;
1414

15+
def SDTX86Ccmp : SDTypeProfile<1, 5,
16+
[SDTCisVT<3, i8>, SDTCisVT<4, i8>, SDTCisVT<5, i32>]>;
17+
1518
def SDTX86Cmov : SDTypeProfile<1, 4,
1619
[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
1720
SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
@@ -138,6 +141,9 @@ def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>;
138141
def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>;
139142
def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
140143

144+
def X86ccmp : SDNode<"X86ISD::CCMP", SDTX86Ccmp>;
145+
def X86ctest : SDNode<"X86ISD::CTEST", SDTX86Ccmp>;
146+
141147
def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
142148
def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
143149
[SDNPHasChain]>;

0 commit comments

Comments
 (0)