Skip to content

Commit 3ab33c8

Browse files
committed
[SelectionDAG] Expand [US]CMP using arithmetic on boolean values
The previous expansion of [US]CMP was done using two selects and two compares. It produced decent code, but on many platforms it is better to implement [US]CMP nodes by performing the following operation: [us]cmp(x, y) = (x [us]> y) - (x [us]< y) This patch adds this new expansion, as well as a hook in TargetLowering to allow some targets to still use the select-based approach.
1 parent 66cd2e0 commit 3ab33c8

File tree

5 files changed

+2374
-2853
lines changed

5 files changed

+2374
-2853
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3390,6 +3390,10 @@ class TargetLoweringBase {
33903390
return isOperationLegalOrCustom(Op, VT);
33913391
}
33923392

3393+
/// Should we expand [US]CMP nodes using two selects and two compares, or by
3394+
/// doing arithmetic on boolean types
3395+
virtual bool shouldExpandCmpUsingSelects() const { return false; }
3396+
33933397
/// Does this target support complex deinterleaving
33943398
virtual bool isComplexDeinterleavingSupported() const { return false; }
33953399

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10381,14 +10381,24 @@ SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
1038110381

1038210382
auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
1038310383
auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10384-
1038510384
SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
1038610385
SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10387-
SDValue SelectZeroOrOne =
10388-
DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10389-
DAG.getConstant(0, dl, ResVT));
10390-
return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
10391-
SelectZeroOrOne);
10386+
10387+
// We can't perform arithmetic on i1 values. Extending them would
10388+
// probably result in worse codegen, so let's just use two selects instead.
10389+
// Some targets are also just better off using selects rather than subtraction
10390+
// because one of the conditions can be merged with one of the selects
10391+
EVT BoolElVT = BoolVT.isVector() ? BoolVT.getVectorElementType() : BoolVT;
10392+
if (shouldExpandCmpUsingSelects() || !BoolElVT.knownBitsGT(MVT::i1)) {
10393+
SDValue SelectZeroOrOne =
10394+
DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10395+
DAG.getConstant(0, dl, ResVT));
10396+
return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
10397+
SelectZeroOrOne);
10398+
}
10399+
10400+
return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10401+
ResVT);
1039210402
}
1039310403

1039410404
SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,8 @@ class AArch64TargetLowering : public TargetLowering {
907907

908908
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
909909

910+
bool shouldExpandCmpUsingSelects() const override { return true; }
911+
910912
bool isComplexDeinterleavingSupported() const override;
911913
bool isComplexDeinterleavingOperationSupported(
912914
ComplexDeinterleavingOperation Operation, Type *Ty) const override;

0 commit comments

Comments
 (0)