Skip to content

Commit 6500268

Browse files
authored
[RISCV][SDAG] Prefer ShortForwardBranch to lower sdiv by pow2 (#67364)
This patch lowers `sdiv x, +/-2**k` to `add + select + shift` when the short forward branch optimization is enabled. The latter inst seq performs faster than the seq generated by target-independent DAGCombiner. This algorithm is described in ***Hacker's Delight***. This patch also removes duplicate logic in the X86 and AArch64 backend. But we cannot do this for the PowerPC backend since it generates a special instruction `addze`.
1 parent 6b4ac76 commit 6500268

File tree

7 files changed

+452
-56
lines changed

7 files changed

+452
-56
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4919,6 +4919,10 @@ class TargetLowering : public TargetLoweringBase {
49194919
SmallVectorImpl<SDNode *> &Created) const;
49204920
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
49214921
SmallVectorImpl<SDNode *> &Created) const;
4922+
// Build sdiv by power-of-2 with conditional move instructions
4923+
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor,
4924+
SelectionDAG &DAG,
4925+
SmallVectorImpl<SDNode *> &Created) const;
49224926

49234927
/// Targets may override this function to provide custom SDIV lowering for
49244928
/// power-of-2 denominators. If the target returns an empty SDValue, LLVM

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6081,6 +6081,49 @@ TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
60816081
return SDValue();
60826082
}
60836083

6084+
/// Build sdiv by power-of-2 with conditional move instructions
6085+
/// Ref: "Hacker's Delight" by Henry Warren 10-1
6086+
/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6087+
/// bgez x, label
6088+
/// add x, x, 2**k-1
6089+
/// label:
6090+
/// sra res, x, k
6091+
/// neg res, res (when the divisor is negative)
6092+
SDValue TargetLowering::buildSDIVPow2WithCMov(
6093+
SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6094+
SmallVectorImpl<SDNode *> &Created) const {
6095+
unsigned Lg2 = Divisor.countr_zero();
6096+
EVT VT = N->getValueType(0);
6097+
6098+
SDLoc DL(N);
6099+
SDValue N0 = N->getOperand(0);
6100+
SDValue Zero = DAG.getConstant(0, DL, VT);
6101+
APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6102+
SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6103+
6104+
// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6105+
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6106+
SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6107+
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6108+
SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6109+
6110+
Created.push_back(Cmp.getNode());
6111+
Created.push_back(Add.getNode());
6112+
Created.push_back(CMov.getNode());
6113+
6114+
// Divide by pow2.
6115+
SDValue SRA =
6116+
DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6117+
6118+
// If we're dividing by a positive value, we're done. Otherwise, we must
6119+
// negate the result.
6120+
if (Divisor.isNonNegative())
6121+
return SRA;
6122+
6123+
Created.push_back(SRA.getNode());
6124+
return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6125+
}
6126+
60846127
/// Given an ISD::SDIV node expressing a divide by constant,
60856128
/// return a DAG expression to select that will generate the same value by
60866129
/// multiplying by a magic number.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16356,33 +16356,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
1635616356
!(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
1635716357
return SDValue();
1635816358

16359-
SDLoc DL(N);
16360-
SDValue N0 = N->getOperand(0);
16361-
unsigned Lg2 = Divisor.countr_zero();
16362-
SDValue Zero = DAG.getConstant(0, DL, VT);
16363-
SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
16364-
16365-
// Add (N0 < 0) ? Pow2 - 1 : 0;
16366-
SDValue CCVal;
16367-
SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
16368-
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
16369-
SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
16370-
16371-
Created.push_back(Cmp.getNode());
16372-
Created.push_back(Add.getNode());
16373-
Created.push_back(CSel.getNode());
16374-
16375-
// Divide by pow2.
16376-
SDValue SRA =
16377-
DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
16378-
16379-
// If we're dividing by a positive value, we're done. Otherwise, we must
16380-
// negate the result.
16381-
if (Divisor.isNonNegative())
16382-
return SRA;
16383-
16384-
Created.push_back(SRA.getNode());
16385-
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
16359+
return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
1638616360
}
1638716361

1638816362
SDValue

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19654,6 +19654,26 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
1965419654
return false;
1965519655
}
1965619656

19657+
SDValue
19658+
RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
19659+
SelectionDAG &DAG,
19660+
SmallVectorImpl<SDNode *> &Created) const {
19661+
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
19662+
if (isIntDivCheap(N->getValueType(0), Attr))
19663+
return SDValue(N, 0); // Lower SDIV as SDIV
19664+
19665+
// Only perform this transform if short forward branch opt is supported.
19666+
if (!Subtarget.hasShortForwardBranchOpt())
19667+
return SDValue();
19668+
EVT VT = N->getValueType(0);
19669+
if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
19670+
return SDValue();
19671+
19672+
// Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
19673+
if (Divisor.sgt(2048) || Divisor.slt(-2048))
19674+
return SDValue();
19675+
return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
19676+
}
1965719677
namespace llvm::RISCVVIntrinsicsTable {
1965819678

1965919679
#define GET_RISCVVIntrinsicsTable_IMPL

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,9 @@ class RISCVTargetLowering : public TargetLowering {
956956
/// For available scheduling models FDIV + two independent FMULs are much
957957
/// faster than two FDIVs.
958958
unsigned combineRepeatedFPDivisors() const override;
959+
960+
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
961+
SmallVectorImpl<SDNode *> &Created) const override;
959962
};
960963

961964
namespace RISCV {

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22648,38 +22648,12 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
2264822648
!(Subtarget.is64Bit() && VT == MVT::i64))
2264922649
return SDValue();
2265022650

22651-
unsigned Lg2 = Divisor.countr_zero();
22652-
2265322651
// If the divisor is 2 or -2, the default expansion is better.
22654-
if (Lg2 == 1)
22652+
if (Divisor == 2 ||
22653+
Divisor == APInt(Divisor.getBitWidth(), -2, /*isSigned*/ true))
2265522654
return SDValue();
2265622655

22657-
SDLoc DL(N);
22658-
SDValue N0 = N->getOperand(0);
22659-
SDValue Zero = DAG.getConstant(0, DL, VT);
22660-
APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
22661-
SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
22662-
22663-
// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
22664-
SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT);
22665-
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
22666-
SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
22667-
22668-
Created.push_back(Cmp.getNode());
22669-
Created.push_back(Add.getNode());
22670-
Created.push_back(CMov.getNode());
22671-
22672-
// Divide by pow2.
22673-
SDValue SRA =
22674-
DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8));
22675-
22676-
// If we're dividing by a positive value, we're done. Otherwise, we must
22677-
// negate the result.
22678-
if (Divisor.isNonNegative())
22679-
return SRA;
22680-
22681-
Created.push_back(SRA.getNode());
22682-
return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
22656+
return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
2268322657
}
2268422658

2268522659
/// Result of 'and' is compared against zero. Change to a BT node if possible.

0 commit comments

Comments
 (0)