Skip to content

[RISCV][SDAG] Prefer ShortForwardBranch to lower sdiv by pow2 #67364

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -4937,6 +4937,10 @@ class TargetLowering : public TargetLoweringBase {
SmallVectorImpl<SDNode *> &Created) const;
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
SmallVectorImpl<SDNode *> &Created) const;
// Build sdiv by power-of-2 with conditional move instructions
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const;

/// Targets may override this function to provide custom SDIV lowering for
/// power-of-2 denominators. If the target returns an empty SDValue, LLVM
Expand Down
43 changes: 43 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6081,6 +6081,49 @@ TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
return SDValue();
}

/// Build sdiv by power-of-2 with conditional move instructions
/// Ref: "Hacker's Delight" by Henry Warren 10-1
/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
/// bgez x, label
/// add x, x, 2**k-1
/// label:
/// sra res, x, k
/// neg res, res (when the divisor is negative)
SDValue TargetLowering::buildSDIVPow2WithCMov(
SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const {
unsigned Lg2 = Divisor.countr_zero();
EVT VT = N->getValueType(0);

SDLoc DL(N);
SDValue N0 = N->getOperand(0);
SDValue Zero = DAG.getConstant(0, DL, VT);
APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);

// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);

Created.push_back(Cmp.getNode());
Created.push_back(Add.getNode());
Created.push_back(CMov.getNode());

// Divide by pow2.
SDValue SRA =
DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));

// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (Divisor.isNonNegative())
return SRA;

Created.push_back(SRA.getNode());
return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
}

/// Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
Expand Down
28 changes: 1 addition & 27 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16330,33 +16330,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
!(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();

SDLoc DL(N);
SDValue N0 = N->getOperand(0);
unsigned Lg2 = Divisor.countr_zero();
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);

// Add (N0 < 0) ? Pow2 - 1 : 0;
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);

Created.push_back(Cmp.getNode());
Created.push_back(Add.getNode());
Created.push_back(CSel.getNode());

// Divide by pow2.
SDValue SRA =
DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));

// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (Divisor.isNonNegative())
return SRA;

Created.push_back(SRA.getNode());
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
}

SDValue
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19487,6 +19487,26 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
return false;
}

SDValue
RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N, 0); // Lower SDIV as SDIV

// Only perform this transform if short forward branch opt is supported.
if (!Subtarget.hasShortForwardBranchOpt())
return SDValue();
EVT VT = N->getValueType(0);
if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
return SDValue();

// Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
if (Divisor.sgt(2048) || Divisor.slt(-2048))
return SDValue();
return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
}
namespace llvm::RISCVVIntrinsicsTable {

#define GET_RISCVVIntrinsicsTable_IMPL
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -955,6 +955,9 @@ class RISCVTargetLowering : public TargetLowering {
/// For available scheduling models FDIV + two independent FMULs are much
/// faster than two FDIVs.
unsigned combineRepeatedFPDivisors() const override;

SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
};

namespace RISCV {
Expand Down
32 changes: 3 additions & 29 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22715,38 +22715,12 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
!(Subtarget.is64Bit() && VT == MVT::i64))
return SDValue();

unsigned Lg2 = Divisor.countr_zero();

// If the divisor is 2 or -2, the default expansion is better.
if (Lg2 == 1)
if (Divisor == 2 ||
Divisor == APInt(Divisor.getBitWidth(), -2, /*isSigned*/ true))
return SDValue();

SDLoc DL(N);
SDValue N0 = N->getOperand(0);
SDValue Zero = DAG.getConstant(0, DL, VT);
APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);

// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);

Created.push_back(Cmp.getNode());
Created.push_back(Add.getNode());
Created.push_back(CMov.getNode());

// Divide by pow2.
SDValue SRA =
DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8));

// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (Divisor.isNonNegative())
return SRA;

Created.push_back(SRA.getNode());
return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
}

/// Result of 'and' is compared against zero. Change to a BT node if possible.
Expand Down
Loading