Skip to content

Commit ab4fc87

Browse files
shubhamnarlawardavemgreen
authored andcommitted
[DAG] Emit table lookup from TargetLowering::expandCTTZ()
This patch emits table lookup in expandCTTZ. Context - https://reviews.llvm.org/D113291 transforms set of IR instructions to cttz intrinsic but there are some targets which does not support CTTZ or CTLZ. Hence, I generate a table lookup in TargetLowering::expandCTTZ(). Differential Revision: https://reviews.llvm.org/D128911
1 parent b4e9977 commit ab4fc87

File tree

7 files changed

+734
-869
lines changed

7 files changed

+734
-869
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4757,6 +4757,12 @@ class TargetLowering : public TargetLoweringBase {
47574757
/// \returns The expansion result or SDValue() if it fails.
47584758
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const;
47594759

4760+
/// Expand CTTZ via Table Lookup.
4761+
/// \param N Node to expand
4762+
/// \returns The expansion result or SDValue() if it fails.
4763+
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4764+
SDValue Op, unsigned NumBitsPerElt) const;
4765+
47604766
/// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes,
47614767
/// vector nodes can only succeed if all operations are legal/custom.
47624768
/// \param N Node to expand

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7983,6 +7983,50 @@ SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
79837983
return DAG.getNode(ISD::CTPOP, dl, VT, Op);
79847984
}
79857985

7986+
SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
7987+
const SDLoc &DL, EVT VT, SDValue Op,
7988+
unsigned BitWidth) const {
7989+
if (BitWidth != 32 && BitWidth != 64)
7990+
return SDValue();
7991+
APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
7992+
: APInt(64, 0x0218A392CD3D5DBFULL);
7993+
const DataLayout &TD = DAG.getDataLayout();
7994+
MachinePointerInfo PtrInfo =
7995+
MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
7996+
unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
7997+
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
7998+
SDValue Lookup = DAG.getNode(
7999+
ISD::SRL, DL, VT,
8000+
DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
8001+
DAG.getConstant(DeBruijn, DL, VT)),
8002+
DAG.getConstant(ShiftAmt, DL, VT));
8003+
Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
8004+
8005+
SmallVector<uint8_t> Table(BitWidth, 0);
8006+
for (unsigned i = 0; i < BitWidth; i++) {
8007+
APInt Shl = DeBruijn.shl(i);
8008+
APInt Lshr = Shl.lshr(ShiftAmt);
8009+
Table[Lshr.getZExtValue()] = i;
8010+
}
8011+
8012+
// Create a ConstantArray in Constant Pool
8013+
auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
8014+
SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
8015+
TD.getPrefTypeAlign(CA->getType()));
8016+
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
8017+
DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
8018+
PtrInfo, MVT::i8);
8019+
if (Node->getOpcode() != ISD::CTLZ_ZERO_UNDEF) {
8020+
EVT SetCCVT =
8021+
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8022+
SDValue Zero = DAG.getConstant(0, DL, VT);
8023+
SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
8024+
ExtLoad = DAG.getSelect(DL, VT, SrcIsZero,
8025+
DAG.getConstant(BitWidth, DL, VT), ExtLoad);
8026+
}
8027+
return ExtLoad;
8028+
}
8029+
79868030
SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
79878031
SDLoc dl(Node);
79888032
EVT VT = Node->getValueType(0);
@@ -8016,6 +8060,12 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
80168060
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
80178061
return SDValue();
80188062

8063+
// Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
8064+
if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
8065+
!isOperationLegal(ISD::CTLZ, VT))
8066+
if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
8067+
return V;
8068+
80198069
// for now, we use: { return popcount(~x & (x - 1)); }
80208070
// unless the target has ctlz but not ctpop, in which case we use:
80218071
// { return 32 - nlz(~x & (x-1)); }

0 commit comments

Comments
 (0)