@@ -7983,6 +7983,50 @@ SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
7983
7983
return DAG.getNode (ISD::CTPOP, dl, VT, Op);
7984
7984
}
7985
7985
7986
+ SDValue TargetLowering::CTTZTableLookup (SDNode *Node, SelectionDAG &DAG,
7987
+ const SDLoc &DL, EVT VT, SDValue Op,
7988
+ unsigned BitWidth) const {
7989
+ if (BitWidth != 32 && BitWidth != 64 )
7990
+ return SDValue ();
7991
+ APInt DeBruijn = BitWidth == 32 ? APInt (32 , 0x077CB531U )
7992
+ : APInt (64 , 0x0218A392CD3D5DBFULL );
7993
+ const DataLayout &TD = DAG.getDataLayout ();
7994
+ MachinePointerInfo PtrInfo =
7995
+ MachinePointerInfo::getConstantPool (DAG.getMachineFunction ());
7996
+ unsigned ShiftAmt = BitWidth - Log2_32 (BitWidth);
7997
+ SDValue Neg = DAG.getNode (ISD::SUB, DL, VT, DAG.getConstant (0 , DL, VT), Op);
7998
+ SDValue Lookup = DAG.getNode (
7999
+ ISD::SRL, DL, VT,
8000
+ DAG.getNode (ISD::MUL, DL, VT, DAG.getNode (ISD::AND, DL, VT, Op, Neg),
8001
+ DAG.getConstant (DeBruijn, DL, VT)),
8002
+ DAG.getConstant (ShiftAmt, DL, VT));
8003
+ Lookup = DAG.getSExtOrTrunc (Lookup, DL, getPointerTy (TD));
8004
+
8005
+ SmallVector<uint8_t > Table (BitWidth, 0 );
8006
+ for (unsigned i = 0 ; i < BitWidth; i++) {
8007
+ APInt Shl = DeBruijn.shl (i);
8008
+ APInt Lshr = Shl.lshr (ShiftAmt);
8009
+ Table[Lshr.getZExtValue ()] = i;
8010
+ }
8011
+
8012
+ // Create a ConstantArray in Constant Pool
8013
+ auto *CA = ConstantDataArray::get (*DAG.getContext (), Table);
8014
+ SDValue CPIdx = DAG.getConstantPool (CA, getPointerTy (TD),
8015
+ TD.getPrefTypeAlign (CA->getType ()));
8016
+ SDValue ExtLoad = DAG.getExtLoad (ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode (),
8017
+ DAG.getMemBasePlusOffset (CPIdx, Lookup, DL),
8018
+ PtrInfo, MVT::i8 );
8019
+ if (Node->getOpcode () != ISD::CTLZ_ZERO_UNDEF) {
8020
+ EVT SetCCVT =
8021
+ getSetCCResultType (DAG.getDataLayout (), *DAG.getContext (), VT);
8022
+ SDValue Zero = DAG.getConstant (0 , DL, VT);
8023
+ SDValue SrcIsZero = DAG.getSetCC (DL, SetCCVT, Op, Zero, ISD::SETEQ);
8024
+ ExtLoad = DAG.getSelect (DL, VT, SrcIsZero,
8025
+ DAG.getConstant (BitWidth, DL, VT), ExtLoad);
8026
+ }
8027
+ return ExtLoad;
8028
+ }
8029
+
7986
8030
SDValue TargetLowering::expandCTTZ (SDNode *Node, SelectionDAG &DAG) const {
7987
8031
SDLoc dl (Node);
7988
8032
EVT VT = Node->getValueType (0 );
@@ -8016,6 +8060,12 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
8016
8060
!isOperationLegalOrCustomOrPromote (ISD::XOR, VT)))
8017
8061
return SDValue ();
8018
8062
8063
+ // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
8064
+ if (!VT.isVector () && isOperationExpand (ISD::CTPOP, VT) &&
8065
+ !isOperationLegal (ISD::CTLZ, VT))
8066
+ if (SDValue V = CTTZTableLookup (Node, DAG, dl, VT, Op, NumBitsPerElt))
8067
+ return V;
8068
+
8019
8069
// for now, we use: { return popcount(~x & (x - 1)); }
8020
8070
// unless the target has ctlz but not ctpop, in which case we use:
8021
8071
// { return 32 - nlz(~x & (x-1)); }
0 commit comments