|
66 | 66 | #include <iterator>
|
67 | 67 | #include <optional>
|
68 | 68 | #include <string>
|
| 69 | +#include <tuple> |
69 | 70 | #include <utility>
|
70 | 71 | #include <vector>
|
71 | 72 |
|
@@ -668,8 +669,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
|
668 | 669 | {MVT::i8, MVT::i16, MVT::v2i16, MVT::i32, MVT::i64},
|
669 | 670 | Expand);
|
670 | 671 |
|
671 |
| - if (STI.hasHWROT32()) |
| 672 | + if (STI.hasHWROT32()) { |
672 | 673 | setOperationAction({ISD::FSHL, ISD::FSHR}, MVT::i32, Legal);
|
| 674 | + setOperationAction({ISD::ROTL, ISD::ROTR, ISD::FSHL, ISD::FSHR}, MVT::i64, |
| 675 | + Custom); |
| 676 | + } |
673 | 677 |
|
674 | 678 | setOperationAction(ISD::BSWAP, MVT::i16, Expand);
|
675 | 679 |
|
@@ -1056,6 +1060,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
1056 | 1060 | MAKE_CASE(NVPTXISD::StoreRetvalV2)
|
1057 | 1061 | MAKE_CASE(NVPTXISD::StoreRetvalV4)
|
1058 | 1062 | MAKE_CASE(NVPTXISD::PseudoUseParam)
|
| 1063 | + MAKE_CASE(NVPTXISD::UNPACK_VECTOR) |
| 1064 | + MAKE_CASE(NVPTXISD::BUILD_VECTOR) |
1059 | 1065 | MAKE_CASE(NVPTXISD::RETURN)
|
1060 | 1066 | MAKE_CASE(NVPTXISD::CallSeqBegin)
|
1061 | 1067 | MAKE_CASE(NVPTXISD::CallSeqEnd)
|
@@ -2758,6 +2764,61 @@ static SDValue lowerCTLZCTPOP(SDValue Op, SelectionDAG &DAG) {
|
2758 | 2764 | return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, CT, SDNodeFlags::NonNeg);
|
2759 | 2765 | }
|
2760 | 2766 |
|
| 2767 | +static SDValue expandFSH64(SDValue A, SDValue B, SDValue ShiftAmount, SDLoc DL, |
| 2768 | + unsigned Opcode, SelectionDAG &DAG) { |
| 2769 | + assert(A.getValueType() == MVT::i64 && B.getValueType() == MVT::i64); |
| 2770 | + |
| 2771 | + const auto *AmtConst = dyn_cast<ConstantSDNode>(ShiftAmount); |
| 2772 | + if (!AmtConst) |
| 2773 | + return SDValue(); |
| 2774 | + const auto Amt = AmtConst->getZExtValue() & 63; |
| 2775 | + |
| 2776 | + SDValue UnpackA = |
| 2777 | + DAG.getNode(NVPTXISD::UNPACK_VECTOR, DL, {MVT::i32, MVT::i32}, A); |
| 2778 | + SDValue UnpackB = |
| 2779 | + DAG.getNode(NVPTXISD::UNPACK_VECTOR, DL, {MVT::i32, MVT::i32}, B); |
| 2780 | + |
| 2781 | + // Arch is Little endiain: 0 = low bits, 1 = high bits |
| 2782 | + SDValue ALo = UnpackA.getValue(0); |
| 2783 | + SDValue AHi = UnpackA.getValue(1); |
| 2784 | + SDValue BLo = UnpackB.getValue(0); |
| 2785 | + SDValue BHi = UnpackB.getValue(1); |
| 2786 | + |
| 2787 | + // The bitfeild consists of { AHi : ALo : BHi : BLo } |
| 2788 | + // |
| 2789 | + // * FSHL, Amt < 32 - The window will contain { AHi : ALo : BHi } |
| 2790 | + // * FSHL, Amt >= 32 - The window will contain { ALo : BHi : BLo } |
| 2791 | + // * FSHR, Amt < 32 - The window will contain { ALo : BHi : BLo } |
| 2792 | + // * FSHR, Amt >= 32 - The window will contain { AHi : ALo : BHi } |
| 2793 | + // |
| 2794 | + // Note that Amt = 0 and Amt = 32 are special cases where 32-bit funnel shifts |
| 2795 | + // are not needed at all. Amt = 0 is a no-op producing either A or B depending |
| 2796 | + // on the direction. Amt = 32 can be implemented by a packing and unpacking |
| 2797 | + // move to select and arrange the 32bit values. For simplicity, these cases |
| 2798 | + // are not handled here explicitly and instead we rely on DAGCombiner to |
| 2799 | + // remove the no-op funnel shifts we insert. |
| 2800 | + auto [High, Mid, Low] = ((Opcode == ISD::FSHL) == (Amt < 32)) |
| 2801 | + ? std::make_tuple(AHi, ALo, BHi) |
| 2802 | + : std::make_tuple(ALo, BHi, BLo); |
| 2803 | + |
| 2804 | + SDValue NewAmt = DAG.getConstant(Amt & 31, DL, MVT::i32); |
| 2805 | + SDValue RHi = DAG.getNode(Opcode, DL, MVT::i32, {High, Mid, NewAmt}); |
| 2806 | + SDValue RLo = DAG.getNode(Opcode, DL, MVT::i32, {Mid, Low, NewAmt}); |
| 2807 | + |
| 2808 | + return DAG.getNode(NVPTXISD::BUILD_VECTOR, DL, MVT::i64, {RLo, RHi}); |
| 2809 | +} |
| 2810 | + |
| 2811 | +static SDValue lowerFSH(SDValue Op, SelectionDAG &DAG) { |
| 2812 | + return expandFSH64(Op->getOperand(0), Op->getOperand(1), Op->getOperand(2), |
| 2813 | + SDLoc(Op), Op->getOpcode(), DAG); |
| 2814 | +} |
| 2815 | + |
| 2816 | +static SDValue lowerROT(SDValue Op, SelectionDAG &DAG) { |
| 2817 | + unsigned Opcode = Op->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR; |
| 2818 | + return expandFSH64(Op->getOperand(0), Op->getOperand(0), Op->getOperand(1), |
| 2819 | + SDLoc(Op), Opcode, DAG); |
| 2820 | +} |
| 2821 | + |
2761 | 2822 | SDValue
|
2762 | 2823 | NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
2763 | 2824 | switch (Op.getOpcode()) {
|
@@ -2818,6 +2879,12 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
2818 | 2879 | return LowerVAARG(Op, DAG);
|
2819 | 2880 | case ISD::VASTART:
|
2820 | 2881 | return LowerVASTART(Op, DAG);
|
| 2882 | + case ISD::FSHL: |
| 2883 | + case ISD::FSHR: |
| 2884 | + return lowerFSH(Op, DAG); |
| 2885 | + case ISD::ROTL: |
| 2886 | + case ISD::ROTR: |
| 2887 | + return lowerROT(Op, DAG); |
2821 | 2888 | case ISD::ABS:
|
2822 | 2889 | case ISD::SMIN:
|
2823 | 2890 | case ISD::SMAX:
|
|
0 commit comments