@@ -643,6 +643,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
643
643
// We want to custom lower some of our intrinsics.
644
644
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
645
645
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
646
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
646
647
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
647
648
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
648
649
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
@@ -10757,6 +10758,88 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10757
10758
return true;
10758
10759
}
10759
10760
10761
+ static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst,
10762
+ SDValue Src, unsigned SH, unsigned MB,
10763
+ unsigned ME) {
10764
+ assert(SH < 32 && MB < 32 && ME < 32 &&
10765
+ "Invalid argument for rotate insert!");
10766
+ return SDValue(
10767
+ DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32,
10768
+ {Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32),
10769
+ DAG.getTargetConstant(MB, Loc, MVT::i32),
10770
+ DAG.getTargetConstant(ME, Loc, MVT::i32)}),
10771
+ 0);
10772
+ }
10773
+
10774
+ static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst,
10775
+ SDValue Src, unsigned SH, unsigned MB,
10776
+ unsigned ME, bool IsPPC64) {
10777
+ assert(SH < 64 && MB < 64 && ME < 64 &&
10778
+ "Invalid argument for rotate insert!");
10779
+ if (IsPPC64) {
10780
+ // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10781
+ if (ME < 63 - SH) {
10782
+ Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src,
10783
+ DAG.getConstant(ME + SH + 1, Loc, MVT::i32));
10784
+ } else if (ME > 63 - SH) {
10785
+ Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src,
10786
+ DAG.getConstant(ME + SH - 63, Loc, MVT::i32));
10787
+ }
10788
+ return SDValue(DAG.getMachineNode(
10789
+ PPC::RLDIMI, Loc, MVT::i64,
10790
+ {Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32),
10791
+ DAG.getTargetConstant(MB, Loc, MVT::i32)}),
10792
+ 0);
10793
+ }
10794
+
10795
+ // To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH
10796
+ // is adjusted to simplify cases. Invalid ranges will be skipped.
10797
+ // - SrcHi inserted into DstHi with [0, 32-SH)
10798
+ // - SrcLo inserted into DstHi with [32-SH, 32)
10799
+ // - SrcHi inserted into DstLo with [32, 64-SH)
10800
+ // - SrcLo inserted into DstLo with [64-SH, 64)
10801
+ auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32);
10802
+ auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32);
10803
+ if (SH >= 32) {
10804
+ SH -= 32;
10805
+ std::swap(SrcLo, SrcHi);
10806
+ }
10807
+ auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right,
10808
+ SDValue Src, SDValue Dst, unsigned MB,
10809
+ unsigned ME) {
10810
+ if (Left > Right)
10811
+ return Dst;
10812
+
10813
+ if (MB <= ME) {
10814
+ if (MB <= Right && ME >= Left)
10815
+ return getRotateInsert32(DAG, Loc, Dst, Src, SH,
10816
+ std::max(MB, Left) % 32,
10817
+ std::min(ME, Right) % 32);
10818
+ } else {
10819
+ if (MB < Left || ME > Right)
10820
+ return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, Right % 32);
10821
+
10822
+ if (MB <= Right && ME < Left)
10823
+ return getRotateInsert32(DAG, Loc, Dst, Src, SH, MB % 32, Right % 32);
10824
+
10825
+ if (MB <= Right && ME <= Right)
10826
+ return getRotateInsert32(
10827
+ DAG, Loc,
10828
+ getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32), Src,
10829
+ SH, MB % 32, Right % 32);
10830
+
10831
+ if (MB > Right && ME >= Left && ME <= Right)
10832
+ return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32);
10833
+ }
10834
+ return Dst;
10835
+ };
10836
+ DstHi = GetSubInsert(0, 31 - SH, SrcHi, DstHi, MB, ME);
10837
+ DstHi = GetSubInsert(32 - SH, 31, SrcLo, DstHi, MB, ME);
10838
+ DstLo = GetSubInsert(32, 63 - SH, SrcLo, DstLo, MB, ME);
10839
+ DstLo = GetSubInsert(64 - SH, 63, SrcHi, DstLo, MB, ME);
10840
+ return DAG.getNode(ISD::BUILD_PAIR, Loc, MVT::i64, DstLo, DstHi);
10841
+ }
10842
+
10760
10843
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10761
10844
/// lower, do it, otherwise return null.
10762
10845
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
@@ -10773,7 +10856,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10773
10856
return DAG.getRegister(PPC::R2, MVT::i32);
10774
10857
10775
10858
case Intrinsic::ppc_rldimi: {
10776
- assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10777
10859
SDValue Src = Op.getOperand(1);
10778
10860
APInt Mask = Op.getConstantOperandAPInt(4);
10779
10861
if (Mask.isZero())
@@ -10784,20 +10866,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10784
10866
unsigned MB = 0, ME = 0;
10785
10867
if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
10786
10868
report_fatal_error("invalid rldimi mask!");
10787
- // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10788
- if (ME < 63 - SH) {
10789
- Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10790
- DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10791
- } else if (ME > 63 - SH) {
10792
- Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10793
- DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10794
- }
10795
- return SDValue(
10796
- DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10797
- {Op.getOperand(2), Src,
10798
- DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10799
- DAG.getTargetConstant(MB, dl, MVT::i32)}),
10800
- 0);
10869
+ return getRotateInsert64(DAG, dl, Op.getOperand(2), Op.getOperand(1), SH,
10870
+ MB, ME, Subtarget.isPPC64());
10801
10871
}
10802
10872
10803
10873
case Intrinsic::ppc_rlwimi: {
@@ -10810,12 +10880,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10810
10880
unsigned MB = 0, ME = 0;
10811
10881
if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
10812
10882
report_fatal_error("invalid rlwimi mask!");
10813
- return SDValue(DAG.getMachineNode(
10814
- PPC::RLWIMI, dl, MVT::i32,
10815
- {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10816
- DAG.getTargetConstant(MB, dl, MVT::i32),
10817
- DAG.getTargetConstant(ME, dl, MVT::i32)}),
10818
- 0);
10883
+ return getRotateInsert32(DAG, dl, Op.getOperand(2), Op.getOperand(1),
10884
+ Op.getConstantOperandVal(3), MB, ME);
10819
10885
}
10820
10886
10821
10887
case Intrinsic::ppc_rlwnm: {
@@ -11833,6 +11899,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
11833
11899
case Intrinsic::ppc_maxfe:
11834
11900
case Intrinsic::ppc_minfe:
11835
11901
case Intrinsic::ppc_fnmsub:
11902
+ case Intrinsic::ppc_rldimi:
11836
11903
case Intrinsic::ppc_convert_f128_to_ppcf128:
11837
11904
Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11838
11905
break;
0 commit comments