@@ -643,6 +643,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
643
643
// We want to custom lower some of our intrinsics.
644
644
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
645
645
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
646
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
646
647
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
647
648
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
648
649
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
@@ -10748,6 +10749,88 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10748
10749
return true;
10749
10750
}
10750
10751
10752
+ static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst,
10753
+ SDValue Src, unsigned SH, unsigned MB,
10754
+ unsigned ME) {
10755
+ assert(SH < 32 && MB < 32 && ME < 32 &&
10756
+ "Invalid argument for rotate insert!");
10757
+ return SDValue(
10758
+ DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32,
10759
+ {Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32),
10760
+ DAG.getTargetConstant(MB, Loc, MVT::i32),
10761
+ DAG.getTargetConstant(ME, Loc, MVT::i32)}),
10762
+ 0);
10763
+ }
10764
+
10765
+ static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst,
10766
+ SDValue Src, unsigned SH, unsigned MB,
10767
+ unsigned ME, bool IsPPC64) {
10768
+ assert(SH < 64 && MB < 64 && ME < 64 &&
10769
+ "Invalid argument for rotate insert!");
10770
+ if (IsPPC64) {
10771
+ // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10772
+ if (ME < 63 - SH) {
10773
+ Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src,
10774
+ DAG.getConstant(ME + SH + 1, Loc, MVT::i32));
10775
+ } else if (ME > 63 - SH) {
10776
+ Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src,
10777
+ DAG.getConstant(ME + SH - 63, Loc, MVT::i32));
10778
+ }
10779
+ return SDValue(DAG.getMachineNode(
10780
+ PPC::RLDIMI, Loc, MVT::i64,
10781
+ {Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32),
10782
+ DAG.getTargetConstant(MB, Loc, MVT::i32)}),
10783
+ 0);
10784
+ }
10785
+
10786
+ // To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH
10787
+ // is adjusted to simplify cases. Invalid ranges will be skipped.
10788
+ // - SrcHi inserted into DstHi with [0, 32-SH)
10789
+ // - SrcLo inserted into DstHi with [32-SH, 32)
10790
+ // - SrcHi inserted into DstLo with [32, 64-SH)
10791
+ // - SrcLo inserted into DstLo with [64-SH, 64)
10792
+ auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32);
10793
+ auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32);
10794
+ if (SH >= 32) {
10795
+ SH -= 32;
10796
+ std::swap(SrcLo, SrcHi);
10797
+ }
10798
+ auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right,
10799
+ SDValue Src, SDValue Dst, unsigned MB,
10800
+ unsigned ME) {
10801
+ if (Left > Right)
10802
+ return Dst;
10803
+
10804
+ if (MB <= ME) {
10805
+ if (MB <= Right && ME >= Left)
10806
+ return getRotateInsert32(DAG, Loc, Dst, Src, SH,
10807
+ std::max(MB, Left) % 32,
10808
+ std::min(ME, Right) % 32);
10809
+ } else {
10810
+ if (MB < Left || ME > Right)
10811
+ return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, Right % 32);
10812
+
10813
+ if (MB <= Right && ME < Left)
10814
+ return getRotateInsert32(DAG, Loc, Dst, Src, SH, MB % 32, Right % 32);
10815
+
10816
+ if (MB <= Right && ME <= Right)
10817
+ return getRotateInsert32(
10818
+ DAG, Loc,
10819
+ getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32), Src,
10820
+ SH, MB % 32, Right % 32);
10821
+
10822
+ if (MB > Right && ME >= Left && ME <= Right)
10823
+ return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32);
10824
+ }
10825
+ return Dst;
10826
+ };
10827
+ DstHi = GetSubInsert(0, 31 - SH, SrcHi, DstHi, MB, ME);
10828
+ DstHi = GetSubInsert(32 - SH, 31, SrcLo, DstHi, MB, ME);
10829
+ DstLo = GetSubInsert(32, 63 - SH, SrcLo, DstLo, MB, ME);
10830
+ DstLo = GetSubInsert(64 - SH, 63, SrcHi, DstLo, MB, ME);
10831
+ return DAG.getNode(ISD::BUILD_PAIR, Loc, MVT::i64, DstLo, DstHi);
10832
+ }
10833
+
10751
10834
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10752
10835
/// lower, do it, otherwise return null.
10753
10836
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
@@ -10764,7 +10847,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10764
10847
return DAG.getRegister(PPC::R2, MVT::i32);
10765
10848
10766
10849
case Intrinsic::ppc_rldimi: {
10767
- assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10768
10850
SDValue Src = Op.getOperand(1);
10769
10851
APInt Mask = Op.getConstantOperandAPInt(4);
10770
10852
if (Mask.isZero())
@@ -10775,20 +10857,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10775
10857
unsigned MB = 0, ME = 0;
10776
10858
if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
10777
10859
report_fatal_error("invalid rldimi mask!");
10778
- // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10779
- if (ME < 63 - SH) {
10780
- Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10781
- DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10782
- } else if (ME > 63 - SH) {
10783
- Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10784
- DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10785
- }
10786
- return SDValue(
10787
- DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10788
- {Op.getOperand(2), Src,
10789
- DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10790
- DAG.getTargetConstant(MB, dl, MVT::i32)}),
10791
- 0);
10860
+ return getRotateInsert64(DAG, dl, Op.getOperand(2), Op.getOperand(1), SH,
10861
+ MB, ME, Subtarget.isPPC64());
10792
10862
}
10793
10863
10794
10864
case Intrinsic::ppc_rlwimi: {
@@ -10801,12 +10871,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10801
10871
unsigned MB = 0, ME = 0;
10802
10872
if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
10803
10873
report_fatal_error("invalid rlwimi mask!");
10804
- return SDValue(DAG.getMachineNode(
10805
- PPC::RLWIMI, dl, MVT::i32,
10806
- {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10807
- DAG.getTargetConstant(MB, dl, MVT::i32),
10808
- DAG.getTargetConstant(ME, dl, MVT::i32)}),
10809
- 0);
10874
+ return getRotateInsert32(DAG, dl, Op.getOperand(2), Op.getOperand(1),
10875
+ Op.getConstantOperandVal(3), MB, ME);
10810
10876
}
10811
10877
10812
10878
case Intrinsic::ppc_rlwnm: {
@@ -11824,6 +11890,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
11824
11890
case Intrinsic::ppc_maxfe:
11825
11891
case Intrinsic::ppc_minfe:
11826
11892
case Intrinsic::ppc_fnmsub:
11893
+ case Intrinsic::ppc_rldimi:
11827
11894
case Intrinsic::ppc_convert_f128_to_ppcf128:
11828
11895
Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11829
11896
break;
0 commit comments