Skip to content

Commit 3362a81

Browse files
committed
[PowerPC] Implement 32-bit expansion for rldimi
rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rlwimi in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen.
1 parent defc485 commit 3362a81

File tree

3 files changed

+454
-22
lines changed

3 files changed

+454
-22
lines changed

clang/lib/Sema/SemaChecking.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5236,7 +5236,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) {
52365236
case PPC::BI__builtin_ppc_fetch_and_andlp:
52375237
case PPC::BI__builtin_ppc_fetch_and_orlp:
52385238
case PPC::BI__builtin_ppc_fetch_and_swaplp:
5239-
case PPC::BI__builtin_ppc_rldimi:
52405239
return true;
52415240
}
52425241
return false;

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 88 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
643643
// We want to custom lower some of our intrinsics.
644644
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
645645
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
646+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
646647
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
647648
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
648649
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
@@ -10748,6 +10749,88 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
1074810749
return true;
1074910750
}
1075010751

10752+
static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst,
10753+
SDValue Src, unsigned SH, unsigned MB,
10754+
unsigned ME) {
10755+
assert(SH < 32 && MB < 32 && ME < 32 &&
10756+
"Invalid argument for rotate insert!");
10757+
return SDValue(
10758+
DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32,
10759+
{Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32),
10760+
DAG.getTargetConstant(MB, Loc, MVT::i32),
10761+
DAG.getTargetConstant(ME, Loc, MVT::i32)}),
10762+
0);
10763+
}
10764+
10765+
static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst,
10766+
SDValue Src, unsigned SH, unsigned MB,
10767+
unsigned ME, bool IsPPC64) {
10768+
assert(SH < 64 && MB < 64 && ME < 64 &&
10769+
"Invalid argument for rotate insert!");
10770+
if (IsPPC64) {
10771+
// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10772+
if (ME < 63 - SH) {
10773+
Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src,
10774+
DAG.getConstant(ME + SH + 1, Loc, MVT::i32));
10775+
} else if (ME > 63 - SH) {
10776+
Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src,
10777+
DAG.getConstant(ME + SH - 63, Loc, MVT::i32));
10778+
}
10779+
return SDValue(DAG.getMachineNode(
10780+
PPC::RLDIMI, Loc, MVT::i64,
10781+
{Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32),
10782+
DAG.getTargetConstant(MB, Loc, MVT::i32)}),
10783+
0);
10784+
}
10785+
10786+
// To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH
10787+
// is adjusted to simplify cases. Invalid ranges will be skipped.
10788+
// - SrcHi inserted into DstHi with [0, 32-SH)
10789+
// - SrcLo inserted into DstHi with [32-SH, 32)
10790+
// - SrcHi inserted into DstLo with [32, 64-SH)
10791+
// - SrcLo inserted into DstLo with [64-SH, 64)
10792+
auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32);
10793+
auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32);
10794+
if (SH >= 32) {
10795+
SH -= 32;
10796+
std::swap(SrcLo, SrcHi);
10797+
}
10798+
auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right,
10799+
SDValue Src, SDValue Dst, unsigned MB,
10800+
unsigned ME) {
10801+
if (Left > Right)
10802+
return Dst;
10803+
10804+
if (MB <= ME) {
10805+
if (MB <= Right && ME >= Left)
10806+
return getRotateInsert32(DAG, Loc, Dst, Src, SH,
10807+
std::max(MB, Left) % 32,
10808+
std::min(ME, Right) % 32);
10809+
} else {
10810+
if (MB < Left || ME > Right)
10811+
return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, Right % 32);
10812+
10813+
if (MB <= Right && ME < Left)
10814+
return getRotateInsert32(DAG, Loc, Dst, Src, SH, MB % 32, Right % 32);
10815+
10816+
if (MB <= Right && ME <= Right)
10817+
return getRotateInsert32(
10818+
DAG, Loc,
10819+
getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32), Src,
10820+
SH, MB % 32, Right % 32);
10821+
10822+
if (MB > Right && ME >= Left && ME <= Right)
10823+
return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32);
10824+
}
10825+
return Dst;
10826+
};
10827+
DstHi = GetSubInsert(0, 31 - SH, SrcHi, DstHi, MB, ME);
10828+
DstHi = GetSubInsert(32 - SH, 31, SrcLo, DstHi, MB, ME);
10829+
DstLo = GetSubInsert(32, 63 - SH, SrcLo, DstLo, MB, ME);
10830+
DstLo = GetSubInsert(64 - SH, 63, SrcHi, DstLo, MB, ME);
10831+
return DAG.getNode(ISD::BUILD_PAIR, Loc, MVT::i64, DstLo, DstHi);
10832+
}
10833+
1075110834
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
1075210835
/// lower, do it, otherwise return null.
1075310836
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
@@ -10764,7 +10847,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1076410847
return DAG.getRegister(PPC::R2, MVT::i32);
1076510848

1076610849
case Intrinsic::ppc_rldimi: {
10767-
assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
1076810850
SDValue Src = Op.getOperand(1);
1076910851
APInt Mask = Op.getConstantOperandAPInt(4);
1077010852
if (Mask.isZero())
@@ -10775,20 +10857,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1077510857
unsigned MB = 0, ME = 0;
1077610858
if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
1077710859
report_fatal_error("invalid rldimi mask!");
10778-
// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10779-
if (ME < 63 - SH) {
10780-
Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10781-
DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10782-
} else if (ME > 63 - SH) {
10783-
Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10784-
DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10785-
}
10786-
return SDValue(
10787-
DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10788-
{Op.getOperand(2), Src,
10789-
DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10790-
DAG.getTargetConstant(MB, dl, MVT::i32)}),
10791-
0);
10860+
return getRotateInsert64(DAG, dl, Op.getOperand(2), Op.getOperand(1), SH,
10861+
MB, ME, Subtarget.isPPC64());
1079210862
}
1079310863

1079410864
case Intrinsic::ppc_rlwimi: {
@@ -10801,12 +10871,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1080110871
unsigned MB = 0, ME = 0;
1080210872
if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
1080310873
report_fatal_error("invalid rlwimi mask!");
10804-
return SDValue(DAG.getMachineNode(
10805-
PPC::RLWIMI, dl, MVT::i32,
10806-
{Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10807-
DAG.getTargetConstant(MB, dl, MVT::i32),
10808-
DAG.getTargetConstant(ME, dl, MVT::i32)}),
10809-
0);
10874+
return getRotateInsert32(DAG, dl, Op.getOperand(2), Op.getOperand(1),
10875+
Op.getConstantOperandVal(3), MB, ME);
1081010876
}
1081110877

1081210878
case Intrinsic::ppc_rlwnm: {
@@ -11824,6 +11890,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
1182411890
case Intrinsic::ppc_maxfe:
1182511891
case Intrinsic::ppc_minfe:
1182611892
case Intrinsic::ppc_fnmsub:
11893+
case Intrinsic::ppc_rldimi:
1182711894
case Intrinsic::ppc_convert_f128_to_ppcf128:
1182811895
Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
1182911896
break;

0 commit comments

Comments
 (0)