Skip to content

Commit b886dcf

Browse files
committed
[PowerPC] Implement 32-bit expansion for rldimi
rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rlwimi in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen.
1 parent b890c17 commit b886dcf

File tree

3 files changed

+454
-22
lines changed

3 files changed

+454
-22
lines changed

clang/lib/Sema/SemaChecking.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5236,7 +5236,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) {
52365236
case PPC::BI__builtin_ppc_fetch_and_andlp:
52375237
case PPC::BI__builtin_ppc_fetch_and_orlp:
52385238
case PPC::BI__builtin_ppc_fetch_and_swaplp:
5239-
case PPC::BI__builtin_ppc_rldimi:
52405239
return true;
52415240
}
52425241
return false;

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 88 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
643643
// We want to custom lower some of our intrinsics.
644644
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
645645
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
646+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
646647
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
647648
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
648649
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
@@ -10757,6 +10758,88 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
1075710758
return true;
1075810759
}
1075910760

10761+
static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst,
10762+
SDValue Src, unsigned SH, unsigned MB,
10763+
unsigned ME) {
10764+
assert(SH < 32 && MB < 32 && ME < 32 &&
10765+
"Invalid argument for rotate insert!");
10766+
return SDValue(
10767+
DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32,
10768+
{Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32),
10769+
DAG.getTargetConstant(MB, Loc, MVT::i32),
10770+
DAG.getTargetConstant(ME, Loc, MVT::i32)}),
10771+
0);
10772+
}
10773+
10774+
static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst,
10775+
SDValue Src, unsigned SH, unsigned MB,
10776+
unsigned ME, bool IsPPC64) {
10777+
assert(SH < 64 && MB < 64 && ME < 64 &&
10778+
"Invalid argument for rotate insert!");
10779+
if (IsPPC64) {
10780+
// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10781+
if (ME < 63 - SH) {
10782+
Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src,
10783+
DAG.getConstant(ME + SH + 1, Loc, MVT::i32));
10784+
} else if (ME > 63 - SH) {
10785+
Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src,
10786+
DAG.getConstant(ME + SH - 63, Loc, MVT::i32));
10787+
}
10788+
return SDValue(DAG.getMachineNode(
10789+
PPC::RLDIMI, Loc, MVT::i64,
10790+
{Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32),
10791+
DAG.getTargetConstant(MB, Loc, MVT::i32)}),
10792+
0);
10793+
}
10794+
10795+
// To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH
10796+
// is adjusted to simplify cases. Invalid ranges will be skipped.
10797+
// - SrcHi inserted into DstHi with [0, 32-SH)
10798+
// - SrcLo inserted into DstHi with [32-SH, 32)
10799+
// - SrcHi inserted into DstLo with [32, 64-SH)
10800+
// - SrcLo inserted into DstLo with [64-SH, 64)
10801+
auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32);
10802+
auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32);
10803+
if (SH >= 32) {
10804+
SH -= 32;
10805+
std::swap(SrcLo, SrcHi);
10806+
}
10807+
auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right,
10808+
SDValue Src, SDValue Dst, unsigned MB,
10809+
unsigned ME) {
10810+
if (Left > Right)
10811+
return Dst;
10812+
10813+
if (MB <= ME) {
10814+
if (MB <= Right && ME >= Left)
10815+
return getRotateInsert32(DAG, Loc, Dst, Src, SH,
10816+
std::max(MB, Left) % 32,
10817+
std::min(ME, Right) % 32);
10818+
} else {
10819+
if (MB < Left || ME > Right)
10820+
return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, Right % 32);
10821+
10822+
if (MB <= Right && ME < Left)
10823+
return getRotateInsert32(DAG, Loc, Dst, Src, SH, MB % 32, Right % 32);
10824+
10825+
if (MB <= Right && ME <= Right)
10826+
return getRotateInsert32(
10827+
DAG, Loc,
10828+
getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32), Src,
10829+
SH, MB % 32, Right % 32);
10830+
10831+
if (MB > Right && ME >= Left && ME <= Right)
10832+
return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32);
10833+
}
10834+
return Dst;
10835+
};
10836+
DstHi = GetSubInsert(0, 31 - SH, SrcHi, DstHi, MB, ME);
10837+
DstHi = GetSubInsert(32 - SH, 31, SrcLo, DstHi, MB, ME);
10838+
DstLo = GetSubInsert(32, 63 - SH, SrcLo, DstLo, MB, ME);
10839+
DstLo = GetSubInsert(64 - SH, 63, SrcHi, DstLo, MB, ME);
10840+
return DAG.getNode(ISD::BUILD_PAIR, Loc, MVT::i64, DstLo, DstHi);
10841+
}
10842+
1076010843
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
1076110844
/// lower, do it, otherwise return null.
1076210845
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
@@ -10773,7 +10856,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1077310856
return DAG.getRegister(PPC::R2, MVT::i32);
1077410857

1077510858
case Intrinsic::ppc_rldimi: {
10776-
assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
1077710859
SDValue Src = Op.getOperand(1);
1077810860
APInt Mask = Op.getConstantOperandAPInt(4);
1077910861
if (Mask.isZero())
@@ -10784,20 +10866,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1078410866
unsigned MB = 0, ME = 0;
1078510867
if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
1078610868
report_fatal_error("invalid rldimi mask!");
10787-
// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10788-
if (ME < 63 - SH) {
10789-
Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10790-
DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10791-
} else if (ME > 63 - SH) {
10792-
Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10793-
DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10794-
}
10795-
return SDValue(
10796-
DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10797-
{Op.getOperand(2), Src,
10798-
DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10799-
DAG.getTargetConstant(MB, dl, MVT::i32)}),
10800-
0);
10869+
return getRotateInsert64(DAG, dl, Op.getOperand(2), Op.getOperand(1), SH,
10870+
MB, ME, Subtarget.isPPC64());
1080110871
}
1080210872

1080310873
case Intrinsic::ppc_rlwimi: {
@@ -10810,12 +10880,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1081010880
unsigned MB = 0, ME = 0;
1081110881
if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
1081210882
report_fatal_error("invalid rlwimi mask!");
10813-
return SDValue(DAG.getMachineNode(
10814-
PPC::RLWIMI, dl, MVT::i32,
10815-
{Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10816-
DAG.getTargetConstant(MB, dl, MVT::i32),
10817-
DAG.getTargetConstant(ME, dl, MVT::i32)}),
10818-
0);
10883+
return getRotateInsert32(DAG, dl, Op.getOperand(2), Op.getOperand(1),
10884+
Op.getConstantOperandVal(3), MB, ME);
1081910885
}
1082010886

1082110887
case Intrinsic::ppc_rlwnm: {
@@ -11833,6 +11899,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
1183311899
case Intrinsic::ppc_maxfe:
1183411900
case Intrinsic::ppc_minfe:
1183511901
case Intrinsic::ppc_fnmsub:
11902+
case Intrinsic::ppc_rldimi:
1183611903
case Intrinsic::ppc_convert_f128_to_ppcf128:
1183711904
Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
1183811905
break;

0 commit comments

Comments
 (0)