-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[PowerPC] Implement 32-bit expansion for rldimi #86783
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-powerpc Author: Qiu Chaofan (ecnelises) Changesrldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rlwimi in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen. Patch is 20.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86783.diff 3 Files Affected:
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 08449581330934..5e8228ed998978 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5236,7 +5236,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) {
case PPC::BI__builtin_ppc_fetch_and_andlp:
case PPC::BI__builtin_ppc_fetch_and_orlp:
case PPC::BI__builtin_ppc_fetch_and_swaplp:
- case PPC::BI__builtin_ppc_rldimi:
return true;
}
return false;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cce0efad39c75b..7e42773f3aa1cd 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -643,6 +643,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
@@ -10748,6 +10749,88 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
return true;
}
+static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst,
+ SDValue Src, unsigned SH, unsigned MB,
+ unsigned ME) {
+ assert(SH < 32 && MB < 32 && ME < 32 &&
+ "Invalid argument for rotate insert!");
+ return SDValue(
+ DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32,
+ {Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32),
+ DAG.getTargetConstant(MB, Loc, MVT::i32),
+ DAG.getTargetConstant(ME, Loc, MVT::i32)}),
+ 0);
+}
+
+static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst,
+ SDValue Src, unsigned SH, unsigned MB,
+ unsigned ME, bool IsPPC64) {
+ assert(SH < 64 && MB < 64 && ME < 64 &&
+ "Invalid argument for rotate insert!");
+ if (IsPPC64) {
+ // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
+ if (ME < 63 - SH) {
+ Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src,
+ DAG.getConstant(ME + SH + 1, Loc, MVT::i32));
+ } else if (ME > 63 - SH) {
+ Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src,
+ DAG.getConstant(ME + SH - 63, Loc, MVT::i32));
+ }
+ return SDValue(DAG.getMachineNode(
+ PPC::RLDIMI, Loc, MVT::i64,
+ {Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32),
+ DAG.getTargetConstant(MB, Loc, MVT::i32)}),
+ 0);
+ }
+
+ // To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH
+ // is adjusted to simplify cases. Invalid ranges will be skipped.
+ // - SrcHi inserted into DstHi with [0, 32-SH)
+ // - SrcLo inserted into DstHi with [32-SH, 32)
+ // - SrcHi inserted into DstLo with [32, 64-SH)
+ // - SrcLo inserted into DstLo with [64-SH, 64)
+ auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32);
+ auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32);
+ if (SH >= 32) {
+ SH -= 32;
+ std::swap(SrcLo, SrcHi);
+ }
+ auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right,
+ SDValue Src, SDValue Dst, unsigned MB,
+ unsigned ME) {
+ if (Left > Right)
+ return Dst;
+
+ if (MB <= ME) {
+ if (MB <= Right && ME >= Left)
+ return getRotateInsert32(DAG, Loc, Dst, Src, SH,
+ std::max(MB, Left) % 32,
+ std::min(ME, Right) % 32);
+ } else {
+ if (MB < Left || ME > Right)
+ return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, Right % 32);
+
+ if (MB <= Right && ME < Left)
+ return getRotateInsert32(DAG, Loc, Dst, Src, SH, MB % 32, Right % 32);
+
+ if (MB <= Right && ME <= Right)
+ return getRotateInsert32(
+ DAG, Loc,
+ getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32), Src,
+ SH, MB % 32, Right % 32);
+
+ if (MB > Right && ME >= Left && ME <= Right)
+ return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32);
+ }
+ return Dst;
+ };
+ DstHi = GetSubInsert(0, 31 - SH, SrcHi, DstHi, MB, ME);
+ DstHi = GetSubInsert(32 - SH, 31, SrcLo, DstHi, MB, ME);
+ DstLo = GetSubInsert(32, 63 - SH, SrcLo, DstLo, MB, ME);
+ DstLo = GetSubInsert(64 - SH, 63, SrcHi, DstLo, MB, ME);
+ return DAG.getNode(ISD::BUILD_PAIR, Loc, MVT::i64, DstLo, DstHi);
+}
+
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
/// lower, do it, otherwise return null.
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
@@ -10764,7 +10847,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getRegister(PPC::R2, MVT::i32);
case Intrinsic::ppc_rldimi: {
- assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
SDValue Src = Op.getOperand(1);
APInt Mask = Op.getConstantOperandAPInt(4);
if (Mask.isZero())
@@ -10775,20 +10857,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned MB = 0, ME = 0;
if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
report_fatal_error("invalid rldimi mask!");
- // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
- if (ME < 63 - SH) {
- Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
- DAG.getConstant(ME + SH + 1, dl, MVT::i32));
- } else if (ME > 63 - SH) {
- Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
- DAG.getConstant(ME + SH - 63, dl, MVT::i32));
- }
- return SDValue(
- DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
- {Op.getOperand(2), Src,
- DAG.getTargetConstant(63 - ME, dl, MVT::i32),
- DAG.getTargetConstant(MB, dl, MVT::i32)}),
- 0);
+ return getRotateInsert64(DAG, dl, Op.getOperand(2), Op.getOperand(1), SH,
+ MB, ME, Subtarget.isPPC64());
}
case Intrinsic::ppc_rlwimi: {
@@ -10801,12 +10871,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned MB = 0, ME = 0;
if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
report_fatal_error("invalid rlwimi mask!");
- return SDValue(DAG.getMachineNode(
- PPC::RLWIMI, dl, MVT::i32,
- {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
- DAG.getTargetConstant(MB, dl, MVT::i32),
- DAG.getTargetConstant(ME, dl, MVT::i32)}),
- 0);
+ return getRotateInsert32(DAG, dl, Op.getOperand(2), Op.getOperand(1),
+ Op.getConstantOperandVal(3), MB, ME);
}
case Intrinsic::ppc_rlwnm: {
@@ -11824,6 +11890,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
case Intrinsic::ppc_maxfe:
case Intrinsic::ppc_minfe:
case Intrinsic::ppc_fnmsub:
+ case Intrinsic::ppc_rldimi:
case Intrinsic::ppc_convert_f128_to_ppcf128:
Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
break;
diff --git a/llvm/test/CodeGen/PowerPC/rldimi.ll b/llvm/test/CodeGen/PowerPC/rldimi.ll
index 78ea9aa862f2c2..7495c5d66dbbb3 100644
--- a/llvm/test/CodeGen/PowerPC/rldimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rldimi.ll
@@ -1,12 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix -mcpu=pwr8 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-ibm-aix -mcpu=pwr8 | FileCheck %s --check-prefix=32BIT
define i64 @rldimi1(i64 %a) {
; CHECK-LABEL: rldimi1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: rldimi 3, 3, 8, 0
; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi1:
+; 32BIT: # %bb.0: # %entry
+; 32BIT-NEXT: rotlwi 5, 4, 8
+; 32BIT-NEXT: rlwimi 4, 4, 8, 0, 23
+; 32BIT-NEXT: rlwimi 5, 3, 8, 0, 23
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: blr
entry:
%x0 = shl i64 %a, 8
%x1 = and i64 %a, 255
@@ -23,6 +32,18 @@ define i64 @rldimi2(i64 %a) {
; CHECK-NEXT: rldimi 4, 3, 24, 0
; CHECK-NEXT: mr 3, 4
; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi2:
+; 32BIT: # %bb.0: # %entry
+; 32BIT-NEXT: mr 6, 4
+; 32BIT-NEXT: rotlwi 5, 4, 24
+; 32BIT-NEXT: rlwimi 6, 4, 8, 16, 23
+; 32BIT-NEXT: rlwimi 5, 3, 24, 0, 7
+; 32BIT-NEXT: rlwimi 6, 4, 16, 8, 15
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: rlwimi 6, 4, 24, 0, 7
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: blr
entry:
%x0 = shl i64 %a, 8
%x1 = and i64 %a, 255
@@ -46,6 +67,15 @@ define i64 @rldimi3(i64 %a) {
; CHECK-NEXT: rldimi 4, 3, 56, 0
; CHECK-NEXT: mr 3, 4
; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi3:
+; 32BIT: # %bb.0: # %entry
+; 32BIT-NEXT: mr 3, 4
+; 32BIT-NEXT: rlwimi 3, 4, 8, 16, 23
+; 32BIT-NEXT: rlwimi 3, 4, 16, 8, 15
+; 32BIT-NEXT: rlwimi 3, 4, 24, 0, 7
+; 32BIT-NEXT: mr 4, 3
+; 32BIT-NEXT: blr
entry:
%0 = shl i64 %a, 8
%1 = and i64 %a, 255
@@ -66,6 +96,17 @@ define i64 @rldimi4(i64 %a) {
; CHECK-NEXT: rldimi 3, 3, 16, 0
; CHECK-NEXT: rldimi 3, 3, 32, 0
; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi4:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 3, 3, 8, 0, 23
+; 32BIT-NEXT: rlwimi 3, 4, 8, 24, 31
+; 32BIT-NEXT: rlwimi 4, 4, 8, 0, 23
+; 32BIT-NEXT: rlwimi 3, 3, 16, 0, 15
+; 32BIT-NEXT: rlwimi 3, 4, 16, 16, 31
+; 32BIT-NEXT: rlwimi 4, 4, 16, 0, 15
+; 32BIT-NEXT: rlwimi 3, 4, 0, 0, 31
+; 32BIT-NEXT: blr
%r1 = call i64 @llvm.ppc.rldimi(i64 %a, i64 %a, i32 8, i64 -256)
%r2 = call i64 @llvm.ppc.rldimi(i64 %r1, i64 %r1, i32 16, i64 -65536)
%r3 = call i64 @llvm.ppc.rldimi(i64 %r2, i64 %r2, i32 32, i64 -4294967296)
@@ -78,6 +119,13 @@ define i64 @rldimi5(i64 %a, i64 %b) {
; CHECK-NEXT: rldimi 4, 3, 8, 40
; CHECK-NEXT: mr 3, 4
; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi5:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 6, 4, 8, 8, 23
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: blr
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 16776960) ; 0xffff << 8
ret i64 %r
}
@@ -89,6 +137,14 @@ define i64 @rldimi6(i64 %a, i64 %b) {
; CHECK-NEXT: rldimi 4, 3, 7, 41
; CHECK-NEXT: mr 3, 4
; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi6:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 6, 4, 8, 9, 23
+; 32BIT-NEXT: rlwimi 6, 3, 8, 24, 24
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: blr
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 8388480) ; 0xffff << 7
ret i64 %r
}
@@ -100,6 +156,13 @@ define i64 @rldimi7(i64 %a, i64 %b) {
; CHECK-NEXT: rldimi 4, 3, 9, 39
; CHECK-NEXT: mr 3, 4
; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi7:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 6, 4, 8, 7, 22
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: blr
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 33553920) ; 0xffff << 9
ret i64 %r
}
@@ -109,6 +172,12 @@ define i64 @rldimi8(i64 %a, i64 %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: mr 3, 4
; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi8:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: blr
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 0)
ret i64 %r
}
@@ -118,6 +187,12 @@ define i64 @rldimi9(i64 %a, i64 %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: mr 3, 4
; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi9:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: blr
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 63, i64 0)
ret i64 %r
}
@@ -126,6 +201,10 @@ define i64 @rldimi10(i64 %a, i64 %b) {
; CHECK-LABEL: rldimi10:
; CHECK: # %bb.0:
; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi10:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: blr
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 -1)
ret i64 %r
}
@@ -135,8 +214,295 @@ define i64 @rldimi11(i64 %a, i64 %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: rotldi 3, 3, 8
; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi11:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rotlwi 5, 4, 8
+; 32BIT-NEXT: rotlwi 6, 3, 8
+; 32BIT-NEXT: rlwimi 5, 3, 8, 0, 23
+; 32BIT-NEXT: rlwimi 6, 4, 8, 0, 23
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: blr
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 -1)
ret i64 %r
}
+define i64 @rldimi12(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 20
+; CHECK-NEXT: rldimi 4, 3, 44, 31
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi12:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 6, 4, 0, 0, 31
+; 32BIT-NEXT: rlwimi 5, 3, 0, 0, 19
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: rlwimi 3, 5, 0, 0, 30
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 18446726490113441791)
+ ret i64 %r
+}
+
+define i64 @rldimi13(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi13:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 62
+; CHECK-NEXT: rldimi 4, 3, 32, 2
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi13:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: rlwimi 3, 4, 30, 2, 31
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 30, i64 4611686014132420608)
+ ret i64 %r
+}
+
+define i64 @rldimi14(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 23
+; CHECK-NEXT: rldimi 4, 3, 53, 0
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi14:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 10
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437736874454810624) ; mb=0, me=10
+ ret i64 %r
+}
+
+define i64 @rldimi15(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi15:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 36
+; CHECK-NEXT: rldimi 4, 3, 40, 10
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi15:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 5, 3, 12, 10, 19
+; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 23
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18013298997854208) ; mb=10, me=23
+ ret i64 %r
+}
+
+define i64 @rldimi16(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 57
+; CHECK-NEXT: rldimi 4, 3, 19, 10
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi16:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 5, 3, 12, 10, 19
+; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 12
+; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 31
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18014398508957696) ; mb=10, me=44
+ ret i64 %r
+}
+
+define i64 @rldimi17(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi17:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 43
+; CHECK-NEXT: rldimi 4, 3, 33, 25
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi17:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: rlwimi 3, 4, 12, 25, 30
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 541165879296) ; mb=25, me=30
+ ret i64 %r
+}
+
+define i64 @rldimi18(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi18:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 57
+; CHECK-NEXT: rldimi 4, 3, 19, 25
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi18:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 12
+; 32BIT-NEXT: rlwimi 3, 4, 12, 25, 31
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 549755289600) ; mb=25, me=44
+ ret i64 %r
+}
+
+define i64 @rldimi19(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi19:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 57
+; CHECK-NEXT: rldimi 4, 3, 19, 33
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi19:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 6, 4, 12, 1, 12
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 2146959360) ; mb=33, me=44
+ ret i64 %r
+}
+
+define i64 @rldimi20(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi20:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 23
+; CHECK-NEXT: rldimi 4, 3, 53, 15
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi20:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 10
+; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 19
+; 32BIT-NEXT: rlwimi 5, 3, 12, 15, 19
+; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31
+; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 31
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18438299824408231935) ; mb=15, me=10
+ ret i64 %r
+}
+
+define i64 @rldimi21(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi21:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 23
+; CHECK-NEXT: rldimi 4, 3, 53, 25
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi21:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 10
+; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 19
+; 32BIT-NEXT: rlwimi 5, 4, 12, 25, 31
+; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437737424210624511) ; mb=25, me=10
+ ret i64 %r
+}
+
+define i64 @rldimi22(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi22:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 34
+; CHECK-NEXT: rldimi 4, 3, 42, 25
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi22:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 19
+; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 19
+; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 21
+; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31
+; 32BIT-NEXT: rlwimi 5, 4, 12, 25, 31
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446740225418854399) ; mb=25, me=21
+ ret i64 %r
+}
+
+define i64 @rldimi23(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi23:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 23
+; CHECK-NEXT: rldimi 4, 3, 53, 44
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi23:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 6, 4, 12, 12, 19
+; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 10
+; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437736874455859199) ; mb=44, me=10
+ ret i64 %r
+}
+
+define i64 @rldimi24(i64 %a, i64 %b) {
+; CHECK-LABEL: rldimi24:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rotldi 3, 3, 38
+; CHECK-NEXT: rldimi 4, 3, 38, 44
+; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: blr
+;
+; 32BIT-LABEL: rldimi24:
+; 32BIT: # %bb.0:
+; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 19
+; 32BIT-NEXT: rlwimi 6, 4, 12, 12, 19
+; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 25
+; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31
+; 32BIT-NEXT: mr 3, 5
+; 32BIT-NEXT: mr 4, 6
+; 32BIT-NEXT: blr
+ %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446743798832693247) ;...
[truncated]
|
rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rlwimi in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen.
Why must be 'series of rlwimi'? Why don't we just expand it following what ISA describes and let legalizer generates code sequence under 32-bit mode?
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one nit
rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rotate and masking in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen.