Skip to content

Commit a4558a4

Browse files
authored
[PowerPC] Implement 32-bit expansion for rldimi (llvm#86783)
rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rotate and masking in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen.
1 parent 9430a4b commit a4558a4

File tree

5 files changed

+167
-11
lines changed

5 files changed

+167
-11
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17288,6 +17288,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
1728817288
Value *Op1 = EmitScalarExpr(E->getArg(1));
1728917289
Value *Op2 = EmitScalarExpr(E->getArg(2));
1729017290
Value *Op3 = EmitScalarExpr(E->getArg(3));
17291+
// rldimi is 64-bit instruction, expand the intrinsic before isel to
17292+
// leverage peephole and avoid legalization efforts.
17293+
if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
17294+
!getTarget().getTriple().isPPC64()) {
17295+
Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
17296+
Op2 = Builder.CreateZExt(Op2, Int64Ty);
17297+
Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
17298+
return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
17299+
Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
17300+
}
1729117301
return Builder.CreateCall(
1729217302
CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
1729317303
? Intrinsic::ppc_rldimi

clang/lib/Sema/SemaChecking.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5232,7 +5232,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) {
52325232
case PPC::BI__builtin_ppc_fetch_and_andlp:
52335233
case PPC::BI__builtin_ppc_fetch_and_orlp:
52345234
case PPC::BI__builtin_ppc_fetch_and_swaplp:
5235-
case PPC::BI__builtin_ppc_rldimi:
52365235
return true;
52375236
}
52385237
return false;

clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ void test_trap(void) {
2424
__tw(ia, ib, 0); //expected-error {{argument value 0 is outside the valid range [1, 31]}}
2525
}
2626

27-
#ifdef __PPC64__
2827
void test_builtin_ppc_rldimi() {
2928
unsigned int shift;
3029
unsigned long long mask;
@@ -33,7 +32,6 @@ void test_builtin_ppc_rldimi() {
3332
res = __builtin_ppc_rldimi(ull, ull, 63, 0xFFFF000000000F00); // expected-error {{argument 3 value should represent a contiguous bit field}}
3433
res = __builtin_ppc_rldimi(ull, ull, 64, 0xFFFF000000000000); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
3534
}
36-
#endif
3735

3836
void test_builtin_ppc_rlwimi() {
3937
unsigned int shift;
@@ -86,10 +84,6 @@ void testalignx(const void *pointer, unsigned int alignment) {
8684
}
8785

8886
#ifndef __PPC64__
89-
unsigned long long testrldimi32() {
90-
return __rldimi(ull, ui, 3, 0x7ffff8ULL); //expected-error {{this builtin is only available on 64-bit targets}}
91-
}
92-
9387
long long testbpermd(long long bit_selector, long long source) {
9488
return __bpermd(bit_selector, source); //expected-error {{this builtin is only available on 64-bit targets}}
9589
}

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,10 +182,6 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
182182
def int_ppc_fctuwz
183183
: ClangBuiltin<"__builtin_ppc_fctuwz">,
184184
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
185-
def int_ppc_rldimi
186-
: ClangBuiltin<"__builtin_ppc_rldimi">,
187-
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
188-
[IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
189185
def int_ppc_rlwimi
190186
: ClangBuiltin<"__builtin_ppc_rlwimi">,
191187
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
@@ -194,6 +190,9 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
194190
: ClangBuiltin<"__builtin_ppc_rlwnm">,
195191
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
196192
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
193+
def int_ppc_rldimi
194+
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
195+
[IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
197196

198197
// XL compatible select functions
199198
// TODO: Add llvm_f128_ty support.

llvm/test/CodeGen/PowerPC/rldimi.ll

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,158 @@ define i64 @rldimi11(i64 %a, i64 %b) {
139139
ret i64 %r
140140
}
141141

142+
define i64 @rldimi12(i64 %a, i64 %b) {
143+
; CHECK-LABEL: rldimi12:
144+
; CHECK: # %bb.0:
145+
; CHECK-NEXT: rotldi 3, 3, 20
146+
; CHECK-NEXT: rldimi 4, 3, 44, 31
147+
; CHECK-NEXT: mr 3, 4
148+
; CHECK-NEXT: blr
149+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 18446726490113441791)
150+
ret i64 %r
151+
}
152+
153+
define i64 @rldimi13(i64 %a, i64 %b) {
154+
; CHECK-LABEL: rldimi13:
155+
; CHECK: # %bb.0:
156+
; CHECK-NEXT: rotldi 3, 3, 62
157+
; CHECK-NEXT: rldimi 4, 3, 32, 2
158+
; CHECK-NEXT: mr 3, 4
159+
; CHECK-NEXT: blr
160+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 30, i64 4611686014132420608)
161+
ret i64 %r
162+
}
163+
164+
define i64 @rldimi14(i64 %a, i64 %b) {
165+
; CHECK-LABEL: rldimi14:
166+
; CHECK: # %bb.0:
167+
; CHECK-NEXT: rotldi 3, 3, 23
168+
; CHECK-NEXT: rldimi 4, 3, 53, 0
169+
; CHECK-NEXT: mr 3, 4
170+
; CHECK-NEXT: blr
171+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437736874454810624) ; mb=0, me=10
172+
ret i64 %r
173+
}
174+
175+
define i64 @rldimi15(i64 %a, i64 %b) {
176+
; CHECK-LABEL: rldimi15:
177+
; CHECK: # %bb.0:
178+
; CHECK-NEXT: rotldi 3, 3, 36
179+
; CHECK-NEXT: rldimi 4, 3, 40, 10
180+
; CHECK-NEXT: mr 3, 4
181+
; CHECK-NEXT: blr
182+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18013298997854208) ; mb=10, me=23
183+
ret i64 %r
184+
}
185+
186+
define i64 @rldimi16(i64 %a, i64 %b) {
187+
; CHECK-LABEL: rldimi16:
188+
; CHECK: # %bb.0:
189+
; CHECK-NEXT: rotldi 3, 3, 57
190+
; CHECK-NEXT: rldimi 4, 3, 19, 10
191+
; CHECK-NEXT: mr 3, 4
192+
; CHECK-NEXT: blr
193+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18014398508957696) ; mb=10, me=44
194+
ret i64 %r
195+
}
196+
197+
define i64 @rldimi17(i64 %a, i64 %b) {
198+
; CHECK-LABEL: rldimi17:
199+
; CHECK: # %bb.0:
200+
; CHECK-NEXT: rotldi 3, 3, 43
201+
; CHECK-NEXT: rldimi 4, 3, 33, 25
202+
; CHECK-NEXT: mr 3, 4
203+
; CHECK-NEXT: blr
204+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 541165879296) ; mb=25, me=30
205+
ret i64 %r
206+
}
207+
208+
define i64 @rldimi18(i64 %a, i64 %b) {
209+
; CHECK-LABEL: rldimi18:
210+
; CHECK: # %bb.0:
211+
; CHECK-NEXT: rotldi 3, 3, 57
212+
; CHECK-NEXT: rldimi 4, 3, 19, 25
213+
; CHECK-NEXT: mr 3, 4
214+
; CHECK-NEXT: blr
215+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 549755289600) ; mb=25, me=44
216+
ret i64 %r
217+
}
218+
219+
define i64 @rldimi19(i64 %a, i64 %b) {
220+
; CHECK-LABEL: rldimi19:
221+
; CHECK: # %bb.0:
222+
; CHECK-NEXT: rotldi 3, 3, 57
223+
; CHECK-NEXT: rldimi 4, 3, 19, 33
224+
; CHECK-NEXT: mr 3, 4
225+
; CHECK-NEXT: blr
226+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 2146959360) ; mb=33, me=44
227+
ret i64 %r
228+
}
229+
230+
define i64 @rldimi20(i64 %a, i64 %b) {
231+
; CHECK-LABEL: rldimi20:
232+
; CHECK: # %bb.0:
233+
; CHECK-NEXT: rotldi 3, 3, 23
234+
; CHECK-NEXT: rldimi 4, 3, 53, 15
235+
; CHECK-NEXT: mr 3, 4
236+
; CHECK-NEXT: blr
237+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18438299824408231935) ; mb=15, me=10
238+
ret i64 %r
239+
}
240+
241+
define i64 @rldimi21(i64 %a, i64 %b) {
242+
; CHECK-LABEL: rldimi21:
243+
; CHECK: # %bb.0:
244+
; CHECK-NEXT: rotldi 3, 3, 23
245+
; CHECK-NEXT: rldimi 4, 3, 53, 25
246+
; CHECK-NEXT: mr 3, 4
247+
; CHECK-NEXT: blr
248+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437737424210624511) ; mb=25, me=10
249+
ret i64 %r
250+
}
251+
252+
define i64 @rldimi22(i64 %a, i64 %b) {
253+
; CHECK-LABEL: rldimi22:
254+
; CHECK: # %bb.0:
255+
; CHECK-NEXT: rotldi 3, 3, 34
256+
; CHECK-NEXT: rldimi 4, 3, 42, 25
257+
; CHECK-NEXT: mr 3, 4
258+
; CHECK-NEXT: blr
259+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446740225418854399) ; mb=25, me=21
260+
ret i64 %r
261+
}
262+
263+
define i64 @rldimi23(i64 %a, i64 %b) {
264+
; CHECK-LABEL: rldimi23:
265+
; CHECK: # %bb.0:
266+
; CHECK-NEXT: rotldi 3, 3, 23
267+
; CHECK-NEXT: rldimi 4, 3, 53, 44
268+
; CHECK-NEXT: mr 3, 4
269+
; CHECK-NEXT: blr
270+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437736874455859199) ; mb=44, me=10
271+
ret i64 %r
272+
}
273+
274+
define i64 @rldimi24(i64 %a, i64 %b) {
275+
; CHECK-LABEL: rldimi24:
276+
; CHECK: # %bb.0:
277+
; CHECK-NEXT: rotldi 3, 3, 38
278+
; CHECK-NEXT: rldimi 4, 3, 38, 44
279+
; CHECK-NEXT: mr 3, 4
280+
; CHECK-NEXT: blr
281+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446743798832693247) ; mb=44, me=25
282+
ret i64 %r
283+
}
284+
285+
define i64 @rldimi25(i64 %a, i64 %b) {
286+
; CHECK-LABEL: rldimi25:
287+
; CHECK: # %bb.0:
288+
; CHECK-NEXT: rotldi 3, 3, 48
289+
; CHECK-NEXT: rldimi 4, 3, 28, 44
290+
; CHECK-NEXT: mr 3, 4
291+
; CHECK-NEXT: blr
292+
%r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446744073442164735) ; mb=44, me=35
293+
ret i64 %r
294+
}
295+
142296
declare i64 @llvm.ppc.rldimi(i64, i64, i32 immarg, i64 immarg)

0 commit comments

Comments
 (0)