Skip to content

Commit 492c1f3

Browse files
[PowerPC] Merge rotate and clear into single instruction.
This patch tries to catch a codegen opportunity where the rotate and mask can be merged into a single RLDCL instruction. Reviewed By: lei, amyk Differential Revision: https://reviews.llvm.org/D158328
1 parent 80225af commit 492c1f3

File tree

2 files changed

+57
-41
lines changed

2 files changed

+57
-41
lines changed

llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ namespace {
424424
bool tryFoldSWTestBRCC(SDNode *N);
425425
bool trySelectLoopCountIntrinsic(SDNode *N);
426426
bool tryAsSingleRLDICL(SDNode *N);
427+
bool tryAsSingleRLDCL(SDNode *N);
427428
bool tryAsSingleRLDICR(SDNode *N);
428429
bool tryAsSingleRLWINM(SDNode *N);
429430
bool tryAsSingleRLWINM8(SDNode *N);
@@ -5084,6 +5085,35 @@ bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
50845085
return false;
50855086
}
50865087

5088+
bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
5089+
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5090+
5091+
uint64_t Imm64;
5092+
if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5093+
return false;
5094+
5095+
SDValue Val = N->getOperand(0);
5096+
5097+
if (Val.getOpcode() != ISD::ROTL)
5098+
return false;
5099+
5100+
// Looking to try to avoid a situation like this one:
5101+
// %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
5102+
// %and1 = and i64 %2, 9223372036854775807
5103+
// In this function we are looking to try to match RLDCL. However, the above
5104+
// DAG would better match RLDICL instead which is not what we are looking
5105+
// for here.
5106+
SDValue RotateAmt = Val.getOperand(1);
5107+
if (RotateAmt.getOpcode() == ISD::Constant)
5108+
return false;
5109+
5110+
unsigned MB = 64 - llvm::countr_one(Imm64);
5111+
SDLoc dl(N);
5112+
SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)};
5113+
CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
5114+
return true;
5115+
}
5116+
50875117
bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
50885118
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
50895119
uint64_t Imm64;
@@ -5604,8 +5634,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
56045634

56055635
case ISD::AND:
56065636
// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5607-
if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
5608-
tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
5637+
if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
5638+
tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
5639+
tryAsPairOfRLDICL(N))
56095640
return;
56105641

56115642
// Other cases are autogenerated.

llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll

Lines changed: 24 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -75,22 +75,19 @@ define dso_local i64 @rotatemask64(i64 noundef %word) local_unnamed_addr #0 {
7575
; AIX64-LABEL: rotatemask64:
7676
; AIX64: # %bb.0: # %entry
7777
; AIX64-NEXT: cntlzd r4, r3
78-
; AIX64-NEXT: rotld r3, r3, r4
79-
; AIX64-NEXT: clrldi r3, r3, 1
78+
; AIX64-NEXT: rldcl r3, r3, r4, 1
8079
; AIX64-NEXT: blr
8180
;
8281
; LINUX64BE-LABEL: rotatemask64:
8382
; LINUX64BE: # %bb.0: # %entry
8483
; LINUX64BE-NEXT: cntlzd r4, r3
85-
; LINUX64BE-NEXT: rotld r3, r3, r4
86-
; LINUX64BE-NEXT: clrldi r3, r3, 1
84+
; LINUX64BE-NEXT: rldcl r3, r3, r4, 1
8785
; LINUX64BE-NEXT: blr
8886
;
8987
; LINUX64LE-LABEL: rotatemask64:
9088
; LINUX64LE: # %bb.0: # %entry
9189
; LINUX64LE-NEXT: cntlzd r4, r3
92-
; LINUX64LE-NEXT: rotld r3, r3, r4
93-
; LINUX64LE-NEXT: clrldi r3, r3, 1
90+
; LINUX64LE-NEXT: rldcl r3, r3, r4, 1
9491
; LINUX64LE-NEXT: blr
9592
entry:
9693
%0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false)
@@ -126,22 +123,19 @@ define dso_local i64 @rotatemask64_2(i64 noundef %word) local_unnamed_addr #0 {
126123
; AIX64-LABEL: rotatemask64_2:
127124
; AIX64: # %bb.0: # %entry
128125
; AIX64-NEXT: cntlzd r4, r3
129-
; AIX64-NEXT: rotld r3, r3, r4
130-
; AIX64-NEXT: clrldi r3, r3, 1
126+
; AIX64-NEXT: rldcl r3, r3, r4, 1
131127
; AIX64-NEXT: blr
132128
;
133129
; LINUX64BE-LABEL: rotatemask64_2:
134130
; LINUX64BE: # %bb.0: # %entry
135131
; LINUX64BE-NEXT: cntlzd r4, r3
136-
; LINUX64BE-NEXT: rotld r3, r3, r4
137-
; LINUX64BE-NEXT: clrldi r3, r3, 1
132+
; LINUX64BE-NEXT: rldcl r3, r3, r4, 1
138133
; LINUX64BE-NEXT: blr
139134
;
140135
; LINUX64LE-LABEL: rotatemask64_2:
141136
; LINUX64LE: # %bb.0: # %entry
142137
; LINUX64LE-NEXT: cntlzd r4, r3
143-
; LINUX64LE-NEXT: rotld r3, r3, r4
144-
; LINUX64LE-NEXT: clrldi r3, r3, 1
138+
; LINUX64LE-NEXT: rldcl r3, r3, r4, 1
145139
; LINUX64LE-NEXT: blr
146140
entry:
147141
%0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false)
@@ -222,20 +216,17 @@ define dso_local i64 @rotatemask64_nocount(i64 noundef %word, i64 noundef %clz)
222216
;
223217
; AIX64-LABEL: rotatemask64_nocount:
224218
; AIX64: # %bb.0: # %entry
225-
; AIX64-NEXT: rotld r3, r3, r4
226-
; AIX64-NEXT: clrldi r3, r3, 8
219+
; AIX64-NEXT: rldcl r3, r3, r4, 8
227220
; AIX64-NEXT: blr
228221
;
229222
; LINUX64BE-LABEL: rotatemask64_nocount:
230223
; LINUX64BE: # %bb.0: # %entry
231-
; LINUX64BE-NEXT: rotld r3, r3, r4
232-
; LINUX64BE-NEXT: clrldi r3, r3, 8
224+
; LINUX64BE-NEXT: rldcl r3, r3, r4, 8
233225
; LINUX64BE-NEXT: blr
234226
;
235227
; LINUX64LE-LABEL: rotatemask64_nocount:
236228
; LINUX64LE: # %bb.0: # %entry
237-
; LINUX64LE-NEXT: rotld r3, r3, r4
238-
; LINUX64LE-NEXT: clrldi r3, r3, 8
229+
; LINUX64LE-NEXT: rldcl r3, r3, r4, 8
239230
; LINUX64LE-NEXT: blr
240231
entry:
241232
%0 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %clz)
@@ -262,20 +253,17 @@ define dso_local i64 @builtincheck(i64 noundef %word, i64 noundef %shift) local_
262253
;
263254
; AIX64-LABEL: builtincheck:
264255
; AIX64: # %bb.0: # %entry
265-
; AIX64-NEXT: rotld r3, r3, r4
266-
; AIX64-NEXT: clrldi r3, r3, 1
256+
; AIX64-NEXT: rldcl r3, r3, r4, 1
267257
; AIX64-NEXT: blr
268258
;
269259
; LINUX64BE-LABEL: builtincheck:
270260
; LINUX64BE: # %bb.0: # %entry
271-
; LINUX64BE-NEXT: rotld r3, r3, r4
272-
; LINUX64BE-NEXT: clrldi r3, r3, 1
261+
; LINUX64BE-NEXT: rldcl r3, r3, r4, 1
273262
; LINUX64BE-NEXT: blr
274263
;
275264
; LINUX64LE-LABEL: builtincheck:
276265
; LINUX64LE: # %bb.0: # %entry
277-
; LINUX64LE-NEXT: rotld r3, r3, r4
278-
; LINUX64LE-NEXT: clrldi r3, r3, 1
266+
; LINUX64LE-NEXT: rldcl r3, r3, r4, 1
279267
; LINUX64LE-NEXT: blr
280268
entry:
281269
%0 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %shift)
@@ -352,9 +340,9 @@ define dso_local i64 @twomasks(i64 noundef %word) local_unnamed_addr #0 {
352340
; AIX64-NEXT: stdu r1, -112(r1)
353341
; AIX64-NEXT: cntlzd r4, r3
354342
; AIX64-NEXT: std r0, 128(r1)
355-
; AIX64-NEXT: rotld r4, r3, r4
356-
; AIX64-NEXT: clrldi r3, r4, 1
357-
; AIX64-NEXT: clrldi r4, r4, 16
343+
; AIX64-NEXT: rldcl r5, r3, r4, 1
344+
; AIX64-NEXT: rldcl r4, r3, r4, 16
345+
; AIX64-NEXT: mr r3, r5
358346
; AIX64-NEXT: bl .callee[PR]
359347
; AIX64-NEXT: nop
360348
; AIX64-NEXT: addi r1, r1, 112
@@ -368,9 +356,9 @@ define dso_local i64 @twomasks(i64 noundef %word) local_unnamed_addr #0 {
368356
; LINUX64BE-NEXT: stdu r1, -112(r1)
369357
; LINUX64BE-NEXT: cntlzd r4, r3
370358
; LINUX64BE-NEXT: std r0, 128(r1)
371-
; LINUX64BE-NEXT: rotld r4, r3, r4
372-
; LINUX64BE-NEXT: clrldi r3, r4, 1
373-
; LINUX64BE-NEXT: clrldi r4, r4, 16
359+
; LINUX64BE-NEXT: rldcl r5, r3, r4, 1
360+
; LINUX64BE-NEXT: rldcl r4, r3, r4, 16
361+
; LINUX64BE-NEXT: mr r3, r5
374362
; LINUX64BE-NEXT: bl callee
375363
; LINUX64BE-NEXT: nop
376364
; LINUX64BE-NEXT: addi r1, r1, 112
@@ -384,9 +372,9 @@ define dso_local i64 @twomasks(i64 noundef %word) local_unnamed_addr #0 {
384372
; LINUX64LE-NEXT: stdu r1, -32(r1)
385373
; LINUX64LE-NEXT: cntlzd r4, r3
386374
; LINUX64LE-NEXT: std r0, 48(r1)
387-
; LINUX64LE-NEXT: rotld r4, r3, r4
388-
; LINUX64LE-NEXT: clrldi r3, r4, 1
389-
; LINUX64LE-NEXT: clrldi r4, r4, 16
375+
; LINUX64LE-NEXT: rldcl r5, r3, r4, 1
376+
; LINUX64LE-NEXT: rldcl r4, r3, r4, 16
377+
; LINUX64LE-NEXT: mr r3, r5
390378
; LINUX64LE-NEXT: bl callee
391379
; LINUX64LE-NEXT: nop
392380
; LINUX64LE-NEXT: addi r1, r1, 32
@@ -445,8 +433,7 @@ define dso_local i64 @tworotates(i64 noundef %word) local_unnamed_addr #0 {
445433
; AIX64-NEXT: stdu r1, -112(r1)
446434
; AIX64-NEXT: cntlzd r4, r3
447435
; AIX64-NEXT: std r0, 128(r1)
448-
; AIX64-NEXT: rotld r4, r3, r4
449-
; AIX64-NEXT: clrldi r5, r4, 1
436+
; AIX64-NEXT: rldcl r5, r3, r4, 1
450437
; AIX64-NEXT: rldicl r4, r3, 23, 1
451438
; AIX64-NEXT: mr r3, r5
452439
; AIX64-NEXT: bl .callee[PR]
@@ -462,8 +449,7 @@ define dso_local i64 @tworotates(i64 noundef %word) local_unnamed_addr #0 {
462449
; LINUX64BE-NEXT: stdu r1, -112(r1)
463450
; LINUX64BE-NEXT: cntlzd r4, r3
464451
; LINUX64BE-NEXT: std r0, 128(r1)
465-
; LINUX64BE-NEXT: rotld r4, r3, r4
466-
; LINUX64BE-NEXT: clrldi r5, r4, 1
452+
; LINUX64BE-NEXT: rldcl r5, r3, r4, 1
467453
; LINUX64BE-NEXT: rldicl r4, r3, 23, 1
468454
; LINUX64BE-NEXT: mr r3, r5
469455
; LINUX64BE-NEXT: bl callee
@@ -479,8 +465,7 @@ define dso_local i64 @tworotates(i64 noundef %word) local_unnamed_addr #0 {
479465
; LINUX64LE-NEXT: stdu r1, -32(r1)
480466
; LINUX64LE-NEXT: cntlzd r4, r3
481467
; LINUX64LE-NEXT: std r0, 48(r1)
482-
; LINUX64LE-NEXT: rotld r4, r3, r4
483-
; LINUX64LE-NEXT: clrldi r5, r4, 1
468+
; LINUX64LE-NEXT: rldcl r5, r3, r4, 1
484469
; LINUX64LE-NEXT: rldicl r4, r3, 23, 1
485470
; LINUX64LE-NEXT: mr r3, r5
486471
; LINUX64LE-NEXT: bl callee

0 commit comments

Comments
 (0)