Skip to content

Commit 2de74e1

Browse files
author
esmeyi
committed
[PowerPC][Peephole] Combine rldicl/rldicr and andi/andis after isel.
Summary: rldicl/rldicr can be eliminated if it's used to clear the high-order or low-order n bits and all bits cleared will be ANDed with 0 by andi/andis. Or they can be folded to `andi 0` if all bits to AND are already zero in the input. Reviewed By: qiucf, shchenz Differential Revision: https://reviews.llvm.org/D159073
1 parent ea0ee55 commit 2de74e1

File tree

3 files changed

+213
-9
lines changed

3 files changed

+213
-9
lines changed

llvm/lib/Target/PowerPC/PPCMIPeephole.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,60 @@ bool PPCMIPeephole::simplifyCode() {
11971197
combineSEXTAndSHL(MI, ToErase);
11981198
break;
11991199
}
1200+
case PPC::ANDI_rec:
1201+
case PPC::ANDI8_rec:
1202+
case PPC::ANDIS_rec:
1203+
case PPC::ANDIS8_rec: {
1204+
Register TrueReg =
1205+
TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
1206+
if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(TrueReg))
1207+
break;
1208+
1209+
MachineInstr *SrcMI = MRI->getVRegDef(TrueReg);
1210+
if (!SrcMI)
1211+
break;
1212+
1213+
unsigned SrcOpCode = SrcMI->getOpcode();
1214+
if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
1215+
break;
1216+
1217+
uint64_t AndImm = MI.getOperand(2).getImm();
1218+
if (MI.getOpcode() == PPC::ANDIS_rec ||
1219+
MI.getOpcode() == PPC::ANDIS8_rec)
1220+
AndImm <<= 16;
1221+
uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(AndImm);
1222+
uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(AndImm);
1223+
uint64_t ImmSrc = SrcMI->getOperand(3).getImm();
1224+
1225+
// We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
1226+
// if all bits to AND are already zero in the input.
1227+
bool PatternResultZero =
1228+
(SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
1229+
(SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
1230+
1231+
// We can eliminate RLDICL/RLDICR if it's used to clear bits and all
1232+
// bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
1233+
bool PatternRemoveRotate =
1234+
SrcMI->getOperand(2).getImm() == 0 &&
1235+
((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
1236+
(SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
1237+
1238+
if (!PatternResultZero && !PatternRemoveRotate)
1239+
break;
1240+
1241+
LLVM_DEBUG(dbgs() << "Combining pair: ");
1242+
LLVM_DEBUG(SrcMI->dump());
1243+
LLVM_DEBUG(MI.dump());
1244+
if (PatternResultZero)
1245+
MI.getOperand(2).setImm(0);
1246+
MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1247+
addRegToUpdate(MI.getOperand(1).getReg());
1248+
LLVM_DEBUG(dbgs() << "To: ");
1249+
LLVM_DEBUG(MI.dump());
1250+
Simplified = true;
1251+
SrcMI->eraseFromParent();
1252+
break;
1253+
}
12001254
case PPC::RLWINM:
12011255
case PPC::RLWINM_rec:
12021256
case PPC::RLWINM8:
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 -x mir \
3+
# RUN: -verify-machineinstrs -run-pass ppc-mi-peepholes < %s | FileCheck %s
4+
5+
---
6+
name: fold_RLDICL_ANDI
7+
tracksRegLiveness: true
8+
body: |
9+
bb.0.entry:
10+
liveins: $x3
11+
; CHECK-LABEL: name: fold_RLDICL_ANDI
12+
; CHECK: liveins: $x3
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3
15+
; CHECK-NEXT: [[ANDI8_rec:%[0-9]+]]:g8rc = ANDI8_rec killed [[COPY]], 1, implicit-def dead $cr0
16+
; CHECK-NEXT: $x3 = COPY killed [[ANDI8_rec]]
17+
; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
18+
%1:g8rc = COPY $x3
19+
%2:g8rc = RLDICL %1:g8rc, 0, 32
20+
%3:g8rc = ANDI8_rec killed %2:g8rc, 1, implicit-def dead $cr0
21+
$x3 = COPY %3:g8rc
22+
BLR8 implicit $lr8, implicit $rm, implicit $x3
23+
...
24+
---
25+
name: fold_RLDICL_ANDI2
26+
tracksRegLiveness: true
27+
body: |
28+
bb.0.entry:
29+
liveins: $x3
30+
; CHECK-LABEL: name: fold_RLDICL_ANDI2
31+
; CHECK: liveins: $x3
32+
; CHECK-NEXT: {{ $}}
33+
; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3
34+
; CHECK-NEXT: [[ANDI8_rec:%[0-9]+]]:g8rc = ANDI8_rec killed [[COPY]], 0, implicit-def dead $cr0
35+
; CHECK-NEXT: $x3 = COPY killed [[ANDI8_rec]]
36+
; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
37+
%1:g8rc = COPY $x3
38+
%2:g8rc = RLDICL %1:g8rc, 10, 60
39+
%3:g8rc = ANDI8_rec killed %2:g8rc, 32, implicit-def dead $cr0
40+
$x3 = COPY %3:g8rc
41+
BLR8 implicit $lr8, implicit $rm, implicit $x3
42+
...
43+
---
44+
name: fold_RLDICR_ANDI
45+
tracksRegLiveness: true
46+
body: |
47+
bb.0.entry:
48+
liveins: $x3
49+
; CHECK-LABEL: name: fold_RLDICR_ANDI
50+
; CHECK: liveins: $x3
51+
; CHECK-NEXT: {{ $}}
52+
; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3
53+
; CHECK-NEXT: [[ANDI8_rec:%[0-9]+]]:g8rc = ANDI8_rec killed [[COPY]], 16, implicit-def dead $cr0
54+
; CHECK-NEXT: $x3 = COPY killed [[ANDI8_rec]]
55+
; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
56+
%1:g8rc = COPY $x3
57+
%2:g8rc = RLDICR %1:g8rc, 0, 60
58+
%3:g8rc = ANDI8_rec killed %2:g8rc, 16, implicit-def dead $cr0
59+
$x3 = COPY %3:g8rc
60+
BLR8 implicit $lr8, implicit $rm, implicit $x3
61+
...
62+
---
63+
name: fold_RLDICR_ANDI2
64+
tracksRegLiveness: true
65+
body: |
66+
bb.0.entry:
67+
liveins: $x3
68+
; CHECK-LABEL: name: fold_RLDICR_ANDI2
69+
; CHECK: liveins: $x3
70+
; CHECK-NEXT: {{ $}}
71+
; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3
72+
; CHECK-NEXT: [[ANDI8_rec:%[0-9]+]]:g8rc = ANDI8_rec killed [[COPY]], 0, implicit-def dead $cr0
73+
; CHECK-NEXT: $x3 = COPY killed [[ANDI8_rec]]
74+
; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
75+
%1:g8rc = COPY $x3
76+
%2:g8rc = RLDICR %1:g8rc, 10, 60
77+
%3:g8rc = ANDI8_rec killed %2:g8rc, 1, implicit-def dead $cr0
78+
$x3 = COPY %3:g8rc
79+
BLR8 implicit $lr8, implicit $rm, implicit $x3
80+
...
81+
---
82+
name: fold_RLDICL_ANDIS
83+
tracksRegLiveness: true
84+
body: |
85+
bb.0.entry:
86+
liveins: $x3
87+
; CHECK-LABEL: name: fold_RLDICL_ANDIS
88+
; CHECK: liveins: $x3
89+
; CHECK-NEXT: {{ $}}
90+
; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3
91+
; CHECK-NEXT: [[ANDIS8_rec:%[0-9]+]]:g8rc = ANDIS8_rec killed [[COPY]], 1, implicit-def dead $cr0
92+
; CHECK-NEXT: $x3 = COPY killed [[ANDIS8_rec]]
93+
; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
94+
%1:g8rc = COPY $x3
95+
%2:g8rc = RLDICL %1:g8rc, 0, 32
96+
%3:g8rc = ANDIS8_rec killed %2:g8rc, 1, implicit-def dead $cr0
97+
$x3 = COPY %3:g8rc
98+
BLR8 implicit $lr8, implicit $rm, implicit $x3
99+
...
100+
---
101+
name: fold_RLDICL_ANDIS2
102+
tracksRegLiveness: true
103+
body: |
104+
bb.0.entry:
105+
liveins: $x3
106+
; CHECK-LABEL: name: fold_RLDICL_ANDIS2
107+
; CHECK: liveins: $x3
108+
; CHECK-NEXT: {{ $}}
109+
; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3
110+
; CHECK-NEXT: [[ANDIS8_rec:%[0-9]+]]:g8rc = ANDIS8_rec killed [[COPY]], 0, implicit-def dead $cr0
111+
; CHECK-NEXT: $x3 = COPY killed [[ANDIS8_rec]]
112+
; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
113+
%1:g8rc = COPY $x3
114+
%2:g8rc = RLDICL %1:g8rc, 10, 60
115+
%3:g8rc = ANDIS8_rec killed %2:g8rc, 16, implicit-def dead $cr0
116+
$x3 = COPY %3:g8rc
117+
BLR8 implicit $lr8, implicit $rm, implicit $x3
118+
...
119+
---
120+
name: fold_RLDICR_ANDIS
121+
tracksRegLiveness: true
122+
body: |
123+
bb.0.entry:
124+
liveins: $x3
125+
; CHECK-LABEL: name: fold_RLDICR_ANDIS
126+
; CHECK: liveins: $x3
127+
; CHECK-NEXT: {{ $}}
128+
; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3
129+
; CHECK-NEXT: [[ANDIS8_rec:%[0-9]+]]:g8rc = ANDIS8_rec killed [[COPY]], 16, implicit-def dead $cr0
130+
; CHECK-NEXT: $x3 = COPY killed [[ANDIS8_rec]]
131+
; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
132+
%1:g8rc = COPY $x3
133+
%2:g8rc = RLDICR %1:g8rc, 0, 60
134+
%3:g8rc = ANDIS8_rec killed %2:g8rc, 16, implicit-def dead $cr0
135+
$x3 = COPY %3:g8rc
136+
BLR8 implicit $lr8, implicit $rm, implicit $x3
137+
...
138+
---
139+
name: fold_RLDICR_ANDIS2
140+
tracksRegLiveness: true
141+
body: |
142+
bb.0.entry:
143+
liveins: $x3
144+
; CHECK-LABEL: name: fold_RLDICR_ANDIS2
145+
; CHECK: liveins: $x3
146+
; CHECK-NEXT: {{ $}}
147+
; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3
148+
; CHECK-NEXT: [[ANDIS8_rec:%[0-9]+]]:g8rc = ANDIS8_rec killed [[COPY]], 0, implicit-def dead $cr0
149+
; CHECK-NEXT: $x3 = COPY killed [[ANDIS8_rec]]
150+
; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
151+
%1:g8rc = COPY $x3
152+
%2:g8rc = RLDICR %1:g8rc, 10, 32
153+
%3:g8rc = ANDIS8_rec killed %2:g8rc, 1, implicit-def dead $cr0
154+
$x3 = COPY %3:g8rc
155+
BLR8 implicit $lr8, implicit $rm, implicit $x3
156+
...

llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2204,8 +2204,7 @@ entry:
22042204
define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) {
22052205
; CHECK-LABEL: getvelsl:
22062206
; CHECK: # %bb.0: # %entry
2207-
; CHECK-NEXT: clrldi r3, r5, 32
2208-
; CHECK-NEXT: andi. r3, r3, 1
2207+
; CHECK-NEXT: andi. r3, r5, 1
22092208
; CHECK-NEXT: sldi r3, r3, 3
22102209
; CHECK-NEXT: lvsl v3, 0, r3
22112210
; CHECK-NEXT: vperm v2, v2, v2, v3
@@ -2225,7 +2224,6 @@ define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) {
22252224
;
22262225
; CHECK-AIX-LABEL: getvelsl:
22272226
; CHECK-AIX: # %bb.0: # %entry
2228-
; CHECK-AIX-NEXT: clrldi 3, 3, 32
22292227
; CHECK-AIX-NEXT: andi. 3, 3, 1
22302228
; CHECK-AIX-NEXT: sldi 3, 3, 3
22312229
; CHECK-AIX-NEXT: lvsl 3, 0, 3
@@ -2242,8 +2240,7 @@ entry:
22422240
define i64 @getvelul(<2 x i64> %vul, i32 signext %i) {
22432241
; CHECK-LABEL: getvelul:
22442242
; CHECK: # %bb.0: # %entry
2245-
; CHECK-NEXT: clrldi r3, r5, 32
2246-
; CHECK-NEXT: andi. r3, r3, 1
2243+
; CHECK-NEXT: andi. r3, r5, 1
22472244
; CHECK-NEXT: sldi r3, r3, 3
22482245
; CHECK-NEXT: lvsl v3, 0, r3
22492246
; CHECK-NEXT: vperm v2, v2, v2, v3
@@ -2263,7 +2260,6 @@ define i64 @getvelul(<2 x i64> %vul, i32 signext %i) {
22632260
;
22642261
; CHECK-AIX-LABEL: getvelul:
22652262
; CHECK-AIX: # %bb.0: # %entry
2266-
; CHECK-AIX-NEXT: clrldi 3, 3, 32
22672263
; CHECK-AIX-NEXT: andi. 3, 3, 1
22682264
; CHECK-AIX-NEXT: sldi 3, 3, 3
22692265
; CHECK-AIX-NEXT: lvsl 3, 0, 3
@@ -2461,8 +2457,7 @@ entry:
24612457
define double @getveld(<2 x double> %vd, i32 signext %i) {
24622458
; CHECK-LABEL: getveld:
24632459
; CHECK: # %bb.0: # %entry
2464-
; CHECK-NEXT: clrldi r3, r5, 32
2465-
; CHECK-NEXT: andi. r3, r3, 1
2460+
; CHECK-NEXT: andi. r3, r5, 1
24662461
; CHECK-NEXT: sldi r3, r3, 3
24672462
; CHECK-NEXT: lvsl v3, 0, r3
24682463
; CHECK-NEXT: vperm v2, v2, v2, v3
@@ -2484,7 +2479,6 @@ define double @getveld(<2 x double> %vd, i32 signext %i) {
24842479
;
24852480
; CHECK-AIX-LABEL: getveld:
24862481
; CHECK-AIX: # %bb.0: # %entry
2487-
; CHECK-AIX-NEXT: clrldi 3, 3, 32
24882482
; CHECK-AIX-NEXT: andi. 3, 3, 1
24892483
; CHECK-AIX-NEXT: sldi 3, 3, 3
24902484
; CHECK-AIX-NEXT: lvsl 3, 0, 3

0 commit comments

Comments
 (0)