Skip to content

Commit e59d06f

Browse files
committed
[PowerPC, DAGCombiner] Fold a << (b % (sizeof(a) * 8)) back to a single instruction
Summary: This is the corresponding llvm change to D28037 to ensure no performance regression. Reviewers: bogner, kbarton, hfinkel, iteratee, echristo Subscribers: nemanjai, llvm-commits Differential Revision: https://reviews.llvm.org/D28329 llvm-svn: 301990
1 parent 0255227 commit e59d06f

File tree

4 files changed

+49
-39
lines changed

4 files changed

+49
-39
lines changed

llvm/include/llvm/Target/TargetLowering.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2061,6 +2061,14 @@ class TargetLoweringBase {
20612061
return false;
20622062
}
20632063

2064+
// Return true if the instruction that performs a << b actually performs
2065+
// a << (b % (sizeof(a) * 8)).
2066+
virtual bool supportsModuloShift(ISD::NodeType Inst, EVT ReturnType) const {
2067+
assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
2068+
"Expect a shift instruction");
2069+
return false;
2070+
}
2071+
20642072
//===--------------------------------------------------------------------===//
20652073
// Runtime Library hooks
20662074
//

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5294,6 +5294,17 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
52945294
}
52955295
}
52965296

5297+
// If the target supports masking y in (shl, y),
5298+
// fold (shl x, (and y, ((1 << numbits(x)) - 1))) -> (shl x, y)
5299+
if (TLI.isOperationLegal(ISD::SHL, VT) &&
5300+
TLI.supportsModuloShift(ISD::SHL, VT) && N1->getOpcode() == ISD::AND) {
5301+
if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
5302+
if (Mask->getZExtValue() == OpSizeInBits - 1) {
5303+
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1->getOperand(0));
5304+
}
5305+
}
5306+
}
5307+
52975308
ConstantSDNode *N1C = isConstOrConstSplat(N1);
52985309

52995310
// fold (shl c1, c2) -> c1<<c2
@@ -5492,6 +5503,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
54925503
EVT VT = N0.getValueType();
54935504
unsigned OpSizeInBits = VT.getScalarSizeInBits();
54945505

5506+
// If the target supports masking y in (sra, y),
5507+
// fold (sra x, (and y, ((1 << numbits(x)) - 1))) -> (sra x, y)
5508+
if (TLI.isOperationLegal(ISD::SRA, VT) &&
5509+
TLI.supportsModuloShift(ISD::SRA, VT) && N1->getOpcode() == ISD::AND) {
5510+
if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
5511+
if (Mask->getZExtValue() == OpSizeInBits - 1) {
5512+
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, N1->getOperand(0));
5513+
}
5514+
}
5515+
}
5516+
54955517
// Arithmetic shifting an all-sign-bit value is a no-op.
54965518
if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
54975519
return N0;
@@ -5650,6 +5672,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
56505672
EVT VT = N0.getValueType();
56515673
unsigned OpSizeInBits = VT.getScalarSizeInBits();
56525674

5675+
// If the target supports masking y in (srl, y),
5676+
// fold (srl x, (and y, ((1 << numbits(x)) - 1))) -> (srl x, y)
5677+
if (TLI.isOperationLegal(ISD::SRL, VT) &&
5678+
TLI.supportsModuloShift(ISD::SRL, VT) && N1->getOpcode() == ISD::AND) {
5679+
if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
5680+
if (Mask->getZExtValue() == OpSizeInBits - 1) {
5681+
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1->getOperand(0));
5682+
}
5683+
}
5684+
}
5685+
56535686
// fold vector ops
56545687
if (VT.isVector())
56555688
if (SDValue FoldedVOp = SimplifyVBinOp(N))

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,6 +1017,14 @@ namespace llvm {
10171017
SDValue
10181018
combineElementTruncationToVectorTruncation(SDNode *N,
10191019
DAGCombinerInfo &DCI) const;
1020+
1021+
bool supportsModuloShift(ISD::NodeType Inst,
1022+
EVT ReturnType) const override {
1023+
assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
1024+
"Expect a shift instruction");
1025+
assert(isOperationLegal(Inst, ReturnType));
1026+
return ReturnType.isVector();
1027+
}
10201028
};
10211029

10221030
namespace PPC {

llvm/test/CodeGen/PowerPC/shift_mask.ll

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@ define i64 @test003(i64 %a, i64 %b) {
4949
define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) {
5050
; CHECK-LABEL: test010:
5151
; CHECK: # BB#0:
52-
; CHECK-NEXT: vspltisb 4, 7
53-
; CHECK-NEXT: xxland 35, 35, 36
5452
; CHECK-NEXT: vslb 2, 2, 3
5553
; CHECK-NEXT: blr
5654
%rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -61,8 +59,6 @@ define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) {
6159
define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) {
6260
; CHECK-LABEL: test011:
6361
; CHECK: # BB#0:
64-
; CHECK-NEXT: vspltish 4, 15
65-
; CHECK-NEXT: xxland 35, 35, 36
6662
; CHECK-NEXT: vslh 2, 2, 3
6763
; CHECK-NEXT: blr
6864
%rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -73,10 +69,6 @@ define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) {
7369
define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) {
7470
; CHECK-LABEL: test012:
7571
; CHECK: # BB#0:
76-
; CHECK-NEXT: vspltisw 4, -16
77-
; CHECK-NEXT: vspltisw 5, 15
78-
; CHECK-NEXT: vsubuwm 4, 5, 4
79-
; CHECK-NEXT: xxland 35, 35, 36
8072
; CHECK-NEXT: vslw 2, 2, 3
8173
; CHECK-NEXT: blr
8274
%rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -87,11 +79,6 @@ define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) {
8779
define <2 x i64> @test013(<2 x i64> %a, <2 x i64> %b) {
8880
; CHECK-LABEL: test013:
8981
; CHECK: # BB#0:
90-
; CHECK-NEXT: addis 3, 2, .LCPI7_0@toc@ha
91-
; CHECK-NEXT: addi 3, 3, .LCPI7_0@toc@l
92-
; CHECK-NEXT: lxvd2x 0, 0, 3
93-
; CHECK-NEXT: xxswapd 36, 0
94-
; CHECK-NEXT: xxland 35, 35, 36
9582
; CHECK-NEXT: vsld 2, 2, 3
9683
; CHECK-NEXT: blr
9784
%rem = and <2 x i64> %b, <i64 63, i64 63>
@@ -148,8 +135,6 @@ define i64 @test103(i64 %a, i64 %b) {
148135
define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) {
149136
; CHECK-LABEL: test110:
150137
; CHECK: # BB#0:
151-
; CHECK-NEXT: vspltisb 4, 7
152-
; CHECK-NEXT: xxland 35, 35, 36
153138
; CHECK-NEXT: vsrb 2, 2, 3
154139
; CHECK-NEXT: blr
155140
%rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -160,8 +145,6 @@ define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) {
160145
define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) {
161146
; CHECK-LABEL: test111:
162147
; CHECK: # BB#0:
163-
; CHECK-NEXT: vspltish 4, 15
164-
; CHECK-NEXT: xxland 35, 35, 36
165148
; CHECK-NEXT: vsrh 2, 2, 3
166149
; CHECK-NEXT: blr
167150
%rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -172,10 +155,6 @@ define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) {
172155
define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) {
173156
; CHECK-LABEL: test112:
174157
; CHECK: # BB#0:
175-
; CHECK-NEXT: vspltisw 4, -16
176-
; CHECK-NEXT: vspltisw 5, 15
177-
; CHECK-NEXT: vsubuwm 4, 5, 4
178-
; CHECK-NEXT: xxland 35, 35, 36
179158
; CHECK-NEXT: vsrw 2, 2, 3
180159
; CHECK-NEXT: blr
181160
%rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -186,11 +165,6 @@ define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) {
186165
define <2 x i64> @test113(<2 x i64> %a, <2 x i64> %b) {
187166
; CHECK-LABEL: test113:
188167
; CHECK: # BB#0:
189-
; CHECK-NEXT: addis 3, 2, .LCPI15_0@toc@ha
190-
; CHECK-NEXT: addi 3, 3, .LCPI15_0@toc@l
191-
; CHECK-NEXT: lxvd2x 0, 0, 3
192-
; CHECK-NEXT: xxswapd 36, 0
193-
; CHECK-NEXT: xxland 35, 35, 36
194168
; CHECK-NEXT: vsrd 2, 2, 3
195169
; CHECK-NEXT: blr
196170
%rem = and <2 x i64> %b, <i64 63, i64 63>
@@ -247,8 +221,6 @@ define i64 @test203(i64 %a, i64 %b) {
247221
define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) {
248222
; CHECK-LABEL: test210:
249223
; CHECK: # BB#0:
250-
; CHECK-NEXT: vspltisb 4, 7
251-
; CHECK-NEXT: xxland 35, 35, 36
252224
; CHECK-NEXT: vsrab 2, 2, 3
253225
; CHECK-NEXT: blr
254226
%rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -259,8 +231,6 @@ define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) {
259231
define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) {
260232
; CHECK-LABEL: test211:
261233
; CHECK: # BB#0:
262-
; CHECK-NEXT: vspltish 4, 15
263-
; CHECK-NEXT: xxland 35, 35, 36
264234
; CHECK-NEXT: vsrah 2, 2, 3
265235
; CHECK-NEXT: blr
266236
%rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -271,10 +241,6 @@ define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) {
271241
define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) {
272242
; CHECK-LABEL: test212:
273243
; CHECK: # BB#0:
274-
; CHECK-NEXT: vspltisw 4, -16
275-
; CHECK-NEXT: vspltisw 5, 15
276-
; CHECK-NEXT: vsubuwm 4, 5, 4
277-
; CHECK-NEXT: xxland 35, 35, 36
278244
; CHECK-NEXT: vsraw 2, 2, 3
279245
; CHECK-NEXT: blr
280246
%rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -285,11 +251,6 @@ define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) {
285251
define <2 x i64> @test213(<2 x i64> %a, <2 x i64> %b) {
286252
; CHECK-LABEL: test213:
287253
; CHECK: # BB#0:
288-
; CHECK-NEXT: addis 3, 2, .LCPI23_0@toc@ha
289-
; CHECK-NEXT: addi 3, 3, .LCPI23_0@toc@l
290-
; CHECK-NEXT: lxvd2x 0, 0, 3
291-
; CHECK-NEXT: xxswapd 36, 0
292-
; CHECK-NEXT: xxland 35, 35, 36
293254
; CHECK-NEXT: vsrad 2, 2, 3
294255
; CHECK-NEXT: blr
295256
%rem = and <2 x i64> %b, <i64 63, i64 63>

0 commit comments

Comments
 (0)