Skip to content

Commit e13e95b

Browse files
authored
[Mips] Optimize (shift x (and y, BitWidth - 1)) to (shift x, y) (#73889)
Do optimization to turn x >> (shift & 31/63) into a single srlv instead of andi + srlv, since the mips variable shift instruction already implicitly masks the shift, like x86, wasm and AMDGPU. Copy the X86DAGToDAGISel::isUnneededShiftMask() function to MIPS for checking whether need combine two instructions to one.
1 parent 87779fd commit e13e95b

File tree

11 files changed

+277
-223
lines changed

11 files changed

+277
-223
lines changed

llvm/lib/Target/Mips/Mips64InstrInfo.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -164,20 +164,20 @@ def NOR64 : LogicNOR<"nor", GPR64Opnd>, ADD_FM<0, 0x27>, GPR_64;
164164

165165
/// Shift Instructions
166166
let AdditionalPredicates = [NotInMicroMips] in {
167-
def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, shl,
167+
def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, mshl_64,
168168
immZExt6>,
169169
SRA_FM<0x38, 0>, ISA_MIPS3;
170-
def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, srl,
170+
def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, msrl_64,
171171
immZExt6>,
172172
SRA_FM<0x3a, 0>, ISA_MIPS3;
173-
def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, sra,
173+
def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, msra_64,
174174
immZExt6>,
175175
SRA_FM<0x3b, 0>, ISA_MIPS3;
176-
def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, shl>,
176+
def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, mshl_64>,
177177
SRLV_FM<0x14, 0>, ISA_MIPS3;
178-
def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, sra>,
178+
def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, msra_64>,
179179
SRLV_FM<0x17, 0>, ISA_MIPS3;
180-
def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, srl>,
180+
def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, msrl_64>,
181181
SRLV_FM<0x16, 0>, ISA_MIPS3;
182182
def DSLL32 : shift_rotate_imm<"dsll32", uimm5, GPR64Opnd, II_DSLL32>,
183183
SRA_FM<0x3c, 0>, ISA_MIPS3;

llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/CodeGen/MachineFunction.h"
2323
#include "llvm/CodeGen/MachineInstrBuilder.h"
2424
#include "llvm/CodeGen/MachineRegisterInfo.h"
25+
#include "llvm/CodeGen/SelectionDAG.h"
2526
#include "llvm/CodeGen/SelectionDAGNodes.h"
2627
#include "llvm/CodeGen/StackProtector.h"
2728
#include "llvm/IR/CFG.h"
@@ -31,6 +32,7 @@
3132
#include "llvm/IR/Type.h"
3233
#include "llvm/Support/Debug.h"
3334
#include "llvm/Support/ErrorHandling.h"
35+
#include "llvm/Support/KnownBits.h"
3436
#include "llvm/Support/raw_ostream.h"
3537
#include "llvm/Target/TargetMachine.h"
3638
using namespace llvm;
@@ -324,6 +326,24 @@ bool MipsDAGToDAGISel::SelectInlineAsmMemoryOperand(
324326
return true;
325327
}
326328

329+
bool MipsDAGToDAGISel::isUnneededShiftMask(SDNode *N,
330+
unsigned ShAmtBits) const {
331+
assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
332+
333+
const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
334+
if (RHS.countr_one() >= ShAmtBits) {
335+
LLVM_DEBUG(
336+
dbgs()
337+
<< DEBUG_TYPE
338+
<< " Need optimize 'and & shl/srl/sra' and operand value bits is "
339+
<< RHS.countr_one() << "\n");
340+
return true;
341+
}
342+
343+
KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0));
344+
return (Known.Zero | RHS).countr_one() >= ShAmtBits;
345+
}
346+
327347
char MipsDAGToDAGISel::ID = 0;
328348

329349
INITIALIZE_PASS(MipsDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)

llvm/lib/Target/Mips/MipsISelDAGToDAG.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ class MipsDAGToDAGISel : public SelectionDAGISel {
143143
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
144144
InlineAsm::ConstraintCode ConstraintID,
145145
std::vector<SDValue> &OutOps) override;
146+
bool isUnneededShiftMask(SDNode *N, unsigned ShAmtBits) const;
146147
};
147148
}
148149

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//===- MipsInstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file describes the various pseudo instructions used by the compiler,
10+
// as well as Pat patterns used during instruction selection.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
15+
def shiftMask_32 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
16+
return isUnneededShiftMask(N, 5);
17+
}]>;
18+
19+
def shiftMask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm), [{
20+
return isUnneededShiftMask(N, 6);
21+
}]>;
22+
23+
foreach width = [32, 64] in {
24+
defvar shiftMask = !cast<SDPatternOperator>("shiftMask_"#width);
25+
def mshl_#width : PatFrags<(ops node:$src0, node:$src1),
26+
[(shl node:$src0, node:$src1), (shl node:$src0, (shiftMask node:$src1))]>;
27+
28+
def msrl_#width : PatFrags<(ops node:$src0, node:$src1),
29+
[(srl node:$src0, node:$src1), (srl node:$src0, (shiftMask node:$src1))]>;
30+
31+
def msra_#width : PatFrags<(ops node:$src0, node:$src1),
32+
[(sra node:$src0, node:$src1), (sra node:$src0, (shiftMask node:$src1))]>;
33+
}

llvm/lib/Target/Mips/MipsInstrInfo.td

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
//===----------------------------------------------------------------------===//
1515
// Mips profiles and nodes
1616
//===----------------------------------------------------------------------===//
17+
include "MipsInstrCompiler.td"
1718

1819
def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
1920
def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
@@ -2079,17 +2080,17 @@ let AdditionalPredicates = [NotInMicroMips] in {
20792080

20802081
let AdditionalPredicates = [NotInMicroMips] in {
20812082
/// Shift Instructions
2082-
def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL, shl,
2083+
def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL, mshl_32,
20832084
immZExt5>, SRA_FM<0, 0>, ISA_MIPS1;
2084-
def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, II_SRL, srl,
2085+
def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, II_SRL, msrl_32,
20852086
immZExt5>, SRA_FM<2, 0>, ISA_MIPS1;
2086-
def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, II_SRA, sra,
2087+
def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, II_SRA, msra_32,
20872088
immZExt5>, SRA_FM<3, 0>, ISA_MIPS1;
2088-
def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, II_SLLV, shl>,
2089+
def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, II_SLLV, mshl_32>,
20892090
SRLV_FM<4, 0>, ISA_MIPS1;
2090-
def SRLV : MMRel, shift_rotate_reg<"srlv", GPR32Opnd, II_SRLV, srl>,
2091+
def SRLV : MMRel, shift_rotate_reg<"srlv", GPR32Opnd, II_SRLV, msrl_32>,
20912092
SRLV_FM<6, 0>, ISA_MIPS1;
2092-
def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, II_SRAV, sra>,
2093+
def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, II_SRAV, msra_32>,
20932094
SRLV_FM<7, 0>, ISA_MIPS1;
20942095

20952096
// Rotate Instructions

llvm/test/CodeGen/Mips/funnel-shift-rot.ll

Lines changed: 46 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,8 @@ define i16 @rotl_i16(i16 %x, i16 %z) {
6262
define i32 @rotl_i32(i32 %x, i32 %z) {
6363
; CHECK-LABEL: rotl_i32:
6464
; CHECK: # %bb.0:
65-
; CHECK-NEXT: andi $1, $5, 31
66-
; CHECK-NEXT: sllv $1, $4, $1
65+
; CHECK-NEXT: sllv $1, $4, $5
6766
; CHECK-NEXT: negu $2, $5
68-
; CHECK-NEXT: andi $2, $2, 31
6967
; CHECK-NEXT: srlv $2, $4, $2
7068
; CHECK-NEXT: jr $ra
7169
; CHECK-NEXT: or $2, $1, $2
@@ -80,15 +78,13 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
8078
; CHECK-BE-NEXT: andi $1, $1, 1
8179
; CHECK-BE-NEXT: move $3, $4
8280
; CHECK-BE-NEXT: movn $3, $5, $1
83-
; CHECK-BE-NEXT: andi $6, $7, 31
84-
; CHECK-BE-NEXT: sllv $2, $3, $6
81+
; CHECK-BE-NEXT: sllv $2, $3, $7
8582
; CHECK-BE-NEXT: movn $5, $4, $1
8683
; CHECK-BE-NEXT: srl $1, $5, 1
8784
; CHECK-BE-NEXT: not $4, $7
88-
; CHECK-BE-NEXT: andi $4, $4, 31
8985
; CHECK-BE-NEXT: srlv $1, $1, $4
9086
; CHECK-BE-NEXT: or $2, $2, $1
91-
; CHECK-BE-NEXT: sllv $1, $5, $6
87+
; CHECK-BE-NEXT: sllv $1, $5, $7
9288
; CHECK-BE-NEXT: srl $3, $3, 1
9389
; CHECK-BE-NEXT: srlv $3, $3, $4
9490
; CHECK-BE-NEXT: jr $ra
@@ -100,15 +96,13 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
10096
; CHECK-LE-NEXT: andi $1, $1, 1
10197
; CHECK-LE-NEXT: move $3, $4
10298
; CHECK-LE-NEXT: movn $3, $5, $1
103-
; CHECK-LE-NEXT: andi $7, $6, 31
104-
; CHECK-LE-NEXT: sllv $2, $3, $7
99+
; CHECK-LE-NEXT: sllv $2, $3, $6
105100
; CHECK-LE-NEXT: movn $5, $4, $1
106101
; CHECK-LE-NEXT: srl $1, $5, 1
107102
; CHECK-LE-NEXT: not $4, $6
108-
; CHECK-LE-NEXT: andi $4, $4, 31
109103
; CHECK-LE-NEXT: srlv $1, $1, $4
110104
; CHECK-LE-NEXT: or $2, $2, $1
111-
; CHECK-LE-NEXT: sllv $1, $5, $7
105+
; CHECK-LE-NEXT: sllv $1, $5, $6
112106
; CHECK-LE-NEXT: srl $3, $3, 1
113107
; CHECK-LE-NEXT: srlv $3, $3, $4
114108
; CHECK-LE-NEXT: jr $ra
@@ -122,35 +116,27 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
122116
define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
123117
; CHECK-LABEL: rotl_v4i32:
124118
; CHECK: # %bb.0:
125-
; CHECK-NEXT: lw $1, 24($sp)
119+
; CHECK-NEXT: lw $1, 20($sp)
126120
; CHECK-NEXT: negu $2, $1
127-
; CHECK-NEXT: lw $3, 20($sp)
121+
; CHECK-NEXT: lw $3, 24($sp)
128122
; CHECK-NEXT: negu $8, $3
129-
; CHECK-NEXT: andi $8, $8, 31
130-
; CHECK-NEXT: andi $2, $2, 31
131-
; CHECK-NEXT: andi $3, $3, 31
132-
; CHECK-NEXT: andi $1, $1, 31
133-
; CHECK-NEXT: lw $9, 16($sp)
134-
; CHECK-NEXT: sllv $1, $6, $1
135-
; CHECK-NEXT: srlv $6, $6, $2
136-
; CHECK-NEXT: sllv $3, $5, $3
137-
; CHECK-NEXT: srlv $5, $5, $8
138-
; CHECK-NEXT: andi $2, $9, 31
139-
; CHECK-NEXT: sllv $2, $4, $2
140-
; CHECK-NEXT: negu $8, $9
141-
; CHECK-NEXT: andi $8, $8, 31
142-
; CHECK-NEXT: srlv $4, $4, $8
143-
; CHECK-NEXT: lw $8, 28($sp)
144-
; CHECK-NEXT: or $2, $2, $4
145-
; CHECK-NEXT: or $3, $3, $5
146-
; CHECK-NEXT: or $4, $1, $6
147-
; CHECK-NEXT: andi $1, $8, 31
148-
; CHECK-NEXT: sllv $1, $7, $1
149-
; CHECK-NEXT: negu $5, $8
150-
; CHECK-NEXT: andi $5, $5, 31
151-
; CHECK-NEXT: srlv $5, $7, $5
123+
; CHECK-NEXT: sllv $9, $6, $3
124+
; CHECK-NEXT: srlv $6, $6, $8
125+
; CHECK-NEXT: sllv $1, $5, $1
126+
; CHECK-NEXT: srlv $3, $5, $2
127+
; CHECK-NEXT: lw $2, 16($sp)
128+
; CHECK-NEXT: sllv $5, $4, $2
129+
; CHECK-NEXT: negu $2, $2
130+
; CHECK-NEXT: srlv $2, $4, $2
131+
; CHECK-NEXT: or $2, $5, $2
132+
; CHECK-NEXT: or $3, $1, $3
133+
; CHECK-NEXT: or $4, $9, $6
134+
; CHECK-NEXT: lw $1, 28($sp)
135+
; CHECK-NEXT: sllv $5, $7, $1
136+
; CHECK-NEXT: negu $1, $1
137+
; CHECK-NEXT: srlv $1, $7, $1
152138
; CHECK-NEXT: jr $ra
153-
; CHECK-NEXT: or $5, $1, $5
139+
; CHECK-NEXT: or $5, $5, $1
154140
%f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
155141
ret <4 x i32> %f
156142
}
@@ -224,10 +210,8 @@ define i16 @rotr_i16(i16 %x, i16 %z) {
224210
define i32 @rotr_i32(i32 %x, i32 %z) {
225211
; CHECK-LABEL: rotr_i32:
226212
; CHECK: # %bb.0:
227-
; CHECK-NEXT: andi $1, $5, 31
228-
; CHECK-NEXT: srlv $1, $4, $1
213+
; CHECK-NEXT: srlv $1, $4, $5
229214
; CHECK-NEXT: negu $2, $5
230-
; CHECK-NEXT: andi $2, $2, 31
231215
; CHECK-NEXT: sllv $2, $4, $2
232216
; CHECK-NEXT: jr $ra
233217
; CHECK-NEXT: or $2, $1, $2
@@ -241,15 +225,13 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
241225
; CHECK-BE-NEXT: andi $1, $7, 32
242226
; CHECK-BE-NEXT: move $3, $5
243227
; CHECK-BE-NEXT: movz $3, $4, $1
244-
; CHECK-BE-NEXT: andi $6, $7, 31
245-
; CHECK-BE-NEXT: srlv $2, $3, $6
228+
; CHECK-BE-NEXT: srlv $2, $3, $7
246229
; CHECK-BE-NEXT: movz $4, $5, $1
247230
; CHECK-BE-NEXT: sll $1, $4, 1
248231
; CHECK-BE-NEXT: not $5, $7
249-
; CHECK-BE-NEXT: andi $5, $5, 31
250232
; CHECK-BE-NEXT: sllv $1, $1, $5
251233
; CHECK-BE-NEXT: or $2, $1, $2
252-
; CHECK-BE-NEXT: srlv $1, $4, $6
234+
; CHECK-BE-NEXT: srlv $1, $4, $7
253235
; CHECK-BE-NEXT: sll $3, $3, 1
254236
; CHECK-BE-NEXT: sllv $3, $3, $5
255237
; CHECK-BE-NEXT: jr $ra
@@ -260,15 +242,13 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
260242
; CHECK-LE-NEXT: andi $1, $6, 32
261243
; CHECK-LE-NEXT: move $3, $5
262244
; CHECK-LE-NEXT: movz $3, $4, $1
263-
; CHECK-LE-NEXT: andi $7, $6, 31
264-
; CHECK-LE-NEXT: srlv $2, $3, $7
245+
; CHECK-LE-NEXT: srlv $2, $3, $6
265246
; CHECK-LE-NEXT: movz $4, $5, $1
266247
; CHECK-LE-NEXT: sll $1, $4, 1
267248
; CHECK-LE-NEXT: not $5, $6
268-
; CHECK-LE-NEXT: andi $5, $5, 31
269249
; CHECK-LE-NEXT: sllv $1, $1, $5
270250
; CHECK-LE-NEXT: or $2, $1, $2
271-
; CHECK-LE-NEXT: srlv $1, $4, $7
251+
; CHECK-LE-NEXT: srlv $1, $4, $6
272252
; CHECK-LE-NEXT: sll $3, $3, 1
273253
; CHECK-LE-NEXT: sllv $3, $3, $5
274254
; CHECK-LE-NEXT: jr $ra
@@ -282,35 +262,27 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
282262
define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
283263
; CHECK-LABEL: rotr_v4i32:
284264
; CHECK: # %bb.0:
285-
; CHECK-NEXT: lw $1, 24($sp)
265+
; CHECK-NEXT: lw $1, 20($sp)
286266
; CHECK-NEXT: negu $2, $1
287-
; CHECK-NEXT: lw $3, 20($sp)
267+
; CHECK-NEXT: lw $3, 24($sp)
288268
; CHECK-NEXT: negu $8, $3
289-
; CHECK-NEXT: andi $8, $8, 31
290-
; CHECK-NEXT: andi $2, $2, 31
291-
; CHECK-NEXT: andi $3, $3, 31
292-
; CHECK-NEXT: andi $1, $1, 31
293-
; CHECK-NEXT: lw $9, 16($sp)
294-
; CHECK-NEXT: srlv $1, $6, $1
295-
; CHECK-NEXT: sllv $6, $6, $2
296-
; CHECK-NEXT: srlv $3, $5, $3
297-
; CHECK-NEXT: sllv $5, $5, $8
298-
; CHECK-NEXT: andi $2, $9, 31
299-
; CHECK-NEXT: srlv $2, $4, $2
300-
; CHECK-NEXT: negu $8, $9
301-
; CHECK-NEXT: andi $8, $8, 31
302-
; CHECK-NEXT: sllv $4, $4, $8
303-
; CHECK-NEXT: lw $8, 28($sp)
304-
; CHECK-NEXT: or $2, $2, $4
305-
; CHECK-NEXT: or $3, $3, $5
306-
; CHECK-NEXT: or $4, $1, $6
307-
; CHECK-NEXT: andi $1, $8, 31
308-
; CHECK-NEXT: srlv $1, $7, $1
309-
; CHECK-NEXT: negu $5, $8
310-
; CHECK-NEXT: andi $5, $5, 31
311-
; CHECK-NEXT: sllv $5, $7, $5
269+
; CHECK-NEXT: srlv $9, $6, $3
270+
; CHECK-NEXT: sllv $6, $6, $8
271+
; CHECK-NEXT: srlv $1, $5, $1
272+
; CHECK-NEXT: sllv $3, $5, $2
273+
; CHECK-NEXT: lw $2, 16($sp)
274+
; CHECK-NEXT: srlv $5, $4, $2
275+
; CHECK-NEXT: negu $2, $2
276+
; CHECK-NEXT: sllv $2, $4, $2
277+
; CHECK-NEXT: or $2, $5, $2
278+
; CHECK-NEXT: or $3, $1, $3
279+
; CHECK-NEXT: or $4, $9, $6
280+
; CHECK-NEXT: lw $1, 28($sp)
281+
; CHECK-NEXT: srlv $5, $7, $1
282+
; CHECK-NEXT: negu $1, $1
283+
; CHECK-NEXT: sllv $1, $7, $1
312284
; CHECK-NEXT: jr $ra
313-
; CHECK-NEXT: or $5, $1, $5
285+
; CHECK-NEXT: or $5, $5, $1
314286
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
315287
ret <4 x i32> %f
316288
}

0 commit comments

Comments
 (0)