Skip to content

Commit 5f51ab1

Browse files
committed
[GISel][RISCV] Implement selectShiftMask.
Implement the selectShiftMask for GlobalISel.
1 parent 52d1397 commit 5f51ab1

File tree

3 files changed

+438
-3
lines changed

3 files changed

+438
-3
lines changed

llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,69 @@ RISCVInstructionSelector::RISCVInstructionSelector(
159159

160160
InstructionSelector::ComplexRendererFns
161161
RISCVInstructionSelector::selectShiftMask(MachineOperand &Root) const {
162-
// TODO: Also check if we are seeing the result of an AND operation which
163-
// could be bypassed since we only check the lower log2(xlen) bits.
164-
return {{[=](MachineInstrBuilder &MIB) { MIB.add(Root); }}};
162+
if (!Root.isReg())
163+
return std::nullopt;
164+
165+
using namespace llvm::MIPatternMatch;
166+
MachineRegisterInfo &MRI = MF->getRegInfo();
167+
168+
Register RootReg = Root.getReg();
169+
Register ShAmtReg = RootReg;
170+
const LLT ShiftLLT = MRI.getType(RootReg);
171+
unsigned ShiftWidth = ShiftLLT.getSizeInBits();
172+
assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
173+
// Peek through zext.
174+
Register ZExtSrcReg;
175+
if (mi_match(ShAmtReg, MRI, m_GZExt(m_Reg(ZExtSrcReg)))) {
176+
ShAmtReg = ZExtSrcReg;
177+
}
178+
179+
APInt AndMask;
180+
Register AndSrcReg;
181+
if (mi_match(ShAmtReg, MRI, m_GAnd(m_Reg(AndSrcReg), m_ICst(AndMask)))) {
182+
APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
183+
if (ShMask.isSubsetOf(AndMask)) {
184+
ShAmtReg = AndSrcReg;
185+
} else {
186+
// TODO:
187+
// SimplifyDemandedBits may have optimized the mask so try restoring any
188+
// bits that are known zero.
189+
}
190+
}
191+
192+
APInt Imm;
193+
Register Reg;
194+
if (mi_match(ShAmtReg, MRI, m_GAdd(m_Reg(Reg), m_ICst(Imm)))) {
195+
if (Imm != 0 && Imm.urem(ShiftWidth) == 0)
196+
// If we are shifting by X+N where N == 0 mod Size, then just shift by X
197+
// to avoid the ADD.
198+
ShAmtReg = Reg;
199+
} else if (mi_match(ShAmtReg, MRI, m_GSub(m_ICst(Imm), m_Reg(Reg)))) {
200+
if (Imm != 0 && Imm.urem(ShiftWidth) == 0) {
201+
// If we are shifting by N-X where N == 0 mod Size, then just shift by -X
202+
// to generate a NEG instead of a SUB of a constant.
203+
ShAmtReg = MRI.createGenericVirtualRegister(ShiftLLT);
204+
unsigned NegOpc = Subtarget->is64Bit() ? RISCV::SUBW : RISCV::SUB;
205+
return {{[=](MachineInstrBuilder &MIB) {
206+
MachineIRBuilder(*MIB.getInstr())
207+
.buildInstr(NegOpc, {ShAmtReg}, {Register(RISCV::X0), Reg});
208+
MIB.addReg(ShAmtReg);
209+
}}};
210+
}
211+
if ((Imm.urem(ShiftWidth) & (ShiftWidth - 1)) == ShiftWidth - 1) {
212+
// If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
213+
// to generate a NOT instead of a SUB of a constant.
214+
ShAmtReg = MRI.createGenericVirtualRegister(ShiftLLT);
215+
return {{[=](MachineInstrBuilder &MIB) {
216+
MachineIRBuilder(*MIB.getInstr())
217+
.buildInstr(RISCV::XORI, {ShAmtReg}, {Reg})
218+
.addImm(-1);
219+
MIB.addReg(ShAmtReg);
220+
}}};
221+
}
222+
}
223+
224+
return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(ShAmtReg); }}};
165225
}
166226

167227
InstructionSelector::ComplexRendererFns
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2+
# RUN: llc -mtriple=riscv32 -run-pass=instruction-select \
3+
# RUN: -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
4+
5+
# This 32-bit version doesn't have tests for zext, because there is no legal type to zext from.
6+
---
7+
name: shl
8+
legalized: true
9+
regBankSelected: true
10+
tracksRegLiveness: true
11+
body: |
12+
bb.0:
13+
liveins: $x10, $x11
14+
15+
; CHECK-LABEL: name: shl
16+
; CHECK: liveins: $x10, $x11
17+
; CHECK-NEXT: {{ $}}
18+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
19+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
20+
; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[COPY1]]
21+
; CHECK-NEXT: $x10 = COPY [[SLL]]
22+
; CHECK-NEXT: PseudoRET implicit $x10
23+
%0:gprb(s32) = COPY $x10
24+
%1:gprb(s32) = COPY $x11
25+
%3:gprb(s32) = G_SHL %0, %1
26+
$x10 = COPY %3(s32)
27+
PseudoRET implicit $x10
28+
...
29+
30+
---
31+
name: shl_and
32+
legalized: true
33+
regBankSelected: true
34+
tracksRegLiveness: true
35+
body: |
36+
bb.0:
37+
liveins: $x10, $x11
38+
39+
; CHECK-LABEL: name: shl_and
40+
; CHECK: liveins: $x10, $x11
41+
; CHECK-NEXT: {{ $}}
42+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
43+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
44+
; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[COPY1]]
45+
; CHECK-NEXT: $x10 = COPY [[SLL]]
46+
; CHECK-NEXT: PseudoRET implicit $x10
47+
%0:gprb(s32) = COPY $x10
48+
%1:gprb(s32) = COPY $x11
49+
%2:gprb(s32) = G_CONSTANT i32 31
50+
%3:gprb(s32) = G_AND %1, %2
51+
%4:gprb(s32) = G_SHL %0, %3(s32)
52+
$x10 = COPY %4(s32)
53+
PseudoRET implicit $x10
54+
...
55+
56+
---
57+
name: shl_add
58+
legalized: true
59+
regBankSelected: true
60+
tracksRegLiveness: true
61+
body: |
62+
bb.0:
63+
liveins: $x10, $x11
64+
65+
; CHECK-LABEL: name: shl_add
66+
; CHECK: liveins: $x10, $x11
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
69+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
70+
; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[COPY1]]
71+
; CHECK-NEXT: $x10 = COPY [[SLL]]
72+
; CHECK-NEXT: PseudoRET implicit $x10
73+
%0:gprb(s32) = COPY $x10
74+
%1:gprb(s32) = COPY $x11
75+
%2:gprb(s32) = G_CONSTANT i32 32
76+
%3:gprb(s32) = G_ADD %1, %2
77+
%4:gprb(s32) = G_SHL %0, %3(s32)
78+
$x10 = COPY %4(s32)
79+
PseudoRET implicit $x10
80+
...
81+
82+
---
83+
name: shl_sub
84+
legalized: true
85+
regBankSelected: true
86+
tracksRegLiveness: true
87+
body: |
88+
bb.0:
89+
liveins: $x10, $x11
90+
91+
; CHECK-LABEL: name: shl_sub
92+
; CHECK: liveins: $x10, $x11
93+
; CHECK-NEXT: {{ $}}
94+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
95+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
96+
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB $x0, [[COPY1]]
97+
; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[SUB]]
98+
; CHECK-NEXT: $x10 = COPY [[SLL]]
99+
; CHECK-NEXT: PseudoRET implicit $x10
100+
%0:gprb(s32) = COPY $x10
101+
%1:gprb(s32) = COPY $x11
102+
%2:gprb(s32) = G_CONSTANT i32 32
103+
%3:gprb(s32) = G_SUB %2, %1
104+
%4:gprb(s32) = G_SHL %0, %3(s32)
105+
$x10 = COPY %4(s32)
106+
PseudoRET implicit $x10
107+
...
108+
109+
---
110+
name: shl_bitwise_not
111+
legalized: true
112+
regBankSelected: true
113+
tracksRegLiveness: true
114+
body: |
115+
bb.0:
116+
liveins: $x10, $x11
117+
118+
; CHECK-LABEL: name: shl_bitwise_not
119+
; CHECK: liveins: $x10, $x11
120+
; CHECK-NEXT: {{ $}}
121+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
122+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
123+
; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[COPY1]], -1
124+
; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[XORI]]
125+
; CHECK-NEXT: $x10 = COPY [[SLL]]
126+
; CHECK-NEXT: PseudoRET implicit $x10
127+
%0:gprb(s32) = COPY $x10
128+
%1:gprb(s32) = COPY $x11
129+
%2:gprb(s32) = G_CONSTANT i32 -1
130+
%3:gprb(s32) = G_SUB %2, %1
131+
%4:gprb(s32) = G_SHL %0, %3(s32)
132+
$x10 = COPY %4(s32)
133+
PseudoRET implicit $x10
134+
...
135+
136+
---
137+
name: shl_bitwise_not_2
138+
legalized: true
139+
regBankSelected: true
140+
tracksRegLiveness: true
141+
body: |
142+
bb.0:
143+
liveins: $x10, $x11
144+
145+
; CHECK-LABEL: name: shl_bitwise_not_2
146+
; CHECK: liveins: $x10, $x11
147+
; CHECK-NEXT: {{ $}}
148+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
149+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
150+
; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[COPY1]], -1
151+
; CHECK-NEXT: [[SLL:%[0-9]+]]:gpr = SLL [[COPY]], [[XORI]]
152+
; CHECK-NEXT: $x10 = COPY [[SLL]]
153+
; CHECK-NEXT: PseudoRET implicit $x10
154+
%0:gprb(s32) = COPY $x10
155+
%1:gprb(s32) = COPY $x11
156+
%2:gprb(s32) = G_CONSTANT i32 31
157+
%3:gprb(s32) = G_SUB %2, %1
158+
%4:gprb(s32) = G_SHL %0, %3(s32)
159+
$x10 = COPY %4(s32)
160+
PseudoRET implicit $x10
161+
...

0 commit comments

Comments
 (0)