Skip to content

Commit e2692f0

Browse files
committed
[RISCV] Add matching of codegen patterns to RISCV Bit Manipulation Zbb asm instructions
This patch provides optimization of bit manipulation operations by enabling the +experimental-b target feature. It adds matching of single block patterns of instructions to specific bit-manip instructions from the base subset (zbb subextension) of the experimental B extension of RISC-V. It adds also the correspondent codegen tests. This patch is based on Claire Wolf's proposal for the bit manipulation extension of RISCV: https://github.com/riscv/riscv-bitmanip/blob/master/bitmanip-0.92.pdf Differential Revision: https://reviews.llvm.org/D79870
1 parent 20854d8 commit e2692f0

File tree

6 files changed

+2645
-3
lines changed

6 files changed

+2645
-3
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,196 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
184184
return false;
185185
}
186186

187+
// Check that it is a SLOI (Shift Left Ones Immediate). We first check that
188+
// it is the right node tree:
189+
//
190+
// (OR (SHL RS1, VC2), VC1)
191+
//
192+
// and then we check that VC1, the mask used to fill with ones, is compatible
193+
// with VC2, the shamt:
194+
//
195+
// VC1 == maskTrailingOnes<uint64_t>(VC2)
196+
197+
bool RISCVDAGToDAGISel::SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt) {
198+
MVT XLenVT = Subtarget->getXLenVT();
199+
if (N.getOpcode() == ISD::OR) {
200+
SDValue Or = N;
201+
if (Or.getOperand(0).getOpcode() == ISD::SHL) {
202+
SDValue Shl = Or.getOperand(0);
203+
if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
204+
isa<ConstantSDNode>(Or.getOperand(1))) {
205+
if (XLenVT == MVT::i64) {
206+
uint64_t VC1 = Or.getConstantOperandVal(1);
207+
uint64_t VC2 = Shl.getConstantOperandVal(1);
208+
if (VC1 == maskTrailingOnes<uint64_t>(VC2)) {
209+
RS1 = Shl.getOperand(0);
210+
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
211+
Shl.getOperand(1).getValueType());
212+
return true;
213+
}
214+
}
215+
if (XLenVT == MVT::i32) {
216+
uint32_t VC1 = Or.getConstantOperandVal(1);
217+
uint32_t VC2 = Shl.getConstantOperandVal(1);
218+
if (VC1 == maskTrailingOnes<uint32_t>(VC2)) {
219+
RS1 = Shl.getOperand(0);
220+
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
221+
Shl.getOperand(1).getValueType());
222+
return true;
223+
}
224+
}
225+
}
226+
}
227+
}
228+
return false;
229+
}
230+
231+
// Check that it is a SROI (Shift Right Ones Immediate). We first check that
232+
// it is the right node tree:
233+
//
234+
// (OR (SRL RS1, VC2), VC1)
235+
//
236+
// and then we check that VC1, the mask used to fill with ones, is compatible
237+
// with VC2, the shamt:
238+
//
239+
// VC1 == maskLeadingOnes<uint64_t>(VC2)
240+
241+
bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) {
242+
MVT XLenVT = Subtarget->getXLenVT();
243+
if (N.getOpcode() == ISD::OR) {
244+
SDValue Or = N;
245+
if (Or.getOperand(0).getOpcode() == ISD::SRL) {
246+
SDValue Srl = Or.getOperand(0);
247+
if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
248+
isa<ConstantSDNode>(Or.getOperand(1))) {
249+
if (XLenVT == MVT::i64) {
250+
uint64_t VC1 = Or.getConstantOperandVal(1);
251+
uint64_t VC2 = Srl.getConstantOperandVal(1);
252+
if (VC1 == maskLeadingOnes<uint64_t>(VC2)) {
253+
RS1 = Srl.getOperand(0);
254+
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
255+
Srl.getOperand(1).getValueType());
256+
return true;
257+
}
258+
}
259+
if (XLenVT == MVT::i32) {
260+
uint32_t VC1 = Or.getConstantOperandVal(1);
261+
uint32_t VC2 = Srl.getConstantOperandVal(1);
262+
if (VC1 == maskLeadingOnes<uint32_t>(VC2)) {
263+
RS1 = Srl.getOperand(0);
264+
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
265+
Srl.getOperand(1).getValueType());
266+
return true;
267+
}
268+
}
269+
}
270+
}
271+
}
272+
return false;
273+
}
274+
275+
// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
276+
// on RV64).
277+
// SLLIUW is the same as SLLI except for the fact that it clears the bits
278+
// XLEN-1:32 of the input RS1 before shifting.
279+
// We first check that it is the right node tree:
280+
//
281+
// (AND (SHL RS1, VC2), VC1)
282+
//
283+
// We check that VC2, the shamt is less than 32, otherwise the pattern is
284+
// exactly the same as SLLI and we give priority to that.
285+
// Eventually we check that that VC1, the mask used to clear the upper 32 bits
286+
// of RS1, is correct:
287+
//
288+
// VC1 == (0xFFFFFFFF << VC2)
289+
290+
bool RISCVDAGToDAGISel::SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt) {
291+
if (N.getOpcode() == ISD::AND && Subtarget->getXLenVT() == MVT::i64) {
292+
SDValue And = N;
293+
if (And.getOperand(0).getOpcode() == ISD::SHL) {
294+
SDValue Shl = And.getOperand(0);
295+
if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
296+
isa<ConstantSDNode>(And.getOperand(1))) {
297+
uint64_t VC1 = And.getConstantOperandVal(1);
298+
uint64_t VC2 = Shl.getConstantOperandVal(1);
299+
if (VC2 < 32 && VC1 == ((uint64_t)0xFFFFFFFF << VC2)) {
300+
RS1 = Shl.getOperand(0);
301+
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
302+
Shl.getOperand(1).getValueType());
303+
return true;
304+
}
305+
}
306+
}
307+
}
308+
return false;
309+
}
310+
311+
// Check that it is a SLOIW (Shift Left Ones Immediate i32 on RV64).
312+
// We first check that it is the right node tree:
313+
//
314+
// (SIGN_EXTEND_INREG (OR (SHL RS1, VC2), VC1))
315+
//
316+
// and then we check that VC1, the mask used to fill with ones, is compatible
317+
// with VC2, the shamt:
318+
//
319+
// VC1 == maskTrailingOnes<uint32_t>(VC2)
320+
321+
bool RISCVDAGToDAGISel::SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
322+
if (Subtarget->getXLenVT() == MVT::i64 &&
323+
N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
324+
cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
325+
if (N.getOperand(0).getOpcode() == ISD::OR) {
326+
SDValue Or = N.getOperand(0);
327+
if (Or.getOperand(0).getOpcode() == ISD::SHL) {
328+
SDValue Shl = Or.getOperand(0);
329+
if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
330+
isa<ConstantSDNode>(Or.getOperand(1))) {
331+
uint32_t VC1 = Or.getConstantOperandVal(1);
332+
uint32_t VC2 = Shl.getConstantOperandVal(1);
333+
if (VC1 == maskTrailingOnes<uint32_t>(VC2)) {
334+
RS1 = Shl.getOperand(0);
335+
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
336+
Shl.getOperand(1).getValueType());
337+
return true;
338+
}
339+
}
340+
}
341+
}
342+
}
343+
return false;
344+
}
345+
346+
// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64).
347+
// We first check that it is the right node tree:
348+
//
349+
// (OR (SHL RS1, VC2), VC1)
350+
//
351+
// and then we check that VC1, the mask used to fill with ones, is compatible
352+
// with VC2, the shamt:
353+
//
354+
// VC1 == maskLeadingOnes<uint32_t>(VC2)
355+
356+
bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
357+
if (N.getOpcode() == ISD::OR && Subtarget->getXLenVT() == MVT::i64) {
358+
SDValue Or = N;
359+
if (Or.getOperand(0).getOpcode() == ISD::SRL) {
360+
SDValue Srl = Or.getOperand(0);
361+
if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
362+
isa<ConstantSDNode>(Or.getOperand(1))) {
363+
uint32_t VC1 = Or.getConstantOperandVal(1);
364+
uint32_t VC2 = Srl.getConstantOperandVal(1);
365+
if (VC1 == maskLeadingOnes<uint32_t>(VC2)) {
366+
RS1 = Srl.getOperand(0);
367+
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
368+
Srl.getOperand(1).getValueType());
369+
return true;
370+
}
371+
}
372+
}
373+
}
374+
return false;
375+
}
376+
187377
// Merge an ADDI into the offset of a load/store instruction where possible.
188378
// (load (addi base, off1), off2) -> (load base, off1+off2)
189379
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
4545

4646
bool SelectAddrFI(SDValue Addr, SDValue &Base);
4747

48+
bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt);
49+
bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt);
50+
bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt);
51+
bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
52+
bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
53+
4854
// Include the pieces autogenerated from the target description.
4955
#include "RISCVGenDAGISel.inc"
5056

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
152152
setOperationAction(ISD::ROTL, XLenVT, Expand);
153153
setOperationAction(ISD::ROTR, XLenVT, Expand);
154154
setOperationAction(ISD::BSWAP, XLenVT, Expand);
155-
setOperationAction(ISD::CTTZ, XLenVT, Expand);
156-
setOperationAction(ISD::CTLZ, XLenVT, Expand);
157-
setOperationAction(ISD::CTPOP, XLenVT, Expand);
155+
156+
if (!Subtarget.hasStdExtZbb()) {
157+
setOperationAction(ISD::CTTZ, XLenVT, Expand);
158+
setOperationAction(ISD::CTLZ, XLenVT, Expand);
159+
setOperationAction(ISD::CTPOP, XLenVT, Expand);
160+
}
158161

159162
ISD::CondCode FPCCToExtend[] = {
160163
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,

llvm/lib/Target/RISCV/RISCVInstrInfoB.td

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,3 +632,79 @@ let Predicates = [HasStdExtZbproposedc, HasStdExtZbbOrZbp, HasStdExtC, IsRV64] i
632632
def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0),
633633
(C_ZEXTW GPRC:$rs1)>;
634634
} // Predicates = [HasStdExtZbproposedc, HasStdExtC, IsRV64]
635+
636+
//===----------------------------------------------------------------------===//
637+
// Codegen patterns
638+
//===----------------------------------------------------------------------===//
639+
def SLOIPat : ComplexPattern<XLenVT, 2, "SelectSLOI", [or]>;
640+
def SROIPat : ComplexPattern<XLenVT, 2, "SelectSROI", [or]>;
641+
def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
642+
def SLOIWPat : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
643+
def SROIWPat : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
644+
645+
let Predicates = [HasStdExtZbb] in {
646+
def : Pat<(xor (shl (xor GPR:$rs1, -1), GPR:$rs2), -1),
647+
(SLO GPR:$rs1, GPR:$rs2)>;
648+
def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1),
649+
(SRO GPR:$rs1, GPR:$rs2)>;
650+
def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
651+
(SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
652+
def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
653+
(SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
654+
def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>;
655+
def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>;
656+
def : Pat<(ctpop GPR:$rs1), (PCNT GPR:$rs1)>;
657+
} // Predicates = [HasStdExtZbb]
658+
659+
let Predicates = [HasStdExtZbb, IsRV32] in
660+
def : Pat<(sra (shl GPR:$rs1, (i32 24)), (i32 24)), (SEXTB GPR:$rs1)>;
661+
let Predicates = [HasStdExtZbb, IsRV64] in
662+
def : Pat<(sra (shl GPR:$rs1, (i64 56)), (i64 56)), (SEXTB GPR:$rs1)>;
663+
664+
let Predicates = [HasStdExtZbb, IsRV32] in
665+
def : Pat<(sra (shl GPR:$rs1, (i32 16)), (i32 16)), (SEXTH GPR:$rs1)>;
666+
let Predicates = [HasStdExtZbb, IsRV64] in
667+
def : Pat<(sra (shl GPR:$rs1, (i64 48)), (i64 48)), (SEXTH GPR:$rs1)>;
668+
669+
let Predicates = [HasStdExtZbb] in {
670+
def : Pat<(smin GPR:$rs1, GPR:$rs2), (MIN GPR:$rs1, GPR:$rs2)>;
671+
def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 20), GPR:$rs1, GPR:$rs2),
672+
(MIN GPR:$rs1, GPR:$rs2)>;
673+
def : Pat<(smax GPR:$rs1, GPR:$rs2), (MAX GPR:$rs1, GPR:$rs2)>;
674+
def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 20), GPR:$rs1, GPR:$rs2),
675+
(MAX GPR:$rs1, GPR:$rs2)>;
676+
def : Pat<(umin GPR:$rs1, GPR:$rs2), (MINU GPR:$rs1, GPR:$rs2)>;
677+
def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 12), GPR:$rs1, GPR:$rs2),
678+
(MINU GPR:$rs1, GPR:$rs2)>;
679+
def : Pat<(umax GPR:$rs1, GPR:$rs2), (MAXU GPR:$rs1, GPR:$rs2)>;
680+
def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2),
681+
(MAXU GPR:$rs1, GPR:$rs2)>;
682+
} // Predicates = [HasStdExtZbb]
683+
684+
let Predicates = [HasStdExtZbb, IsRV64] in {
685+
def : Pat<(and (add GPR:$rs, simm12:$simm12), (i64 0xFFFFFFFF)),
686+
(ADDIWU GPR:$rs, simm12:$simm12)>;
687+
def : Pat<(SLLIUWPat GPR:$rs1, uimmlog2xlen:$shamt),
688+
(SLLIUW GPR:$rs1, uimmlog2xlen:$shamt)>;
689+
def : Pat<(and (add GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)),
690+
(ADDWU GPR:$rs1, GPR:$rs2)>;
691+
def : Pat<(and (sub GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)),
692+
(SUBWU GPR:$rs1, GPR:$rs2)>;
693+
def : Pat<(add GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))),
694+
(ADDUW GPR:$rs1, GPR:$rs2)>;
695+
def : Pat<(sub GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))),
696+
(SUBUW GPR:$rs1, GPR:$rs2)>;
697+
def : Pat<(xor (riscv_sllw (xor GPR:$rs1, -1), GPR:$rs2), -1),
698+
(SLOW GPR:$rs1, GPR:$rs2)>;
699+
def : Pat<(xor (riscv_srlw (xor GPR:$rs1, -1), GPR:$rs2), -1),
700+
(SROW GPR:$rs1, GPR:$rs2)>;
701+
def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt),
702+
(SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>;
703+
def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt),
704+
(SROIW GPR:$rs1, uimmlog2xlen:$shamt)>;
705+
def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
706+
(CLZW GPR:$rs1)>;
707+
// We don't pattern-match CTZW here as it has the same pattern and result as
708+
// RV64 CTZ
709+
def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>;
710+
} // Predicates = [HasStdExtZbb, IsRV64]

0 commit comments

Comments
 (0)