-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Macro-fusion support for veyron-v1 CPU. #70012
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
783a2ad
eb79be7
dad5db6
4a2bec5
c6dff10
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,101 @@ | |
|
||
using namespace llvm; | ||
|
||
static bool checkRegisters(Register FirstDest, const MachineInstr &SecondMI) { | ||
if (!SecondMI.getOperand(1).isReg()) | ||
return false; | ||
|
||
if (SecondMI.getOperand(1).getReg() != FirstDest) | ||
return false; | ||
|
||
// If the input is virtual make sure this is the only user. | ||
if (FirstDest.isVirtual()) { | ||
auto &MRI = SecondMI.getMF()->getRegInfo(); | ||
return MRI.hasOneNonDBGUse(FirstDest); | ||
} | ||
|
||
return SecondMI.getOperand(0).getReg() == FirstDest; | ||
} | ||
|
||
// Fuse load with add: | ||
// add rd, rs1, rs2 | ||
// ld rd, 0(rd) | ||
static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) { | ||
if (SecondMI.getOpcode() != RISCV::LD) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mgudim Is this fusion really restricted to just 64-bit load? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes. |
||
return false; | ||
|
||
if (!SecondMI.getOperand(2).isImm()) | ||
return false; | ||
|
||
if (SecondMI.getOperand(2).getImm() != 0) | ||
return false; | ||
|
||
// Given SecondMI, when FirstMI is unspecified, we must return | ||
// if SecondMI may be part of a fused pair at all. | ||
if (!FirstMI) | ||
return true; | ||
|
||
if (FirstMI->getOpcode() != RISCV::ADD) | ||
return true; | ||
|
||
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); | ||
} | ||
|
||
// Fuse these patterns: | ||
// | ||
// slli rd, rs1, 32 | ||
// srli rd, rd, x | ||
// where 0 <= x <= 32 | ||
// | ||
// and | ||
// | ||
// slli rd, rs1, 48 | ||
// srli rd, rd, x | ||
static bool isShiftedZExt(const MachineInstr *FirstMI, | ||
const MachineInstr &SecondMI) { | ||
if (SecondMI.getOpcode() != RISCV::SRLI) | ||
return false; | ||
|
||
if (!SecondMI.getOperand(2).isImm()) | ||
return false; | ||
|
||
unsigned SRLIImm = SecondMI.getOperand(2).getImm(); | ||
bool IsShiftBy48 = SRLIImm == 48; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems a very oddly restricted fusion. Are you sure that e.g. a shift pair by 37 that clears the top bits isn't fused? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The two shifts by 48 case is the i16 zext pattern for rv64 without Zbb. So it makes some sense. |
||
if (SRLIImm > 32 && !IsShiftBy48) | ||
return false; | ||
|
||
// Given SecondMI, when FirstMI is unspecified, we must return | ||
// if SecondMI may be part of a fused pair at all. | ||
if (!FirstMI) | ||
return true; | ||
|
||
if (FirstMI->getOpcode() != RISCV::SLLI) | ||
return false; | ||
|
||
unsigned SLLIImm = FirstMI->getOperand(2).getImm(); | ||
preames marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm != 32)) | ||
return false; | ||
|
||
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); | ||
} | ||
|
||
// Fuse AUIPC followed by ADDI | ||
// auipc rd, imm20 | ||
// addi rd, rd, imm12 | ||
static bool isAUIPCADDI(const MachineInstr *FirstMI, | ||
const MachineInstr &SecondMI) { | ||
if (SecondMI.getOpcode() != RISCV::ADDI) | ||
return false; | ||
// Assume the 1st instr to be a wildcard if it is unspecified. | ||
if (!FirstMI) | ||
return true; | ||
|
||
if (FirstMI->getOpcode() != RISCV::AUIPC) | ||
return false; | ||
|
||
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); | ||
} | ||
|
||
// Fuse LUI followed by ADDI or ADDIW. | ||
// rd = imm[31:0] which decomposes to | ||
// lui rd, imm[31:12] | ||
|
@@ -27,29 +122,14 @@ static bool isLUIADDI(const MachineInstr *FirstMI, | |
if (SecondMI.getOpcode() != RISCV::ADDI && | ||
SecondMI.getOpcode() != RISCV::ADDIW) | ||
return false; | ||
|
||
// Assume the 1st instr to be a wildcard if it is unspecified. | ||
if (!FirstMI) | ||
return true; | ||
|
||
if (FirstMI->getOpcode() != RISCV::LUI) | ||
return false; | ||
|
||
Register FirstDest = FirstMI->getOperand(0).getReg(); | ||
|
||
// Destination of LUI should be the ADDI(W) source register. | ||
if (SecondMI.getOperand(1).getReg() != FirstDest) | ||
return false; | ||
|
||
// If the input is virtual make sure this is the only user. | ||
if (FirstDest.isVirtual()) { | ||
auto &MRI = SecondMI.getMF()->getRegInfo(); | ||
return MRI.hasOneNonDBGUse(FirstDest); | ||
} | ||
|
||
// If the FirstMI destination is non-virtual, it should match the SecondMI | ||
// destination. | ||
return SecondMI.getOperand(0).getReg() == FirstDest; | ||
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); | ||
} | ||
|
||
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, | ||
|
@@ -61,6 +141,15 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, | |
if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI)) | ||
return true; | ||
|
||
if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI)) | ||
return true; | ||
|
||
if (ST.hasShiftedZExtFusion() && isShiftedZExt(FirstMI, SecondMI)) | ||
return true; | ||
|
||
if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI)) | ||
return true; | ||
|
||
return false; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
# REQUIRES: asserts | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need end-to-end llc tests like |
||
# RUN: llc -mtriple=riscv64-linux-gnu -mcpu=veyron-v1 -x=mir < %s \ | ||
# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \ | ||
# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+shifted-zext-fusion,+ld-add-fusion \ | ||
# RUN: | FileCheck %s | ||
|
||
# CHECK: lui_addi:%bb.0 | ||
# CHECK: Macro fuse: {{.*}}LUI - ADDI | ||
--- | ||
name: lui_addi | ||
tracksRegLiveness: true | ||
body: | | ||
bb.0.entry: | ||
liveins: $x10 | ||
%1:gpr = COPY $x10 | ||
%2:gpr = LUI 1 | ||
%3:gpr = XORI %1, 2 | ||
%4:gpr = ADDI %2, 3 | ||
$x10 = COPY %3 | ||
$x11 = COPY %4 | ||
PseudoRET | ||
... | ||
|
||
# CHECK: auipc_addi | ||
# CHECK: Macro fuse: {{.*}}AUIPC - ADDI | ||
--- | ||
name: auipc_addi | ||
tracksRegLiveness: true | ||
body: | | ||
bb.0.entry: | ||
liveins: $x10 | ||
%1:gpr = COPY $x10 | ||
%2:gpr = AUIPC 1 | ||
%3:gpr = XORI %1, 2 | ||
%4:gpr = ADDI %2, 3 | ||
$x10 = COPY %3 | ||
$x11 = COPY %4 | ||
PseudoRET | ||
... | ||
|
||
# CHECK: slli_srli | ||
# CHECK: Macro fuse: {{.*}}SLLI - SRLI | ||
--- | ||
name: slli_srli | ||
tracksRegLiveness: true | ||
body: | | ||
bb.0.entry: | ||
liveins: $x10 | ||
%1:gpr = COPY $x10 | ||
%2:gpr = SLLI %1, 32 | ||
%3:gpr = XORI %1, 3 | ||
%4:gpr = SRLI %2, 4 | ||
$x10 = COPY %3 | ||
$x11 = COPY %4 | ||
PseudoRET | ||
... | ||
|
||
# CHECK: slli_srli_48 | ||
# CHECK: Macro fuse: {{.*}}SLLI - SRLI | ||
--- | ||
name: slli_srli_48 | ||
tracksRegLiveness: true | ||
body: | | ||
bb.0.entry: | ||
liveins: $x10 | ||
%1:gpr = COPY $x10 | ||
%2:gpr = SLLI %1, 48 | ||
%3:gpr = XORI %1, 3 | ||
%4:gpr = SRLI %2, 48 | ||
$x10 = COPY %3 | ||
$x11 = COPY %4 | ||
PseudoRET | ||
... | ||
|
||
# CHECK: slli_srli_no_fusion_0 | ||
# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI | ||
--- | ||
name: slli_srli_no_fusion_0 | ||
tracksRegLiveness: true | ||
body: | | ||
bb.0.entry: | ||
liveins: $x10 | ||
%1:gpr = COPY $x10 | ||
%2:gpr = SLLI %1, 32 | ||
%3:gpr = XORI %1, 3 | ||
%4:gpr = SRLI %2, 33 | ||
$x10 = COPY %3 | ||
$x11 = COPY %4 | ||
PseudoRET | ||
... | ||
|
||
# CHECK: slli_srli_no_fusion_1 | ||
# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI | ||
--- | ||
name: slli_srli_no_fusion_1 | ||
tracksRegLiveness: true | ||
body: | | ||
bb.0.entry: | ||
liveins: $x10 | ||
%1:gpr = COPY $x10 | ||
%2:gpr = SLLI %1, 48 | ||
%3:gpr = XORI %1, 3 | ||
%4:gpr = SRLI %2, 4 | ||
$x10 = COPY %3 | ||
$x11 = COPY %4 | ||
PseudoRET | ||
... | ||
|
||
# CHECK: slli_srli_no_fusion_2 | ||
# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI | ||
--- | ||
name: slli_srli_no_fusion_2 | ||
tracksRegLiveness: true | ||
body: | | ||
bb.0.entry: | ||
liveins: $x10 | ||
%1:gpr = COPY $x10 | ||
%2:gpr = SLLI %1, 31 | ||
%3:gpr = XORI %1, 3 | ||
%4:gpr = SRLI %2, 4 | ||
$x10 = COPY %3 | ||
$x11 = COPY %4 | ||
PseudoRET | ||
... | ||
|
||
# CHECK: slli_srli_no_fusion_3 | ||
# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI | ||
--- | ||
name: slli_srli_no_fusion_3 | ||
tracksRegLiveness: true | ||
body: | | ||
bb.0.entry: | ||
liveins: $x10 | ||
%1:gpr = COPY $x10 | ||
%2:gpr = SLLI %1, 31 | ||
%3:gpr = XORI %1, 3 | ||
%4:gpr = SRLI %2, 48 | ||
$x10 = COPY %3 | ||
$x11 = COPY %4 | ||
PseudoRET | ||
... | ||
|
||
# CHECK: ld_add | ||
# CHECK: Macro fuse: {{.*}}ADD - LD | ||
--- | ||
name: ld_add | ||
tracksRegLiveness: true | ||
body: | | ||
bb.0.entry: | ||
liveins: $x10, $x11 | ||
%1:gpr = COPY $x10 | ||
%2:gpr = COPY $x11 | ||
%3:gpr = ADD %1, %2 | ||
%4:gpr = XORI %2, 3 | ||
%5:gpr = LD %3, 0 | ||
$x10 = COPY %4 | ||
$x11 = COPY %5 | ||
PseudoRET | ||
... |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please separate this out in a separate PR which only adds the proc family and the existing LUI/ADDI fusion. Then this change can extend the list with the new fusions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done: #70414