Skip to content

[RISCV] Add late optimization pass for RISC-V to optimize branch instructions #131728

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/lib/CodeGen/BranchFolding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
DebugLoc dl = CurMBB->findBranchDebugLoc();
if (!dl)
dl = BranchDL;
if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond)) {
MachineBasicBlock *NextBB = &*I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
if (!TII->reverseBranchCondition(Cond)) {
Expand Down Expand Up @@ -1107,7 +1107,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {

MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) {
if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond)) {
// Failing case: IBB is the target of a cbr, and we cannot reverse the
// branch.
SmallVector<MachineOperand, 4> NewCond(Cond);
Expand Down Expand Up @@ -1564,7 +1564,8 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// Loop: xxx; jcc Out; jmp Loop
// we want:
// Loop: xxx; jncc Loop; jmp Out
if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB &&
!CurCond.empty()) {
SmallVector<MachineOperand, 4> NewCond(CurCond);
if (!TII->reverseBranchCondition(NewCond)) {
DebugLoc Dl = MBB->findBranchDebugLoc();
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ add_llvm_target(RISCVCodeGen
RISCVConstantPoolValue.cpp
RISCVDeadRegisterDefinitions.cpp
RISCVMakeCompressible.cpp
RISCVLatePeephole.cpp
RISCVExpandAtomicPseudoInsts.cpp
RISCVExpandPseudoInsts.cpp
RISCVFoldMemOffset.cpp
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCV.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM,
FunctionPass *createRISCVMakeCompressibleOptPass();
void initializeRISCVMakeCompressibleOptPass(PassRegistry &);

FunctionPass *createRISCVLatePeepholeOptPass();
void initializeRISCVLatePeepholeOptPass(PassRegistry &);

FunctionPass *createRISCVGatherScatterLoweringPass();
void initializeRISCVGatherScatterLoweringPass(PassRegistry &);

Expand Down
110 changes: 110 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,109 @@ RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
}
}

// Return true if MO definitely contains the value one.
static bool isOne(MachineOperand &MO) {
if (MO.isImm() && MO.getImm() == 1)
return true;

if (!MO.isReg() || !MO.getReg().isVirtual())
return false;

MachineRegisterInfo &MRI =
MO.getParent()->getParent()->getParent()->getRegInfo();
MachineInstr *DefMI = MRI.getUniqueVRegDef(MO.getReg());
if (!DefMI)
return false;

// For now, just check the canonical one value.
if (DefMI->getOpcode() == RISCV::ADDI &&
DefMI->getOperand(1).getReg() == RISCV::X0 &&
DefMI->getOperand(2).getImm() == 1)
return true;

return false;
}

// Return true if MO definitely contains the value zero.
static bool isZero(MachineOperand &MO) {
if (MO.isImm() && MO.getImm() == 0)
return true;
if (MO.isReg() && MO.getReg() == RISCV::X0)
return true;
return false;
}

bool RISCVInstrInfo::trySimplifyCondBr(
MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
SmallVectorImpl<MachineOperand> &Cond) const {

if (!TBB || Cond.size() != 3)
return false;

RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
auto LHS = Cond[1];
auto RHS = Cond[2];

MachineBasicBlock *Folded = nullptr;
switch (CC) {
default:
// TODO: Implement for more CCs
return false;
case RISCVCC::COND_EQ: {
// We can statically evaluate that we take the first branch
if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) {
Folded = TBB;
break;
}
// We can statically evaluate that we take the second branch
if ((isZero(LHS) && isOne(RHS)) || (isOne(LHS) && isZero(RHS))) {
Folded = FBB;
break;
}
return false;
}
case RISCVCC::COND_NE: {
// We can statically evaluate that we take the first branch
if ((isOne(LHS) && isZero(RHS)) || (isZero(LHS) && isOne(RHS))) {
Folded = TBB;
break;
}
// We can statically evaluate that we take the second branch
if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) {
Folded = FBB;
break;
}
return false;
}
}

// At this point, its legal to optimize.
removeBranch(MBB);
Cond.clear();

// Only need to insert a branch if we're not falling through.
if (Folded) {
DebugLoc DL = MBB.findBranchDebugLoc();
insertBranch(MBB, Folded, nullptr, {}, DL);
}

// Update the successors. Remove them all and add back the correct one.
while (!MBB.succ_empty())
MBB.removeSuccessor(MBB.succ_end() - 1);

// If it's a fallthrough, we need to figure out where MBB is going.
if (!Folded) {
MachineFunction::iterator Fallthrough = ++MBB.getIterator();
if (Fallthrough != MBB.getParent()->end())
MBB.addSuccessor(&*Fallthrough);
} else
MBB.addSuccessor(Folded);

TBB = Folded;
FBB = nullptr;
return true;
}

bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
Expand Down Expand Up @@ -1062,6 +1165,9 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Handle a single conditional branch.
if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
parseCondBranch(*I, TBB, Cond);
// Try to fold the branch of the conditional branch into a the fallthru.
if (AllowModify)
trySimplifyCondBr(MBB, TBB, FBB, Cond);
return false;
}

Expand All @@ -1070,6 +1176,10 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
I->getDesc().isUnconditionalBranch()) {
parseCondBranch(*std::prev(I), TBB, Cond);
FBB = getBranchDestBlock(*I);
// Try to fold the branch of the conditional branch into an unconditional
// branch.
if (AllowModify)
trySimplifyCondBr(MBB, TBB, FBB, Cond);
return false;
}

Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,26 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {

static bool isLdStSafeToPair(const MachineInstr &LdSt,
const TargetRegisterInfo *TRI);
/// Return true if the branch represented by the conditional branch with
/// components TBB, FBB, and CurCond was folded into an unconditional branch.
///
/// If FBB is nullptr, then the the input represents a conditional branch with
/// a fallthrough.
///
/// For example:
/// BRCOND EQ 0, 0, BB1
/// BR BB2
///
/// can be simplified to BR BB1 since 0 == 0 statically. On the other hand,
///
///
/// BRCOND EQ 0, 1, BB1
/// BR BB2
///
/// can be simplified to BR BB2 because 0 != 1 statically.
bool trySimplifyCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
SmallVectorImpl<MachineOperand> &Cond) const;

protected:
const RISCVSubtarget &STI;
Expand Down
85 changes: 85 additions & 0 deletions llvm/lib/Target/RISCV/RISCVLatePeephole.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
//===-- RISCVLatePeephole.cpp - Late stage peephole optimization ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// This file provides RISC-V late peephole optimizations
///
//===----------------------------------------------------------------------===//

#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "RISCV.h"
#include "RISCVInstrInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"

using namespace llvm;

#define DEBUG_TYPE "riscv-late-peephole"
#define RISCV_LATE_PEEPHOLE_NAME "RISC-V Late Stage Peephole"

namespace {

struct RISCVLatePeepholeOpt : public MachineFunctionPass {
static char ID;

RISCVLatePeepholeOpt() : MachineFunctionPass(ID) {}

StringRef getPassName() const override { return RISCV_LATE_PEEPHOLE_NAME; }

void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}

bool runOnMachineFunction(MachineFunction &Fn) override;

private:
bool optimizeBlock(MachineBasicBlock &MBB);

const RISCVInstrInfo *TII = nullptr;
};
} // namespace

char RISCVLatePeepholeOpt::ID = 0;
INITIALIZE_PASS(RISCVLatePeepholeOpt, "riscv-late-peephole",
RISCV_LATE_PEEPHOLE_NAME, false, false)

bool RISCVLatePeepholeOpt::optimizeBlock(MachineBasicBlock &MBB) {

// Use trySimplifyCondBr directly to know whether the optimization occured.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

occurred*

MachineBasicBlock *TBB, *FBB;
SmallVector<MachineOperand, 4> Cond;
if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, false))
return TII->trySimplifyCondBr(MBB, TBB, FBB, Cond);

return false;
}

bool RISCVLatePeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();

bool MadeChange = false;

for (MachineBasicBlock &MBB : MF)
MadeChange |= optimizeBlock(MBB);

return MadeChange;
}

/// Returns an instance of the Make Compressible Optimization pass.
FunctionPass *llvm::createRISCVLatePeepholeOptPass() {
return new RISCVLatePeepholeOpt();
}
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeKCFIPass(*PR);
initializeRISCVDeadRegisterDefinitionsPass(*PR);
initializeRISCVMakeCompressibleOptPass(*PR);
initializeRISCVLatePeepholeOptPass(*PR);
initializeRISCVGatherScatterLoweringPass(*PR);
initializeRISCVCodeGenPreparePass(*PR);
initializeRISCVPostRAExpandPseudoPass(*PR);
Expand Down Expand Up @@ -567,6 +568,7 @@ void RISCVPassConfig::addPreEmitPass() {
addPass(createMachineCopyPropagationPass(true));
addPass(&BranchRelaxationPassID);
addPass(createRISCVMakeCompressibleOptPass());
addPass(createRISCVLatePeepholeOptPass());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure we should mess with branches after BranchRelaxationPass

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm checking my notes here and the pattern that generates the branch, i.e.:

bb.2:
  renamable $x10 = XORI killed renamable $x10, -1
  renamable $x10 = BEXTI killed renamable $x10, 13
  BNE killed renamable $x10, $x0, %bb.5

Seems to be formed by MachineBlockPlacementPass, so maybe we can move this change to the beginning of addPreEmitPass and still be able to optimize it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would put it immediately before branch relaxation, unless you're also sure that machine copy propagation won't also introduce these kinds of branches.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lenary, MCP is definitely introducing these kinds of branches.

}

void RISCVPassConfig::addPreEmitPass2() {
Expand Down
20 changes: 0 additions & 20 deletions llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -357,11 +357,6 @@ define i64 @ctpop_i64(i64 %a) nounwind {
define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
; RV32I-LABEL: ctpop_i64_ugt_two:
; RV32I: # %bb.0:
; RV32I-NEXT: beqz zero, .LBB6_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltiu a0, zero, 0
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB6_2:
; RV32I-NEXT: srli a2, a0, 1
; RV32I-NEXT: lui a3, 349525
; RV32I-NEXT: lui a4, 209715
Expand Down Expand Up @@ -404,11 +399,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
;
; RV32ZBB-LABEL: ctpop_i64_ugt_two:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: beqz zero, .LBB6_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: sltiu a0, zero, 0
; RV32ZBB-NEXT: ret
; RV32ZBB-NEXT: .LBB6_2:
; RV32ZBB-NEXT: cpop a0, a0
; RV32ZBB-NEXT: cpop a1, a1
; RV32ZBB-NEXT: add a0, a1, a0
Expand All @@ -422,11 +412,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
; RV32I-LABEL: ctpop_i64_ugt_one:
; RV32I: # %bb.0:
; RV32I-NEXT: beqz zero, .LBB7_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: snez a0, zero
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB7_2:
; RV32I-NEXT: srli a2, a0, 1
; RV32I-NEXT: lui a3, 349525
; RV32I-NEXT: lui a4, 209715
Expand Down Expand Up @@ -470,11 +455,6 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
;
; RV32ZBB-LABEL: ctpop_i64_ugt_one:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: beqz zero, .LBB7_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: snez a0, zero
; RV32ZBB-NEXT: ret
; RV32ZBB-NEXT: .LBB7_2:
; RV32ZBB-NEXT: cpop a0, a0
; RV32ZBB-NEXT: cpop a1, a1
; RV32ZBB-NEXT: add a0, a1, a0
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
; CHECK-NEXT: RISC-V Late Stage Peephole
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@
; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
; CHECK-NEXT: RISC-V Late Stage Peephole
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
Expand Down
Loading