Skip to content

[RISCV] Add a rematerializable pseudo instruction for LUI+ADDI for global addresses. #93142

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 71 additions & 4 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2457,6 +2457,61 @@ static bool isWorthFoldingAdd(SDValue Add) {
return true;
}

// To prevent SelectAddrRegImm from folding offsets that conflicts with the
// fusion of PseudoLIAddr, check if the offset of every use of a given address
// is within the alignment
static bool areUserOffsetsWithinAlignment(SDValue Addr, Align Alignment) {
for (auto *Use : Addr->uses()) {
if (!Use->isMachineOpcode()) {
// Don't allow stores of the value. It must be used as the address.
if (Use->getOpcode() == ISD::STORE &&
cast<StoreSDNode>(Use)->getValue() == Addr)
return false;
if (Use->getOpcode() == ISD::ATOMIC_STORE &&
cast<AtomicSDNode>(Use)->getVal() == Addr)
return false;
// If the user is direct load/store, there is no offset.
if (Use->getOpcode() == ISD::LOAD || Use->getOpcode() == ISD::STORE ||
Use->getOpcode() == ISD::ATOMIC_LOAD ||
Use->getOpcode() == ISD::ATOMIC_STORE)
continue;
if (Use->getOpcode() == ISD::ADD &&
isa<ConstantSDNode>(Use->getOperand(1)) &&
Alignment > cast<ConstantSDNode>(Use->getOperand(1))->getSExtValue())
continue;

return false;
}

// If user is already selected, get offsets from load/store instructions
unsigned int Opcode = Use->getMachineOpcode();
if (Opcode == RISCV::LB || Opcode == RISCV::LBU || Opcode == RISCV::LH ||
Opcode == RISCV::LHU || Opcode == RISCV::LW || Opcode == RISCV::LWU ||
Opcode == RISCV::LD || Opcode == RISCV::FLH || Opcode == RISCV::FLW ||
Opcode == RISCV::FLD) {
if (auto *Offset = dyn_cast<ConstantSDNode>(Use->getOperand(1))) {
if (Offset->isZero() || Alignment > Offset->getSExtValue())
continue;
}
return false;
}
if (Opcode == RISCV::SB || Opcode == RISCV::SH || Opcode == RISCV::SW ||
Opcode == RISCV::SD || Opcode == RISCV::FSH || Opcode == RISCV::FSW ||
Opcode == RISCV::FSD) {
// Also check if Addr is used as the value of store.
if (Use->getOperand(0) == Addr)
return false;
if (auto *Offset = dyn_cast<ConstantSDNode>(Use->getOperand(2))) {
if (Offset->isZero() || Alignment > Offset->getSExtValue())
continue;
}
return false;
}
return false;
}

return true;
}
bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
unsigned MaxShiftAmount,
SDValue &Base, SDValue &Index,
Expand Down Expand Up @@ -2520,9 +2575,21 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
MVT VT = Addr.getSimpleValueType();

if (Addr.getOpcode() == RISCVISD::ADD_LO) {
Base = Addr.getOperand(0);
Offset = Addr.getOperand(1);
return true;
bool CanFold = true;
// Unconditionally fold if operand 1 is not a global address (e.g.
// externsymbol)
if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
const DataLayout &DL = CurDAG->getDataLayout();
Align Alignment = commonAlignment(
GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
if (!areUserOffsetsWithinAlignment(Addr, Alignment))
CanFold = false;
}
if (CanFold) {
Base = Addr.getOperand(0);
Offset = Addr.getOperand(1);
return true;
}
}

int64_t RV32ZdinxRange = IsINX ? 4 : 0;
Expand All @@ -2541,7 +2608,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
const DataLayout &DL = CurDAG->getDataLayout();
Align Alignment = commonAlignment(
GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
if (CVal == 0 || Alignment > CVal) {
if (areUserOffsetsWithinAlignment(Base, Alignment)) {
int64_t CombinedOffset = CVal + GA->getOffset();
Base = Base.getOperand(0);
Offset = CurDAG->getTargetGlobalAddress(
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1311,6 +1311,25 @@ def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),

/// HI and ADD_LO address nodes.

// Pseudo for a rematerializable LUI+ADDI sequence for loading an address.
// It will be expanded after register allocation.
// FIXME: The scheduling information does not reflect the multiple instructions.
let Size = 8, isReMaterializable = 1 in
def PseudoMovAddr : Pseudo<(outs GPR:$dst), (ins uimm20_lui:$hi, simm12:$lo), []>,
Sched<[WriteIALU]>;

def addr_hi_lo : PatFrag<(ops node:$hi, node:$lo),
(riscv_add_lo (riscv_hi node:$hi), node:$lo)>;

def : Pat<(addr_hi_lo tglobaladdr:$hi, tglobaladdr:$lo),
(PseudoMovAddr tglobaladdr:$hi, tglobaladdr:$lo)>;
def : Pat<(addr_hi_lo tblockaddress:$hi, tblockaddress:$lo),
(PseudoMovAddr tblockaddress:$hi, tblockaddress:$lo)>;
def : Pat<(addr_hi_lo tjumptable:$hi, tjumptable:$lo),
(PseudoMovAddr tjumptable:$hi, tjumptable:$lo)>;
def : Pat<(addr_hi_lo tconstpool:$hi, tconstpool:$lo),
(PseudoMovAddr tconstpool:$hi, tconstpool:$lo)>;

def : Pat<(riscv_hi tglobaladdr:$in), (LUI tglobaladdr:$in)>;
def : Pat<(riscv_hi tblockaddress:$in), (LUI tblockaddress:$in)>;
def : Pat<(riscv_hi tjumptable:$in), (LUI tjumptable:$in)>;
Expand Down
35 changes: 27 additions & 8 deletions llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
// 3) The offset value in the Global Address or Constant Pool is 0.
bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
MachineInstr *&Lo) {
if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC &&
Hi.getOpcode() != RISCV::PseudoMovAddr)
return false;

const MachineOperand &HiOp1 = Hi.getOperand(1);
Expand All @@ -97,16 +98,22 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
HiOp1.getOffset() != 0)
return false;

Register HiDestReg = Hi.getOperand(0).getReg();
if (!MRI->hasOneUse(HiDestReg))
return false;
if (Hi.getOpcode() == RISCV::PseudoMovAddr) {
// Most of the code should handle it correctly without modification by
// setting Lo and Hi both point to PseudoMovAddr
Lo = &Hi;
} else {
Register HiDestReg = Hi.getOperand(0).getReg();
if (!MRI->hasOneUse(HiDestReg))
return false;

Lo = &*MRI->use_instr_begin(HiDestReg);
if (Lo->getOpcode() != RISCV::ADDI)
return false;
Lo = &*MRI->use_instr_begin(HiDestReg);
if (Lo->getOpcode() != RISCV::ADDI)
return false;
}

const MachineOperand &LoOp2 = Lo->getOperand(2);
if (Hi.getOpcode() == RISCV::LUI) {
if (Hi.getOpcode() == RISCV::LUI || Hi.getOpcode() == RISCV::PseudoMovAddr) {
if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
!(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
LoOp2.getOffset() != 0)
Expand Down Expand Up @@ -466,6 +473,13 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,

Hi.getOperand(1).setOffset(NewOffset);
MachineOperand &ImmOp = Lo.getOperand(2);
// Expand PseudoMovAddr into LUI
if (Hi.getOpcode() == RISCV::PseudoMovAddr) {
auto *TII = ST->getInstrInfo();
Hi.setDesc(TII->get(RISCV::LUI));
Hi.removeOperand(2);
}

if (Hi.getOpcode() != RISCV::AUIPC)
ImmOp.setOffset(NewOffset);

Expand Down Expand Up @@ -501,6 +515,11 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
}
}

// Prevent Lo (originally PseudoMovAddr, which is also pointed by Hi) from
// being erased
if (&Lo == &Hi)
return true;

MRI->replaceRegWith(Lo.getOperand(0).getReg(), Hi.getOperand(0).getReg());
Lo.eraseFromParent();
return true;
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class RISCVPostRAExpandPseudo : public MachineFunctionPass {
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandLIaddr(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
};

char RISCVPostRAExpandPseudo::ID = 0;
Expand Down Expand Up @@ -75,6 +76,8 @@ bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
switch (MBBI->getOpcode()) {
case RISCV::PseudoMovImm:
return expandMovImm(MBB, MBBI);
case RISCV::PseudoMovAddr:
return expandLIaddr(MBB, MBBI);
default:
return false;
}
Expand All @@ -101,6 +104,26 @@ bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB,
return true;
}

bool RISCVPostRAExpandPseudo::expandLIaddr(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
DebugLoc DL = MBBI->getDebugLoc();

Register DstReg = MBBI->getOperand(0).getReg();
bool DstIsDead = MBBI->getOperand(0).isDead();
bool Renamable = MBBI->getOperand(0).isRenamable();

BuildMI(MBB, MBBI, DL, TII->get(RISCV::LUI))
.addReg(DstReg, RegState::Define | getRenamableRegState(Renamable))
.add(MBBI->getOperand(1));
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead) |
getRenamableRegState(Renamable))
.addReg(DstReg, RegState::Kill | getRenamableRegState(Renamable))
.add(MBBI->getOperand(2));
MBBI->eraseFromParent();
return true;
}

} // end of anonymous namespace

INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32",
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/RISCV/bfloat-mem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ define bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind {
; CHECK-NEXT: fadd.s fa5, fa4, fa5
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: lui a0, %hi(G)
; CHECK-NEXT: flh fa5, %lo(G)(a0)
; CHECK-NEXT: addi a1, a0, %lo(G)
; CHECK-NEXT: fsh fa0, %lo(G)(a0)
; CHECK-NEXT: flh fa5, 18(a1)
; CHECK-NEXT: fsh fa0, 18(a1)
; CHECK-NEXT: addi a0, a0, %lo(G)
; CHECK-NEXT: flh fa5, 0(a0)
; CHECK-NEXT: fsh fa0, 0(a0)
; CHECK-NEXT: flh fa5, 18(a0)
; CHECK-NEXT: fsh fa0, 18(a0)
; CHECK-NEXT: ret
%1 = fadd bfloat %a, %b
%2 = load volatile bfloat, ptr @G
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/byval.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ define void @caller() nounwind {
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, %hi(foo)
; RV32I-NEXT: lw a1, %lo(foo)(a0)
; RV32I-NEXT: sw a1, 12(sp)
; RV32I-NEXT: addi a0, a0, %lo(foo)
; RV32I-NEXT: lw a1, 12(a0)
; RV32I-NEXT: sw a1, 24(sp)
; RV32I-NEXT: lw a1, 8(a0)
; RV32I-NEXT: sw a1, 20(sp)
; RV32I-NEXT: lw a0, 4(a0)
; RV32I-NEXT: sw a0, 16(sp)
; RV32I-NEXT: lw a1, 4(a0)
; RV32I-NEXT: sw a1, 16(sp)
; RV32I-NEXT: lw a0, 0(a0)
; RV32I-NEXT: sw a0, 12(sp)
; RV32I-NEXT: addi a0, sp, 12
; RV32I-NEXT: call callee
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
Expand Down
Loading
Loading