Skip to content

[RISCV][WIP] Add a rematerializable pseudo instruction for LUI+ADDI for global addresses. #93320

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1311,6 +1311,22 @@ def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),

/// HI and ADD_LO address nodes.

let Size = 8, isReMaterializable = 1 in
def PseudoLIaddr : Pseudo<(outs GPR:$dst), (ins uimm20_lui:$hi, simm12:$lo), []>,
Sched<[WriteIALU]>;

def LUIADDI : PatFrag<(ops node:$hi, node:$lo),
(riscv_add_lo (riscv_hi node:$hi), node:$lo)>;

def : Pat<(LUIADDI tglobaladdr:$hi, tglobaladdr:$lo),
(PseudoLIaddr tglobaladdr:$hi, tglobaladdr:$lo)>;
def : Pat<(LUIADDI tblockaddress:$hi, tblockaddress:$lo),
(PseudoLIaddr tblockaddress:$hi, tblockaddress:$lo)>;
def : Pat<(LUIADDI tjumptable:$hi, tjumptable:$lo),
(PseudoLIaddr tjumptable:$hi, tjumptable:$lo)>;
def : Pat<(LUIADDI tconstpool:$hi, tconstpool:$lo),
(PseudoLIaddr tconstpool:$hi, tconstpool:$lo)>;

def : Pat<(riscv_hi tglobaladdr:$in), (LUI tglobaladdr:$in)>;
def : Pat<(riscv_hi tblockaddress:$in), (LUI tblockaddress:$in)>;
def : Pat<(riscv_hi tjumptable:$in), (LUI tjumptable:$in)>;
Expand Down
35 changes: 27 additions & 8 deletions llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
// 3) The offset value in the Global Address or Constant Pool is 0.
bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
MachineInstr *&Lo) {
if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC &&
Hi.getOpcode() != RISCV::PseudoLIaddr)
return false;

const MachineOperand &HiOp1 = Hi.getOperand(1);
Expand All @@ -97,16 +98,22 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
HiOp1.getOffset() != 0)
return false;

Register HiDestReg = Hi.getOperand(0).getReg();
if (!MRI->hasOneUse(HiDestReg))
return false;
if (Hi.getOpcode() == RISCV::PseudoLIaddr) {
// Most of the code should handle it correctly without modification by
// setting Lo and Hi both point to PseudoLIaddr
Lo = &Hi;
} else {
Register HiDestReg = Hi.getOperand(0).getReg();
if (!MRI->hasOneUse(HiDestReg))
return false;

Lo = &*MRI->use_instr_begin(HiDestReg);
if (Lo->getOpcode() != RISCV::ADDI)
return false;
Lo = &*MRI->use_instr_begin(HiDestReg);
if (Lo->getOpcode() != RISCV::ADDI)
return false;
}

const MachineOperand &LoOp2 = Lo->getOperand(2);
if (Hi.getOpcode() == RISCV::LUI) {
if (Hi.getOpcode() == RISCV::LUI || Hi.getOpcode() == RISCV::PseudoLIaddr) {
if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
!(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
LoOp2.getOffset() != 0)
Expand Down Expand Up @@ -466,6 +473,13 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,

Hi.getOperand(1).setOffset(NewOffset);
MachineOperand &ImmOp = Lo.getOperand(2);
// Expand PseudoLIaddr into LUI
if (Hi.getOpcode() == RISCV::PseudoLIaddr) {
auto *TII = ST->getInstrInfo();
Hi.setDesc(TII->get(RISCV::LUI));
Hi.removeOperand(2);
}

if (Hi.getOpcode() != RISCV::AUIPC)
ImmOp.setOffset(NewOffset);

Expand Down Expand Up @@ -501,6 +515,11 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
}
}

// Prevent Lo (originally PseudoLIaddr, which is also pointed by Hi) from
// being erased
if (&Lo == &Hi)
return true;

MRI->replaceRegWith(Lo.getOperand(0).getReg(), Hi.getOperand(0).getReg());
Lo.eraseFromParent();
return true;
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class RISCVPostRAExpandPseudo : public MachineFunctionPass {
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandLIaddr(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
};

char RISCVPostRAExpandPseudo::ID = 0;
Expand Down Expand Up @@ -75,6 +76,8 @@ bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
switch (MBBI->getOpcode()) {
case RISCV::PseudoMovImm:
return expandMovImm(MBB, MBBI);
case RISCV::PseudoLIaddr:
return expandLIaddr(MBB, MBBI);
default:
return false;
}
Expand All @@ -101,6 +104,26 @@ bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB,
return true;
}

bool RISCVPostRAExpandPseudo::expandLIaddr(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
DebugLoc DL = MBBI->getDebugLoc();

Register DstReg = MBBI->getOperand(0).getReg();
bool DstIsDead = MBBI->getOperand(0).isDead();
bool Renamable = MBBI->getOperand(0).isRenamable();

BuildMI(MBB, MBBI, DL, TII->get(RISCV::LUI))
.addReg(DstReg, RegState::Define | getRenamableRegState(Renamable))
.add(MBBI->getOperand(1));
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead) |
getRenamableRegState(Renamable))
.addReg(DstReg, RegState::Kill | getRenamableRegState(Renamable))
.add(MBBI->getOperand(2));
MBBI->eraseFromParent();
return true;
}

} // end of anonymous namespace

INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32",
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/RISCV/bfloat-mem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ define bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind {
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: lui a0, %hi(G)
; CHECK-NEXT: flh fa5, %lo(G)(a0)
; CHECK-NEXT: addi a1, a0, %lo(G)
; CHECK-NEXT: lui a1, %hi(G+18)
; CHECK-NEXT: fsh fa0, %lo(G)(a0)
; CHECK-NEXT: flh fa5, 18(a1)
; CHECK-NEXT: fsh fa0, 18(a1)
; CHECK-NEXT: flh fa5, %lo(G+18)(a1)
; CHECK-NEXT: fsh fa0, %lo(G+18)(a1)
; CHECK-NEXT: ret
%1 = fadd bfloat %a, %b
%2 = load volatile bfloat, ptr @G
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/RISCV/byval.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,16 @@ define void @caller() nounwind {
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, %hi(foo)
; RV32I-NEXT: lw a1, %lo(foo)(a0)
; RV32I-NEXT: sw a1, 12(sp)
; RV32I-NEXT: addi a0, a0, %lo(foo)
; RV32I-NEXT: lw a1, 12(a0)
; RV32I-NEXT: sw a1, 24(sp)
; RV32I-NEXT: lw a1, 8(a0)
; RV32I-NEXT: sw a1, 20(sp)
; RV32I-NEXT: lw a0, 4(a0)
; RV32I-NEXT: sw a0, 16(sp)
; RV32I-NEXT: lui a0, %hi(foo)
; RV32I-NEXT: lw a0, %lo(foo)(a0)
; RV32I-NEXT: sw a0, 12(sp)
; RV32I-NEXT: addi a0, sp, 12
; RV32I-NEXT: call callee
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
Expand Down
48 changes: 32 additions & 16 deletions llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ define void @callee() nounwind {
; ILP32-NEXT: flw fa4, %lo(var+4)(a0)
; ILP32-NEXT: flw fa3, %lo(var+8)(a0)
; ILP32-NEXT: flw fa2, %lo(var+12)(a0)
; ILP32-NEXT: addi a1, a0, %lo(var)
; ILP32-NEXT: lui a1, %hi(var)
; ILP32-NEXT: addi a1, a1, %lo(var)
; ILP32-NEXT: flw fa1, 16(a1)
; ILP32-NEXT: flw fa0, 20(a1)
; ILP32-NEXT: flw ft0, 24(a1)
Expand Down Expand Up @@ -102,7 +103,8 @@ define void @callee() nounwind {
; ILP32E-NEXT: flw fa4, %lo(var+4)(a0)
; ILP32E-NEXT: flw fa3, %lo(var+8)(a0)
; ILP32E-NEXT: flw fa2, %lo(var+12)(a0)
; ILP32E-NEXT: addi a1, a0, %lo(var)
; ILP32E-NEXT: lui a1, %hi(var)
; ILP32E-NEXT: addi a1, a1, %lo(var)
; ILP32E-NEXT: flw fa1, 16(a1)
; ILP32E-NEXT: flw fa0, 20(a1)
; ILP32E-NEXT: flw ft0, 24(a1)
Expand Down Expand Up @@ -172,7 +174,8 @@ define void @callee() nounwind {
; LP64-NEXT: flw fa4, %lo(var+4)(a0)
; LP64-NEXT: flw fa3, %lo(var+8)(a0)
; LP64-NEXT: flw fa2, %lo(var+12)(a0)
; LP64-NEXT: addi a1, a0, %lo(var)
; LP64-NEXT: lui a1, %hi(var)
; LP64-NEXT: addi a1, a1, %lo(var)
; LP64-NEXT: flw fa1, 16(a1)
; LP64-NEXT: flw fa0, 20(a1)
; LP64-NEXT: flw ft0, 24(a1)
Expand Down Expand Up @@ -242,7 +245,8 @@ define void @callee() nounwind {
; LP64E-NEXT: flw fa4, %lo(var+4)(a0)
; LP64E-NEXT: flw fa3, %lo(var+8)(a0)
; LP64E-NEXT: flw fa2, %lo(var+12)(a0)
; LP64E-NEXT: addi a1, a0, %lo(var)
; LP64E-NEXT: lui a1, %hi(var)
; LP64E-NEXT: addi a1, a1, %lo(var)
; LP64E-NEXT: flw fa1, 16(a1)
; LP64E-NEXT: flw fa0, 20(a1)
; LP64E-NEXT: flw ft0, 24(a1)
Expand Down Expand Up @@ -325,7 +329,8 @@ define void @callee() nounwind {
; ILP32F-NEXT: flw fa4, %lo(var+4)(a0)
; ILP32F-NEXT: flw fa3, %lo(var+8)(a0)
; ILP32F-NEXT: flw fa2, %lo(var+12)(a0)
; ILP32F-NEXT: addi a1, a0, %lo(var)
; ILP32F-NEXT: lui a1, %hi(var)
; ILP32F-NEXT: addi a1, a1, %lo(var)
; ILP32F-NEXT: flw fa1, 16(a1)
; ILP32F-NEXT: flw fa0, 20(a1)
; ILP32F-NEXT: flw ft0, 24(a1)
Expand Down Expand Up @@ -421,7 +426,8 @@ define void @callee() nounwind {
; LP64F-NEXT: flw fa4, %lo(var+4)(a0)
; LP64F-NEXT: flw fa3, %lo(var+8)(a0)
; LP64F-NEXT: flw fa2, %lo(var+12)(a0)
; LP64F-NEXT: addi a1, a0, %lo(var)
; LP64F-NEXT: lui a1, %hi(var)
; LP64F-NEXT: addi a1, a1, %lo(var)
; LP64F-NEXT: flw fa1, 16(a1)
; LP64F-NEXT: flw fa0, 20(a1)
; LP64F-NEXT: flw ft0, 24(a1)
Expand Down Expand Up @@ -517,7 +523,8 @@ define void @callee() nounwind {
; ILP32D-NEXT: flw fa4, %lo(var+4)(a0)
; ILP32D-NEXT: flw fa3, %lo(var+8)(a0)
; ILP32D-NEXT: flw fa2, %lo(var+12)(a0)
; ILP32D-NEXT: addi a1, a0, %lo(var)
; ILP32D-NEXT: lui a1, %hi(var)
; ILP32D-NEXT: addi a1, a1, %lo(var)
; ILP32D-NEXT: flw fa1, 16(a1)
; ILP32D-NEXT: flw fa0, 20(a1)
; ILP32D-NEXT: flw ft0, 24(a1)
Expand Down Expand Up @@ -613,7 +620,8 @@ define void @callee() nounwind {
; LP64D-NEXT: flw fa4, %lo(var+4)(a0)
; LP64D-NEXT: flw fa3, %lo(var+8)(a0)
; LP64D-NEXT: flw fa2, %lo(var+12)(a0)
; LP64D-NEXT: addi a1, a0, %lo(var)
; LP64D-NEXT: lui a1, %hi(var)
; LP64D-NEXT: addi a1, a1, %lo(var)
; LP64D-NEXT: flw fa1, 16(a1)
; LP64D-NEXT: flw fa0, 20(a1)
; LP64D-NEXT: flw ft0, 24(a1)
Expand Down Expand Up @@ -716,7 +724,8 @@ define void @caller() nounwind {
; ILP32-NEXT: fsw fa5, 120(sp) # 4-byte Folded Spill
; ILP32-NEXT: flw fa5, %lo(var+12)(s0)
; ILP32-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill
; ILP32-NEXT: addi s1, s0, %lo(var)
; ILP32-NEXT: lui s1, %hi(var)
; ILP32-NEXT: addi s1, s1, %lo(var)
; ILP32-NEXT: flw fa5, 16(s1)
; ILP32-NEXT: fsw fa5, 112(sp) # 4-byte Folded Spill
; ILP32-NEXT: flw fa5, 20(s1)
Expand Down Expand Up @@ -859,7 +868,8 @@ define void @caller() nounwind {
; ILP32E-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill
; ILP32E-NEXT: flw fa5, %lo(var+12)(s0)
; ILP32E-NEXT: fsw fa5, 112(sp) # 4-byte Folded Spill
; ILP32E-NEXT: addi s1, s0, %lo(var)
; ILP32E-NEXT: lui s1, %hi(var)
; ILP32E-NEXT: addi s1, s1, %lo(var)
; ILP32E-NEXT: flw fa5, 16(s1)
; ILP32E-NEXT: fsw fa5, 108(sp) # 4-byte Folded Spill
; ILP32E-NEXT: flw fa5, 20(s1)
Expand Down Expand Up @@ -1002,7 +1012,8 @@ define void @caller() nounwind {
; LP64-NEXT: fsw fa5, 124(sp) # 4-byte Folded Spill
; LP64-NEXT: flw fa5, %lo(var+12)(s0)
; LP64-NEXT: fsw fa5, 120(sp) # 4-byte Folded Spill
; LP64-NEXT: addi s1, s0, %lo(var)
; LP64-NEXT: lui s1, %hi(var)
; LP64-NEXT: addi s1, s1, %lo(var)
; LP64-NEXT: flw fa5, 16(s1)
; LP64-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill
; LP64-NEXT: flw fa5, 20(s1)
Expand Down Expand Up @@ -1145,7 +1156,8 @@ define void @caller() nounwind {
; LP64E-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill
; LP64E-NEXT: flw fa5, %lo(var+12)(s0)
; LP64E-NEXT: fsw fa5, 112(sp) # 4-byte Folded Spill
; LP64E-NEXT: addi s1, s0, %lo(var)
; LP64E-NEXT: lui s1, %hi(var)
; LP64E-NEXT: addi s1, s1, %lo(var)
; LP64E-NEXT: flw fa5, 16(s1)
; LP64E-NEXT: fsw fa5, 108(sp) # 4-byte Folded Spill
; LP64E-NEXT: flw fa5, 20(s1)
Expand Down Expand Up @@ -1300,7 +1312,8 @@ define void @caller() nounwind {
; ILP32F-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill
; ILP32F-NEXT: flw fa5, %lo(var+12)(s0)
; ILP32F-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill
; ILP32F-NEXT: addi s1, s0, %lo(var)
; ILP32F-NEXT: lui s1, %hi(var)
; ILP32F-NEXT: addi s1, s1, %lo(var)
; ILP32F-NEXT: flw fa5, 16(s1)
; ILP32F-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill
; ILP32F-NEXT: flw fa5, 20(s1)
Expand Down Expand Up @@ -1443,7 +1456,8 @@ define void @caller() nounwind {
; LP64F-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill
; LP64F-NEXT: flw fa5, %lo(var+12)(s0)
; LP64F-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill
; LP64F-NEXT: addi s1, s0, %lo(var)
; LP64F-NEXT: lui s1, %hi(var)
; LP64F-NEXT: addi s1, s1, %lo(var)
; LP64F-NEXT: flw fa5, 16(s1)
; LP64F-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill
; LP64F-NEXT: flw fa5, 20(s1)
Expand Down Expand Up @@ -1586,7 +1600,8 @@ define void @caller() nounwind {
; ILP32D-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill
; ILP32D-NEXT: flw fa5, %lo(var+12)(s0)
; ILP32D-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill
; ILP32D-NEXT: addi s1, s0, %lo(var)
; ILP32D-NEXT: lui s1, %hi(var)
; ILP32D-NEXT: addi s1, s1, %lo(var)
; ILP32D-NEXT: flw fa5, 16(s1)
; ILP32D-NEXT: fsw fa5, 60(sp) # 4-byte Folded Spill
; ILP32D-NEXT: flw fa5, 20(s1)
Expand Down Expand Up @@ -1729,7 +1744,8 @@ define void @caller() nounwind {
; LP64D-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill
; LP64D-NEXT: flw fa5, %lo(var+12)(s0)
; LP64D-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill
; LP64D-NEXT: addi s1, s0, %lo(var)
; LP64D-NEXT: lui s1, %hi(var)
; LP64D-NEXT: addi s1, s1, %lo(var)
; LP64D-NEXT: flw fa5, 16(s1)
; LP64D-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill
; LP64D-NEXT: flw fa5, 20(s1)
Expand Down
Loading
Loading