Skip to content

Commit f736cd5

Browse files
committed
[RISCV][WIP] Add a rematerializable pseudo instruction for LUI+ADDI for global addresses.
This is what llvm#91432 looks like without llvm#93129.
1 parent 7c265e9 commit f736cd5

37 files changed

+900
-779
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1311,6 +1311,22 @@ def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
13111311

13121312
/// HI and ADD_LO address nodes.
13131313

1314+
let Size = 8, isReMaterializable = 1 in
1315+
def PseudoLIaddr : Pseudo<(outs GPR:$dst), (ins uimm20_lui:$hi, simm12:$lo), []>,
1316+
Sched<[WriteIALU]>;
1317+
1318+
def LUIADDI : PatFrag<(ops node:$hi, node:$lo),
1319+
(riscv_add_lo (riscv_hi node:$hi), node:$lo)>;
1320+
1321+
def : Pat<(LUIADDI tglobaladdr:$hi, tglobaladdr:$lo),
1322+
(PseudoLIaddr tglobaladdr:$hi, tglobaladdr:$lo)>;
1323+
def : Pat<(LUIADDI tblockaddress:$hi, tblockaddress:$lo),
1324+
(PseudoLIaddr tblockaddress:$hi, tblockaddress:$lo)>;
1325+
def : Pat<(LUIADDI tjumptable:$hi, tjumptable:$lo),
1326+
(PseudoLIaddr tjumptable:$hi, tjumptable:$lo)>;
1327+
def : Pat<(LUIADDI tconstpool:$hi, tconstpool:$lo),
1328+
(PseudoLIaddr tconstpool:$hi, tconstpool:$lo)>;
1329+
13141330
def : Pat<(riscv_hi tglobaladdr:$in), (LUI tglobaladdr:$in)>;
13151331
def : Pat<(riscv_hi tblockaddress:$in), (LUI tblockaddress:$in)>;
13161332
def : Pat<(riscv_hi tjumptable:$in), (LUI tjumptable:$in)>;

llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
8484
// 3) The offset value in the Global Address or Constant Pool is 0.
8585
bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
8686
MachineInstr *&Lo) {
87-
if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
87+
if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC &&
88+
Hi.getOpcode() != RISCV::PseudoLIaddr)
8889
return false;
8990

9091
const MachineOperand &HiOp1 = Hi.getOperand(1);
@@ -97,16 +98,22 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
9798
HiOp1.getOffset() != 0)
9899
return false;
99100

100-
Register HiDestReg = Hi.getOperand(0).getReg();
101-
if (!MRI->hasOneUse(HiDestReg))
102-
return false;
101+
if (Hi.getOpcode() == RISCV::PseudoLIaddr) {
102+
// Most of the code should handle it correctly without modification by
103+
// setting Lo and Hi both point to PseudoLIaddr
104+
Lo = &Hi;
105+
} else {
106+
Register HiDestReg = Hi.getOperand(0).getReg();
107+
if (!MRI->hasOneUse(HiDestReg))
108+
return false;
103109

104-
Lo = &*MRI->use_instr_begin(HiDestReg);
105-
if (Lo->getOpcode() != RISCV::ADDI)
106-
return false;
110+
Lo = &*MRI->use_instr_begin(HiDestReg);
111+
if (Lo->getOpcode() != RISCV::ADDI)
112+
return false;
113+
}
107114

108115
const MachineOperand &LoOp2 = Lo->getOperand(2);
109-
if (Hi.getOpcode() == RISCV::LUI) {
116+
if (Hi.getOpcode() == RISCV::LUI || Hi.getOpcode() == RISCV::PseudoLIaddr) {
110117
if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
111118
!(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
112119
LoOp2.getOffset() != 0)
@@ -466,6 +473,13 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
466473

467474
Hi.getOperand(1).setOffset(NewOffset);
468475
MachineOperand &ImmOp = Lo.getOperand(2);
476+
// Expand PseudoLIaddr into LUI
477+
if (Hi.getOpcode() == RISCV::PseudoLIaddr) {
478+
auto *TII = ST->getInstrInfo();
479+
Hi.setDesc(TII->get(RISCV::LUI));
480+
Hi.removeOperand(2);
481+
}
482+
469483
if (Hi.getOpcode() != RISCV::AUIPC)
470484
ImmOp.setOffset(NewOffset);
471485

@@ -501,6 +515,11 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
501515
}
502516
}
503517

518+
// Prevent Lo (originally PseudoLIaddr, which is also pointed by Hi) from
519+
// being erased
520+
if (&Lo == &Hi)
521+
return true;
522+
504523
MRI->replaceRegWith(Lo.getOperand(0).getReg(), Hi.getOperand(0).getReg());
505524
Lo.eraseFromParent();
506525
return true;

llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class RISCVPostRAExpandPseudo : public MachineFunctionPass {
4444
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
4545
MachineBasicBlock::iterator &NextMBBI);
4646
bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
47+
bool expandLIaddr(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
4748
};
4849

4950
char RISCVPostRAExpandPseudo::ID = 0;
@@ -75,6 +76,8 @@ bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
7576
switch (MBBI->getOpcode()) {
7677
case RISCV::PseudoMovImm:
7778
return expandMovImm(MBB, MBBI);
79+
case RISCV::PseudoLIaddr:
80+
return expandLIaddr(MBB, MBBI);
7881
default:
7982
return false;
8083
}
@@ -101,6 +104,26 @@ bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB,
101104
return true;
102105
}
103106

107+
bool RISCVPostRAExpandPseudo::expandLIaddr(MachineBasicBlock &MBB,
108+
MachineBasicBlock::iterator MBBI) {
109+
DebugLoc DL = MBBI->getDebugLoc();
110+
111+
Register DstReg = MBBI->getOperand(0).getReg();
112+
bool DstIsDead = MBBI->getOperand(0).isDead();
113+
bool Renamable = MBBI->getOperand(0).isRenamable();
114+
115+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::LUI))
116+
.addReg(DstReg, RegState::Define | getRenamableRegState(Renamable))
117+
.add(MBBI->getOperand(1));
118+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI))
119+
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead) |
120+
getRenamableRegState(Renamable))
121+
.addReg(DstReg, RegState::Kill | getRenamableRegState(Renamable))
122+
.add(MBBI->getOperand(2));
123+
MBBI->eraseFromParent();
124+
return true;
125+
}
126+
104127
} // end of anonymous namespace
105128

106129
INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32",

llvm/test/CodeGen/RISCV/bfloat-mem.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ define bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind {
5454
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
5555
; CHECK-NEXT: lui a0, %hi(G)
5656
; CHECK-NEXT: flh fa5, %lo(G)(a0)
57-
; CHECK-NEXT: addi a1, a0, %lo(G)
57+
; CHECK-NEXT: lui a1, %hi(G+18)
5858
; CHECK-NEXT: fsh fa0, %lo(G)(a0)
59-
; CHECK-NEXT: flh fa5, 18(a1)
60-
; CHECK-NEXT: fsh fa0, 18(a1)
59+
; CHECK-NEXT: flh fa5, %lo(G+18)(a1)
60+
; CHECK-NEXT: fsh fa0, %lo(G+18)(a1)
6161
; CHECK-NEXT: ret
6262
%1 = fadd bfloat %a, %b
6363
%2 = load volatile bfloat, ptr @G

llvm/test/CodeGen/RISCV/byval.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,16 @@ define void @caller() nounwind {
2222
; RV32I-NEXT: addi sp, sp, -32
2323
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
2424
; RV32I-NEXT: lui a0, %hi(foo)
25-
; RV32I-NEXT: lw a1, %lo(foo)(a0)
26-
; RV32I-NEXT: sw a1, 12(sp)
2725
; RV32I-NEXT: addi a0, a0, %lo(foo)
2826
; RV32I-NEXT: lw a1, 12(a0)
2927
; RV32I-NEXT: sw a1, 24(sp)
3028
; RV32I-NEXT: lw a1, 8(a0)
3129
; RV32I-NEXT: sw a1, 20(sp)
3230
; RV32I-NEXT: lw a0, 4(a0)
3331
; RV32I-NEXT: sw a0, 16(sp)
32+
; RV32I-NEXT: lui a0, %hi(foo)
33+
; RV32I-NEXT: lw a0, %lo(foo)(a0)
34+
; RV32I-NEXT: sw a0, 12(sp)
3435
; RV32I-NEXT: addi a0, sp, 12
3536
; RV32I-NEXT: call callee
3637
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload

llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ define void @callee() nounwind {
3232
; ILP32-NEXT: flw fa4, %lo(var+4)(a0)
3333
; ILP32-NEXT: flw fa3, %lo(var+8)(a0)
3434
; ILP32-NEXT: flw fa2, %lo(var+12)(a0)
35-
; ILP32-NEXT: addi a1, a0, %lo(var)
35+
; ILP32-NEXT: lui a1, %hi(var)
36+
; ILP32-NEXT: addi a1, a1, %lo(var)
3637
; ILP32-NEXT: flw fa1, 16(a1)
3738
; ILP32-NEXT: flw fa0, 20(a1)
3839
; ILP32-NEXT: flw ft0, 24(a1)
@@ -102,7 +103,8 @@ define void @callee() nounwind {
102103
; ILP32E-NEXT: flw fa4, %lo(var+4)(a0)
103104
; ILP32E-NEXT: flw fa3, %lo(var+8)(a0)
104105
; ILP32E-NEXT: flw fa2, %lo(var+12)(a0)
105-
; ILP32E-NEXT: addi a1, a0, %lo(var)
106+
; ILP32E-NEXT: lui a1, %hi(var)
107+
; ILP32E-NEXT: addi a1, a1, %lo(var)
106108
; ILP32E-NEXT: flw fa1, 16(a1)
107109
; ILP32E-NEXT: flw fa0, 20(a1)
108110
; ILP32E-NEXT: flw ft0, 24(a1)
@@ -172,7 +174,8 @@ define void @callee() nounwind {
172174
; LP64-NEXT: flw fa4, %lo(var+4)(a0)
173175
; LP64-NEXT: flw fa3, %lo(var+8)(a0)
174176
; LP64-NEXT: flw fa2, %lo(var+12)(a0)
175-
; LP64-NEXT: addi a1, a0, %lo(var)
177+
; LP64-NEXT: lui a1, %hi(var)
178+
; LP64-NEXT: addi a1, a1, %lo(var)
176179
; LP64-NEXT: flw fa1, 16(a1)
177180
; LP64-NEXT: flw fa0, 20(a1)
178181
; LP64-NEXT: flw ft0, 24(a1)
@@ -242,7 +245,8 @@ define void @callee() nounwind {
242245
; LP64E-NEXT: flw fa4, %lo(var+4)(a0)
243246
; LP64E-NEXT: flw fa3, %lo(var+8)(a0)
244247
; LP64E-NEXT: flw fa2, %lo(var+12)(a0)
245-
; LP64E-NEXT: addi a1, a0, %lo(var)
248+
; LP64E-NEXT: lui a1, %hi(var)
249+
; LP64E-NEXT: addi a1, a1, %lo(var)
246250
; LP64E-NEXT: flw fa1, 16(a1)
247251
; LP64E-NEXT: flw fa0, 20(a1)
248252
; LP64E-NEXT: flw ft0, 24(a1)
@@ -325,7 +329,8 @@ define void @callee() nounwind {
325329
; ILP32F-NEXT: flw fa4, %lo(var+4)(a0)
326330
; ILP32F-NEXT: flw fa3, %lo(var+8)(a0)
327331
; ILP32F-NEXT: flw fa2, %lo(var+12)(a0)
328-
; ILP32F-NEXT: addi a1, a0, %lo(var)
332+
; ILP32F-NEXT: lui a1, %hi(var)
333+
; ILP32F-NEXT: addi a1, a1, %lo(var)
329334
; ILP32F-NEXT: flw fa1, 16(a1)
330335
; ILP32F-NEXT: flw fa0, 20(a1)
331336
; ILP32F-NEXT: flw ft0, 24(a1)
@@ -421,7 +426,8 @@ define void @callee() nounwind {
421426
; LP64F-NEXT: flw fa4, %lo(var+4)(a0)
422427
; LP64F-NEXT: flw fa3, %lo(var+8)(a0)
423428
; LP64F-NEXT: flw fa2, %lo(var+12)(a0)
424-
; LP64F-NEXT: addi a1, a0, %lo(var)
429+
; LP64F-NEXT: lui a1, %hi(var)
430+
; LP64F-NEXT: addi a1, a1, %lo(var)
425431
; LP64F-NEXT: flw fa1, 16(a1)
426432
; LP64F-NEXT: flw fa0, 20(a1)
427433
; LP64F-NEXT: flw ft0, 24(a1)
@@ -517,7 +523,8 @@ define void @callee() nounwind {
517523
; ILP32D-NEXT: flw fa4, %lo(var+4)(a0)
518524
; ILP32D-NEXT: flw fa3, %lo(var+8)(a0)
519525
; ILP32D-NEXT: flw fa2, %lo(var+12)(a0)
520-
; ILP32D-NEXT: addi a1, a0, %lo(var)
526+
; ILP32D-NEXT: lui a1, %hi(var)
527+
; ILP32D-NEXT: addi a1, a1, %lo(var)
521528
; ILP32D-NEXT: flw fa1, 16(a1)
522529
; ILP32D-NEXT: flw fa0, 20(a1)
523530
; ILP32D-NEXT: flw ft0, 24(a1)
@@ -613,7 +620,8 @@ define void @callee() nounwind {
613620
; LP64D-NEXT: flw fa4, %lo(var+4)(a0)
614621
; LP64D-NEXT: flw fa3, %lo(var+8)(a0)
615622
; LP64D-NEXT: flw fa2, %lo(var+12)(a0)
616-
; LP64D-NEXT: addi a1, a0, %lo(var)
623+
; LP64D-NEXT: lui a1, %hi(var)
624+
; LP64D-NEXT: addi a1, a1, %lo(var)
617625
; LP64D-NEXT: flw fa1, 16(a1)
618626
; LP64D-NEXT: flw fa0, 20(a1)
619627
; LP64D-NEXT: flw ft0, 24(a1)
@@ -716,7 +724,8 @@ define void @caller() nounwind {
716724
; ILP32-NEXT: fsw fa5, 120(sp) # 4-byte Folded Spill
717725
; ILP32-NEXT: flw fa5, %lo(var+12)(s0)
718726
; ILP32-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill
719-
; ILP32-NEXT: addi s1, s0, %lo(var)
727+
; ILP32-NEXT: lui s1, %hi(var)
728+
; ILP32-NEXT: addi s1, s1, %lo(var)
720729
; ILP32-NEXT: flw fa5, 16(s1)
721730
; ILP32-NEXT: fsw fa5, 112(sp) # 4-byte Folded Spill
722731
; ILP32-NEXT: flw fa5, 20(s1)
@@ -859,7 +868,8 @@ define void @caller() nounwind {
859868
; ILP32E-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill
860869
; ILP32E-NEXT: flw fa5, %lo(var+12)(s0)
861870
; ILP32E-NEXT: fsw fa5, 112(sp) # 4-byte Folded Spill
862-
; ILP32E-NEXT: addi s1, s0, %lo(var)
871+
; ILP32E-NEXT: lui s1, %hi(var)
872+
; ILP32E-NEXT: addi s1, s1, %lo(var)
863873
; ILP32E-NEXT: flw fa5, 16(s1)
864874
; ILP32E-NEXT: fsw fa5, 108(sp) # 4-byte Folded Spill
865875
; ILP32E-NEXT: flw fa5, 20(s1)
@@ -1002,7 +1012,8 @@ define void @caller() nounwind {
10021012
; LP64-NEXT: fsw fa5, 124(sp) # 4-byte Folded Spill
10031013
; LP64-NEXT: flw fa5, %lo(var+12)(s0)
10041014
; LP64-NEXT: fsw fa5, 120(sp) # 4-byte Folded Spill
1005-
; LP64-NEXT: addi s1, s0, %lo(var)
1015+
; LP64-NEXT: lui s1, %hi(var)
1016+
; LP64-NEXT: addi s1, s1, %lo(var)
10061017
; LP64-NEXT: flw fa5, 16(s1)
10071018
; LP64-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill
10081019
; LP64-NEXT: flw fa5, 20(s1)
@@ -1145,7 +1156,8 @@ define void @caller() nounwind {
11451156
; LP64E-NEXT: fsw fa5, 116(sp) # 4-byte Folded Spill
11461157
; LP64E-NEXT: flw fa5, %lo(var+12)(s0)
11471158
; LP64E-NEXT: fsw fa5, 112(sp) # 4-byte Folded Spill
1148-
; LP64E-NEXT: addi s1, s0, %lo(var)
1159+
; LP64E-NEXT: lui s1, %hi(var)
1160+
; LP64E-NEXT: addi s1, s1, %lo(var)
11491161
; LP64E-NEXT: flw fa5, 16(s1)
11501162
; LP64E-NEXT: fsw fa5, 108(sp) # 4-byte Folded Spill
11511163
; LP64E-NEXT: flw fa5, 20(s1)
@@ -1300,7 +1312,8 @@ define void @caller() nounwind {
13001312
; ILP32F-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill
13011313
; ILP32F-NEXT: flw fa5, %lo(var+12)(s0)
13021314
; ILP32F-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill
1303-
; ILP32F-NEXT: addi s1, s0, %lo(var)
1315+
; ILP32F-NEXT: lui s1, %hi(var)
1316+
; ILP32F-NEXT: addi s1, s1, %lo(var)
13041317
; ILP32F-NEXT: flw fa5, 16(s1)
13051318
; ILP32F-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill
13061319
; ILP32F-NEXT: flw fa5, 20(s1)
@@ -1443,7 +1456,8 @@ define void @caller() nounwind {
14431456
; LP64F-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill
14441457
; LP64F-NEXT: flw fa5, %lo(var+12)(s0)
14451458
; LP64F-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill
1446-
; LP64F-NEXT: addi s1, s0, %lo(var)
1459+
; LP64F-NEXT: lui s1, %hi(var)
1460+
; LP64F-NEXT: addi s1, s1, %lo(var)
14471461
; LP64F-NEXT: flw fa5, 16(s1)
14481462
; LP64F-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill
14491463
; LP64F-NEXT: flw fa5, 20(s1)
@@ -1586,7 +1600,8 @@ define void @caller() nounwind {
15861600
; ILP32D-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill
15871601
; ILP32D-NEXT: flw fa5, %lo(var+12)(s0)
15881602
; ILP32D-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill
1589-
; ILP32D-NEXT: addi s1, s0, %lo(var)
1603+
; ILP32D-NEXT: lui s1, %hi(var)
1604+
; ILP32D-NEXT: addi s1, s1, %lo(var)
15901605
; ILP32D-NEXT: flw fa5, 16(s1)
15911606
; ILP32D-NEXT: fsw fa5, 60(sp) # 4-byte Folded Spill
15921607
; ILP32D-NEXT: flw fa5, 20(s1)
@@ -1729,7 +1744,8 @@ define void @caller() nounwind {
17291744
; LP64D-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill
17301745
; LP64D-NEXT: flw fa5, %lo(var+12)(s0)
17311746
; LP64D-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill
1732-
; LP64D-NEXT: addi s1, s0, %lo(var)
1747+
; LP64D-NEXT: lui s1, %hi(var)
1748+
; LP64D-NEXT: addi s1, s1, %lo(var)
17331749
; LP64D-NEXT: flw fa5, 16(s1)
17341750
; LP64D-NEXT: fsw fa5, 68(sp) # 4-byte Folded Spill
17351751
; LP64D-NEXT: flw fa5, 20(s1)

0 commit comments

Comments
 (0)