Skip to content

Commit b225b15

Browse files
authored
[LoongArch] Merge base and offset for large offsets (#113277)
This PR merges large offsets into the base address loading.
1 parent 401d123 commit b225b15

File tree

2 files changed

+119
-105
lines changed

2 files changed

+119
-105
lines changed

llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp

Lines changed: 99 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,6 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
183183
MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
184184
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
185185
int64_t Offset) {
186-
assert(isInt<32>(Offset) && "Unexpected offset");
187186
// Put the offset back in Hi and the Lo
188187
Hi20.getOperand(1).setOffset(Offset);
189188
Lo12.getOperand(2).setOffset(Offset);
@@ -209,22 +208,35 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
209208
// instructions and deletes TailAdd and the instructions that produced the
210209
// offset.
211210
//
212-
// Base address lowering is of the form:
213-
// Hi20: pcalau12i vreg1, %pc_hi20(s)
214-
// Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
215-
// / \
216-
// / \
217-
// / \
218-
// / The large offset can be of two forms: \
219-
// 1) Offset that has non zero bits in lower 2) Offset that has non zero
220-
// 12 bits and upper 20 bits bits in upper 20 bits only
221-
// OffsetHi: lu12i.w vreg3, 4
222-
// OffsetLo: ori voff, vreg3, 188 OffsetHi: lu12i.w voff, 128
223-
// \ /
224-
// \ /
225-
// \ /
226-
// \ /
227-
// TailAdd: add.d vreg4, vreg2, voff
211+
// (The instructions marked with "!" are not necessarily present)
212+
//
213+
// Base address lowering is of the form:
214+
// Hi20: pcalau12i vreg1, %pc_hi20(s)
215+
// +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
216+
// | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
217+
// +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
218+
// |
219+
// | The large offset can be one of the forms:
220+
// |
221+
// +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
222+
// | OffsetHi20: lu12i.w vreg3, 4
223+
// | OffsetLo12: ori voff, vreg3, 188 ------------------+
224+
// | |
225+
// +-> 2) Offset that has non zero bits in Hi20 bits only: |
226+
// | OffsetHi20: lu12i.w voff, 128 ------------------+
227+
// | |
228+
// +-> 3) Offset that has non zero bits in Lo20 bits: |
229+
// | OffsetHi20: lu12i.w vreg3, 121 ! |
230+
// | OffsetLo12: ori voff, vreg3, 122 ! |
231+
// | OffsetLo20: lu32i.d voff, 123 ------------------+
232+
// +-> 4) Offset that has non zero bits in Hi12 bits: |
233+
// OffsetHi20: lu12i.w vreg3, 121 ! |
234+
// OffsetLo12: ori voff, vreg3, 122 ! |
235+
// OffsetLo20: lu32i.d vreg3, 123 ! |
236+
// OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
237+
// |
238+
// TailAdd: add.d vreg4, vreg2, voff <------------------+
239+
//
228240
bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
229241
MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
230242
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
@@ -235,55 +247,81 @@ bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
235247
Register Rs = TailAdd.getOperand(1).getReg();
236248
Register Rt = TailAdd.getOperand(2).getReg();
237249
Register Reg = Rs == GAReg ? Rt : Rs;
250+
SmallVector<MachineInstr *, 4> Instrs;
251+
int64_t Offset = 0;
252+
int64_t Mask = -1;
253+
254+
// This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
255+
for (int i = 0; i < 4; i++) {
256+
// Handle Reg is R0.
257+
if (Reg == LoongArch::R0)
258+
break;
238259

239-
// Can't fold if the register has more than one use.
240-
if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
241-
return false;
242-
// This can point to an ORI or a LU12I.W:
243-
MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
244-
if (OffsetTail.getOpcode() == LoongArch::ORI) {
245-
// The offset value has non zero bits in both %hi and %lo parts.
246-
// Detect an ORI that feeds from a LU12I.W instruction.
247-
MachineOperand &OriImmOp = OffsetTail.getOperand(2);
248-
if (OriImmOp.getTargetFlags() != LoongArchII::MO_None)
260+
// Can't fold if the register has more than one use.
261+
if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
249262
return false;
250-
Register OriReg = OffsetTail.getOperand(1).getReg();
251-
int64_t OffLo = OriImmOp.getImm();
252-
253-
// Handle rs1 of ORI is R0.
254-
if (OriReg == LoongArch::R0) {
255-
LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail);
256-
foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, OffLo);
257-
OffsetTail.eraseFromParent();
258-
return true;
259-
}
260263

261-
MachineInstr &OffsetLu12i = *MRI->getVRegDef(OriReg);
262-
MachineOperand &Lu12iImmOp = OffsetLu12i.getOperand(1);
263-
if (OffsetLu12i.getOpcode() != LoongArch::LU12I_W ||
264-
Lu12iImmOp.getTargetFlags() != LoongArchII::MO_None ||
265-
!MRI->hasOneUse(OffsetLu12i.getOperand(0).getReg()))
264+
MachineInstr *Curr = MRI->getVRegDef(Reg);
265+
if (!Curr)
266+
break;
267+
268+
switch (Curr->getOpcode()) {
269+
default:
270+
// Can't fold if the instruction opcode is unexpected.
266271
return false;
267-
int64_t Offset = SignExtend64<32>(Lu12iImmOp.getImm() << 12);
268-
Offset += OffLo;
269-
// LU12I.W+ORI sign extends the result.
270-
Offset = SignExtend64<32>(Offset);
271-
LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
272-
<< " " << OffsetLu12i);
273-
foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
274-
OffsetTail.eraseFromParent();
275-
OffsetLu12i.eraseFromParent();
276-
return true;
277-
} else if (OffsetTail.getOpcode() == LoongArch::LU12I_W) {
278-
// The offset value has all zero bits in the lower 12 bits. Only LU12I.W
279-
// exists.
280-
LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
281-
int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
282-
foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
283-
OffsetTail.eraseFromParent();
284-
return true;
272+
case LoongArch::ORI: {
273+
MachineOperand ImmOp = Curr->getOperand(2);
274+
if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
275+
return false;
276+
Offset += ImmOp.getImm();
277+
Reg = Curr->getOperand(1).getReg();
278+
Instrs.push_back(Curr);
279+
break;
280+
}
281+
case LoongArch::LU12I_W: {
282+
MachineOperand ImmOp = Curr->getOperand(1);
283+
if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
284+
return false;
285+
Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask;
286+
Reg = LoongArch::R0;
287+
Instrs.push_back(Curr);
288+
break;
289+
}
290+
case LoongArch::LU32I_D: {
291+
MachineOperand ImmOp = Curr->getOperand(2);
292+
if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20)
293+
return false;
294+
Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask;
295+
Mask ^= 0x000FFFFF00000000ULL;
296+
Reg = Curr->getOperand(1).getReg();
297+
Instrs.push_back(Curr);
298+
break;
299+
}
300+
case LoongArch::LU52I_D: {
301+
MachineOperand ImmOp = Curr->getOperand(2);
302+
if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12)
303+
return false;
304+
Offset += ImmOp.getImm() << 52;
305+
Mask ^= 0xFFF0000000000000ULL;
306+
Reg = Curr->getOperand(1).getReg();
307+
Instrs.push_back(Curr);
308+
break;
309+
}
310+
}
285311
}
286-
return false;
312+
313+
// Can't fold if the offset is not extracted.
314+
if (!Offset)
315+
return false;
316+
317+
foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
318+
LLVM_DEBUG(dbgs() << " Offset Instrs:\n");
319+
for (auto I : Instrs) {
320+
LLVM_DEBUG(dbgs() << " " << *I);
321+
I->eraseFromParent();
322+
}
323+
324+
return true;
287325
}
288326

289327
bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
@@ -344,13 +382,6 @@ bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
344382
[[fallthrough]];
345383
case LoongArch::ADD_D:
346384
// The offset is too large to fit in the immediate field of ADDI.
347-
// This can be in two forms:
348-
// 1) LU12I.W hi_offset followed by:
349-
// ORI lo_offset
350-
// This happens in case the offset has non zero bits in
351-
// both hi 20 and lo 12 bits.
352-
// 2) LU12I.W (offset20)
353-
// This happens in case the lower 12 bits of the offset are zeros.
354385
return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
355386
break;
356387
}

llvm/test/CodeGen/LoongArch/merge-base-offset.ll

Lines changed: 20 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,14 +1100,11 @@ define dso_local ptr @load_addr_offset_281474439839744() nounwind {
11001100
;
11011101
; LA64-LARGE-LABEL: load_addr_offset_281474439839744:
11021102
; LA64-LARGE: # %bb.0: # %entry
1103-
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
1104-
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
1105-
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
1106-
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
1103+
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+2251795518717952)
1104+
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+2251795518717952)
1105+
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+2251795518717952)
1106+
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+2251795518717952)
11071107
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
1108-
; LA64-LARGE-NEXT: ori $a1, $zero, 0
1109-
; LA64-LARGE-NEXT: lu32i.d $a1, 524287
1110-
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
11111108
; LA64-LARGE-NEXT: ret
11121109
entry:
11131110
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 281474439839744)
@@ -1131,14 +1128,11 @@ define dso_local ptr @load_addr_offset_248792680471040() nounwind {
11311128
;
11321129
; LA64-LARGE-LABEL: load_addr_offset_248792680471040:
11331130
; LA64-LARGE: # %bb.0: # %entry
1134-
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
1135-
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
1136-
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
1137-
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
1131+
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+1990341443768320)
1132+
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+1990341443768320)
1133+
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+1990341443768320)
1134+
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+1990341443768320)
11381135
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
1139-
; LA64-LARGE-NEXT: lu12i.w $a1, 502733
1140-
; LA64-LARGE-NEXT: lu32i.d $a1, 463412
1141-
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
11421136
; LA64-LARGE-NEXT: ret
11431137
entry:
11441138
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 248792680471040)
@@ -1163,15 +1157,11 @@ define dso_local ptr @load_addr_offset_9380351707272() nounwind {
11631157
;
11641158
; LA64-LARGE-LABEL: load_addr_offset_9380351707272:
11651159
; LA64-LARGE: # %bb.0: # %entry
1166-
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
1167-
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
1168-
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
1169-
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
1160+
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+75042813658176)
1161+
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+75042813658176)
1162+
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+75042813658176)
1163+
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+75042813658176)
11701164
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
1171-
; LA64-LARGE-NEXT: lu12i.w $a1, 279556
1172-
; LA64-LARGE-NEXT: ori $a1, $a1, 1088
1173-
; LA64-LARGE-NEXT: lu32i.d $a1, 17472
1174-
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
11751165
; LA64-LARGE-NEXT: ret
11761166
entry:
11771167
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 9380351707272)
@@ -1194,13 +1184,11 @@ define dso_local ptr @load_addr_offset_562949953421312() nounwind {
11941184
;
11951185
; LA64-LARGE-LABEL: load_addr_offset_562949953421312:
11961186
; LA64-LARGE: # %bb.0: # %entry
1197-
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
1198-
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
1199-
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
1200-
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
1187+
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+4503599627370496)
1188+
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+4503599627370496)
1189+
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+4503599627370496)
1190+
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+4503599627370496)
12011191
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
1202-
; LA64-LARGE-NEXT: lu52i.d $a1, $zero, 1
1203-
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
12041192
; LA64-LARGE-NEXT: ret
12051193
entry:
12061194
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 562949953421312)
@@ -1226,16 +1214,11 @@ define dso_local ptr @load_addr_offset_614749556925924693() nounwind {
12261214
;
12271215
; LA64-LARGE-LABEL: load_addr_offset_614749556925924693:
12281216
; LA64-LARGE: # %bb.0: # %entry
1229-
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
1230-
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
1231-
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
1232-
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
1217+
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+4917996455407397544)
1218+
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+4917996455407397544)
1219+
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+4917996455407397544)
1220+
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+4917996455407397544)
12331221
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
1234-
; LA64-LARGE-NEXT: lu12i.w $a1, 209666
1235-
; LA64-LARGE-NEXT: ori $a1, $a1, 2728
1236-
; LA64-LARGE-NEXT: lu32i.d $a1, 15288
1237-
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, 1092
1238-
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
12391222
; LA64-LARGE-NEXT: ret
12401223
entry:
12411224
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 614749556925924693)

0 commit comments

Comments
 (0)