@@ -183,7 +183,6 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
183
183
MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
184
184
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
185
185
int64_t Offset) {
186
- assert (isInt<32 >(Offset) && " Unexpected offset" );
187
186
// Put the offset back in Hi and the Lo
188
187
Hi20.getOperand (1 ).setOffset (Offset);
189
188
Lo12.getOperand (2 ).setOffset (Offset);
@@ -209,22 +208,35 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
209
208
// instructions and deletes TailAdd and the instructions that produced the
210
209
// offset.
211
210
//
212
- // Base address lowering is of the form:
213
- // Hi20: pcalau12i vreg1, %pc_hi20(s)
214
- // Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
215
- // / \
216
- // / \
217
- // / \
218
- // / The large offset can be of two forms: \
219
- // 1) Offset that has non zero bits in lower 2) Offset that has non zero
220
- // 12 bits and upper 20 bits bits in upper 20 bits only
221
- // OffsetHi: lu12i.w vreg3, 4
222
- // OffsetLo: ori voff, vreg3, 188 OffsetHi: lu12i.w voff, 128
223
- // \ /
224
- // \ /
225
- // \ /
226
- // \ /
227
- // TailAdd: add.d vreg4, vreg2, voff
211
+ // (The instructions marked with "!" are not necessarily present)
212
+ //
213
+ // Base address lowering is of the form:
214
+ // Hi20: pcalau12i vreg1, %pc_hi20(s)
215
+ // +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
216
+ // | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
217
+ // +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
218
+ // |
219
+ // | The large offset can be one of the forms:
220
+ // |
221
+ // +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
222
+ // | OffsetHi20: lu12i.w vreg3, 4
223
+ // | OffsetLo12: ori voff, vreg3, 188 ------------------+
224
+ // | |
225
+ // +-> 2) Offset that has non zero bits in Hi20 bits only: |
226
+ // | OffsetHi20: lu12i.w voff, 128 ------------------+
227
+ // | |
228
+ // +-> 3) Offset that has non zero bits in Lo20 bits: |
229
+ // | OffsetHi20: lu12i.w vreg3, 121 ! |
230
+ // | OffsetLo12: ori voff, vreg3, 122 ! |
231
+ // | OffsetLo20: lu32i.d voff, 123 ------------------+
232
+ // +-> 4) Offset that has non zero bits in Hi12 bits: |
233
+ // OffsetHi20: lu12i.w vreg3, 121 ! |
234
+ // OffsetLo12: ori voff, vreg3, 122 ! |
235
+ // OffsetLo20: lu32i.d vreg3, 123 ! |
236
+ // OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
237
+ // |
238
+ // TailAdd: add.d vreg4, vreg2, voff <------------------+
239
+ //
228
240
bool LoongArchMergeBaseOffsetOpt::foldLargeOffset (
229
241
MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
230
242
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
@@ -235,55 +247,81 @@ bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
235
247
Register Rs = TailAdd.getOperand (1 ).getReg ();
236
248
Register Rt = TailAdd.getOperand (2 ).getReg ();
237
249
Register Reg = Rs == GAReg ? Rt : Rs;
250
+ SmallVector<MachineInstr *, 4 > Instrs;
251
+ int64_t Offset = 0 ;
252
+ int64_t Mask = -1 ;
253
+
254
+ // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
255
+ for (int i = 0 ; i < 4 ; i++) {
256
+ // Handle Reg is R0.
257
+ if (Reg == LoongArch::R0)
258
+ break ;
238
259
239
- // Can't fold if the register has more than one use.
240
- if (!Reg.isVirtual () || !MRI->hasOneUse (Reg))
241
- return false ;
242
- // This can point to an ORI or a LU12I.W:
243
- MachineInstr &OffsetTail = *MRI->getVRegDef (Reg);
244
- if (OffsetTail.getOpcode () == LoongArch::ORI) {
245
- // The offset value has non zero bits in both %hi and %lo parts.
246
- // Detect an ORI that feeds from a LU12I.W instruction.
247
- MachineOperand &OriImmOp = OffsetTail.getOperand (2 );
248
- if (OriImmOp.getTargetFlags () != LoongArchII::MO_None)
260
+ // Can't fold if the register has more than one use.
261
+ if (!Reg.isVirtual () || !MRI->hasOneUse (Reg))
249
262
return false ;
250
- Register OriReg = OffsetTail.getOperand (1 ).getReg ();
251
- int64_t OffLo = OriImmOp.getImm ();
252
-
253
- // Handle rs1 of ORI is R0.
254
- if (OriReg == LoongArch::R0) {
255
- LLVM_DEBUG (dbgs () << " Offset Instrs: " << OffsetTail);
256
- foldOffset (Hi20, Lo12, Lo20, Hi12, Last, TailAdd, OffLo);
257
- OffsetTail.eraseFromParent ();
258
- return true ;
259
- }
260
263
261
- MachineInstr &OffsetLu12i = *MRI->getVRegDef (OriReg);
262
- MachineOperand &Lu12iImmOp = OffsetLu12i.getOperand (1 );
263
- if (OffsetLu12i.getOpcode () != LoongArch::LU12I_W ||
264
- Lu12iImmOp.getTargetFlags () != LoongArchII::MO_None ||
265
- !MRI->hasOneUse (OffsetLu12i.getOperand (0 ).getReg ()))
264
+ MachineInstr *Curr = MRI->getVRegDef (Reg);
265
+ if (!Curr)
266
+ break ;
267
+
268
+ switch (Curr->getOpcode ()) {
269
+ default :
270
+ // Can't fold if the instruction opcode is unexpected.
266
271
return false ;
267
- int64_t Offset = SignExtend64<32 >(Lu12iImmOp.getImm () << 12 );
268
- Offset += OffLo;
269
- // LU12I.W+ORI sign extends the result.
270
- Offset = SignExtend64<32 >(Offset);
271
- LLVM_DEBUG (dbgs () << " Offset Instrs: " << OffsetTail
272
- << " " << OffsetLu12i);
273
- foldOffset (Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
274
- OffsetTail.eraseFromParent ();
275
- OffsetLu12i.eraseFromParent ();
276
- return true ;
277
- } else if (OffsetTail.getOpcode () == LoongArch::LU12I_W) {
278
- // The offset value has all zero bits in the lower 12 bits. Only LU12I.W
279
- // exists.
280
- LLVM_DEBUG (dbgs () << " Offset Instr: " << OffsetTail);
281
- int64_t Offset = SignExtend64<32 >(OffsetTail.getOperand (1 ).getImm () << 12 );
282
- foldOffset (Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
283
- OffsetTail.eraseFromParent ();
284
- return true ;
272
+ case LoongArch::ORI: {
273
+ MachineOperand ImmOp = Curr->getOperand (2 );
274
+ if (ImmOp.getTargetFlags () != LoongArchII::MO_None)
275
+ return false ;
276
+ Offset += ImmOp.getImm ();
277
+ Reg = Curr->getOperand (1 ).getReg ();
278
+ Instrs.push_back (Curr);
279
+ break ;
280
+ }
281
+ case LoongArch::LU12I_W: {
282
+ MachineOperand ImmOp = Curr->getOperand (1 );
283
+ if (ImmOp.getTargetFlags () != LoongArchII::MO_None)
284
+ return false ;
285
+ Offset += SignExtend64<32 >(ImmOp.getImm () << 12 ) & Mask;
286
+ Reg = LoongArch::R0;
287
+ Instrs.push_back (Curr);
288
+ break ;
289
+ }
290
+ case LoongArch::LU32I_D: {
291
+ MachineOperand ImmOp = Curr->getOperand (2 );
292
+ if (ImmOp.getTargetFlags () != LoongArchII::MO_None || !Lo20)
293
+ return false ;
294
+ Offset += SignExtend64<52 >(ImmOp.getImm () << 32 ) & Mask;
295
+ Mask ^= 0x000FFFFF00000000ULL ;
296
+ Reg = Curr->getOperand (1 ).getReg ();
297
+ Instrs.push_back (Curr);
298
+ break ;
299
+ }
300
+ case LoongArch::LU52I_D: {
301
+ MachineOperand ImmOp = Curr->getOperand (2 );
302
+ if (ImmOp.getTargetFlags () != LoongArchII::MO_None || !Hi12)
303
+ return false ;
304
+ Offset += ImmOp.getImm () << 52 ;
305
+ Mask ^= 0xFFF0000000000000ULL ;
306
+ Reg = Curr->getOperand (1 ).getReg ();
307
+ Instrs.push_back (Curr);
308
+ break ;
309
+ }
310
+ }
285
311
}
286
- return false ;
312
+
313
+ // Can't fold if the offset is not extracted.
314
+ if (!Offset)
315
+ return false ;
316
+
317
+ foldOffset (Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
318
+ LLVM_DEBUG (dbgs () << " Offset Instrs:\n " );
319
+ for (auto I : Instrs) {
320
+ LLVM_DEBUG (dbgs () << " " << *I);
321
+ I->eraseFromParent ();
322
+ }
323
+
324
+ return true ;
287
325
}
288
326
289
327
bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset (MachineInstr &Hi20,
@@ -344,13 +382,6 @@ bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
344
382
[[fallthrough]];
345
383
case LoongArch::ADD_D:
346
384
// The offset is too large to fit in the immediate field of ADDI.
347
- // This can be in two forms:
348
- // 1) LU12I.W hi_offset followed by:
349
- // ORI lo_offset
350
- // This happens in case the offset has non zero bits in
351
- // both hi 20 and lo 12 bits.
352
- // 2) LU12I.W (offset20)
353
- // This happens in case the lower 12 bits of the offset are zeros.
354
385
return foldLargeOffset (Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
355
386
break ;
356
387
}
0 commit comments