@@ -206,6 +206,14 @@ class AArch64InstructionSelector : public InstructionSelector {
206
206
ComplexRendererFns
207
207
selectAddrModeShiftedExtendXReg (MachineOperand &Root,
208
208
unsigned SizeInBytes) const ;
209
+
210
+ // / Returns a \p ComplexRendererFns which contains a base, offset, and whether
211
+ // / or not a shift + extend should be folded into an addressing mode. Returns
212
+ // / None when this is not profitable or possible.
213
+ ComplexRendererFns
214
+ selectExtendedSHL (MachineOperand &Root, MachineOperand &Base,
215
+ MachineOperand &Offset, unsigned SizeInBytes,
216
+ bool WantsExt) const ;
209
217
ComplexRendererFns selectAddrModeRegisterOffset (MachineOperand &Root) const ;
210
218
ComplexRendererFns selectAddrModeXRO (MachineOperand &Root,
211
219
unsigned SizeInBytes) const ;
@@ -214,6 +222,13 @@ class AArch64InstructionSelector : public InstructionSelector {
214
222
return selectAddrModeXRO (Root, Width / 8 );
215
223
}
216
224
225
+ ComplexRendererFns selectAddrModeWRO (MachineOperand &Root,
226
+ unsigned SizeInBytes) const ;
227
+ template <int Width>
228
+ ComplexRendererFns selectAddrModeWRO (MachineOperand &Root) const {
229
+ return selectAddrModeWRO (Root, Width / 8 );
230
+ }
231
+
217
232
ComplexRendererFns selectShiftedRegister (MachineOperand &Root) const ;
218
233
219
234
ComplexRendererFns selectArithShiftedRegister (MachineOperand &Root) const {
@@ -228,6 +243,15 @@ class AArch64InstructionSelector : public InstructionSelector {
228
243
return selectShiftedRegister (Root);
229
244
}
230
245
246
+ // / Given an extend instruction, determine the correct shift-extend type for
247
+ // / that instruction.
248
+ // /
249
+ // / If the instruction is going to be used in a load or store, pass
250
+ // / \p IsLoadStore = true.
251
+ AArch64_AM::ShiftExtendType
252
+ getExtendTypeForInst (MachineInstr &MI, MachineRegisterInfo &MRI,
253
+ bool IsLoadStore = false ) const ;
254
+
231
255
// / Instructions that accept extend modifiers like UXTW expect the register
232
256
// / being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
233
257
// / subregister copy if necessary. Return either ExtReg, or the result of the
@@ -4234,52 +4258,26 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4234
4258
[](MachineInstr &Use) { return Use.mayLoadOrStore (); });
4235
4259
}
4236
4260
4237
- // / This is used for computing addresses like this:
4238
- // /
4239
- // / ldr x1, [x2, x3, lsl #3]
4240
- // /
4241
- // / Where x2 is the base register, and x3 is an offset register. The shift-left
4242
- // / is a constant value specific to this load instruction. That is, we'll never
4243
- // / see anything other than a 3 here (which corresponds to the size of the
4244
- // / element being loaded.)
4245
4261
InstructionSelector::ComplexRendererFns
4246
- AArch64InstructionSelector::selectAddrModeShiftedExtendXReg (
4247
- MachineOperand &Root, unsigned SizeInBytes) const {
4248
- if (!Root. isReg ())
4249
- return None ;
4250
- MachineRegisterInfo &MRI = Root. getParent ()-> getMF ()-> getRegInfo ( );
4262
+ AArch64InstructionSelector::selectExtendedSHL (
4263
+ MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
4264
+ unsigned SizeInBytes, bool WantsExt) const {
4265
+ assert (Base. isReg () && " Expected base to be a register operand " ) ;
4266
+ assert (Offset. isReg () && " Expected offset to be a register operand " );
4251
4267
4252
- // Make sure that the memory op is a valid size.
4253
- int64_t LegalShiftVal = Log2_32 (SizeInBytes);
4254
- if (LegalShiftVal == 0 )
4255
- return None;
4256
-
4257
- // We want to find something like this:
4258
- //
4259
- // val = G_CONSTANT LegalShiftVal
4260
- // shift = G_SHL off_reg val
4261
- // ptr = G_PTR_ADD base_reg shift
4262
- // x = G_LOAD ptr
4263
- //
4264
- // And fold it into this addressing mode:
4265
- //
4266
- // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4267
-
4268
- // Check if we can find the G_PTR_ADD.
4269
- MachineInstr *Gep = getOpcodeDef (TargetOpcode::G_PTR_ADD, Root.getReg (), MRI);
4270
- if (!Gep || !isWorthFoldingIntoExtendedReg (*Gep, MRI))
4271
- return None;
4272
-
4273
- // Now, try to match an opcode which will match our specific offset.
4274
- // We want a G_SHL or a G_MUL.
4275
- MachineInstr *OffsetInst = getDefIgnoringCopies (Gep->getOperand (2 ).getReg (), MRI);
4268
+ MachineRegisterInfo &MRI = Root.getParent ()->getMF ()->getRegInfo ();
4269
+ MachineInstr *OffsetInst = MRI.getVRegDef (Offset.getReg ());
4276
4270
if (!OffsetInst)
4277
4271
return None;
4278
4272
4279
4273
unsigned OffsetOpc = OffsetInst->getOpcode ();
4280
4274
if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4281
4275
return None;
4282
4276
4277
+ // Make sure that the memory op is a valid size.
4278
+ int64_t LegalShiftVal = Log2_32 (SizeInBytes);
4279
+ if (LegalShiftVal == 0 )
4280
+ return None;
4283
4281
if (!isWorthFoldingIntoExtendedReg (*OffsetInst, MRI))
4284
4282
return None;
4285
4283
@@ -4324,20 +4322,75 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4324
4322
if (ImmVal != LegalShiftVal)
4325
4323
return None;
4326
4324
4325
+ unsigned SignExtend = 0 ;
4326
+ if (WantsExt) {
4327
+ // Check if the offset is defined by an extend.
4328
+ MachineInstr *ExtInst = getDefIgnoringCopies (OffsetReg, MRI);
4329
+ auto Ext = getExtendTypeForInst (*ExtInst, MRI, true );
4330
+ if (Ext == AArch64_AM::InvalidShiftExtend)
4331
+ return None;
4332
+
4333
+ SignExtend = Ext == AArch64_AM::SXTW;
4334
+
4335
+ // Need a 32-bit wide register here.
4336
+ MachineIRBuilder MIB (*MRI.getVRegDef (Root.getReg ()));
4337
+ OffsetReg = ExtInst->getOperand (1 ).getReg ();
4338
+ OffsetReg = narrowExtendRegIfNeeded (OffsetReg, MIB);
4339
+ }
4340
+
4327
4341
// We can use the LHS of the GEP as the base, and the LHS of the shift as an
4328
4342
// offset. Signify that we are shifting by setting the shift flag to 1.
4329
- return {{[=](MachineInstrBuilder &MIB) {
4330
- MIB.addUse (Gep->getOperand (1 ).getReg ());
4331
- },
4343
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse (Base.getReg ()); },
4332
4344
[=](MachineInstrBuilder &MIB) { MIB.addUse (OffsetReg); },
4333
4345
[=](MachineInstrBuilder &MIB) {
4334
4346
// Need to add both immediates here to make sure that they are both
4335
4347
// added to the instruction.
4336
- MIB.addImm (0 );
4348
+ MIB.addImm (SignExtend );
4337
4349
MIB.addImm (1 );
4338
4350
}}};
4339
4351
}
4340
4352
4353
+ // / This is used for computing addresses like this:
4354
+ // /
4355
+ // / ldr x1, [x2, x3, lsl #3]
4356
+ // /
4357
+ // / Where x2 is the base register, and x3 is an offset register. The shift-left
4358
+ // / is a constant value specific to this load instruction. That is, we'll never
4359
+ // / see anything other than a 3 here (which corresponds to the size of the
4360
+ // / element being loaded.)
4361
+ InstructionSelector::ComplexRendererFns
4362
+ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg (
4363
+ MachineOperand &Root, unsigned SizeInBytes) const {
4364
+ if (!Root.isReg ())
4365
+ return None;
4366
+ MachineRegisterInfo &MRI = Root.getParent ()->getMF ()->getRegInfo ();
4367
+
4368
+ // We want to find something like this:
4369
+ //
4370
+ // val = G_CONSTANT LegalShiftVal
4371
+ // shift = G_SHL off_reg val
4372
+ // ptr = G_PTR_ADD base_reg shift
4373
+ // x = G_LOAD ptr
4374
+ //
4375
+ // And fold it into this addressing mode:
4376
+ //
4377
+ // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4378
+
4379
+ // Check if we can find the G_PTR_ADD.
4380
+ MachineInstr *PtrAdd =
4381
+ getOpcodeDef (TargetOpcode::G_PTR_ADD, Root.getReg (), MRI);
4382
+ if (!PtrAdd || !isWorthFoldingIntoExtendedReg (*PtrAdd, MRI))
4383
+ return None;
4384
+
4385
+ // Now, try to match an opcode which will match our specific offset.
4386
+ // We want a G_SHL or a G_MUL.
4387
+ MachineInstr *OffsetInst =
4388
+ getDefIgnoringCopies (PtrAdd->getOperand (2 ).getReg (), MRI);
4389
+ return selectExtendedSHL (Root, PtrAdd->getOperand (1 ),
4390
+ OffsetInst->getOperand (0 ), SizeInBytes,
4391
+ /* WantsExt=*/ false );
4392
+ }
4393
+
4341
4394
// / This is used for computing addresses like this:
4342
4395
// /
4343
4396
// / ldr x1, [x2, x3]
@@ -4399,6 +4452,74 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4399
4452
return selectAddrModeRegisterOffset (Root);
4400
4453
}
4401
4454
4455
+ // / This is used for computing addresses like this:
4456
+ // /
4457
+ // / ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
4458
+ // /
4459
+ // / Where we have a 64-bit base register, a 32-bit offset register, and an
4460
+ // / extend (which may or may not be signed).
4461
+ InstructionSelector::ComplexRendererFns
4462
+ AArch64InstructionSelector::selectAddrModeWRO (MachineOperand &Root,
4463
+ unsigned SizeInBytes) const {
4464
+ MachineRegisterInfo &MRI = Root.getParent ()->getMF ()->getRegInfo ();
4465
+
4466
+ MachineInstr *PtrAdd =
4467
+ getOpcodeDef (TargetOpcode::G_PTR_ADD, Root.getReg (), MRI);
4468
+ if (!PtrAdd || !isWorthFoldingIntoExtendedReg (*PtrAdd, MRI))
4469
+ return None;
4470
+
4471
+ MachineOperand &LHS = PtrAdd->getOperand (1 );
4472
+ MachineOperand &RHS = PtrAdd->getOperand (2 );
4473
+ MachineInstr *OffsetInst = getDefIgnoringCopies (RHS.getReg (), MRI);
4474
+
4475
+ // The first case is the same as selectAddrModeXRO, except we need an extend.
4476
+ // In this case, we try to find a shift and extend, and fold them into the
4477
+ // addressing mode.
4478
+ //
4479
+ // E.g.
4480
+ //
4481
+ // off_reg = G_Z/S/ANYEXT ext_reg
4482
+ // val = G_CONSTANT LegalShiftVal
4483
+ // shift = G_SHL off_reg val
4484
+ // ptr = G_PTR_ADD base_reg shift
4485
+ // x = G_LOAD ptr
4486
+ //
4487
+ // In this case we can get a load like this:
4488
+ //
4489
+ // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
4490
+ auto ExtendedShl = selectExtendedSHL (Root, LHS, OffsetInst->getOperand (0 ),
4491
+ SizeInBytes, /* WantsExt=*/ true );
4492
+ if (ExtendedShl)
4493
+ return ExtendedShl;
4494
+
4495
+ // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
4496
+ //
4497
+ // e.g.
4498
+ // ldr something, [base_reg, ext_reg, sxtw]
4499
+ if (!isWorthFoldingIntoExtendedReg (*OffsetInst, MRI))
4500
+ return None;
4501
+
4502
+ // Check if this is an extend. We'll get an extend type if it is.
4503
+ AArch64_AM::ShiftExtendType Ext =
4504
+ getExtendTypeForInst (*OffsetInst, MRI, /* IsLoadStore=*/ true );
4505
+ if (Ext == AArch64_AM::InvalidShiftExtend)
4506
+ return None;
4507
+
4508
+ // Need a 32-bit wide register.
4509
+ MachineIRBuilder MIB (*PtrAdd);
4510
+ Register ExtReg =
4511
+ narrowExtendRegIfNeeded (OffsetInst->getOperand (1 ).getReg (), MIB);
4512
+ unsigned SignExtend = Ext == AArch64_AM::SXTW;
4513
+
4514
+ // Base is LHS, offset is ExtReg.
4515
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse (LHS.getReg ()); },
4516
+ [=](MachineInstrBuilder &MIB) { MIB.addUse (ExtReg); },
4517
+ [=](MachineInstrBuilder &MIB) {
4518
+ MIB.addImm (SignExtend);
4519
+ MIB.addImm (0 );
4520
+ }}};
4521
+ }
4522
+
4402
4523
// / Select a "register plus unscaled signed 9-bit immediate" address. This
4403
4524
// / should only match when there is an offset that is not valid for a scaled
4404
4525
// / immediate addressing mode. The "Size" argument is the size in bytes of the
@@ -4561,9 +4682,8 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
4561
4682
[=](MachineInstrBuilder &MIB) { MIB.addImm (ShiftVal); }}};
4562
4683
}
4563
4684
4564
- // / Get the correct ShiftExtendType for an extend instruction.
4565
- static AArch64_AM::ShiftExtendType
4566
- getExtendTypeForInst (MachineInstr &MI, MachineRegisterInfo &MRI) {
4685
+ AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst (
4686
+ MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
4567
4687
unsigned Opc = MI.getOpcode ();
4568
4688
4569
4689
// Handle explicit extend instructions first.
@@ -4610,9 +4730,9 @@ getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
4610
4730
default :
4611
4731
return AArch64_AM::InvalidShiftExtend;
4612
4732
case 0xFF :
4613
- return AArch64_AM::UXTB;
4733
+ return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend ;
4614
4734
case 0xFFFF :
4615
- return AArch64_AM::UXTH;
4735
+ return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend ;
4616
4736
case 0xFFFFFFFF :
4617
4737
return AArch64_AM::UXTW;
4618
4738
}
0 commit comments