Skip to content

Commit 9949b1a

Browse files
author
Jessica Paquette
committed
[GlobalISel][AArch64] Import + select LDR*roW and STR*roW patterns
This adds support for selecting a large chunk of the load/store *roW patterns. This is pretty much a straight port of AArch64DAGToDAGISel::SelectAddrModeWRO into GISel. The code is very similar to the XRO code. The main difference is that in the *roW patterns, we want to try and fold in an extend, and *possibly* a shift along with it. A good portion of this patch is refactoring the existing XRO code. - Add selectAddrModeWRO - Factor out the code from selectAddrModeShiftedExtendXReg which is used by both selectAddrModeXRO and selectAddrModeWRO into selectExtendedSHL. This is similar to the function of the same name in AArch64DAGToDAGISel. - Add support for extends to the factored out code in selectExtendedSHL. - Teach getExtendTypeForInst how to handle AND masks that are intended to be used in loads/stores (necessary for this addressing mode.) - Make getExtendTypeForInst not static because moving it made an annoying diff and I wanted to have the WRO/XRO functions close to each other while I was writing the code. Differential Revision: https://reviews.llvm.org/D72426
1 parent b35f5d4 commit 9949b1a

File tree

4 files changed

+665
-46
lines changed

4 files changed

+665
-46
lines changed

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3131,6 +3131,22 @@ def ro_Windexed32 : ComplexPattern<i64, 4, "SelectAddrModeWRO<32>", []>;
31313131
def ro_Windexed64 : ComplexPattern<i64, 4, "SelectAddrModeWRO<64>", []>;
31323132
def ro_Windexed128 : ComplexPattern<i64, 4, "SelectAddrModeWRO<128>", []>;
31333133

3134+
def gi_ro_Windexed8 :
3135+
GIComplexOperandMatcher<s64, "selectAddrModeWRO<8>">,
3136+
GIComplexPatternEquiv<ro_Windexed8>;
3137+
def gi_ro_Windexed16 :
3138+
GIComplexOperandMatcher<s64, "selectAddrModeWRO<16>">,
3139+
GIComplexPatternEquiv<ro_Windexed16>;
3140+
def gi_ro_Windexed32 :
3141+
GIComplexOperandMatcher<s64, "selectAddrModeWRO<32>">,
3142+
GIComplexPatternEquiv<ro_Windexed32>;
3143+
def gi_ro_Windexed64 :
3144+
GIComplexOperandMatcher<s64, "selectAddrModeWRO<64>">,
3145+
GIComplexPatternEquiv<ro_Windexed64>;
3146+
def gi_ro_Windexed128 :
3147+
GIComplexOperandMatcher<s64, "selectAddrModeWRO<128>">,
3148+
GIComplexPatternEquiv<ro_Windexed128>;
3149+
31343150
class MemExtendOperand<string Reg, int Width> : AsmOperandClass {
31353151
let Name = "Mem" # Reg # "Extend" # Width;
31363152
let PredicateMethod = "isMem" # Reg # "Extend<" # Width # ">";

llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Lines changed: 166 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,14 @@ class AArch64InstructionSelector : public InstructionSelector {
206206
ComplexRendererFns
207207
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
208208
unsigned SizeInBytes) const;
209+
210+
/// Returns a \p ComplexRendererFns which contains a base, offset, and whether
211+
/// or not a shift + extend should be folded into an addressing mode. Returns
212+
/// None when this is not profitable or possible.
213+
ComplexRendererFns
214+
selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
215+
MachineOperand &Offset, unsigned SizeInBytes,
216+
bool WantsExt) const;
209217
ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
210218
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
211219
unsigned SizeInBytes) const;
@@ -214,6 +222,13 @@ class AArch64InstructionSelector : public InstructionSelector {
214222
return selectAddrModeXRO(Root, Width / 8);
215223
}
216224

225+
ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
226+
unsigned SizeInBytes) const;
227+
template <int Width>
228+
ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
229+
return selectAddrModeWRO(Root, Width / 8);
230+
}
231+
217232
ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
218233

219234
ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
@@ -228,6 +243,15 @@ class AArch64InstructionSelector : public InstructionSelector {
228243
return selectShiftedRegister(Root);
229244
}
230245

246+
/// Given an extend instruction, determine the correct shift-extend type for
247+
/// that instruction.
248+
///
249+
/// If the instruction is going to be used in a load or store, pass
250+
/// \p IsLoadStore = true.
251+
AArch64_AM::ShiftExtendType
252+
getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
253+
bool IsLoadStore = false) const;
254+
231255
/// Instructions that accept extend modifiers like UXTW expect the register
232256
/// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
233257
/// subregister copy if necessary. Return either ExtReg, or the result of the
@@ -4234,52 +4258,26 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
42344258
[](MachineInstr &Use) { return Use.mayLoadOrStore(); });
42354259
}
42364260

4237-
/// This is used for computing addresses like this:
4238-
///
4239-
/// ldr x1, [x2, x3, lsl #3]
4240-
///
4241-
/// Where x2 is the base register, and x3 is an offset register. The shift-left
4242-
/// is a constant value specific to this load instruction. That is, we'll never
4243-
/// see anything other than a 3 here (which corresponds to the size of the
4244-
/// element being loaded.)
42454261
InstructionSelector::ComplexRendererFns
4246-
AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4247-
MachineOperand &Root, unsigned SizeInBytes) const {
4248-
if (!Root.isReg())
4249-
return None;
4250-
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4262+
AArch64InstructionSelector::selectExtendedSHL(
4263+
MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
4264+
unsigned SizeInBytes, bool WantsExt) const {
4265+
assert(Base.isReg() && "Expected base to be a register operand");
4266+
assert(Offset.isReg() && "Expected offset to be a register operand");
42514267

4252-
// Make sure that the memory op is a valid size.
4253-
int64_t LegalShiftVal = Log2_32(SizeInBytes);
4254-
if (LegalShiftVal == 0)
4255-
return None;
4256-
4257-
// We want to find something like this:
4258-
//
4259-
// val = G_CONSTANT LegalShiftVal
4260-
// shift = G_SHL off_reg val
4261-
// ptr = G_PTR_ADD base_reg shift
4262-
// x = G_LOAD ptr
4263-
//
4264-
// And fold it into this addressing mode:
4265-
//
4266-
// ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4267-
4268-
// Check if we can find the G_PTR_ADD.
4269-
MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
4270-
if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
4271-
return None;
4272-
4273-
// Now, try to match an opcode which will match our specific offset.
4274-
// We want a G_SHL or a G_MUL.
4275-
MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
4268+
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4269+
MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
42764270
if (!OffsetInst)
42774271
return None;
42784272

42794273
unsigned OffsetOpc = OffsetInst->getOpcode();
42804274
if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
42814275
return None;
42824276

4277+
// Make sure that the memory op is a valid size.
4278+
int64_t LegalShiftVal = Log2_32(SizeInBytes);
4279+
if (LegalShiftVal == 0)
4280+
return None;
42834281
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
42844282
return None;
42854283

@@ -4324,20 +4322,75 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
43244322
if (ImmVal != LegalShiftVal)
43254323
return None;
43264324

4325+
unsigned SignExtend = 0;
4326+
if (WantsExt) {
4327+
// Check if the offset is defined by an extend.
4328+
MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
4329+
auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
4330+
if (Ext == AArch64_AM::InvalidShiftExtend)
4331+
return None;
4332+
4333+
SignExtend = Ext == AArch64_AM::SXTW;
4334+
4335+
// Need a 32-bit wide register here.
4336+
MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
4337+
OffsetReg = ExtInst->getOperand(1).getReg();
4338+
OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
4339+
}
4340+
43274341
// We can use the LHS of the GEP as the base, and the LHS of the shift as an
43284342
// offset. Signify that we are shifting by setting the shift flag to 1.
4329-
return {{[=](MachineInstrBuilder &MIB) {
4330-
MIB.addUse(Gep->getOperand(1).getReg());
4331-
},
4343+
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
43324344
[=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
43334345
[=](MachineInstrBuilder &MIB) {
43344346
// Need to add both immediates here to make sure that they are both
43354347
// added to the instruction.
4336-
MIB.addImm(0);
4348+
MIB.addImm(SignExtend);
43374349
MIB.addImm(1);
43384350
}}};
43394351
}
43404352

4353+
/// This is used for computing addresses like this:
4354+
///
4355+
/// ldr x1, [x2, x3, lsl #3]
4356+
///
4357+
/// Where x2 is the base register, and x3 is an offset register. The shift-left
4358+
/// is a constant value specific to this load instruction. That is, we'll never
4359+
/// see anything other than a 3 here (which corresponds to the size of the
4360+
/// element being loaded.)
4361+
InstructionSelector::ComplexRendererFns
4362+
AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4363+
MachineOperand &Root, unsigned SizeInBytes) const {
4364+
if (!Root.isReg())
4365+
return None;
4366+
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4367+
4368+
// We want to find something like this:
4369+
//
4370+
// val = G_CONSTANT LegalShiftVal
4371+
// shift = G_SHL off_reg val
4372+
// ptr = G_PTR_ADD base_reg shift
4373+
// x = G_LOAD ptr
4374+
//
4375+
// And fold it into this addressing mode:
4376+
//
4377+
// ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4378+
4379+
// Check if we can find the G_PTR_ADD.
4380+
MachineInstr *PtrAdd =
4381+
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
4382+
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
4383+
return None;
4384+
4385+
// Now, try to match an opcode which will match our specific offset.
4386+
// We want a G_SHL or a G_MUL.
4387+
MachineInstr *OffsetInst =
4388+
getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
4389+
return selectExtendedSHL(Root, PtrAdd->getOperand(1),
4390+
OffsetInst->getOperand(0), SizeInBytes,
4391+
/*WantsExt=*/false);
4392+
}
4393+
43414394
/// This is used for computing addresses like this:
43424395
///
43434396
/// ldr x1, [x2, x3]
@@ -4399,6 +4452,74 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
43994452
return selectAddrModeRegisterOffset(Root);
44004453
}
44014454

4455+
/// This is used for computing addresses like this:
4456+
///
4457+
/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
4458+
///
4459+
/// Where we have a 64-bit base register, a 32-bit offset register, and an
4460+
/// extend (which may or may not be signed).
4461+
InstructionSelector::ComplexRendererFns
4462+
AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
4463+
unsigned SizeInBytes) const {
4464+
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4465+
4466+
MachineInstr *PtrAdd =
4467+
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
4468+
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
4469+
return None;
4470+
4471+
MachineOperand &LHS = PtrAdd->getOperand(1);
4472+
MachineOperand &RHS = PtrAdd->getOperand(2);
4473+
MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
4474+
4475+
// The first case is the same as selectAddrModeXRO, except we need an extend.
4476+
// In this case, we try to find a shift and extend, and fold them into the
4477+
// addressing mode.
4478+
//
4479+
// E.g.
4480+
//
4481+
// off_reg = G_Z/S/ANYEXT ext_reg
4482+
// val = G_CONSTANT LegalShiftVal
4483+
// shift = G_SHL off_reg val
4484+
// ptr = G_PTR_ADD base_reg shift
4485+
// x = G_LOAD ptr
4486+
//
4487+
// In this case we can get a load like this:
4488+
//
4489+
// ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
4490+
auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
4491+
SizeInBytes, /*WantsExt=*/true);
4492+
if (ExtendedShl)
4493+
return ExtendedShl;
4494+
4495+
// There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
4496+
//
4497+
// e.g.
4498+
// ldr something, [base_reg, ext_reg, sxtw]
4499+
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4500+
return None;
4501+
4502+
// Check if this is an extend. We'll get an extend type if it is.
4503+
AArch64_AM::ShiftExtendType Ext =
4504+
getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
4505+
if (Ext == AArch64_AM::InvalidShiftExtend)
4506+
return None;
4507+
4508+
// Need a 32-bit wide register.
4509+
MachineIRBuilder MIB(*PtrAdd);
4510+
Register ExtReg =
4511+
narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
4512+
unsigned SignExtend = Ext == AArch64_AM::SXTW;
4513+
4514+
// Base is LHS, offset is ExtReg.
4515+
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
4516+
[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
4517+
[=](MachineInstrBuilder &MIB) {
4518+
MIB.addImm(SignExtend);
4519+
MIB.addImm(0);
4520+
}}};
4521+
}
4522+
44024523
/// Select a "register plus unscaled signed 9-bit immediate" address. This
44034524
/// should only match when there is an offset that is not valid for a scaled
44044525
/// immediate addressing mode. The "Size" argument is the size in bytes of the
@@ -4561,9 +4682,8 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
45614682
[=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
45624683
}
45634684

4564-
/// Get the correct ShiftExtendType for an extend instruction.
4565-
static AArch64_AM::ShiftExtendType
4566-
getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
4685+
AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
4686+
MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
45674687
unsigned Opc = MI.getOpcode();
45684688

45694689
// Handle explicit extend instructions first.
@@ -4610,9 +4730,9 @@ getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
46104730
default:
46114731
return AArch64_AM::InvalidShiftExtend;
46124732
case 0xFF:
4613-
return AArch64_AM::UXTB;
4733+
return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
46144734
case 0xFFFF:
4615-
return AArch64_AM::UXTH;
4735+
return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
46164736
case 0xFFFFFFFF:
46174737
return AArch64_AM::UXTW;
46184738
}

0 commit comments

Comments
 (0)