Skip to content

[AArch64][GISel] Always fold G_SHL into addressing mode where possible, unless the subtarget has addr-lsl-slow-14 #96603

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 66 additions & 10 deletions llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -414,8 +414,13 @@ class AArch64InstructionSelector : public InstructionSelector {
return selectAddrModeIndexed(Root, Width / 8);
}

std::optional<bool>
isWorthFoldingIntoAddrMode(MachineInstr &MI,
const MachineRegisterInfo &MRI) const;

bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
const MachineRegisterInfo &MRI) const;
const MachineRegisterInfo &MRI,
bool IsAddrOperand) const;
ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
unsigned SizeInBytes) const;
Expand Down Expand Up @@ -6869,19 +6874,70 @@ AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
return select12BitValueWithLeftShift(Immed);
}

/// Checks if we are sure that folding MI into load/store addressing mode is
/// beneficial or not.
///
/// Returns:
/// - true if folding MI would be beneficial.
/// - false if folding MI would be bad.
/// - std::nullopt if it is not sure whether folding MI is beneficial.
///
/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
///
/// %13:gpr(s64) = G_CONSTANT i64 1
/// %8:gpr(s64) = G_SHL %6, %13(s64)
/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
MachineInstr &MI, const MachineRegisterInfo &MRI) const {
if (MI.getOpcode() == AArch64::G_SHL) {
// Address operands with shifts are free, except for running on subtargets
// with AddrLSLSlow14.
if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
MI.getOperand(2).getReg(), MRI)) {
const APInt ShiftVal = ValAndVeg->Value;

// Don't fold if we know this will be slow.
return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
}
}
return std::nullopt;
}

/// Return true if it is worth folding MI into an extended register. That is,
/// if it's safe to pull it into the addressing mode of a load or store as a
/// shift.
/// \p IsAddrOperand whether the def of MI is used as an address operand
/// (e.g. feeding into an LDR/STR).
bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
MachineInstr &MI, const MachineRegisterInfo &MRI) const {
MachineInstr &MI, const MachineRegisterInfo &MRI,
bool IsAddrOperand) const {

// Always fold if there is one use, or if we're optimizing for size.
Register DefReg = MI.getOperand(0).getReg();
if (MRI.hasOneNonDBGUse(DefReg) ||
MI.getParent()->getParent()->getFunction().hasOptSize())
return true;

// FIXME: Consider checking HasAddrLSLSlow14 and HasALULSLFast as
// appropriate.
if (IsAddrOperand) {
// If we are already sure that folding MI is good or bad, return the result.
if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
return *Worth;

// Fold G_PTR_ADD if its offset operand can be folded
if (MI.getOpcode() == AArch64::G_PTR_ADD) {
MachineInstr *OffsetInst =
getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);

// Note, we already know G_PTR_ADD is used by at least two instructions.
// If we are also sure about whether folding is beneficial or not,
// return the result.
if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
return *Worth;
}
}

// FIXME: Consider checking HasALULSLFast as appropriate.

// We have a fastpath, so folding a shift in and potentially computing it
// many times may be beneficial. Check if this is only used in memory ops.
Expand Down Expand Up @@ -6929,7 +6985,7 @@ AArch64InstructionSelector::selectExtendedSHL(
int64_t LegalShiftVal = Log2_32(SizeInBytes);
if (LegalShiftVal == 0)
return std::nullopt;
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
return std::nullopt;

// Now, try to find the specific G_CONSTANT. Start by assuming that the
Expand Down Expand Up @@ -7036,7 +7092,7 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
// Check if we can find the G_PTR_ADD.
MachineInstr *PtrAdd =
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
return std::nullopt;

// Now, try to match an opcode which will match our specific offset.
Expand Down Expand Up @@ -7170,7 +7226,7 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,

MachineInstr *PtrAdd =
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
return std::nullopt;

MachineOperand &LHS = PtrAdd->getOperand(1);
Expand Down Expand Up @@ -7201,7 +7257,7 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
//
// e.g.
// ldr something, [base_reg, ext_reg, sxtw]
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
return std::nullopt;

// Check if this is an extend. We'll get an extend type if it is.
Expand Down Expand Up @@ -7396,7 +7452,7 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
return std::nullopt;
if (ShType == AArch64_AM::ROR && !AllowROR)
return std::nullopt;
if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
return std::nullopt;

// Need an immediate on the RHS.
Expand Down Expand Up @@ -7510,7 +7566,7 @@ AArch64InstructionSelector::selectArithExtendedRegister(
if (!RootDef)
return std::nullopt;

if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
return std::nullopt;

// Check if we can fold a shift and an extend.
Expand Down
156 changes: 103 additions & 53 deletions llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
Original file line number Diff line number Diff line change
Expand Up @@ -535,13 +535,13 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load (s64) from %ir.addr)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[UBFMXri]]
; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr)
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[ADDXri]]
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]]
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]]
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]]
; CHECK-NEXT: $x2 = COPY [[ADDXrr2]]
; CHECK-NEXT: RET_ReallyLR implicit $x2
%0:gpr(s64) = COPY $x0
Expand Down Expand Up @@ -571,19 +571,36 @@ body: |
liveins: $x0, $x1, $x2
liveins: $w1, $x0

; CHECK-LABEL: name: ldrhrox_more_than_one_mem_use_shl
; CHECK: liveins: $x0, $x1, $x2, $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
; CHECK-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
; CHECK-NEXT: RET_ReallyLR implicit [[ADDWrr]]
; CHECK-FAST-LABEL: name: ldrhrox_more_than_one_mem_use_shl
; CHECK-FAST: liveins: $x0, $x1, $x2, $w1, $x0
; CHECK-FAST-NEXT: {{ $}}
; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-FAST-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
; CHECK-FAST-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
; CHECK-FAST-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
; CHECK-FAST-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
; CHECK-FAST-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
; CHECK-FAST-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
; CHECK-FAST-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
; CHECK-FAST-NEXT: RET_ReallyLR implicit [[ADDWrr]]
;
; CHECK-SLOW-LABEL: name: ldrhrox_more_than_one_mem_use_shl
; CHECK-SLOW: liveins: $x0, $x1, $x2, $w1, $x0
; CHECK-SLOW-NEXT: {{ $}}
; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-SLOW-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
; CHECK-SLOW-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
; CHECK-SLOW-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]].sub_32
; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY [[COPY]]
; CHECK-SLOW-NEXT: [[ADDXrx:%[0-9]+]]:gpr64sp = ADDXrx [[COPY4]], [[COPY3]], 1
; CHECK-SLOW-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
; CHECK-SLOW-NEXT: [[LDRHHui1:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
; CHECK-SLOW-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHui]], [[LDRHHui1]]
; CHECK-SLOW-NEXT: RET_ReallyLR implicit [[ADDWrr]]
%0:gpr(p0) = COPY $x0
%1:gpr(s32) = COPY $w1
%15:gpr(s64) = G_CONSTANT i64 9
Expand Down Expand Up @@ -612,19 +629,36 @@ body: |
liveins: $x0, $x1, $x2
liveins: $w1, $x0

; CHECK-LABEL: name: ldrhrox_more_than_one_use_shl
; CHECK: liveins: $x0, $x1, $x2, $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
; CHECK-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
; CHECK-NEXT: RET_ReallyLR implicit [[ADDWrr]]
; CHECK-FAST-LABEL: name: ldrhrox_more_than_one_use_shl
; CHECK-FAST: liveins: $x0, $x1, $x2, $w1, $x0
; CHECK-FAST-NEXT: {{ $}}
; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-FAST-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
; CHECK-FAST-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
; CHECK-FAST-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
; CHECK-FAST-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
; CHECK-FAST-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
; CHECK-FAST-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
; CHECK-FAST-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
; CHECK-FAST-NEXT: RET_ReallyLR implicit [[ADDWrr]]
;
; CHECK-SLOW-LABEL: name: ldrhrox_more_than_one_use_shl
; CHECK-SLOW: liveins: $x0, $x1, $x2, $w1, $x0
; CHECK-SLOW-NEXT: {{ $}}
; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-SLOW-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
; CHECK-SLOW-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
; CHECK-SLOW-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]].sub_32
; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY [[COPY]]
; CHECK-SLOW-NEXT: [[ADDXrx:%[0-9]+]]:gpr64sp = ADDXrx [[COPY4]], [[COPY3]], 1
; CHECK-SLOW-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
; CHECK-SLOW-NEXT: [[LDRHHui1:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
; CHECK-SLOW-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHui]], [[LDRHHui1]]
; CHECK-SLOW-NEXT: RET_ReallyLR implicit [[ADDWrr]]
%0:gpr(p0) = COPY $x0
%1:gpr(s32) = COPY $w1
%15:gpr(s64) = G_CONSTANT i64 9
Expand Down Expand Up @@ -656,15 +690,15 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 62, 61
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[ADDXrr]], 0 :: (load (s32) from %ir.addr)
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[LDRWui]], 0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[UBFMXri]]
; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY1]], [[COPY]], 0, 1 :: (load (s32) from %ir.addr)
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[LDRWroX]], 0
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 2, 0
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[SUBREG_TO_REG]], [[ADDXri]]
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]]
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]]
; CHECK-NEXT: $x2 = COPY [[ADDXrr2]]
; CHECK-NEXT: RET_ReallyLR implicit $x2
%0:gpr(s64) = COPY $x0
Expand Down Expand Up @@ -692,21 +726,37 @@ machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
; CHECK-LABEL: name: ldrqrox_more_than_one_use_shl
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADDXrr]], 0 :: (load (s128) from %ir.addr)
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[ADDXri]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXrr1]]
; CHECK-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
; CHECK-FAST-LABEL: name: ldrqrox_more_than_one_use_shl
; CHECK-FAST: liveins: $x0, $x1, $x2
; CHECK-FAST-NEXT: {{ $}}
; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-FAST-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
; CHECK-FAST-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
; CHECK-FAST-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[UBFMXri]]
; CHECK-FAST-NEXT: [[LDRQroX:%[0-9]+]]:fpr128 = LDRQroX [[COPY1]], [[COPY]], 0, 1 :: (load (s128) from %ir.addr)
; CHECK-FAST-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
; CHECK-FAST-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[LDRQroX]].dsub
; CHECK-FAST-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[COPY3]]
; CHECK-FAST-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXri]]
; CHECK-FAST-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]]
; CHECK-FAST-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
;
; CHECK-SLOW-LABEL: name: ldrqrox_more_than_one_use_shl
; CHECK-SLOW: liveins: $x0, $x1, $x2
; CHECK-SLOW-NEXT: {{ $}}
; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-SLOW-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-SLOW-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
; CHECK-SLOW-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADDXrr]], 0 :: (load (s128) from %ir.addr)
; CHECK-SLOW-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
; CHECK-SLOW-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[ADDXri]]
; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
; CHECK-SLOW-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXrr1]]
; CHECK-SLOW-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 4
%2:gpr(s64) = G_SHL %0, %1(s64)
Expand Down
Loading
Loading