Skip to content

Commit 93edeea

Browse files
committed
[AArch64][GISel] Always fold G_SHL into addressing mode where possible, unless the subtarget has addr-lsl-slow-14
1 parent e813b6d commit 93edeea

File tree

4 files changed

+231
-108
lines changed

4 files changed

+231
-108
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 66 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -412,8 +412,13 @@ class AArch64InstructionSelector : public InstructionSelector {
412412
return selectAddrModeIndexed(Root, Width / 8);
413413
}
414414

415+
std::optional<bool>
416+
isWorthFoldingIntoAddrMode(MachineInstr &MI,
417+
const MachineRegisterInfo &MRI) const;
418+
415419
bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
416-
const MachineRegisterInfo &MRI) const;
420+
const MachineRegisterInfo &MRI,
421+
bool IsAddrOperand) const;
417422
ComplexRendererFns
418423
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
419424
unsigned SizeInBytes) const;
@@ -6717,19 +6722,70 @@ AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
67176722
return select12BitValueWithLeftShift(Immed);
67186723
}
67196724

6725+
/// Checks if we are sure that folding MI into load/store addressing mode is
6726+
/// beneficial or not.
6727+
///
6728+
/// Returns:
6729+
/// - true if folding MI would be beneficial.
6730+
/// - false if folding MI would be bad.
6731+
/// - std::nullopt if it is not sure whether folding MI is beneficial.
6732+
///
6733+
/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
6734+
///
6735+
/// %13:gpr(s64) = G_CONSTANT i64 1
6736+
/// %8:gpr(s64) = G_SHL %6, %13(s64)
6737+
/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
6738+
/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
6739+
std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
6740+
MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6741+
if (MI.getOpcode() == AArch64::G_SHL) {
6742+
// Address operands with shifts are free, except for running on sub targets
6743+
// with AddrLSLSlow14.
6744+
if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
6745+
MI.getOperand(2).getReg(), MRI)) {
6746+
const APInt ShiftVal = ValAndVeg->Value;
6747+
6748+
// Don't fold if we know this will be slow.
6749+
return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
6750+
}
6751+
}
6752+
return std::nullopt;
6753+
}
6754+
67206755
/// Return true if it is worth folding MI into an extended register. That is,
67216756
/// if it's safe to pull it into the addressing mode of a load or store as a
67226757
/// shift.
6758+
/// \p IsAddrOperand whether the def of MI is used as an address operand
6759+
/// (e.g. feeding into an LDR/STR).
67236760
bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6724-
MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6761+
MachineInstr &MI, const MachineRegisterInfo &MRI,
6762+
bool IsAddrOperand) const {
6763+
67256764
// Always fold if there is one use, or if we're optimizing for size.
67266765
Register DefReg = MI.getOperand(0).getReg();
67276766
if (MRI.hasOneNonDBGUse(DefReg) ||
67286767
MI.getParent()->getParent()->getFunction().hasOptSize())
67296768
return true;
67306769

6731-
// FIXME: Consider checking HasAddrLSLSlow14 and HasALULSLFast as
6732-
// appropriate.
6770+
if (IsAddrOperand) {
6771+
// If we are already sure that folding MI is good or bad, return the result.
6772+
if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
6773+
return *Worth;
6774+
6775+
// Fold G_PTR_ADD if its offset operand can be folded
6776+
if (MI.getOpcode() == AArch64::G_PTR_ADD) {
6777+
MachineInstr *OffsetInst =
6778+
getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
6779+
6780+
// Note, we already know G_PTR_ADD is used by at least two instructions.
6781+
// If we are also sure about whether folding is beneficial or not,
6782+
// return the result.
6783+
if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
6784+
return *Worth;
6785+
}
6786+
}
6787+
6788+
// FIXME: Consider checking HasALULSLFast as appropriate.
67336789

67346790
// We have a fastpath, so folding a shift in and potentially computing it
67356791
// many times may be beneficial. Check if this is only used in memory ops.
@@ -6777,7 +6833,7 @@ AArch64InstructionSelector::selectExtendedSHL(
67776833
int64_t LegalShiftVal = Log2_32(SizeInBytes);
67786834
if (LegalShiftVal == 0)
67796835
return std::nullopt;
6780-
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6836+
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
67816837
return std::nullopt;
67826838

67836839
// Now, try to find the specific G_CONSTANT. Start by assuming that the
@@ -6884,7 +6940,7 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
68846940
// Check if we can find the G_PTR_ADD.
68856941
MachineInstr *PtrAdd =
68866942
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6887-
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
6943+
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
68886944
return std::nullopt;
68896945

68906946
// Now, try to match an opcode which will match our specific offset.
@@ -7018,7 +7074,7 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
70187074

70197075
MachineInstr *PtrAdd =
70207076
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7021-
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
7077+
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
70227078
return std::nullopt;
70237079

70247080
MachineOperand &LHS = PtrAdd->getOperand(1);
@@ -7049,7 +7105,7 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
70497105
//
70507106
// e.g.
70517107
// ldr something, [base_reg, ext_reg, sxtw]
7052-
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
7108+
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
70537109
return std::nullopt;
70547110

70557111
// Check if this is an extend. We'll get an extend type if it is.
@@ -7244,7 +7300,7 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
72447300
return std::nullopt;
72457301
if (ShType == AArch64_AM::ROR && !AllowROR)
72467302
return std::nullopt;
7247-
if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
7303+
if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
72487304
return std::nullopt;
72497305

72507306
// Need an immediate on the RHS.
@@ -7358,7 +7414,7 @@ AArch64InstructionSelector::selectArithExtendedRegister(
73587414
if (!RootDef)
73597415
return std::nullopt;
73607416

7361-
if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
7417+
if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
73627418
return std::nullopt;
73637419

73647420
// Check if we can fold a shift and an extend.

llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir

Lines changed: 103 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -535,13 +535,13 @@ body: |
535535
; CHECK-NEXT: {{ $}}
536536
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
537537
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60
538-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
539-
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
540-
; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load (s64) from %ir.addr)
538+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
539+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
540+
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[UBFMXri]]
541+
; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr)
541542
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0
542-
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[ADDXri]]
543-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
544-
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]]
543+
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]]
544+
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]]
545545
; CHECK-NEXT: $x2 = COPY [[ADDXrr2]]
546546
; CHECK-NEXT: RET_ReallyLR implicit $x2
547547
%0:gpr(s64) = COPY $x0
@@ -571,19 +571,36 @@ body: |
571571
liveins: $x0, $x1, $x2
572572
liveins: $w1, $x0
573573
574-
; CHECK-LABEL: name: ldrhrox_more_than_one_mem_use_shl
575-
; CHECK: liveins: $x0, $x1, $x2, $w1, $x0
576-
; CHECK-NEXT: {{ $}}
577-
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
578-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
579-
; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
580-
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
581-
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
582-
; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
583-
; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
584-
; CHECK-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
585-
; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
586-
; CHECK-NEXT: RET_ReallyLR implicit [[ADDWrr]]
574+
; CHECK-FAST-LABEL: name: ldrhrox_more_than_one_mem_use_shl
575+
; CHECK-FAST: liveins: $x0, $x1, $x2, $w1, $x0
576+
; CHECK-FAST-NEXT: {{ $}}
577+
; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
578+
; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
579+
; CHECK-FAST-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
580+
; CHECK-FAST-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
581+
; CHECK-FAST-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
582+
; CHECK-FAST-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
583+
; CHECK-FAST-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
584+
; CHECK-FAST-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
585+
; CHECK-FAST-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
586+
; CHECK-FAST-NEXT: RET_ReallyLR implicit [[ADDWrr]]
587+
;
588+
; CHECK-SLOW-LABEL: name: ldrhrox_more_than_one_mem_use_shl
589+
; CHECK-SLOW: liveins: $x0, $x1, $x2, $w1, $x0
590+
; CHECK-SLOW-NEXT: {{ $}}
591+
; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
592+
; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
593+
; CHECK-SLOW-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
594+
; CHECK-SLOW-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
595+
; CHECK-SLOW-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
596+
; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]].sub_32
597+
; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
598+
; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY [[COPY]]
599+
; CHECK-SLOW-NEXT: [[ADDXrx:%[0-9]+]]:gpr64sp = ADDXrx [[COPY4]], [[COPY3]], 1
600+
; CHECK-SLOW-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
601+
; CHECK-SLOW-NEXT: [[LDRHHui1:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
602+
; CHECK-SLOW-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHui]], [[LDRHHui1]]
603+
; CHECK-SLOW-NEXT: RET_ReallyLR implicit [[ADDWrr]]
587604
%0:gpr(p0) = COPY $x0
588605
%1:gpr(s32) = COPY $w1
589606
%15:gpr(s64) = G_CONSTANT i64 9
@@ -612,19 +629,36 @@ body: |
612629
liveins: $x0, $x1, $x2
613630
liveins: $w1, $x0
614631
615-
; CHECK-LABEL: name: ldrhrox_more_than_one_use_shl
616-
; CHECK: liveins: $x0, $x1, $x2, $w1, $x0
617-
; CHECK-NEXT: {{ $}}
618-
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
619-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
620-
; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
621-
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
622-
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
623-
; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
624-
; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
625-
; CHECK-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
626-
; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
627-
; CHECK-NEXT: RET_ReallyLR implicit [[ADDWrr]]
632+
; CHECK-FAST-LABEL: name: ldrhrox_more_than_one_use_shl
633+
; CHECK-FAST: liveins: $x0, $x1, $x2, $w1, $x0
634+
; CHECK-FAST-NEXT: {{ $}}
635+
; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
636+
; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
637+
; CHECK-FAST-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
638+
; CHECK-FAST-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
639+
; CHECK-FAST-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
640+
; CHECK-FAST-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
641+
; CHECK-FAST-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
642+
; CHECK-FAST-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
643+
; CHECK-FAST-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
644+
; CHECK-FAST-NEXT: RET_ReallyLR implicit [[ADDWrr]]
645+
;
646+
; CHECK-SLOW-LABEL: name: ldrhrox_more_than_one_use_shl
647+
; CHECK-SLOW: liveins: $x0, $x1, $x2, $w1, $x0
648+
; CHECK-SLOW-NEXT: {{ $}}
649+
; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
650+
; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
651+
; CHECK-SLOW-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
652+
; CHECK-SLOW-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
653+
; CHECK-SLOW-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
654+
; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]].sub_32
655+
; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
656+
; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY [[COPY]]
657+
; CHECK-SLOW-NEXT: [[ADDXrx:%[0-9]+]]:gpr64sp = ADDXrx [[COPY4]], [[COPY3]], 1
658+
; CHECK-SLOW-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
659+
; CHECK-SLOW-NEXT: [[LDRHHui1:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
660+
; CHECK-SLOW-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHui]], [[LDRHHui1]]
661+
; CHECK-SLOW-NEXT: RET_ReallyLR implicit [[ADDWrr]]
628662
%0:gpr(p0) = COPY $x0
629663
%1:gpr(s32) = COPY $w1
630664
%15:gpr(s64) = G_CONSTANT i64 9
@@ -656,15 +690,15 @@ body: |
656690
; CHECK-NEXT: {{ $}}
657691
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
658692
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 62, 61
659-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
660-
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
661-
; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[ADDXrr]], 0 :: (load (s32) from %ir.addr)
662-
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[LDRWui]], 0
693+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
694+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
695+
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[UBFMXri]]
696+
; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY1]], [[COPY]], 0, 1 :: (load (s32) from %ir.addr)
697+
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[LDRWroX]], 0
663698
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
664699
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 2, 0
665700
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[SUBREG_TO_REG]], [[ADDXri]]
666-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
667-
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]]
701+
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]]
668702
; CHECK-NEXT: $x2 = COPY [[ADDXrr2]]
669703
; CHECK-NEXT: RET_ReallyLR implicit $x2
670704
%0:gpr(s64) = COPY $x0
@@ -692,21 +726,37 @@ machineFunctionInfo: {}
692726
body: |
693727
bb.0:
694728
liveins: $x0, $x1, $x2
695-
; CHECK-LABEL: name: ldrqrox_more_than_one_use_shl
696-
; CHECK: liveins: $x0, $x1, $x2
697-
; CHECK-NEXT: {{ $}}
698-
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
699-
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
700-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
701-
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
702-
; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADDXrr]], 0 :: (load (s128) from %ir.addr)
703-
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
704-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
705-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
706-
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[ADDXri]]
707-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
708-
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXrr1]]
709-
; CHECK-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
729+
; CHECK-FAST-LABEL: name: ldrqrox_more_than_one_use_shl
730+
; CHECK-FAST: liveins: $x0, $x1, $x2
731+
; CHECK-FAST-NEXT: {{ $}}
732+
; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
733+
; CHECK-FAST-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
734+
; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
735+
; CHECK-FAST-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
736+
; CHECK-FAST-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[UBFMXri]]
737+
; CHECK-FAST-NEXT: [[LDRQroX:%[0-9]+]]:fpr128 = LDRQroX [[COPY1]], [[COPY]], 0, 1 :: (load (s128) from %ir.addr)
738+
; CHECK-FAST-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
739+
; CHECK-FAST-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[LDRQroX]].dsub
740+
; CHECK-FAST-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[COPY3]]
741+
; CHECK-FAST-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXri]]
742+
; CHECK-FAST-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]]
743+
; CHECK-FAST-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
744+
;
745+
; CHECK-SLOW-LABEL: name: ldrqrox_more_than_one_use_shl
746+
; CHECK-SLOW: liveins: $x0, $x1, $x2
747+
; CHECK-SLOW-NEXT: {{ $}}
748+
; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
749+
; CHECK-SLOW-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
750+
; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
751+
; CHECK-SLOW-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
752+
; CHECK-SLOW-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADDXrr]], 0 :: (load (s128) from %ir.addr)
753+
; CHECK-SLOW-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
754+
; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
755+
; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
756+
; CHECK-SLOW-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[ADDXri]]
757+
; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
758+
; CHECK-SLOW-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXrr1]]
759+
; CHECK-SLOW-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
710760
%0:gpr(s64) = COPY $x0
711761
%1:gpr(s64) = G_CONSTANT i64 4
712762
%2:gpr(s64) = G_SHL %0, %1(s64)

0 commit comments

Comments
 (0)