Skip to content

Commit e4a2d74

Browse files
authored
[AArch64][GISel] Always fold G_SHL into addressing mode where possible, unless the subtarget has addr-lsl-slow-14 (#96603)
Before this patch, we fold G_SHL into addressing mode lsl only when there is exactly one usage, or all the usages are memory ops, or we are optimizing for size. However, lsl is free on all aarch64 targets except those with FeatureAddrLSLSlow14. This patch uses this fact and always folds G_SHL into lsl for memory ops, with exceptions for FeatureAddrLSLSlow14. This patch also fixes GISel 15% regression in TSVC kernel s482, and brings regression in s291 from 20% to 10%.
1 parent ba8e492 commit e4a2d74

File tree

4 files changed

+231
-112
lines changed

4 files changed

+231
-112
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 66 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -414,8 +414,13 @@ class AArch64InstructionSelector : public InstructionSelector {
414414
return selectAddrModeIndexed(Root, Width / 8);
415415
}
416416

417+
std::optional<bool>
418+
isWorthFoldingIntoAddrMode(MachineInstr &MI,
419+
const MachineRegisterInfo &MRI) const;
420+
417421
bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
418-
const MachineRegisterInfo &MRI) const;
422+
const MachineRegisterInfo &MRI,
423+
bool IsAddrOperand) const;
419424
ComplexRendererFns
420425
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
421426
unsigned SizeInBytes) const;
@@ -6869,19 +6874,70 @@ AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
68696874
return select12BitValueWithLeftShift(Immed);
68706875
}
68716876

6877+
/// Checks if we are sure that folding MI into load/store addressing mode is
6878+
/// beneficial or not.
6879+
///
6880+
/// Returns:
6881+
/// - true if folding MI would be beneficial.
6882+
/// - false if folding MI would be bad.
6883+
/// - std::nullopt if it is not sure whether folding MI is beneficial.
6884+
///
6885+
/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
6886+
///
6887+
/// %13:gpr(s64) = G_CONSTANT i64 1
6888+
/// %8:gpr(s64) = G_SHL %6, %13(s64)
6889+
/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
6890+
/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
6891+
std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
6892+
MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6893+
if (MI.getOpcode() == AArch64::G_SHL) {
6894+
// Address operands with shifts are free, except for running on subtargets
6895+
// with AddrLSLSlow14.
6896+
if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
6897+
MI.getOperand(2).getReg(), MRI)) {
6898+
const APInt ShiftVal = ValAndVeg->Value;
6899+
6900+
// Don't fold if we know this will be slow.
6901+
return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
6902+
}
6903+
}
6904+
return std::nullopt;
6905+
}
6906+
68726907
/// Return true if it is worth folding MI into an extended register. That is,
68736908
/// if it's safe to pull it into the addressing mode of a load or store as a
68746909
/// shift.
6910+
/// \p IsAddrOperand whether the def of MI is used as an address operand
6911+
/// (e.g. feeding into an LDR/STR).
68756912
bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6876-
MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6913+
MachineInstr &MI, const MachineRegisterInfo &MRI,
6914+
bool IsAddrOperand) const {
6915+
68776916
// Always fold if there is one use, or if we're optimizing for size.
68786917
Register DefReg = MI.getOperand(0).getReg();
68796918
if (MRI.hasOneNonDBGUse(DefReg) ||
68806919
MI.getParent()->getParent()->getFunction().hasOptSize())
68816920
return true;
68826921

6883-
// FIXME: Consider checking HasAddrLSLSlow14 and HasALULSLFast as
6884-
// appropriate.
6922+
if (IsAddrOperand) {
6923+
// If we are already sure that folding MI is good or bad, return the result.
6924+
if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
6925+
return *Worth;
6926+
6927+
// Fold G_PTR_ADD if its offset operand can be folded
6928+
if (MI.getOpcode() == AArch64::G_PTR_ADD) {
6929+
MachineInstr *OffsetInst =
6930+
getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
6931+
6932+
// Note, we already know G_PTR_ADD is used by at least two instructions.
6933+
// If we are also sure about whether folding is beneficial or not,
6934+
// return the result.
6935+
if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
6936+
return *Worth;
6937+
}
6938+
}
6939+
6940+
// FIXME: Consider checking HasALULSLFast as appropriate.
68856941

68866942
// We have a fastpath, so folding a shift in and potentially computing it
68876943
// many times may be beneficial. Check if this is only used in memory ops.
@@ -6929,7 +6985,7 @@ AArch64InstructionSelector::selectExtendedSHL(
69296985
int64_t LegalShiftVal = Log2_32(SizeInBytes);
69306986
if (LegalShiftVal == 0)
69316987
return std::nullopt;
6932-
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6988+
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
69336989
return std::nullopt;
69346990

69356991
// Now, try to find the specific G_CONSTANT. Start by assuming that the
@@ -7036,7 +7092,7 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
70367092
// Check if we can find the G_PTR_ADD.
70377093
MachineInstr *PtrAdd =
70387094
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7039-
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
7095+
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
70407096
return std::nullopt;
70417097

70427098
// Now, try to match an opcode which will match our specific offset.
@@ -7170,7 +7226,7 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
71707226

71717227
MachineInstr *PtrAdd =
71727228
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7173-
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
7229+
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
71747230
return std::nullopt;
71757231

71767232
MachineOperand &LHS = PtrAdd->getOperand(1);
@@ -7201,7 +7257,7 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
72017257
//
72027258
// e.g.
72037259
// ldr something, [base_reg, ext_reg, sxtw]
7204-
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
7260+
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
72057261
return std::nullopt;
72067262

72077263
// Check if this is an extend. We'll get an extend type if it is.
@@ -7396,7 +7452,7 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
73967452
return std::nullopt;
73977453
if (ShType == AArch64_AM::ROR && !AllowROR)
73987454
return std::nullopt;
7399-
if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
7455+
if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
74007456
return std::nullopt;
74017457

74027458
// Need an immediate on the RHS.
@@ -7510,7 +7566,7 @@ AArch64InstructionSelector::selectArithExtendedRegister(
75107566
if (!RootDef)
75117567
return std::nullopt;
75127568

7513-
if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
7569+
if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
75147570
return std::nullopt;
75157571

75167572
// Check if we can fold a shift and an extend.

llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir

Lines changed: 103 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -535,13 +535,13 @@ body: |
535535
; CHECK-NEXT: {{ $}}
536536
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
537537
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60
538-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
539-
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
540-
; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load (s64) from %ir.addr)
538+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
539+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
540+
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[UBFMXri]]
541+
; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr)
541542
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0
542-
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[ADDXri]]
543-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
544-
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]]
543+
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]]
544+
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]]
545545
; CHECK-NEXT: $x2 = COPY [[ADDXrr2]]
546546
; CHECK-NEXT: RET_ReallyLR implicit $x2
547547
%0:gpr(s64) = COPY $x0
@@ -571,19 +571,36 @@ body: |
571571
liveins: $x0, $x1, $x2
572572
liveins: $w1, $x0
573573
574-
; CHECK-LABEL: name: ldrhrox_more_than_one_mem_use_shl
575-
; CHECK: liveins: $x0, $x1, $x2, $w1, $x0
576-
; CHECK-NEXT: {{ $}}
577-
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
578-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
579-
; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
580-
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
581-
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
582-
; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
583-
; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
584-
; CHECK-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
585-
; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
586-
; CHECK-NEXT: RET_ReallyLR implicit [[ADDWrr]]
574+
; CHECK-FAST-LABEL: name: ldrhrox_more_than_one_mem_use_shl
575+
; CHECK-FAST: liveins: $x0, $x1, $x2, $w1, $x0
576+
; CHECK-FAST-NEXT: {{ $}}
577+
; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
578+
; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
579+
; CHECK-FAST-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
580+
; CHECK-FAST-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
581+
; CHECK-FAST-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
582+
; CHECK-FAST-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
583+
; CHECK-FAST-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
584+
; CHECK-FAST-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
585+
; CHECK-FAST-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
586+
; CHECK-FAST-NEXT: RET_ReallyLR implicit [[ADDWrr]]
587+
;
588+
; CHECK-SLOW-LABEL: name: ldrhrox_more_than_one_mem_use_shl
589+
; CHECK-SLOW: liveins: $x0, $x1, $x2, $w1, $x0
590+
; CHECK-SLOW-NEXT: {{ $}}
591+
; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
592+
; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
593+
; CHECK-SLOW-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
594+
; CHECK-SLOW-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
595+
; CHECK-SLOW-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
596+
; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]].sub_32
597+
; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
598+
; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY [[COPY]]
599+
; CHECK-SLOW-NEXT: [[ADDXrx:%[0-9]+]]:gpr64sp = ADDXrx [[COPY4]], [[COPY3]], 1
600+
; CHECK-SLOW-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
601+
; CHECK-SLOW-NEXT: [[LDRHHui1:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
602+
; CHECK-SLOW-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHui]], [[LDRHHui1]]
603+
; CHECK-SLOW-NEXT: RET_ReallyLR implicit [[ADDWrr]]
587604
%0:gpr(p0) = COPY $x0
588605
%1:gpr(s32) = COPY $w1
589606
%15:gpr(s64) = G_CONSTANT i64 9
@@ -612,19 +629,36 @@ body: |
612629
liveins: $x0, $x1, $x2
613630
liveins: $w1, $x0
614631
615-
; CHECK-LABEL: name: ldrhrox_more_than_one_use_shl
616-
; CHECK: liveins: $x0, $x1, $x2, $w1, $x0
617-
; CHECK-NEXT: {{ $}}
618-
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
619-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
620-
; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
621-
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
622-
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
623-
; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
624-
; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
625-
; CHECK-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
626-
; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
627-
; CHECK-NEXT: RET_ReallyLR implicit [[ADDWrr]]
632+
; CHECK-FAST-LABEL: name: ldrhrox_more_than_one_use_shl
633+
; CHECK-FAST: liveins: $x0, $x1, $x2, $w1, $x0
634+
; CHECK-FAST-NEXT: {{ $}}
635+
; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
636+
; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
637+
; CHECK-FAST-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
638+
; CHECK-FAST-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
639+
; CHECK-FAST-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
640+
; CHECK-FAST-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
641+
; CHECK-FAST-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
642+
; CHECK-FAST-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
643+
; CHECK-FAST-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
644+
; CHECK-FAST-NEXT: RET_ReallyLR implicit [[ADDWrr]]
645+
;
646+
; CHECK-SLOW-LABEL: name: ldrhrox_more_than_one_use_shl
647+
; CHECK-SLOW: liveins: $x0, $x1, $x2, $w1, $x0
648+
; CHECK-SLOW-NEXT: {{ $}}
649+
; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
650+
; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
651+
; CHECK-SLOW-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
652+
; CHECK-SLOW-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
653+
; CHECK-SLOW-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
654+
; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]].sub_32
655+
; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
656+
; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY [[COPY]]
657+
; CHECK-SLOW-NEXT: [[ADDXrx:%[0-9]+]]:gpr64sp = ADDXrx [[COPY4]], [[COPY3]], 1
658+
; CHECK-SLOW-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
659+
; CHECK-SLOW-NEXT: [[LDRHHui1:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
660+
; CHECK-SLOW-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHui]], [[LDRHHui1]]
661+
; CHECK-SLOW-NEXT: RET_ReallyLR implicit [[ADDWrr]]
628662
%0:gpr(p0) = COPY $x0
629663
%1:gpr(s32) = COPY $w1
630664
%15:gpr(s64) = G_CONSTANT i64 9
@@ -656,15 +690,15 @@ body: |
656690
; CHECK-NEXT: {{ $}}
657691
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
658692
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 62, 61
659-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
660-
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
661-
; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[ADDXrr]], 0 :: (load (s32) from %ir.addr)
662-
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[LDRWui]], 0
693+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
694+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
695+
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[UBFMXri]]
696+
; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY1]], [[COPY]], 0, 1 :: (load (s32) from %ir.addr)
697+
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[LDRWroX]], 0
663698
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
664699
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 2, 0
665700
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[SUBREG_TO_REG]], [[ADDXri]]
666-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
667-
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]]
701+
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]]
668702
; CHECK-NEXT: $x2 = COPY [[ADDXrr2]]
669703
; CHECK-NEXT: RET_ReallyLR implicit $x2
670704
%0:gpr(s64) = COPY $x0
@@ -692,21 +726,37 @@ machineFunctionInfo: {}
692726
body: |
693727
bb.0:
694728
liveins: $x0, $x1, $x2
695-
; CHECK-LABEL: name: ldrqrox_more_than_one_use_shl
696-
; CHECK: liveins: $x0, $x1, $x2
697-
; CHECK-NEXT: {{ $}}
698-
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
699-
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
700-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
701-
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
702-
; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADDXrr]], 0 :: (load (s128) from %ir.addr)
703-
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
704-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
705-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
706-
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[ADDXri]]
707-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
708-
; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXrr1]]
709-
; CHECK-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
729+
; CHECK-FAST-LABEL: name: ldrqrox_more_than_one_use_shl
730+
; CHECK-FAST: liveins: $x0, $x1, $x2
731+
; CHECK-FAST-NEXT: {{ $}}
732+
; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
733+
; CHECK-FAST-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
734+
; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
735+
; CHECK-FAST-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
736+
; CHECK-FAST-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[UBFMXri]]
737+
; CHECK-FAST-NEXT: [[LDRQroX:%[0-9]+]]:fpr128 = LDRQroX [[COPY1]], [[COPY]], 0, 1 :: (load (s128) from %ir.addr)
738+
; CHECK-FAST-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
739+
; CHECK-FAST-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[LDRQroX]].dsub
740+
; CHECK-FAST-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[COPY3]]
741+
; CHECK-FAST-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXri]]
742+
; CHECK-FAST-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]]
743+
; CHECK-FAST-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
744+
;
745+
; CHECK-SLOW-LABEL: name: ldrqrox_more_than_one_use_shl
746+
; CHECK-SLOW: liveins: $x0, $x1, $x2
747+
; CHECK-SLOW-NEXT: {{ $}}
748+
; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
749+
; CHECK-SLOW-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
750+
; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
751+
; CHECK-SLOW-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
752+
; CHECK-SLOW-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADDXrr]], 0 :: (load (s128) from %ir.addr)
753+
; CHECK-SLOW-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
754+
; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
755+
; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
756+
; CHECK-SLOW-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[ADDXri]]
757+
; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
758+
; CHECK-SLOW-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXrr1]]
759+
; CHECK-SLOW-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
710760
%0:gpr(s64) = COPY $x0
711761
%1:gpr(s64) = G_CONSTANT i64 4
712762
%2:gpr(s64) = G_SHL %0, %1(s64)

0 commit comments

Comments
 (0)