Skip to content

Commit 2cf6269

Browse files
committed
[AArch64] Remove superfluous sxtw in peephole opt
Across a basic-block we might have in i32 extract from a value that only operates on upper bits (for example a sxtw). We can replace the COPY with a new version skipping the sxtw.
1 parent e887624 commit 2cf6269

File tree

2 files changed

+35
-8
lines changed

2 files changed

+35
-8
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
128128
bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
129129
bool visitINSvi64lane(MachineInstr &MI);
130130
bool visitFMOVDr(MachineInstr &MI);
131+
bool visitCopy(MachineInstr &MI);
131132
bool runOnMachineFunction(MachineFunction &MF) override;
132133

133134
StringRef getPassName() const override {
@@ -690,6 +691,33 @@ bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
690691
return true;
691692
}
692693

694+
// Acrocss a basic-block we might have in i32 extract from a value that only
695+
// operates on upper bits (for example a sxtw). We can replace the COPY with a
696+
// new version skipping the sxtw.
697+
bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
698+
if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
699+
!MRI->hasOneNonDBGUse(MI.getOperand(1).getReg()))
700+
return false;
701+
702+
MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
703+
MachineInstr *CopyMI = SrcMI;
704+
if (SrcMI && SrcMI->isFullCopy() &&
705+
MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg()))
706+
SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
707+
708+
if (!SrcMI || SrcMI->getOpcode() != AArch64::SBFMXri ||
709+
SrcMI->getOperand(2).getImm() != 0 || SrcMI->getOperand(3).getImm() != 31)
710+
return false;
711+
712+
Register SrcReg = SrcMI->getOperand(1).getReg();
713+
MRI->constrainRegClass(SrcReg, MRI->getRegClass(MI.getOperand(1).getReg()));
714+
MI.getOperand(1).setReg(SrcReg);
715+
if (CopyMI != SrcMI)
716+
CopyMI->eraseFromParent();
717+
SrcMI->eraseFromParent();
718+
return true;
719+
}
720+
693721
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
694722
if (skipFunction(MF.getFunction()))
695723
return false;
@@ -771,6 +799,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
771799
case AArch64::FMOVDr:
772800
Changed |= visitFMOVDr(MI);
773801
break;
802+
case AArch64::COPY:
803+
Changed |= visitCopy(MI);
804+
break;
774805
}
775806
}
776807
}

llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,7 @@ define i64 @smull_ldrsw_shift(ptr %x0, i64 %x1) {
281281
; CHECK-LABEL: smull_ldrsw_shift:
282282
; CHECK: // %bb.0: // %entry
283283
; CHECK-NEXT: ldrsw x8, [x0]
284-
; CHECK-NEXT: sxtw x9, w1
285-
; CHECK-NEXT: smull x0, w8, w9
284+
; CHECK-NEXT: smull x0, w8, w1
286285
; CHECK-NEXT: ret
287286
entry:
288287
%ext64 = load i32, ptr %x0
@@ -490,8 +489,7 @@ define i64 @smaddl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) {
490489
; CHECK-LABEL: smaddl_ldrsw_shift:
491490
; CHECK: // %bb.0: // %entry
492491
; CHECK-NEXT: ldrsw x8, [x0]
493-
; CHECK-NEXT: sxtw x9, w1
494-
; CHECK-NEXT: smaddl x0, w8, w9, x2
492+
; CHECK-NEXT: smaddl x0, w8, w1, x2
495493
; CHECK-NEXT: ret
496494
entry:
497495
%ext64 = load i32, ptr %x0
@@ -654,8 +652,7 @@ define i64 @smnegl_ldrsw_shift(ptr %x0, i64 %x1) {
654652
; CHECK-LABEL: smnegl_ldrsw_shift:
655653
; CHECK: // %bb.0: // %entry
656654
; CHECK-NEXT: ldrsw x8, [x0]
657-
; CHECK-NEXT: sxtw x9, w1
658-
; CHECK-NEXT: smnegl x0, w8, w9
655+
; CHECK-NEXT: smnegl x0, w8, w1
659656
; CHECK-NEXT: ret
660657
entry:
661658
%ext64 = load i32, ptr %x0
@@ -818,8 +815,7 @@ define i64 @smsubl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) {
818815
; CHECK-LABEL: smsubl_ldrsw_shift:
819816
; CHECK: // %bb.0: // %entry
820817
; CHECK-NEXT: ldrsw x8, [x0]
821-
; CHECK-NEXT: sxtw x9, w1
822-
; CHECK-NEXT: smsubl x0, w8, w9, x2
818+
; CHECK-NEXT: smsubl x0, w8, w1, x2
823819
; CHECK-NEXT: ret
824820
entry:
825821
%ext64 = load i32, ptr %x0

0 commit comments

Comments
 (0)