Skip to content

Commit 7f2a5df

Browse files
authored
[AArch64] Remove superfluous sxtw in peephole opt (#96293)
Across a basic-block we might have in i32 extract from a value that only operates on upper bits (for example a sxtw). We can replace the COPY with a new version skipping the sxtw.
1 parent 01a9888 commit 7f2a5df

File tree

3 files changed

+82
-8
lines changed

3 files changed

+82
-8
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
128128
bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
129129
bool visitINSvi64lane(MachineInstr &MI);
130130
bool visitFMOVDr(MachineInstr &MI);
131+
bool visitCopy(MachineInstr &MI);
131132
bool runOnMachineFunction(MachineFunction &MF) override;
132133

133134
StringRef getPassName() const override {
@@ -690,6 +691,34 @@ bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
690691
return true;
691692
}
692693

694+
// Across a basic-block we might have in i32 extract from a value that only
695+
// operates on upper bits (for example a sxtw). We can replace the COPY with a
696+
// new version skipping the sxtw.
697+
bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
698+
Register InputReg = MI.getOperand(1).getReg();
699+
if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
700+
!MRI->hasOneNonDBGUse(InputReg))
701+
return false;
702+
703+
MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg);
704+
MachineInstr *CopyMI = SrcMI;
705+
while (SrcMI && SrcMI->isFullCopy() &&
706+
MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg()))
707+
SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
708+
709+
if (!SrcMI || SrcMI->getOpcode() != AArch64::SBFMXri ||
710+
SrcMI->getOperand(2).getImm() != 0 || SrcMI->getOperand(3).getImm() != 31)
711+
return false;
712+
713+
Register SrcReg = SrcMI->getOperand(1).getReg();
714+
MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
715+
MI.getOperand(1).setReg(SrcReg);
716+
if (CopyMI != SrcMI)
717+
CopyMI->eraseFromParent();
718+
SrcMI->eraseFromParent();
719+
return true;
720+
}
721+
693722
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
694723
if (skipFunction(MF.getFunction()))
695724
return false;
@@ -771,6 +800,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
771800
case AArch64::FMOVDr:
772801
Changed |= visitFMOVDr(MI);
773802
break;
803+
case AArch64::COPY:
804+
Changed |= visitCopy(MI);
805+
break;
774806
}
775807
}
776808
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s
3+
4+
---
5+
name: removeSxtw
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0.entry:
9+
liveins: $x0
10+
; CHECK-LABEL: name: removeSxtw
11+
; CHECK: liveins: $x0
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
14+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32
15+
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY1]], 1, 0
16+
; CHECK-NEXT: $w0 = COPY [[ADDWri]]
17+
; CHECK-NEXT: RET_ReallyLR implicit $w0
18+
%0:gpr64 = COPY $x0
19+
%1:gpr64 = SBFMXri %0:gpr64, 0, 31
20+
%2:gpr32sp = COPY %1.sub_32:gpr64
21+
%3:gpr32sp = ADDWri %2:gpr32sp, 1, 0
22+
$w0 = COPY %3:gpr32sp
23+
RET_ReallyLR implicit $w0
24+
...
25+
---
26+
name: extraCopy
27+
tracksRegLiveness: true
28+
body: |
29+
bb.0.entry:
30+
liveins: $x0
31+
; CHECK-LABEL: name: extraCopy
32+
; CHECK: liveins: $x0
33+
; CHECK-NEXT: {{ $}}
34+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
35+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32
36+
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY1]], 1, 0
37+
; CHECK-NEXT: $w0 = COPY [[ADDWri]]
38+
; CHECK-NEXT: RET_ReallyLR implicit $w0
39+
%0:gpr64 = COPY $x0
40+
%1:gpr64 = SBFMXri %0:gpr64, 0, 31
41+
%2:gpr64all = COPY %1:gpr64
42+
%3:gpr32sp = COPY %2.sub_32:gpr64all
43+
%4:gpr32sp = ADDWri %3:gpr32sp, 1, 0
44+
$w0 = COPY %4:gpr32sp
45+
RET_ReallyLR implicit $w0
46+
...

llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,7 @@ define i64 @smull_ldrsw_shift(ptr %x0, i64 %x1) {
281281
; CHECK-LABEL: smull_ldrsw_shift:
282282
; CHECK: // %bb.0: // %entry
283283
; CHECK-NEXT: ldrsw x8, [x0]
284-
; CHECK-NEXT: sxtw x9, w1
285-
; CHECK-NEXT: smull x0, w8, w9
284+
; CHECK-NEXT: smull x0, w8, w1
286285
; CHECK-NEXT: ret
287286
entry:
288287
%ext64 = load i32, ptr %x0
@@ -490,8 +489,7 @@ define i64 @smaddl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) {
490489
; CHECK-LABEL: smaddl_ldrsw_shift:
491490
; CHECK: // %bb.0: // %entry
492491
; CHECK-NEXT: ldrsw x8, [x0]
493-
; CHECK-NEXT: sxtw x9, w1
494-
; CHECK-NEXT: smaddl x0, w8, w9, x2
492+
; CHECK-NEXT: smaddl x0, w8, w1, x2
495493
; CHECK-NEXT: ret
496494
entry:
497495
%ext64 = load i32, ptr %x0
@@ -654,8 +652,7 @@ define i64 @smnegl_ldrsw_shift(ptr %x0, i64 %x1) {
654652
; CHECK-LABEL: smnegl_ldrsw_shift:
655653
; CHECK: // %bb.0: // %entry
656654
; CHECK-NEXT: ldrsw x8, [x0]
657-
; CHECK-NEXT: sxtw x9, w1
658-
; CHECK-NEXT: smnegl x0, w8, w9
655+
; CHECK-NEXT: smnegl x0, w8, w1
659656
; CHECK-NEXT: ret
660657
entry:
661658
%ext64 = load i32, ptr %x0
@@ -818,8 +815,7 @@ define i64 @smsubl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) {
818815
; CHECK-LABEL: smsubl_ldrsw_shift:
819816
; CHECK: // %bb.0: // %entry
820817
; CHECK-NEXT: ldrsw x8, [x0]
821-
; CHECK-NEXT: sxtw x9, w1
822-
; CHECK-NEXT: smsubl x0, w8, w9, x2
818+
; CHECK-NEXT: smsubl x0, w8, w1, x2
823819
; CHECK-NEXT: ret
824820
entry:
825821
%ext64 = load i32, ptr %x0

0 commit comments

Comments
 (0)