Skip to content

Commit fe946bf

Browse files
authored
[AArch64] Extend sxtw peephole to uxtw. (#104516)
This extends the existing sxtw peephole optimization (#96293) to uxtw, which in llvm is a ORRWrr which clears the top bits. Fixes #98481
1 parent e6da78a commit fe946bf

File tree

3 files changed

+42
-21
lines changed

3 files changed

+42
-21
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -733,11 +733,42 @@ bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
733733
DeadInstrs.insert(SrcMI);
734734
}
735735

736-
if (!SrcMI || SrcMI->getOpcode() != AArch64::SBFMXri ||
737-
SrcMI->getOperand(2).getImm() != 0 || SrcMI->getOperand(3).getImm() != 31)
736+
if (!SrcMI)
737+
return false;
738+
739+
// Look for SXTW(X) and return Reg.
740+
auto getSXTWSrcReg = [](MachineInstr *SrcMI) -> Register {
741+
if (SrcMI->getOpcode() != AArch64::SBFMXri ||
742+
SrcMI->getOperand(2).getImm() != 0 ||
743+
SrcMI->getOperand(3).getImm() != 31)
744+
return AArch64::NoRegister;
745+
return SrcMI->getOperand(1).getReg();
746+
};
747+
// Look for SUBREG_TO_REG(ORRWrr(WZR, COPY(X.sub_32)))
748+
auto getUXTWSrcReg = [&](MachineInstr *SrcMI) -> Register {
749+
if (SrcMI->getOpcode() != AArch64::SUBREG_TO_REG ||
750+
SrcMI->getOperand(3).getImm() != AArch64::sub_32 ||
751+
!MRI->hasOneNonDBGUse(SrcMI->getOperand(2).getReg()))
752+
return AArch64::NoRegister;
753+
MachineInstr *Orr = MRI->getUniqueVRegDef(SrcMI->getOperand(2).getReg());
754+
if (!Orr || Orr->getOpcode() != AArch64::ORRWrr ||
755+
Orr->getOperand(1).getReg() != AArch64::WZR ||
756+
!MRI->hasOneNonDBGUse(Orr->getOperand(2).getReg()))
757+
return AArch64::NoRegister;
758+
MachineInstr *Cpy = MRI->getUniqueVRegDef(Orr->getOperand(2).getReg());
759+
if (!Cpy || Cpy->getOpcode() != AArch64::COPY ||
760+
Cpy->getOperand(1).getSubReg() != AArch64::sub_32)
761+
return AArch64::NoRegister;
762+
DeadInstrs.insert(Orr);
763+
return Cpy->getOperand(1).getReg();
764+
};
765+
766+
Register SrcReg = getSXTWSrcReg(SrcMI);
767+
if (!SrcReg)
768+
SrcReg = getUXTWSrcReg(SrcMI);
769+
if (!SrcReg)
738770
return false;
739771

740-
Register SrcReg = SrcMI->getOperand(1).getReg();
741772
MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
742773
LLVM_DEBUG(dbgs() << "Optimizing: " << MI);
743774
MI.getOperand(1).setReg(SrcReg);

llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -997,8 +997,7 @@ define i64 @umull_ldr2_d(ptr %x0, i64 %x1) {
997997
; CHECK-LABEL: umull_ldr2_d:
998998
; CHECK: // %bb.0: // %entry
999999
; CHECK-NEXT: ldr w8, [x0]
1000-
; CHECK-NEXT: mov w9, w1
1001-
; CHECK-NEXT: umull x0, w8, w9
1000+
; CHECK-NEXT: umull x0, w8, w1
10021001
; CHECK-NEXT: ret
10031002
entry:
10041003
%ext64 = load i64, ptr %x0
@@ -1110,8 +1109,7 @@ define i64 @umaddl_ldr2_d(ptr %x0, i64 %x1, i64 %x2) {
11101109
; CHECK-LABEL: umaddl_ldr2_d:
11111110
; CHECK: // %bb.0: // %entry
11121111
; CHECK-NEXT: ldr w8, [x0]
1113-
; CHECK-NEXT: mov w9, w1
1114-
; CHECK-NEXT: umaddl x0, w8, w9, x2
1112+
; CHECK-NEXT: umaddl x0, w8, w1, x2
11151113
; CHECK-NEXT: ret
11161114
entry:
11171115
%ext64 = load i64, ptr %x0
@@ -1224,8 +1222,7 @@ define i64 @umnegl_ldr2_d(ptr %x0, i64 %x1) {
12241222
; CHECK-LABEL: umnegl_ldr2_d:
12251223
; CHECK: // %bb.0: // %entry
12261224
; CHECK-NEXT: ldr w8, [x0]
1227-
; CHECK-NEXT: mov w9, w1
1228-
; CHECK-NEXT: umnegl x0, w8, w9
1225+
; CHECK-NEXT: umnegl x0, w8, w1
12291226
; CHECK-NEXT: ret
12301227
entry:
12311228
%ext64 = load i64, ptr %x0
@@ -1338,8 +1335,7 @@ define i64 @umsubl_ldr2_d(ptr %x0, i64 %x1, i64 %x2) {
13381335
; CHECK-LABEL: umsubl_ldr2_d:
13391336
; CHECK: // %bb.0: // %entry
13401337
; CHECK-NEXT: ldr w8, [x0]
1341-
; CHECK-NEXT: mov w9, w1
1342-
; CHECK-NEXT: umsubl x0, w8, w9, x2
1338+
; CHECK-NEXT: umsubl x0, w8, w1, x2
13431339
; CHECK-NEXT: ret
13441340
entry:
13451341
%ext64 = load i64, ptr %x0
@@ -1400,8 +1396,7 @@ define i64 @umull_and_lshr(i64 %x) {
14001396
; CHECK-LABEL: umull_and_lshr:
14011397
; CHECK: // %bb.0:
14021398
; CHECK-NEXT: lsr x8, x0, #32
1403-
; CHECK-NEXT: mov w9, w0
1404-
; CHECK-NEXT: umull x0, w9, w8
1399+
; CHECK-NEXT: umull x0, w0, w8
14051400
; CHECK-NEXT: ret
14061401
%lo = and i64 %x, u0xffffffff
14071402
%hi = lshr i64 %x, 32
@@ -1424,8 +1419,7 @@ define i64 @umaddl_and_lshr(i64 %x, i64 %a) {
14241419
; CHECK-LABEL: umaddl_and_lshr:
14251420
; CHECK: // %bb.0:
14261421
; CHECK-NEXT: lsr x8, x0, #32
1427-
; CHECK-NEXT: mov w9, w0
1428-
; CHECK-NEXT: umaddl x0, w9, w8, x1
1422+
; CHECK-NEXT: umaddl x0, w0, w8, x1
14291423
; CHECK-NEXT: ret
14301424
%lo = and i64 %x, u0xffffffff
14311425
%hi = lshr i64 %x, 32
@@ -1437,9 +1431,7 @@ define i64 @umaddl_and_lshr(i64 %x, i64 %a) {
14371431
define i64 @umaddl_and_and(i64 %x, i64 %y, i64 %a) {
14381432
; CHECK-LABEL: umaddl_and_and:
14391433
; CHECK: // %bb.0:
1440-
; CHECK-NEXT: mov w8, w0
1441-
; CHECK-NEXT: mov w9, w1
1442-
; CHECK-NEXT: umaddl x0, w8, w9, x2
1434+
; CHECK-NEXT: umaddl x0, w0, w1, x2
14431435
; CHECK-NEXT: ret
14441436
%lo = and i64 %x, u0xffffffff
14451437
%hi = and i64 %y, u0xffffffff

llvm/test/CodeGen/AArch64/peephole-sxtw.mir

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,7 @@ body: |
106106
; CHECK-NEXT: {{ $}}
107107
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
108108
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]].sub_32
109-
; CHECK-NEXT: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr $wzr, [[COPY1]]
110-
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrr]], %subreg.sub_32
111-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32
109+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32
112110
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY2]], 1, 0
113111
; CHECK-NEXT: $w0 = COPY [[ADDWri]]
114112
; CHECK-NEXT: RET_ReallyLR implicit $w0

0 commit comments

Comments
 (0)