Skip to content

Commit 33b00bb

Browse files
authored
Merge pull request #9105 from citymarina/csel-cherry-pick
[AArch64] Cherry-pick fix for redundant csel instructions
2 parents ddcb446 + ad330d7 commit 33b00bb

File tree

3 files changed

+172
-0
lines changed

3 files changed

+172
-0
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@
6161
// %6:fpr128 = IMPLICIT_DEF
6262
// %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
6363
//
64+
// 8. Remove redundant CSELs that select between identical registers, by
65+
// replacing them with unconditional moves.
66+
//
6467
//===----------------------------------------------------------------------===//
6568

6669
#include "AArch64ExpandImm.h"
@@ -124,6 +127,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
124127
template <typename T>
125128
bool visitAND(unsigned Opc, MachineInstr &MI);
126129
bool visitORR(MachineInstr &MI);
130+
bool visitCSEL(MachineInstr &MI);
127131
bool visitINSERT(MachineInstr &MI);
128132
bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
129133
bool visitINSvi64lane(MachineInstr &MI);
@@ -283,6 +287,26 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
283287
return true;
284288
}
285289

290+
bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) {
291+
// Replace CSEL with MOV when both inputs are the same register.
292+
if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg())
293+
return false;
294+
295+
auto ZeroReg =
296+
MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
297+
auto OrOpcode =
298+
MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
299+
300+
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode))
301+
.addReg(MI.getOperand(0).getReg(), RegState::Define)
302+
.addReg(ZeroReg)
303+
.addReg(MI.getOperand(1).getReg())
304+
.addImm(0);
305+
306+
MI.eraseFromParent();
307+
return true;
308+
}
309+
286310
bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
287311
// Check this INSERT_SUBREG comes from below zero-extend pattern.
288312
//
@@ -788,6 +812,10 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
788812
visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
789813
{AArch64::ADDXri, AArch64::ADDSXri}, MI);
790814
break;
815+
case AArch64::CSELWr:
816+
case AArch64::CSELXr:
817+
Changed |= visitCSEL(MI);
818+
break;
791819
case AArch64::INSvi64gpr:
792820
Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
793821
break;
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
3+
4+
define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) {
5+
; CHECK-LABEL: peephole_csel:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: tst w2, #0x1
8+
; CHECK-NEXT: mov w8, #1 // =0x1
9+
; CHECK-NEXT: mov x9, xzr
10+
; CHECK-NEXT: tst w1, #0x1
11+
; CHECK-NEXT: csel x8, x8, x9, eq
12+
; CHECK-NEXT: str x8, [x0]
13+
; CHECK-NEXT: ret
14+
entry:
15+
br i1 %0, label %then, label %exit
16+
17+
then: ; preds = %entry
18+
; The donothing() is needed to make make this block less interesting to
19+
; SimplifyCFG. Otherwise we may not get the csel that we want to test.
20+
call void @llvm.donothing()
21+
br i1 %cmp, label %true, label %exit
22+
23+
true: ; preds = %then
24+
; Same as above
25+
call void @llvm.donothing()
26+
br label %exit
27+
28+
exit: ; preds = %true, %then, %entry
29+
%x = phi i64 [ 0, %true ], [ 0, %then ], [ 1, %entry ]
30+
store i64 %x, ptr %dst, align 8
31+
ret void
32+
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc %s -o - -mtriple=aarch64-unknown-linux -run-pass=aarch64-mi-peephole-opt -verify-machineinstrs | FileCheck %s
3+
4+
---
5+
name: peephole_cselxr_same
6+
registers:
7+
- { id: 1, class: gpr64, preferred-register: '' }
8+
- { id: 2, class: gpr64, preferred-register: '' }
9+
liveins:
10+
- { reg: '$x0', virtual-reg: '%1' }
11+
- { reg: '$x1', virtual-reg: '%2' }
12+
body: |
13+
bb.0.entry:
14+
liveins: $x0, $x1
15+
16+
; CHECK-LABEL: name: peephole_cselxr_same
17+
; CHECK: liveins: $x0, $x1
18+
; CHECK-NEXT: {{ $}}
19+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
20+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
21+
; CHECK-NEXT: $xzr = ANDSXri [[COPY]], 0, implicit-def $nzcv
22+
; CHECK-NEXT: [[ORRXrs:%[0-9]+]]:gpr64 = ORRXrs $xzr, [[COPY1]], 0
23+
; CHECK-NEXT: RET_ReallyLR
24+
%3:gpr64 = COPY $x1
25+
%4:gpr64 = COPY $x0
26+
$xzr = ANDSXri %3, 0, implicit-def $nzcv
27+
%5:gpr64 = CSELXr %4, %4, 0, implicit $nzcv
28+
RET_ReallyLR
29+
30+
...
31+
---
32+
name: peephole_cselwr_same
33+
registers:
34+
- { id: 1, class: gpr32, preferred-register: '' }
35+
- { id: 2, class: gpr32, preferred-register: '' }
36+
liveins:
37+
- { reg: '$w0', virtual-reg: '%1' }
38+
- { reg: '$w1', virtual-reg: '%2' }
39+
body: |
40+
bb.0.entry:
41+
liveins: $w0, $w1
42+
43+
; CHECK-LABEL: name: peephole_cselwr_same
44+
; CHECK: liveins: $w0, $w1
45+
; CHECK-NEXT: {{ $}}
46+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
47+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
48+
; CHECK-NEXT: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
49+
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[COPY1]], 0
50+
; CHECK-NEXT: RET_ReallyLR
51+
%3:gpr32 = COPY $w1
52+
%4:gpr32 = COPY $w0
53+
$wzr = ANDSWri %3, 0, implicit-def $nzcv
54+
%5:gpr32 = CSELWr %4, %4, 0, implicit $nzcv
55+
RET_ReallyLR
56+
57+
...
58+
---
59+
name: peephole_cselxr_different
60+
registers:
61+
- { id: 1, class: gpr64, preferred-register: '' }
62+
- { id: 2, class: gpr64, preferred-register: '' }
63+
liveins:
64+
- { reg: '$x0', virtual-reg: '%1' }
65+
- { reg: '$x1', virtual-reg: '%2' }
66+
body: |
67+
bb.0.entry:
68+
liveins: $x0, $x1
69+
70+
; CHECK-LABEL: name: peephole_cselxr_different
71+
; CHECK: liveins: $x0, $x1
72+
; CHECK-NEXT: {{ $}}
73+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
74+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
75+
; CHECK-NEXT: $xzr = ANDSXri [[COPY]], 0, implicit-def $nzcv
76+
; CHECK-NEXT: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[COPY]], [[COPY1]], 0, implicit $nzcv
77+
; CHECK-NEXT: RET_ReallyLR
78+
%3:gpr64 = COPY $x1
79+
%4:gpr64 = COPY $x0
80+
$xzr = ANDSXri %3, 0, implicit-def $nzcv
81+
%5:gpr64 = CSELXr %3, %4, 0, implicit $nzcv
82+
RET_ReallyLR
83+
84+
...
85+
---
86+
name: peephole_cselwr_different
87+
registers:
88+
- { id: 1, class: gpr32, preferred-register: '' }
89+
- { id: 2, class: gpr32, preferred-register: '' }
90+
liveins:
91+
- { reg: '$w0', virtual-reg: '%1' }
92+
- { reg: '$w1', virtual-reg: '%2' }
93+
body: |
94+
bb.0.entry:
95+
liveins: $w0, $w1
96+
97+
; CHECK-LABEL: name: peephole_cselwr_different
98+
; CHECK: liveins: $w0, $w1
99+
; CHECK-NEXT: {{ $}}
100+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
101+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
102+
; CHECK-NEXT: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
103+
; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY]], [[COPY1]], 0, implicit $nzcv
104+
; CHECK-NEXT: RET_ReallyLR
105+
%3:gpr32 = COPY $w1
106+
%4:gpr32 = COPY $w0
107+
$wzr = ANDSWri %3, 0, implicit-def $nzcv
108+
%5:gpr32 = CSELWr %3, %4, 0, implicit $nzcv
109+
RET_ReallyLR
110+
111+
...
112+

0 commit comments

Comments
 (0)