Skip to content

Commit f5c62ee

Browse files
authored
[PHIElimination] Reuse existing COPY in predecessor basic block (llvm#131837)
The insertion point of COPY isn't always optimal and could eventually lead to a worse block layout, see the regression test in the first commit. This change affects many architectures but the amount of total instructions in the test cases seems too be slightly lower.
1 parent 37ffe89 commit f5c62ee

File tree

143 files changed

+44514
-44154
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

143 files changed

+44514
-44154
lines changed

llvm/lib/CodeGen/PHIElimination.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,15 @@ void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
581581
continue;
582582
}
583583

584+
// Reuse an existing copy in the block if possible.
585+
if (MachineInstr *DefMI = MRI->getUniqueVRegDef(SrcReg)) {
586+
if (DefMI->isCopy() && DefMI->getParent() == &opBlock &&
587+
MRI->use_empty(SrcReg)) {
588+
DefMI->getOperand(0).setReg(IncomingReg);
589+
continue;
590+
}
591+
}
592+
584593
// Find a safe location to insert the copy, this may be the first terminator
585594
// in the block (or end()).
586595
MachineBasicBlock::iterator InsertPos =

llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-outline_atomics.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
118118
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
119119
; -O0-LABEL: store_atomic_i128_aligned_unordered:
120120
; -O0: bl __aarch64_cas16_relax
121-
; -O0: subs x10, x10, x11
122-
; -O0: ccmp x8, x9, #0, eq
121+
; -O0: subs x9, x0, x9
122+
; -O0: ccmp x1, x8, #0, eq
123123
;
124124
; -O1-LABEL: store_atomic_i128_aligned_unordered:
125125
; -O1: ldxp xzr, x8, [x2]
@@ -131,8 +131,8 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
131131
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
132132
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
133133
; -O0: bl __aarch64_cas16_relax
134-
; -O0: subs x10, x10, x11
135-
; -O0: ccmp x8, x9, #0, eq
134+
; -O0: subs x9, x0, x9
135+
; -O0: ccmp x1, x8, #0, eq
136136
;
137137
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
138138
; -O1: ldxp xzr, x8, [x2]
@@ -144,8 +144,8 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
144144
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
145145
; -O0-LABEL: store_atomic_i128_aligned_release:
146146
; -O0: bl __aarch64_cas16_rel
147-
; -O0: subs x10, x10, x11
148-
; -O0: ccmp x8, x9, #0, eq
147+
; -O0: subs x9, x0, x9
148+
; -O0: ccmp x1, x8, #0, eq
149149
;
150150
; -O1-LABEL: store_atomic_i128_aligned_release:
151151
; -O1: ldxp xzr, x8, [x2]
@@ -157,8 +157,8 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
157157
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
158158
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
159159
; -O0: bl __aarch64_cas16_acq_rel
160-
; -O0: subs x10, x10, x11
161-
; -O0: ccmp x8, x9, #0, eq
160+
; -O0: subs x9, x0, x9
161+
; -O0: ccmp x1, x8, #0, eq
162162
;
163163
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
164164
; -O1: ldaxp xzr, x8, [x2]

llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,13 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
117117

118118
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
119119
; -O0-LABEL: store_atomic_i128_aligned_unordered:
120-
; -O0: ldxp x10, x12, [x9]
120+
; -O0: ldxp x8, x10, [x13]
121+
; -O0: cmp x8, x9
121122
; -O0: cmp x10, x11
122-
; -O0: cmp x12, x13
123-
; -O0: stxp w8, x14, x15, [x9]
124-
; -O0: stxp w8, x10, x12, [x9]
125-
; -O0: subs x12, x12, x13
126-
; -O0: ccmp x10, x11, #0, eq
123+
; -O0: stxp w12, x14, x15, [x13]
124+
; -O0: stxp w12, x8, x10, [x13]
125+
; -O0: subs x10, x10, x11
126+
; -O0: ccmp x8, x9, #0, eq
127127
;
128128
; -O1-LABEL: store_atomic_i128_aligned_unordered:
129129
; -O1: ldxp xzr, x8, [x2]
@@ -134,13 +134,13 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
134134

135135
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
136136
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
137-
; -O0: ldxp x10, x12, [x9]
137+
; -O0: ldxp x8, x10, [x13]
138+
; -O0: cmp x8, x9
138139
; -O0: cmp x10, x11
139-
; -O0: cmp x12, x13
140-
; -O0: stxp w8, x14, x15, [x9]
141-
; -O0: stxp w8, x10, x12, [x9]
142-
; -O0: subs x12, x12, x13
143-
; -O0: ccmp x10, x11, #0, eq
140+
; -O0: stxp w12, x14, x15, [x13]
141+
; -O0: stxp w12, x8, x10, [x13]
142+
; -O0: subs x10, x10, x11
143+
; -O0: ccmp x8, x9, #0, eq
144144
;
145145
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
146146
; -O1: ldxp xzr, x8, [x2]
@@ -151,13 +151,13 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
151151

152152
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
153153
; -O0-LABEL: store_atomic_i128_aligned_release:
154-
; -O0: ldxp x10, x12, [x9]
154+
; -O0: ldxp x8, x10, [x13]
155+
; -O0: cmp x8, x9
155156
; -O0: cmp x10, x11
156-
; -O0: cmp x12, x13
157-
; -O0: stlxp w8, x14, x15, [x9]
158-
; -O0: stlxp w8, x10, x12, [x9]
159-
; -O0: subs x12, x12, x13
160-
; -O0: ccmp x10, x11, #0, eq
157+
; -O0: stlxp w12, x14, x15, [x13]
158+
; -O0: stlxp w12, x8, x10, [x13]
159+
; -O0: subs x10, x10, x11
160+
; -O0: ccmp x8, x9, #0, eq
161161
;
162162
; -O1-LABEL: store_atomic_i128_aligned_release:
163163
; -O1: ldxp xzr, x8, [x2]
@@ -168,13 +168,13 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
168168

169169
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
170170
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
171-
; -O0: ldaxp x10, x12, [x9]
171+
; -O0: ldaxp x8, x10, [x13]
172+
; -O0: cmp x8, x9
172173
; -O0: cmp x10, x11
173-
; -O0: cmp x12, x13
174-
; -O0: stlxp w8, x14, x15, [x9]
175-
; -O0: stlxp w8, x10, x12, [x9]
176-
; -O0: subs x12, x12, x13
177-
; -O0: ccmp x10, x11, #0, eq
174+
; -O0: stlxp w12, x14, x15, [x13]
175+
; -O0: stlxp w12, x8, x10, [x13]
176+
; -O0: subs x10, x10, x11
177+
; -O0: ccmp x8, x9, #0, eq
178178
;
179179
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
180180
; -O1: ldaxp xzr, x8, [x2]

llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,13 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
117117

118118
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
119119
; -O0-LABEL: store_atomic_i128_aligned_unordered:
120-
; -O0: ldxp x10, x12, [x9]
120+
; -O0: ldxp x8, x10, [x13]
121+
; -O0: cmp x8, x9
121122
; -O0: cmp x10, x11
122-
; -O0: cmp x12, x13
123-
; -O0: stxp w8, x14, x15, [x9]
124-
; -O0: stxp w8, x10, x12, [x9]
125-
; -O0: subs x12, x12, x13
126-
; -O0: ccmp x10, x11, #0, eq
123+
; -O0: stxp w12, x14, x15, [x13]
124+
; -O0: stxp w12, x8, x10, [x13]
125+
; -O0: subs x10, x10, x11
126+
; -O0: ccmp x8, x9, #0, eq
127127
;
128128
; -O1-LABEL: store_atomic_i128_aligned_unordered:
129129
; -O1: ldxp xzr, x8, [x2]
@@ -134,13 +134,13 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
134134

135135
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
136136
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
137-
; -O0: ldxp x10, x12, [x9]
137+
; -O0: ldxp x8, x10, [x13]
138+
; -O0: cmp x8, x9
138139
; -O0: cmp x10, x11
139-
; -O0: cmp x12, x13
140-
; -O0: stxp w8, x14, x15, [x9]
141-
; -O0: stxp w8, x10, x12, [x9]
142-
; -O0: subs x12, x12, x13
143-
; -O0: ccmp x10, x11, #0, eq
140+
; -O0: stxp w12, x14, x15, [x13]
141+
; -O0: stxp w12, x8, x10, [x13]
142+
; -O0: subs x10, x10, x11
143+
; -O0: ccmp x8, x9, #0, eq
144144
;
145145
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
146146
; -O1: ldxp xzr, x8, [x2]
@@ -151,13 +151,13 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
151151

152152
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
153153
; -O0-LABEL: store_atomic_i128_aligned_release:
154-
; -O0: ldxp x10, x12, [x9]
154+
; -O0: ldxp x8, x10, [x13]
155+
; -O0: cmp x8, x9
155156
; -O0: cmp x10, x11
156-
; -O0: cmp x12, x13
157-
; -O0: stlxp w8, x14, x15, [x9]
158-
; -O0: stlxp w8, x10, x12, [x9]
159-
; -O0: subs x12, x12, x13
160-
; -O0: ccmp x10, x11, #0, eq
157+
; -O0: stlxp w12, x14, x15, [x13]
158+
; -O0: stlxp w12, x8, x10, [x13]
159+
; -O0: subs x10, x10, x11
160+
; -O0: ccmp x8, x9, #0, eq
161161
;
162162
; -O1-LABEL: store_atomic_i128_aligned_release:
163163
; -O1: ldxp xzr, x8, [x2]
@@ -168,13 +168,13 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
168168

169169
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
170170
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
171-
; -O0: ldaxp x10, x12, [x9]
171+
; -O0: ldaxp x8, x10, [x13]
172+
; -O0: cmp x8, x9
172173
; -O0: cmp x10, x11
173-
; -O0: cmp x12, x13
174-
; -O0: stlxp w8, x14, x15, [x9]
175-
; -O0: stlxp w8, x10, x12, [x9]
176-
; -O0: subs x12, x12, x13
177-
; -O0: ccmp x10, x11, #0, eq
174+
; -O0: stlxp w12, x14, x15, [x13]
175+
; -O0: stlxp w12, x8, x10, [x13]
176+
; -O0: subs x10, x10, x11
177+
; -O0: ccmp x8, x9, #0, eq
178178
;
179179
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
180180
; -O1: ldaxp xzr, x8, [x2]

llvm/test/CodeGen/AArch64/PHIElimination-debugloc.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ body: |
3737
bb.1:
3838
%x:gpr32 = COPY $wzr
3939
; Test that the debug location is not copied into bb1!
40-
; CHECK: %3:gpr32 = COPY killed %x{{$}}
40+
; CHECK: %3:gpr32 = COPY $wzr
4141
; CHECK-LABEL: bb.2:
4242
bb.2:
4343
%y:gpr32 = PHI %x:gpr32, %bb.1, undef %undef:gpr32, %bb.0, debug-location !14
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -run-pass=phi-node-elimination -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
3+
4+
# Verify that the original COPY in bb.1 is reappropriated as the PHI source in bb.2,
5+
# instead of creating a new COPY with the same source register.
6+
7+
---
8+
name: copy_virtual_reg
9+
tracksRegLiveness: true
10+
body: |
11+
; CHECK-LABEL: name: copy_virtual_reg
12+
; CHECK: bb.0:
13+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
14+
; CHECK-NEXT: liveins: $nzcv, $w0
15+
; CHECK-NEXT: {{ $}}
16+
; CHECK-NEXT: %a:gpr32 = COPY $w0
17+
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
18+
; CHECK-NEXT: Bcc 8, %bb.2, implicit $nzcv
19+
; CHECK-NEXT: {{ $}}
20+
; CHECK-NEXT: bb.1:
21+
; CHECK-NEXT: successors: %bb.2(0x80000000)
22+
; CHECK-NEXT: {{ $}}
23+
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = COPY %a
24+
; CHECK-NEXT: {{ $}}
25+
; CHECK-NEXT: bb.2:
26+
; CHECK-NEXT: %c:gpr32 = COPY [[DEF]]
27+
; CHECK-NEXT: %d:gpr32 = COPY %c
28+
bb.0:
29+
liveins: $nzcv, $w0
30+
%a:gpr32 = COPY $w0
31+
Bcc 8, %bb.2, implicit $nzcv
32+
bb.1:
33+
%b:gpr32 = COPY %a:gpr32
34+
bb.2:
35+
%c:gpr32 = PHI %b:gpr32, %bb.1, undef %undef:gpr32, %bb.0
36+
%d:gpr32 = COPY %c:gpr32
37+
...
38+
39+
---
40+
name: copy_physical_reg
41+
tracksRegLiveness: true
42+
body: |
43+
; CHECK-LABEL: name: copy_physical_reg
44+
; CHECK: bb.0:
45+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
46+
; CHECK-NEXT: liveins: $nzcv, $w0
47+
; CHECK-NEXT: {{ $}}
48+
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
49+
; CHECK-NEXT: Bcc 8, %bb.2, implicit $nzcv
50+
; CHECK-NEXT: {{ $}}
51+
; CHECK-NEXT: bb.1:
52+
; CHECK-NEXT: successors: %bb.2(0x80000000)
53+
; CHECK-NEXT: {{ $}}
54+
; CHECK-NEXT: $x0 = IMPLICIT_DEF
55+
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = COPY $w0
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: bb.2:
58+
; CHECK-NEXT: %b:gpr32 = COPY [[DEF]]
59+
bb.0:
60+
liveins: $nzcv, $w0
61+
Bcc 8, %bb.2, implicit $nzcv
62+
bb.1:
63+
$x0 = IMPLICIT_DEF
64+
%a:gpr32 = COPY $w0
65+
bb.2:
66+
%b:gpr32 = PHI %a:gpr32, %bb.1, undef %undef:gpr32, %bb.0
67+
...
68+

llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -583,8 +583,8 @@ define i16 @red_mla_dup_ext_u8_s8_s16(ptr noalias nocapture noundef readonly %A,
583583
; CHECK-SD-NEXT: mov w10, w2
584584
; CHECK-SD-NEXT: b.hi .LBB5_4
585585
; CHECK-SD-NEXT: // %bb.2:
586-
; CHECK-SD-NEXT: mov x11, xzr
587586
; CHECK-SD-NEXT: mov w8, wzr
587+
; CHECK-SD-NEXT: mov x11, xzr
588588
; CHECK-SD-NEXT: b .LBB5_7
589589
; CHECK-SD-NEXT: .LBB5_3:
590590
; CHECK-SD-NEXT: mov w8, wzr

0 commit comments

Comments
 (0)